Source code for haive.games.debate_v2.agent_with_judges

"""Enhanced Gamified Debate Agent with AI Judge Integration.

This module extends the basic gamified debate with AI judge panels for sophisticated
winner determination and performance evaluation.

"""

import logging
from typing import Any, Literal

from haive.agents.conversation.debate.state import DebateState
from langchain_core.messages import SystemMessage
from langgraph.types import Command
from pydantic import Field

from haive.games.debate_v2.agent import GameDebateAgent
from haive.games.debate_v2.judges import (
    DebateJudgingPanel,
    DebateJudgment,
    create_academic_judges,
    create_public_judges,
    create_tournament_judges,
)

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)



[docs]
class JudgedGameDebateAgent(GameDebateAgent):
    """Gamified debate agent with AI judge panel integration.

    This agent extends the basic GameDebateAgent with sophisticated AI judge evaluation
    for more nuanced winner determination and detailed performance feedback from
    multiple judge perspectives.

    """
    # Judge system configuration
    use_ai_judges: bool = Field(
        default=True,
        description="Whether to use AI judge panel for winner determination",
    )

    judge_panel_type: Literal["tournament", "academic", "public", "custom"] = Field(
        default="tournament", description="Type of judge panel to use"
    )

    num_judges: int = Field(
        default=3,
        ge=1,
        le=15,
        description="Number of judges in the panel (odd numbers recommended to avoid ties)",
    )

    custom_judges: DebateJudgingPanel | None = Field(
        default=None, description="Custom judge panel if using custom type"
    )

    # Scoring combination
    combine_auto_and_judge_scoring: bool = Field(
        default=True,
        description="Whether to combine automatic scoring with judge evaluation",
    )

    auto_scoring_weight: float = Field(
        default=0.3, ge=0.0, le=1.0, description="Weight of automatic scoring (0.0-1.0)"
    )

    judge_scoring_weight: float = Field(
        default=0.7, ge=0.0, le=1.0, description="Weight of judge scoring (0.0-1.0)"
    )

    # Judgment storage
    final_judgment: DebateJudgment | None = Field(
        default=None, description="Final judgment from AI judge panel"
    )


[docs]
    def setup_agent(self) -> None:
        """Setup the judged debate agent with judge panel."""
        super().setup_agent()

        # Initialize judge panel
        if self.use_ai_judges and not self.custom_judges:
            self.custom_judges = self._create_judge_panel()

        logger.debug(
            f"JudgedGameDebateAgent setup with judge panel: {self.judge_panel_type}"
        )


    def _create_judge_panel(self) -> DebateJudgingPanel:
        """Create appropriate judge panel based on configuration."""
        # Warn about even numbers
        if self.num_judges % 2 == 0:
            logger.warning(
                f"Even number of judges ({
                    self.num_judges
                }) can cause tie votes. Consider using {self.num_judges + 1} judges."
            )

        if self.judge_panel_type == "tournament":
            return create_tournament_judges(self.num_judges)
        elif self.judge_panel_type == "academic":
            return create_academic_judges(self.num_judges)
        elif self.judge_panel_type == "public":
            return create_public_judges(self.num_judges)
        else:  # custom
            if self.custom_judges:
                return self.custom_judges
            else:
                logger.warning(
                    "Custom judge panel requested but not provided, using tournament panel"
                )
                return create_tournament_judges(self.num_judges)


[docs]
    async def conclude_conversation(self, state: DebateState) -> Command:
        """Enhanced conclusion with AI judge evaluation."""
        logger.info("🏛️ Starting AI judge evaluation for debate conclusion...")

        # Get base conclusion (with automatic scoring)
        base_conclusion = super().conclude_conversation(state)
        base_updates = (
            base_conclusion.update if hasattr(base_conclusion, "update") else {}
        )

        # Perform AI judge evaluation if enabled
        if self.use_ai_judges and self.custom_judges:
            try:
                judgment = await self._get_ai_judge_evaluation(state)
                self.final_judgment = judgment

                # Combine scoring if enabled
                if self.combine_auto_and_judge_scoring:
                    combined_scores = self._combine_scoring_methods(state, judgment)
                    base_updates["player_scores"] = combined_scores
                    base_updates["final_judgment"] = judgment
                else:
                    # Use judge scoring only
                    judge_scores = self._extract_judge_scores(judgment)
                    base_updates["player_scores"] = judge_scores
                    base_updates["final_judgment"] = judgment

                # Create enhanced conclusion message
                enhanced_conclusion = self._create_enhanced_conclusion(state, judgment)
                base_updates["messages"] = [enhanced_conclusion]

            except Exception as e:
                logger.error(f"AI judge evaluation failed: {e}")
                # Fall back to automatic scoring
                logger.info("Falling back to automatic scoring only")

        # Update with enhanced results
        enhanced_updates = {
            **base_updates,
            "conversation_ended": True,
            "current_phase": "judged_complete",
            "game_phase": "judged_complete",
        }

        return Command(update=enhanced_updates)


    async def _get_ai_judge_evaluation(self, state: DebateState) -> DebateJudgment:
        """Get evaluation from AI judge panel."""
        # Extract debate information
        players = list((self.debate_positions or {}).keys())
        positions = self.debate_positions or {}

        # Create debate transcript
        transcript = self._create_debate_transcript(state)

        # Get judgment from panel
        judgment = await self.custom_judges.judge_debate(
            topic=self.topic or "Unknown Topic",
            players=players,
            positions=positions,
            debate_transcript=transcript,
        )

        logger.info(f"🏆 AI Judges declare winner: {judgment.overall_winner}")
        logger.info(f"📊 Margin of victory: {judgment.margin_of_victory:.1%}")
        logger.info(f"🤝 Judge consensus: {judgment.consensus_level:.1%}")

        return judgment

    def _create_debate_transcript(self, state: DebateState) -> str:
        """Create a formatted transcript for judge evaluation."""
        transcript_parts = [
            f"🎯 DEBATE TOPIC: {self.topic}",
            f"👥 PARTICIPANTS: {', '.join((self.debate_positions or {}).keys())}",
            "",
            "📜 FULL DEBATE TRANSCRIPT:",
            "=" * 50,
        ]

        # Add all messages with speaker identification
        for i, message in enumerate(state.messages):
            if hasattr(message, "name") and message.name:
                speaker = message.name
            elif message.type == "human":
                speaker = "Moderator"
            elif message.type == "system":
                speaker = "System"
            else:
                speaker = f"Speaker_{i}"

            content = str(message.content)
            if len(content) > 50:  # Only add substantial content
                transcript_parts.extend([f"\n[{speaker.upper()}]:", content, ""])

        return "\n".join(transcript_parts)

    def _combine_scoring_methods(
        self, state: DebateState, judgment: DebateJudgment
    ) -> dict[str, float]:
        """Combine automatic scoring with AI judge scores."""
        auto_scores = getattr(state, "player_scores", {})
        judge_scores = self._extract_judge_scores(judgment)

        combined_scores = {}
        all_players = set(auto_scores.keys()) | set(judge_scores.keys())

        for player in all_players:
            auto_score = auto_scores.get(player, 0)
            judge_score = judge_scores.get(player, 0)

            # Normalize scores to same scale (0-100)
            auto_normalized = min(
                100, auto_score * 2
            )  # Assuming auto scores are typically 0-50
            judge_normalized = judge_score * 100 / 60  # Judge scores are 0-60

            # Weighted combination
            combined = (
                auto_normalized * self.auto_scoring_weight
                + judge_normalized * self.judge_scoring_weight
            )

            combined_scores[player] = round(combined, 1)

        return combined_scores

    def _extract_judge_scores(self, judgment: DebateJudgment) -> dict[str, float]:
        """Extract average judge scores for each player."""
        player_scores = {}

        for player, scores in judgment.judge_scores.items():
            if scores:
                avg_score = sum(score.total_score for score in scores) / len(scores)
                player_scores[player] = avg_score

        return player_scores

    def _create_enhanced_conclusion(
        self, state: DebateState, judgment: DebateJudgment
    ) -> SystemMessage:
        """Create enhanced conclusion message with judge evaluation."""
        # Get final scores (either combined or judge-only)
        final_scores = getattr(state, "player_scores", {})

        summary_parts = [
            "🏛️ **JUDGED DEBATE TOURNAMENT - FINAL DECISION** 🏛️",
            "",
            f"📋 **Topic**: {self.topic}",
            f"👥 **Participants**: {', '.join(judgment.players)}",
            f"⚖️ **Judge Panel**: {self.judge_panel_type.title()} ({len(self.custom_judges.judges) if self.custom_judges else self.num_judges} judges)",
            "",
            f"🏆 **OFFICIAL WINNER**: {judgment.overall_winner}",
            f"📊 **Margin of Victory**: {judgment.margin_of_victory:.1%}",
            f"🤝 **Judge Consensus**: {judgment.consensus_level:.1%}",
            "",
            "🏅 **FINAL SCORES**:",
        ]

        # Add final scores
        sorted_scores = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)
        for rank, (player, score) in enumerate(sorted_scores, 1):
            position = (self.debate_positions or {}).get(player, "Unknown")
            medal = "🥇" if rank == 1 else "🥈" if rank == 2 else "🥉"

            if self.combine_auto_and_judge_scoring:
                score_type = "Combined"
            else:
                score_type = "Judge Panel"

            summary_parts.append(
                f"  {medal} {player} ({position}): {score} points ({score_type})"
            )

        # Add judge panel summary
        summary_parts.extend(
            ["", "🏛️ **JUDGE PANEL EVALUATION**:", judgment.judgment_summary, ""]
        )

        # Add individual judge insights
        if len(judgment.judge_scores) > 0:
            summary_parts.extend(["👨‍⚖️ **INDIVIDUAL JUDGE INSIGHTS**:"])

            # Get a sample of judge reasoning
            # Show reasoning for first 2 players
            for player in judgment.players[:2]:
                player_scores = judgment.judge_scores.get(player, [])
                if player_scores:
                    best_reasoning = max(player_scores, key=lambda x: x.confidence)
                    summary_parts.append(
                        f"• **{player}** (Judge {best_reasoning.judge_name}): "
                        f"{best_reasoning.reasoning[:200]}..."
                    )

        # Tournament information
        if self.tournament_mode:
            summary_parts.extend(
                [
                    "",
                    "🏆 **TOURNAMENT ADVANCEMENT**:",
                    f"  • Match ID: {self.match_id or 'Unknown'}",
                    f"  • Bracket: {self.bracket_position or 'Unknown'}",
                    f"  • Advancing Player: {judgment.overall_winner}",
                    f"  • Victory Type: {
                        'Decisive' if judgment.margin_of_victory > 0.3 else 'Close'
                    }",
                ]
            )

        return SystemMessage(content="\n".join(summary_parts))


[docs]
    @classmethod
    def create_judged_tournament_match(
        cls,
        topic: str,
        player_a: tuple[str, str],  # (name, position)
        player_b: tuple[str, str],  # (name, position)
        match_id: str,
        judge_panel_type: Literal["tournament", "academic", "public"] = "tournament",
        num_judges: int = 3,
        bracket_position: str = "tournament",
        **kwargs,
    ) -> "JudgedGameDebateAgent":
        """Create a tournament debate match with AI judge evaluation."""
        # Use the base factory method and enhance with judge configuration
        base_kwargs = {
            **kwargs,
            "use_ai_judges": True,
            "judge_panel_type": judge_panel_type,
            "num_judges": num_judges,
            "combine_auto_and_judge_scoring": True,
            "auto_scoring_weight": 0.3,
            "judge_scoring_weight": 0.7,
        }

        # Create base tournament match
        agent = cls.create_tournament_match(
            topic=topic,
            player_a=player_a,
            player_b=player_b,
            match_id=match_id,
            bracket_position=bracket_position,
            **base_kwargs,
        )

        # Override class type
        agent.__class__ = cls

        return agent



[docs]
    def get_judge_panel_info(self) -> dict[str, Any]:
        """Get information about the current judge panel."""
        if not self.custom_judges:
            return {"status": "No judge panel configured"}

        return {
            "panel_type": self.judge_panel_type,
            "judge_count": len(self.custom_judges.judges),
            "judges": [
                {
                    "name": judge.name,
                    "type": judge.judge_type.value,
                    "expertise": judge.expertise_area,
                    "strictness": judge.strictness_level,
                }
                for judge in self.custom_judges.judges
            ],
            "scoring_method": (
                "Combined" if self.combine_auto_and_judge_scoring else "Judge-only"
            ),
            "weights": (
                {
                    "automatic": self.auto_scoring_weight,
                    "judges": self.judge_scoring_weight,
                }
                if self.combine_auto_and_judge_scoring
                else {"judges": 1.0}
            ),
        }


    def __repr__(self) -> str:
        """String representation of the judged debate agent."""
        mode_str = "Judged Tournament" if self.tournament_mode else "Judged Game"
        judge_info = (
            f"({self.judge_panel_type} panel)"
            if self.use_ai_judges
            else "(auto-scoring)"
        )
        positions = ", ".join(
            [
                f"{name}={pos[:15]}..."
                for name, pos in (self.debate_positions or {}).items()
            ]
        )
        return (
            f"{mode_str}DebateAgent{judge_info}(topic='{self.topic}', "
            f"positions=[{positions}], scoring={self.scoring_enabled})"
        )