Source code for haive.core.utils.enhanced_naming

#!/usr/bin/env python3
"""Enhanced naming utilities for complex type annotations and tool names.

This module provides advanced utilities for sanitizing complex Python type annotations
and generic classes into OpenAI-compliant tool names with descriptive transformations.

Key Features:
- Handles nested generics: List[Dict[str, Task]] -> list_dict_str_task_nested_generic
- Provides transformation descriptions for debugging
- Handles Union, Optional, and complex type annotations
- Maintains type hierarchy information in the name
- Supports custom naming strategies

Examples:
    Complex type handling::

        from haive.core.utils.enhanced_naming import enhanced_sanitize_tool_name

        # Nested generics
        result, desc = enhanced_sanitize_tool_name("List[Dict[str, Task]]")
        # Returns: ("list_dict_str_task_nested_generic", "3-level nested generic with 4 type parameters")

        # Union types
        result, desc = enhanced_sanitize_tool_name("Union[str, List[Task]]")
        # Returns: ("union_str_list_task_generic", "Union type with 2 alternatives")

        # Optional types
        result, desc = enhanced_sanitize_tool_name("Optional[Plan[Task]]")
        # Returns: ("optional_plan_task_generic", "Optional type wrapping Plan[Task] generic")
"""

import logging
import re
from dataclasses import dataclass
from typing import Any

logger = logging.getLogger(__name__)

# Import base functionality
from haive.core.utils.naming import (
    _convert_to_snake_case,
    _ensure_openai_compliance,
    _final_cleanup,
    sanitize_tool_name,
)



[docs]
@dataclass
class NamingTransformation:
    """Metadata about a naming transformation."""

    original_name: str
    final_name: str
    transformation_type: str
    complexity_level: int
    type_parameters: list[str]
    nesting_depth: int
    description: str
    warnings: list[str]




[docs]
class EnhancedGenericParser:
    """Advanced parser for complex generic type annotations."""

    # Enhanced patterns for different type constructs
    PATTERNS = {
        "simple_generic": re.compile(r"^(\w+)\[([^\[\]]+)\]$"),
        "nested_generic": re.compile(r"^(\w+)\[(.*)\]$"),
        "union_type": re.compile(r"^Union\[(.*)\]$"),
        "optional_type": re.compile(r"^Optional\[(.*)\]$"),
        "literal_type": re.compile(r"^Literal\[(.*)\]$"),
    }


[docs]
    def parse_complex_type(self, type_name: str) -> NamingTransformation:
        """Parse complex type annotations into naming transformation."""
        original = type_name.strip()
        warnings = []

        # Detect the type of generic
        transformation_type, parsed_data = self._identify_type_pattern(original)

        if transformation_type == "nested_generic":
            result = self._handle_nested_generic(original, parsed_data, warnings)
        elif transformation_type == "union_type":
            result = self._handle_union_type(original, parsed_data, warnings)
        elif transformation_type == "optional_type":
            result = self._handle_optional_type(original, parsed_data, warnings)
        elif transformation_type == "simple_generic":
            result = self._handle_simple_generic(original, parsed_data, warnings)
        else:
            # Fall back to basic sanitization
            result = self._handle_unknown_type(original, warnings)

        return result


    def _identify_type_pattern(self, type_name: str) -> tuple[str, dict[str, Any]]:
        """Identify which pattern the type matches."""
        # Check for Union types first
        if union_match := self.PATTERNS["union_type"].match(type_name):
            return "union_type", {"content": union_match.group(1)}

        # Check for Optional types
        if optional_match := self.PATTERNS["optional_type"].match(type_name):
            return "optional_type", {"content": optional_match.group(1)}

        # Check for complex nested generics
        if nested_match := self.PATTERNS["nested_generic"].match(type_name):
            base_class = nested_match.group(1)
            content = nested_match.group(2)

            # Check if content contains nested brackets (indicating complexity)
            bracket_count = content.count("[") + content.count("]")
            if bracket_count > 0:
                return "nested_generic", {"base": base_class, "content": content}
            else:
                return "simple_generic", {"base": base_class, "content": content}

        return "unknown", {"content": type_name}

    def _handle_nested_generic(
        self, original: str, data: dict, warnings: list[str]
    ) -> NamingTransformation:
        """Handle complex nested generics like List[Dict[str, Task]]."""
        base_class = data["base"]
        content = data["content"]

        # Recursively parse the nested content
        type_parameters = self._extract_nested_parameters(content)
        nesting_depth = self._calculate_nesting_depth(content)

        if nesting_depth > 3:
            warnings.append(
                f"Very deep nesting ({nesting_depth} levels) may be hard to understand"
            )

        # Build descriptive name
        param_parts = []
        for param in type_parameters:
            # Recursively handle each parameter
            if "[" in param and "]" in param:
                # This parameter is itself generic
                sub_result = self.parse_complex_type(param)
                param_parts.append(sub_result.final_name.replace("_generic", ""))
            else:
                # Simple parameter
                param_parts.append(_convert_to_snake_case(param.strip()))

        # Combine into final name
        base_snake = _convert_to_snake_case(base_class)
        combined_params = "_".join(param_parts)

        if nesting_depth > 1:
            final_name = f"{base_snake}_{combined_params}_nested_generic"
            description = f"{nesting_depth}-level nested generic with {len(type_parameters)} type parameters"
        else:
            final_name = f"{base_snake}_{combined_params}_generic"
            description = f"Generic type with {len(type_parameters)} parameters"

        return NamingTransformation(
            original_name=original,
            final_name=_ensure_openai_compliance(final_name),
            transformation_type="nested_generic",
            complexity_level=nesting_depth,
            type_parameters=type_parameters,
            nesting_depth=nesting_depth,
            description=description,
            warnings=warnings,
        )

    def _handle_union_type(
        self, original: str, data: dict, warnings: list[str]
    ) -> NamingTransformation:
        """Handle Union types like Union[str, List[Task]]."""
        content = data["content"]

        # Split union alternatives
        alternatives = self._split_union_alternatives(content)

        if len(alternatives) > 4:
            warnings.append(
                f"Union with {len(alternatives)} alternatives may be too complex"
            )

        # Process each alternative
        alt_parts = []
        for alt in alternatives:
            alt = alt.strip()
            if "[" in alt and "]" in alt:
                # Generic alternative
                sub_result = self.parse_complex_type(alt)
                alt_parts.append(sub_result.final_name.replace("_generic", ""))
            else:
                # Simple alternative
                alt_parts.append(_convert_to_snake_case(alt))

        final_name = f"union_{'_'.join(alt_parts)}_generic"
        description = f"Union type with {len(alternatives)} alternatives"

        return NamingTransformation(
            original_name=original,
            final_name=_ensure_openai_compliance(final_name),
            transformation_type="union_type",
            complexity_level=1,
            type_parameters=alternatives,
            nesting_depth=1,
            description=description,
            warnings=warnings,
        )

    def _handle_optional_type(
        self, original: str, data: dict, warnings: list[str]
    ) -> NamingTransformation:
        """Handle Optional types like Optional[Plan[Task]]."""
        content = data["content"].strip()

        # Process the wrapped type
        if "[" in content and "]" in content:
            # Generic wrapped type
            sub_result = self.parse_complex_type(content)
            wrapped_name = sub_result.final_name.replace("_generic", "")
            description = f"Optional type wrapping {sub_result.description}"
        else:
            # Simple wrapped type
            wrapped_name = _convert_to_snake_case(content)
            description = f"Optional type wrapping {content}"

        final_name = f"optional_{wrapped_name}_generic"

        return NamingTransformation(
            original_name=original,
            final_name=_ensure_openai_compliance(final_name),
            transformation_type="optional_type",
            complexity_level=1,
            type_parameters=[content],
            nesting_depth=1,
            description=description,
            warnings=warnings,
        )

    def _handle_simple_generic(
        self, original: str, data: dict, warnings: list[str]
    ) -> NamingTransformation:
        """Handle simple generics like Plan[Task] or Plan[Task, Status]."""
        base_class = data["base"]
        content = data["content"]

        # Split multiple parameters
        parameters = [p.strip() for p in content.split(",")]

        # Convert to snake case
        base_snake = _convert_to_snake_case(base_class)
        param_parts = [_convert_to_snake_case(p) for p in parameters]

        final_name = f"{base_snake}_{'_'.join(param_parts)}_generic"
        description = f"Simple generic with {len(parameters)} type parameter{'s' if len(parameters) > 1 else ''}"

        return NamingTransformation(
            original_name=original,
            final_name=_ensure_openai_compliance(final_name),
            transformation_type="simple_generic",
            complexity_level=1,
            type_parameters=parameters,
            nesting_depth=1,
            description=description,
            warnings=warnings,
        )

    def _handle_unknown_type(
        self, original: str, warnings: list[str]
    ) -> NamingTransformation:
        """Handle unknown/complex types by falling back to basic sanitization."""
        warnings.append("Unknown type pattern, using basic sanitization")

        # Use the original sanitize_tool_name as fallback
        final_name = sanitize_tool_name(original)

        return NamingTransformation(
            original_name=original,
            final_name=final_name,
            transformation_type="unknown",
            complexity_level=0,
            type_parameters=[],
            nesting_depth=0,
            description="Unknown type pattern, basic sanitization applied",
            warnings=warnings,
        )

    def _extract_nested_parameters(self, content: str) -> list[str]:
        """Extract parameters from nested generic content."""
        parameters = []
        current_param = ""
        bracket_depth = 0

        for char in content:
            if char == "[":
                bracket_depth += 1
                current_param += char
            elif char == "]":
                bracket_depth -= 1
                current_param += char
            elif char == "," and bracket_depth == 0:
                # Top-level comma, new parameter
                if current_param.strip():
                    parameters.append(current_param.strip())
                current_param = ""
            else:
                current_param += char

        # Add final parameter
        if current_param.strip():
            parameters.append(current_param.strip())

        return parameters

    def _calculate_nesting_depth(self, content: str) -> int:
        """Calculate the maximum nesting depth in the type."""
        max_depth = 0
        current_depth = 0

        for char in content:
            if char == "[":
                current_depth += 1
                max_depth = max(max_depth, current_depth)
            elif char == "]":
                current_depth -= 1

        return max_depth + 1  # +1 for the outer level

    def _split_union_alternatives(self, content: str) -> list[str]:
        """Split Union alternatives respecting nested brackets."""
        alternatives = []
        current_alt = ""
        bracket_depth = 0

        for char in content:
            if char == "[":
                bracket_depth += 1
                current_alt += char
            elif char == "]":
                bracket_depth -= 1
                current_alt += char
            elif char == "," and bracket_depth == 0:
                # Top-level comma, new alternative
                if current_alt.strip():
                    alternatives.append(current_alt.strip())
                current_alt = ""
            else:
                current_alt += char

        # Add final alternative
        if current_alt.strip():
            alternatives.append(current_alt.strip())

        return alternatives




[docs]
def enhanced_sanitize_tool_name(
    raw_name: str, include_metadata: bool = True, max_complexity: int = 5
) -> tuple[str, NamingTransformation | None]:
    """Enhanced tool name sanitization with metadata and complex type support.

    Args:
        raw_name: Raw tool name from __name__ or type annotation
        include_metadata: Whether to return transformation metadata
        max_complexity: Maximum complexity level to allow (warns if exceeded)

    Returns:
        Tuple of (sanitized_name, transformation_metadata)

    Examples:
        >>> name, meta = enhanced_sanitize_tool_name("List[Dict[str, Task]]")
        >>> print(name)
        'list_dict_str_task_nested_generic'
        >>> print(meta.description)
        '3-level nested generic with 4 type parameters'

        >>> name, meta = enhanced_sanitize_tool_name("Union[str, Optional[Plan[Task]]]")
        >>> print(name)
        'union_str_optional_plan_task_generic'
        >>> print(meta.complexity_level)
        2
    """
    if not raw_name or not isinstance(raw_name, str):
        simple_result = sanitize_tool_name(raw_name)
        if include_metadata:
            return simple_result, None
        return simple_result, None

    parser = EnhancedGenericParser()

    # Check if this looks like a complex type
    if any(
        pattern in raw_name
        for pattern in ["[", "Union", "Optional", "Literal", "Dict", "List"]
    ):
        # Use enhanced parsing
        transformation = parser.parse_complex_type(raw_name)

        # Check complexity warnings
        if transformation.complexity_level > max_complexity:
            transformation.warnings.append(
                f"Complexity level {transformation.complexity_level} exceeds recommended maximum {max_complexity}"
            )

        # Apply final cleanup
        final_name = _final_cleanup(transformation.final_name)
        transformation.final_name = final_name

        if include_metadata:
            return final_name, transformation
        return final_name, None
    else:
        # Use simple sanitization
        simple_result = sanitize_tool_name(raw_name)

        if include_metadata:
            # Create minimal metadata for simple case
            metadata = NamingTransformation(
                original_name=raw_name,
                final_name=simple_result,
                transformation_type="simple",
                complexity_level=0,
                type_parameters=[],
                nesting_depth=0,
                description="Simple name sanitization",
                warnings=[],
            )
            return simple_result, metadata
        return simple_result, None




[docs]
def analyze_naming_complexity(type_names: list[str]) -> dict[str, Any]:
    """Analyze naming complexity across multiple type names.

    Args:
        type_names: List of type names to analyze

    Returns:
        Dictionary with complexity analysis results

    Examples:
        >>> results = analyze_naming_complexity([
        ...     "Plan[Task]",
        ...     "List[Dict[str, Task]]",
        ...     "Union[str, Optional[Plan[Task]]]"
        ... ])
        >>> print(f"Average complexity: {results['average_complexity']}")
        >>> print(f"Most complex: {results['most_complex']['name']}")
    """
    results = {
        "total_count": len(type_names),
        "transformations": [],
        "complexity_distribution": {},
        "total_warnings": 0,
        "most_complex": None,
        "average_complexity": 0.0,
        "recommendations": [],
    }

    max_complexity = 0
    total_complexity = 0

    for name in type_names:
        _, transformation = enhanced_sanitize_tool_name(name, include_metadata=True)

        if transformation:
            results["transformations"].append(transformation)

            # Track complexity
            complexity = transformation.complexity_level
            total_complexity += complexity

            if complexity > max_complexity:
                max_complexity = complexity
                results["most_complex"] = {
                    "name": name,
                    "complexity": complexity,
                    "description": transformation.description,
                }

            # Count complexity levels
            level_key = f"level_{complexity}"
            results["complexity_distribution"][level_key] = (
                results["complexity_distribution"].get(level_key, 0) + 1
            )

            # Count warnings
            results["total_warnings"] += len(transformation.warnings)

    # Calculate averages
    if results["total_count"] > 0:
        results["average_complexity"] = total_complexity / results["total_count"]

    # Generate recommendations
    if results["average_complexity"] > 2:
        results["recommendations"].append(
            "Consider simplifying type annotations for better tool naming"
        )

    if results["total_warnings"] > 0:
        results["recommendations"].append(
            f"Review {results['total_warnings']} naming warnings"
        )

    if max_complexity > 4:
        results["recommendations"].append(
            "Some types are very complex - consider breaking them down"
        )

    return results



# Convenience functions for common use cases

[docs]
def sanitize_pydantic_model_name_enhanced(
    model,
) -> tuple[str, NamingTransformation | None]:
    """Enhanced Pydantic model name sanitization with metadata."""
    if hasattr(model, "__name__"):
        raw_name = model.__name__
    elif hasattr(model, "model_config") and hasattr(model.model_config, "title"):
        raw_name = model.model_config.title
    else:
        raw_name = str(model)

    return enhanced_sanitize_tool_name(raw_name)




[docs]
def get_naming_suggestions_enhanced(
    raw_name: str, count: int = 5
) -> list[dict[str, Any]]:
    """Get multiple enhanced naming suggestions with metadata."""
    suggestions = []

    # Base suggestion
    base_name, base_meta = enhanced_sanitize_tool_name(raw_name)
    suggestions.append(
        {
            "name": base_name,
            "strategy": "enhanced_default",
            "metadata": base_meta,
            "description": (
                base_meta.description if base_meta else "Enhanced default sanitization"
            ),
        }
    )

    # Alternative strategies
    if count > 1:
        # Try with different complexity tolerance
        alt_name, alt_meta = enhanced_sanitize_tool_name(raw_name, max_complexity=10)
        if alt_name != base_name:
            suggestions.append(
                {
                    "name": alt_name,
                    "strategy": "high_complexity",
                    "metadata": alt_meta,
                    "description": "Allow higher complexity transformations",
                }
            )

    # Add tool suffix versions
    remaining_count = count - len(suggestions)
    for i in range(remaining_count):
        suffix_name = f"{base_name}_tool_{i+1}" if i > 0 else f"{base_name}_tool"
        suggestions.append(
            {
                "name": suffix_name,
                "strategy": f"tool_suffix_{i+1}",
                "metadata": None,
                "description": f"Base name with tool suffix variant {i+1}",
            }
        )

    return suggestions[:count]