r"""Robust naming utilities for OpenAI compliance and tool naming.
This module provides comprehensive utilities for sanitizing and transforming
class names, especially generic classes, to be compatible with OpenAI's
function calling requirements and follow consistent naming conventions.
Key Features:
- Handles generic classes (e.g., Plan[Task] -> plan_task_generic)
- Converts CamelCase to snake_case consistently
- Ensures OpenAI API compliance (pattern: ^[a-zA-Z0-9_\\.-]+$)
- Handles edge cases like acronyms, numbers, and special characters
- Provides reverse mapping capabilities for debugging
Example:
Basic usage::
from haive.core.utils.naming import sanitize_tool_name
# Generic class handling
name = sanitize_tool_name("Plan[Task]")
# Returns: "plan_task_generic"
# CamelCase conversion
name = sanitize_tool_name("MyComplexModel")
# Returns: "my_complex_model"
# OpenAI compliance
name = sanitize_tool_name("Invalid-Name[With]Brackets!")
# Returns: "invalid_name_with_brackets"
"""
import logging
import re
logger = logging.getLogger(__name__)
# OpenAI function name pattern: ^[a-zA-Z0-9_\\.-]+$
OPENAI_VALID_PATTERN = re.compile(r"^[a-zA-Z0-9_.-]+$")
# Common acronyms that should be treated as single words
COMMON_ACRONYMS = {
"HTML",
"XML",
"JSON",
"API",
"URL",
"HTTP",
"HTTPS",
"SQL",
"CSV",
"PDF",
"UUID",
"JWT",
"OAuth",
"SMTP",
"FTP",
"SSH",
"AWS",
"GCP",
"AI",
"ML",
"NLP",
"LLM",
"GPT",
"CPU",
"GPU",
}
[docs]
def create_openai_compliant_name(raw_name: str, suffix: str = None) -> str:
"""Create an OpenAI-compliant name with optional suffix.
This is a higher-level function that creates compliant names and can
add suffixes for disambiguation.
Args:
raw_name: Raw name to process
suffix: Optional suffix to add (e.g., 'tool', 'generic')
Returns:
OpenAI-compliant name with optional suffix
Examples:
>>> create_openai_compliant_name("Plan[Task]", "tool")
'plan_task_generic_tool'
>>> create_openai_compliant_name("MyModel")
'my_model'
"""
base_name = sanitize_tool_name(raw_name)
if suffix:
sanitized_suffix = sanitize_tool_name(suffix)
return f"{base_name}_{sanitized_suffix}"
return base_name
def _handle_generic_classes(name: str) -> str:
"""Handle generic class names like Plan[Task] -> Plan_Task_generic.
This function specifically handles Python generic type syntax and converts
it to a readable format that indicates the generic nature.
"""
# Pattern for generic classes: ClassName[TypeParam] with optional additional text
# This handles cases like Plan[Task]WithExtra! -> Plan_Task_WithExtra_generic
generic_pattern = re.compile(r"^(\w+)\[(\w+(?:,\s*\w+)*)\](.*)$")
match = generic_pattern.match(name.strip())
if match:
base_class = match.group(1)
type_params = match.group(2)
additional_text = match.group(3)
# Handle multiple type parameters: Plan[Task,Status] -> Plan_Task_Status_generic
type_parts = [param.strip() for param in type_params.split(",")]
combined_types = "_".join(type_parts)
# Include additional text if present
if additional_text:
# Clean additional text of special characters but preserve letters/numbers
clean_additional = re.sub(r"[^a-zA-Z0-9]", "", additional_text)
if clean_additional:
result = f"{base_class}_{combined_types}_{clean_additional}_generic"
else:
result = f"{base_class}_{combined_types}_generic"
else:
result = f"{base_class}_{combined_types}_generic"
logger.debug(f"Generic class detected: {name} -> {result}")
return result
return name
def _convert_to_snake_case(name: str, preserve_acronyms: bool = True) -> str:
"""Convert CamelCase to snake_case with smart acronym handling.
This function handles various CamelCase patterns including acronyms
and ensures proper snake_case conversion.
"""
if preserve_acronyms:
# Enhanced acronym handling - process each acronym individually
result = name
# Process from longest to shortest to avoid substring issues
sorted_acronyms = sorted(COMMON_ACRONYMS, key=len, reverse=True)
for acronym in sorted_acronyms:
# Look for the acronym in the text
# Handle cases like HTTPSParser -> HTTPS + Parser
# Also handle cases like XMLToJSON -> XML + To + JSON
pattern = re.compile(f"({acronym})([A-Z]|$)", re.IGNORECASE)
def replace_acronym(match):
found_acronym = match.group(1)
following = match.group(2) if match.group(2) else ""
# Convert to lowercase and add underscore if there's following text
if following and following != "":
return f"{found_acronym.lower()}_{following}"
else:
return found_acronym.lower()
result = pattern.sub(replace_acronym, result)
# Now handle remaining CamelCase patterns
# Handle sequences of capitals followed by lowercase
result = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", result)
# Handle lowercase followed by uppercase
result = re.sub(r"([a-z\d])([A-Z])", r"\1_\2", result)
return result.lower()
else:
# Simple conversion without acronym protection
# Handle sequences of capitals
snake = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", name)
# Handle lowercase to uppercase transitions
snake = re.sub(r"([a-z\d])([A-Z])", r"\1_\2", snake)
return snake.lower()
def _ensure_openai_compliance(name: str) -> str:
r"""Ensure the name complies with OpenAI's function name requirements.
OpenAI requires function names to match: ^[a-zA-Z0-9_\\.-]+$
"""
# Remove any invalid characters, keeping only alphanumeric, underscore, dot, dash
compliant = re.sub(r"[^a-zA-Z0-9_.-]", "_", name)
# Ensure it doesn't start with a number
if compliant and compliant[0].isdigit():
compliant = f"tool_{compliant}"
return compliant
def _final_cleanup(name: str) -> str:
"""Perform final cleanup on the name.
This removes excessive underscores, ensures proper length,
and handles edge cases.
"""
# Clean up multiple underscores
cleaned = re.sub(r"_+", "_", name)
# Remove leading/trailing underscores
cleaned = cleaned.strip("_")
# Ensure minimum length
if not cleaned:
return "unnamed_tool"
# Ensure maximum reasonable length (OpenAI doesn't specify, but be reasonable)
if len(cleaned) > 64:
logger.warning(
f"Tool name '{cleaned}' is very long ({len(cleaned)} chars), truncating"
)
cleaned = cleaned[:61] + "..." # Keep some indication it was truncated
return cleaned
[docs]
def get_name_suggestions(raw_name: str, count: int = 3) -> list[str]:
"""Get multiple naming suggestions for a raw name.
Provides different variations of sanitized names to choose from.
Args:
raw_name: Original name to generate suggestions for
count: Number of suggestions to generate
Returns:
List of suggested names
Examples:
>>> get_name_suggestions("Plan[Task]")
['plan_task_generic', 'plan_task_tool', 'task_plan_generic']
"""
suggestions = []
# Base sanitized name
base = sanitize_tool_name(raw_name)
suggestions.append(base)
if count > 1:
# With 'tool' suffix
tool_version = create_openai_compliant_name(raw_name, "tool")
if tool_version != base:
suggestions.append(tool_version)
if count > 2 and "[" in raw_name and "]" in raw_name:
# For generics, try reversing the order
generic_pattern = re.compile(r"^(\w+)\[(\w+)\]$")
match = generic_pattern.match(raw_name.strip())
if match:
base_class, type_param = match.groups()
reversed_name = f"{type_param}_{base_class}_generic"
reversed_sanitized = sanitize_tool_name(reversed_name)
if reversed_sanitized not in suggestions:
suggestions.append(reversed_sanitized)
# Pad with variations if needed
while len(suggestions) < count:
variant = f"{base}_{len(suggestions)}"
suggestions.append(variant)
return suggestions[:count]
[docs]
def create_name_mapping(original_names: list[str]) -> dict[str, str]:
"""Create a mapping from original names to sanitized names.
Useful for batch processing and maintaining mappings for debugging.
Args:
original_names: List of original names to process
Returns:
Dictionary mapping original -> sanitized names
Examples:
>>> create_name_mapping(["Plan[Task]", "MyModel", "HTTPParser"])
{
'Plan[Task]': 'plan_task_generic',
'MyModel': 'my_model',
'HTTPParser': 'http_parser'
}
"""
mapping = {}
used_names = set()
for original in original_names:
sanitized = sanitize_tool_name(original)
# Handle collisions
base_sanitized = sanitized
counter = 1
while sanitized in used_names:
sanitized = f"{base_sanitized}_{counter}"
counter += 1
mapping[original] = sanitized
used_names.add(sanitized)
logger.debug(f"Mapped '{original}' -> '{sanitized}'")
return mapping
# Convenience functions for common patterns
[docs]
def sanitize_class_name(cls) -> str:
"""Sanitize a class object's name for tool usage.
Args:
cls: Class object or class name string
Returns:
Sanitized name suitable for OpenAI tools
"""
if hasattr(cls, "__name__"):
return sanitize_tool_name(cls.__name__)
else:
return sanitize_tool_name(str(cls))
[docs]
def sanitize_pydantic_model_name(model) -> str:
"""Sanitize a Pydantic model's name for tool usage.
Specifically handles Pydantic model naming patterns.
Args:
model: Pydantic model class
Returns:
Sanitized name suitable for OpenAI tools
"""
# Get the model name
if hasattr(model, "__name__"):
raw_name = model.__name__
elif hasattr(model, "model_config") and hasattr(model.model_config, "title"):
raw_name = model.model_config.title
else:
raw_name = str(model)
return sanitize_tool_name(raw_name)