Source code for haive.core.schema.compatibility.langchain_converters

"""LangChain-specific type converters for documents, messages, and prompts."""

from __future__ import annotations

import hashlib
import json
from typing import Any

from langchain_core.documents import Document
from langchain_core.messages import (
    AIMessage,
    BaseMessage,
    FunctionMessage,
    HumanMessage,
    SystemMessage,
    ToolMessage,
)
from langchain_core.prompts import (
    BasePromptTemplate,
    ChatPromptTemplate,
    MessagesPlaceholder,
    PromptTemplate,
)

from haive.core.schema.compatibility.converters import TypeConverter
from haive.core.schema.compatibility.types import ConversionContext, ConversionQuality


[docs] class MessageConverter(TypeConverter): """Converter for LangChain message types.""" # Message type priority for lossy conversions MESSAGE_HIERARCHY = { BaseMessage: 0, SystemMessage: 1, HumanMessage: 1, AIMessage: 1, ToolMessage: 2, FunctionMessage: 2, } @property def name(self) -> str: """Name. Returns: [TODO: Add return description] """ return "langchain_message_converter" @property def priority(self) -> int: """Priority. Returns: [TODO: Add return description] """ return 10 # High priority for message conversions
[docs] def can_convert(self, source_type: type, target_type: type) -> bool: """Check if we can convert between message types.""" try: return issubclass(source_type, BaseMessage) and issubclass( target_type, BaseMessage ) except TypeError: return False
[docs] def get_quality(self, source_type: type, target_type: type) -> ConversionQuality: """Determine conversion quality.""" if source_type == target_type: return ConversionQuality.LOSSLESS # Converting to base type is lossy if target_type == BaseMessage: return ConversionQuality.LOSSY # Check hierarchy levels source_level = self.MESSAGE_HIERARCHY.get(source_type, 99) target_level = self.MESSAGE_HIERARCHY.get(target_type, 99) if source_level == target_level: return ConversionQuality.SAFE if source_level < target_level: # Converting from general to specific return ConversionQuality.UNSAFE # Converting from specific to general return ConversionQuality.LOSSY
[docs] def convert(self, value: BaseMessage, context: ConversionContext) -> BaseMessage: """Convert between message types.""" target_type_name = context.target_type.split(".")[-1] target_type = self._get_message_type(target_type_name) # Same type - no conversion if isinstance(value, target_type): return value # Extract core data content = value.content additional_kwargs = value.additional_kwargs.copy() # Special conversions if isinstance(value, AIMessage) and target_type == ToolMessage: return self._ai_to_tool(value, context) if isinstance(value, ToolMessage) and target_type == AIMessage: return self._tool_to_ai(value, context) if isinstance(value, HumanMessage | AIMessage) and target_type == SystemMessage: context.add_warning("Converting user/assistant message to system message") return SystemMessage(content=content, additional_kwargs=additional_kwargs) # Generic conversion try: if target_type == HumanMessage: return HumanMessage( content=content, additional_kwargs=additional_kwargs ) if target_type == AIMessage: return AIMessage(content=content, additional_kwargs=additional_kwargs) if target_type == SystemMessage: return SystemMessage( content=content, additional_kwargs=additional_kwargs ) if target_type == BaseMessage: # Keep original type but track as "converted" context.add_warning(f"Keeping as {type(value).__name__}") return value # Try direct instantiation return target_type(content=content, **additional_kwargs) except Exception as e: context.add_error(f"Conversion failed: {e!s}") return value
def _ai_to_tool(self, ai_msg: AIMessage, context: ConversionContext) -> ToolMessage: """Convert AIMessage to ToolMessage.""" # Check for tool calls if hasattr(ai_msg, "tool_calls") and ai_msg.tool_calls: tool_call = ai_msg.tool_calls[0] context.track_lost_field("additional_tool_calls", ai_msg.tool_calls[1:]) return ToolMessage( content=ai_msg.content or json.dumps(tool_call.get("args", {})), tool_call_id=tool_call.get("id", self._generate_id(ai_msg.content)), additional_kwargs=ai_msg.additional_kwargs, ) # No tool calls - generate synthetic tool message context.add_warning( "No tool calls in AIMessage, creating synthetic ToolMessage" ) return ToolMessage( content=ai_msg.content, tool_call_id=self._generate_id(ai_msg.content), additional_kwargs={ **ai_msg.additional_kwargs, "synthetic": True, "original_type": "AIMessage", }, ) def _tool_to_ai( self, tool_msg: ToolMessage, context: ConversionContext ) -> AIMessage: """Convert ToolMessage to AIMessage.""" return AIMessage( content=f"Tool Response [{tool_msg.tool_call_id}]: {tool_msg.content}", additional_kwargs={ **tool_msg.additional_kwargs, "was_tool_message": True, "tool_call_id": tool_msg.tool_call_id, }, ) def _get_message_type(self, type_name: str) -> type: """Get message type from string name.""" type_map = { "BaseMessage": BaseMessage, "HumanMessage": HumanMessage, "AIMessage": AIMessage, "SystemMessage": SystemMessage, "ToolMessage": ToolMessage, "FunctionMessage": FunctionMessage, } return type_map.get(type_name, BaseMessage) def _generate_id(self, content: str) -> str: """Generate ID from content.""" return hashlib.md5(content.encode()).hexdigest()[:8]
[docs] class DocumentConverter(TypeConverter): """Converter for Document-related conversions.""" @property def name(self) -> str: """Name. Returns: [TODO: Add return description] """ return "langchain_document_converter" @property def priority(self) -> int: """Priority. Returns: [TODO: Add return description] """ return 10
[docs] def can_convert(self, source_type: type, target_type: type) -> bool: """Check if conversion is possible.""" # Document to Message conversions if source_type == Document and issubclass(target_type, BaseMessage): return True # Message to Document conversions if issubclass(source_type, BaseMessage) and target_type == Document: return True # Document to dict/str if source_type == Document and target_type in [dict, str]: return True # dict/str to Document return bool(source_type in [dict, str] and target_type == Document)
[docs] def get_quality(self, source_type: type, target_type: type) -> ConversionQuality: """Determine conversion quality.""" # Document <-> Message is lossy (metadata handling) if (source_type == Document and issubclass(target_type, BaseMessage)) or ( issubclass(source_type, BaseMessage) and target_type == Document ): return ConversionQuality.LOSSY # Document <-> dict is safe if source_type == Document and target_type == dict: return ConversionQuality.SAFE # Document <-> str is lossy if source_type == Document and target_type == str: return ConversionQuality.LOSSY return ConversionQuality.SAFE
[docs] def convert(self, value: Any, context: ConversionContext) -> Any: """Perform conversion.""" context.source_type.split(".")[-1] target_type_name = context.target_type.split(".")[-1] # Document to Message if isinstance(value, Document): if target_type_name in ["HumanMessage", "BaseMessage"]: return self._doc_to_human_message(value, context) if target_type_name == "AIMessage": return self._doc_to_ai_message(value, context) if target_type_name == "dict": return self._doc_to_dict(value, context) if target_type_name == "str": return self._doc_to_str(value, context) # Message to Document elif isinstance(value, BaseMessage): return self._message_to_doc(value, context) # Dict to Document elif isinstance(value, dict): return self._dict_to_doc(value, context) # String to Document elif isinstance(value, str): return self._str_to_doc(value, context) return value
def _doc_to_human_message( self, doc: Document, context: ConversionContext ) -> HumanMessage: """Convert Document to HumanMessage.""" # Include metadata in message metadata_str = "" if doc.metadata: metadata_str = f"\n[Source: {doc.metadata.get('source', 'unknown')}]" context.track_lost_field("full_metadata", doc.metadata) return HumanMessage( content=doc.page_content + metadata_str, additional_kwargs={ "source": "document", "doc_metadata": doc.metadata, }, ) def _doc_to_ai_message( self, doc: Document, context: ConversionContext ) -> AIMessage: """Convert Document to AIMessage.""" return AIMessage( content=f"Document content: {doc.page_content}", additional_kwargs={ "source": "document", "doc_metadata": doc.metadata, }, ) def _doc_to_dict(self, doc: Document, context: ConversionContext) -> dict: """Convert Document to dict.""" return { "page_content": doc.page_content, "metadata": doc.metadata, "type": "Document", } def _doc_to_str(self, doc: Document, context: ConversionContext) -> str: """Convert Document to string.""" if doc.metadata: context.track_lost_field("metadata", doc.metadata) return doc.page_content def _message_to_doc(self, msg: BaseMessage, context: ConversionContext) -> Document: """Convert Message to Document.""" metadata = { "source": "message", "message_type": type(msg).__name__, **msg.additional_kwargs, } return Document( page_content=msg.content, metadata=metadata, ) def _dict_to_doc(self, data: dict, context: ConversionContext) -> Document: """Convert dict to Document.""" # Handle different dict formats if "page_content" in data: return Document( page_content=data["page_content"], metadata=data.get("metadata", {}), ) if "content" in data: return Document( page_content=data["content"], metadata={k: v for k, v in data.items() if k != "content"}, ) # Treat entire dict as metadata return Document( page_content=str(data), metadata=data, ) def _str_to_doc(self, text: str, context: ConversionContext) -> Document: """Convert string to Document.""" return Document( page_content=text, metadata={"source": "string"}, )
[docs] class PromptConverter(TypeConverter): """Converter for Prompt-related conversions.""" @property def name(self) -> str: """Name. Returns: [TODO: Add return description] """ return "langchain_prompt_converter" @property def priority(self) -> int: """Priority. Returns: [TODO: Add return description] """ return 10
[docs] def can_convert(self, source_type: type, target_type: type) -> bool: """Check if conversion is possible.""" # String to Prompt if source_type == str and issubclass(target_type, BasePromptTemplate): return True # Prompt to String if issubclass(source_type, BasePromptTemplate) and target_type == str: return True # Between prompt types if issubclass(source_type, BasePromptTemplate) and issubclass( target_type, BasePromptTemplate ): return True # Messages to ChatPrompt return bool(source_type == list and target_type == ChatPromptTemplate)
[docs] def get_quality(self, source_type: type, target_type: type) -> ConversionQuality: """Determine conversion quality.""" # Same type if source_type == target_type: return ConversionQuality.LOSSLESS # String conversions are lossy (lose template info) if str in (source_type, target_type): return ConversionQuality.LOSSY # PromptTemplate <-> ChatPromptTemplate if (source_type == PromptTemplate and target_type == ChatPromptTemplate) or ( source_type == ChatPromptTemplate and target_type == PromptTemplate ): return ConversionQuality.SAFE return ConversionQuality.SAFE
[docs] def convert(self, value: Any, context: ConversionContext) -> Any: """Perform conversion.""" target_type_name = context.target_type.split(".")[-1] # String to Prompt if isinstance(value, str): if target_type_name == "PromptTemplate": return PromptTemplate.from_template(value) if target_type_name == "ChatPromptTemplate": return ChatPromptTemplate.from_template(value) # Prompt to String elif isinstance(value, BasePromptTemplate): if target_type_name == "str": # Try to get template string if hasattr(value, "template"): return value.template context.add_warning("Complex prompt converted to string representation") return str(value) # PromptTemplate to ChatPromptTemplate elif isinstance(value, PromptTemplate): if target_type_name == "ChatPromptTemplate": return ChatPromptTemplate.from_template(value.template) # ChatPromptTemplate to PromptTemplate elif isinstance(value, ChatPromptTemplate): if target_type_name == "PromptTemplate": # Flatten to single template context.add_warning("Flattening ChatPromptTemplate to PromptTemplate") messages = value.messages template_parts = [] for msg in messages: if hasattr(msg, "prompt") and hasattr(msg.prompt, "template"): template_parts.append(msg.prompt.template) elif isinstance(msg, MessagesPlaceholder): template_parts.append(f"{{{msg.variable_name}}}") return PromptTemplate.from_template("\n".join(template_parts)) # List of messages to ChatPromptTemplate elif isinstance(value, list): if target_type_name == "ChatPromptTemplate": return ChatPromptTemplate.from_messages(value) return value
[docs] def register_langchain_converters(registry: Any | None = None) -> None: """Register all LangChain converters with the global registry.""" # Import here to avoid circular imports from haive.core.schema.compatibility.converters import register_converter # Register converters register_converter(MessageConverter()) register_converter(DocumentConverter()) register_converter(PromptConverter())