"""LangChain-specific type converters for documents, messages, and prompts."""
from __future__ import annotations
import hashlib
import json
from typing import Any
from langchain_core.documents import Document
from langchain_core.messages import (
AIMessage,
BaseMessage,
FunctionMessage,
HumanMessage,
SystemMessage,
ToolMessage,
)
from langchain_core.prompts import (
BasePromptTemplate,
ChatPromptTemplate,
MessagesPlaceholder,
PromptTemplate,
)
from haive.core.schema.compatibility.converters import TypeConverter
from haive.core.schema.compatibility.types import ConversionContext, ConversionQuality
[docs]
class MessageConverter(TypeConverter):
"""Converter for LangChain message types."""
# Message type priority for lossy conversions
MESSAGE_HIERARCHY = {
BaseMessage: 0,
SystemMessage: 1,
HumanMessage: 1,
AIMessage: 1,
ToolMessage: 2,
FunctionMessage: 2,
}
@property
def name(self) -> str:
"""Name.
Returns:
[TODO: Add return description]
"""
return "langchain_message_converter"
@property
def priority(self) -> int:
"""Priority.
Returns:
[TODO: Add return description]
"""
return 10 # High priority for message conversions
[docs]
def can_convert(self, source_type: type, target_type: type) -> bool:
"""Check if we can convert between message types."""
try:
return issubclass(source_type, BaseMessage) and issubclass(
target_type, BaseMessage
)
except TypeError:
return False
[docs]
def get_quality(self, source_type: type, target_type: type) -> ConversionQuality:
"""Determine conversion quality."""
if source_type == target_type:
return ConversionQuality.LOSSLESS
# Converting to base type is lossy
if target_type == BaseMessage:
return ConversionQuality.LOSSY
# Check hierarchy levels
source_level = self.MESSAGE_HIERARCHY.get(source_type, 99)
target_level = self.MESSAGE_HIERARCHY.get(target_type, 99)
if source_level == target_level:
return ConversionQuality.SAFE
if source_level < target_level:
# Converting from general to specific
return ConversionQuality.UNSAFE
# Converting from specific to general
return ConversionQuality.LOSSY
[docs]
def convert(self, value: BaseMessage, context: ConversionContext) -> BaseMessage:
"""Convert between message types."""
target_type_name = context.target_type.split(".")[-1]
target_type = self._get_message_type(target_type_name)
# Same type - no conversion
if isinstance(value, target_type):
return value
# Extract core data
content = value.content
additional_kwargs = value.additional_kwargs.copy()
# Special conversions
if isinstance(value, AIMessage) and target_type == ToolMessage:
return self._ai_to_tool(value, context)
if isinstance(value, ToolMessage) and target_type == AIMessage:
return self._tool_to_ai(value, context)
if isinstance(value, HumanMessage | AIMessage) and target_type == SystemMessage:
context.add_warning("Converting user/assistant message to system message")
return SystemMessage(content=content, additional_kwargs=additional_kwargs)
# Generic conversion
try:
if target_type == HumanMessage:
return HumanMessage(
content=content, additional_kwargs=additional_kwargs
)
if target_type == AIMessage:
return AIMessage(content=content, additional_kwargs=additional_kwargs)
if target_type == SystemMessage:
return SystemMessage(
content=content, additional_kwargs=additional_kwargs
)
if target_type == BaseMessage:
# Keep original type but track as "converted"
context.add_warning(f"Keeping as {type(value).__name__}")
return value
# Try direct instantiation
return target_type(content=content, **additional_kwargs)
except Exception as e:
context.add_error(f"Conversion failed: {e!s}")
return value
def _ai_to_tool(self, ai_msg: AIMessage, context: ConversionContext) -> ToolMessage:
"""Convert AIMessage to ToolMessage."""
# Check for tool calls
if hasattr(ai_msg, "tool_calls") and ai_msg.tool_calls:
tool_call = ai_msg.tool_calls[0]
context.track_lost_field("additional_tool_calls", ai_msg.tool_calls[1:])
return ToolMessage(
content=ai_msg.content or json.dumps(tool_call.get("args", {})),
tool_call_id=tool_call.get("id", self._generate_id(ai_msg.content)),
additional_kwargs=ai_msg.additional_kwargs,
)
# No tool calls - generate synthetic tool message
context.add_warning(
"No tool calls in AIMessage, creating synthetic ToolMessage"
)
return ToolMessage(
content=ai_msg.content,
tool_call_id=self._generate_id(ai_msg.content),
additional_kwargs={
**ai_msg.additional_kwargs,
"synthetic": True,
"original_type": "AIMessage",
},
)
def _tool_to_ai(
self, tool_msg: ToolMessage, context: ConversionContext
) -> AIMessage:
"""Convert ToolMessage to AIMessage."""
return AIMessage(
content=f"Tool Response [{tool_msg.tool_call_id}]: {tool_msg.content}",
additional_kwargs={
**tool_msg.additional_kwargs,
"was_tool_message": True,
"tool_call_id": tool_msg.tool_call_id,
},
)
def _get_message_type(self, type_name: str) -> type:
"""Get message type from string name."""
type_map = {
"BaseMessage": BaseMessage,
"HumanMessage": HumanMessage,
"AIMessage": AIMessage,
"SystemMessage": SystemMessage,
"ToolMessage": ToolMessage,
"FunctionMessage": FunctionMessage,
}
return type_map.get(type_name, BaseMessage)
def _generate_id(self, content: str) -> str:
"""Generate ID from content."""
return hashlib.md5(content.encode()).hexdigest()[:8]
[docs]
class DocumentConverter(TypeConverter):
"""Converter for Document-related conversions."""
@property
def name(self) -> str:
"""Name.
Returns:
[TODO: Add return description]
"""
return "langchain_document_converter"
@property
def priority(self) -> int:
"""Priority.
Returns:
[TODO: Add return description]
"""
return 10
[docs]
def can_convert(self, source_type: type, target_type: type) -> bool:
"""Check if conversion is possible."""
# Document to Message conversions
if source_type == Document and issubclass(target_type, BaseMessage):
return True
# Message to Document conversions
if issubclass(source_type, BaseMessage) and target_type == Document:
return True
# Document to dict/str
if source_type == Document and target_type in [dict, str]:
return True
# dict/str to Document
return bool(source_type in [dict, str] and target_type == Document)
[docs]
def get_quality(self, source_type: type, target_type: type) -> ConversionQuality:
"""Determine conversion quality."""
# Document <-> Message is lossy (metadata handling)
if (source_type == Document and issubclass(target_type, BaseMessage)) or (
issubclass(source_type, BaseMessage) and target_type == Document
):
return ConversionQuality.LOSSY
# Document <-> dict is safe
if source_type == Document and target_type == dict:
return ConversionQuality.SAFE
# Document <-> str is lossy
if source_type == Document and target_type == str:
return ConversionQuality.LOSSY
return ConversionQuality.SAFE
[docs]
def convert(self, value: Any, context: ConversionContext) -> Any:
"""Perform conversion."""
context.source_type.split(".")[-1]
target_type_name = context.target_type.split(".")[-1]
# Document to Message
if isinstance(value, Document):
if target_type_name in ["HumanMessage", "BaseMessage"]:
return self._doc_to_human_message(value, context)
if target_type_name == "AIMessage":
return self._doc_to_ai_message(value, context)
if target_type_name == "dict":
return self._doc_to_dict(value, context)
if target_type_name == "str":
return self._doc_to_str(value, context)
# Message to Document
elif isinstance(value, BaseMessage):
return self._message_to_doc(value, context)
# Dict to Document
elif isinstance(value, dict):
return self._dict_to_doc(value, context)
# String to Document
elif isinstance(value, str):
return self._str_to_doc(value, context)
return value
def _doc_to_human_message(
self, doc: Document, context: ConversionContext
) -> HumanMessage:
"""Convert Document to HumanMessage."""
# Include metadata in message
metadata_str = ""
if doc.metadata:
metadata_str = f"\n[Source: {doc.metadata.get('source', 'unknown')}]"
context.track_lost_field("full_metadata", doc.metadata)
return HumanMessage(
content=doc.page_content + metadata_str,
additional_kwargs={
"source": "document",
"doc_metadata": doc.metadata,
},
)
def _doc_to_ai_message(
self, doc: Document, context: ConversionContext
) -> AIMessage:
"""Convert Document to AIMessage."""
return AIMessage(
content=f"Document content: {doc.page_content}",
additional_kwargs={
"source": "document",
"doc_metadata": doc.metadata,
},
)
def _doc_to_dict(self, doc: Document, context: ConversionContext) -> dict:
"""Convert Document to dict."""
return {
"page_content": doc.page_content,
"metadata": doc.metadata,
"type": "Document",
}
def _doc_to_str(self, doc: Document, context: ConversionContext) -> str:
"""Convert Document to string."""
if doc.metadata:
context.track_lost_field("metadata", doc.metadata)
return doc.page_content
def _message_to_doc(self, msg: BaseMessage, context: ConversionContext) -> Document:
"""Convert Message to Document."""
metadata = {
"source": "message",
"message_type": type(msg).__name__,
**msg.additional_kwargs,
}
return Document(
page_content=msg.content,
metadata=metadata,
)
def _dict_to_doc(self, data: dict, context: ConversionContext) -> Document:
"""Convert dict to Document."""
# Handle different dict formats
if "page_content" in data:
return Document(
page_content=data["page_content"],
metadata=data.get("metadata", {}),
)
if "content" in data:
return Document(
page_content=data["content"],
metadata={k: v for k, v in data.items() if k != "content"},
)
# Treat entire dict as metadata
return Document(
page_content=str(data),
metadata=data,
)
def _str_to_doc(self, text: str, context: ConversionContext) -> Document:
"""Convert string to Document."""
return Document(
page_content=text,
metadata={"source": "string"},
)
[docs]
class PromptConverter(TypeConverter):
"""Converter for Prompt-related conversions."""
@property
def name(self) -> str:
"""Name.
Returns:
[TODO: Add return description]
"""
return "langchain_prompt_converter"
@property
def priority(self) -> int:
"""Priority.
Returns:
[TODO: Add return description]
"""
return 10
[docs]
def can_convert(self, source_type: type, target_type: type) -> bool:
"""Check if conversion is possible."""
# String to Prompt
if source_type == str and issubclass(target_type, BasePromptTemplate):
return True
# Prompt to String
if issubclass(source_type, BasePromptTemplate) and target_type == str:
return True
# Between prompt types
if issubclass(source_type, BasePromptTemplate) and issubclass(
target_type, BasePromptTemplate
):
return True
# Messages to ChatPrompt
return bool(source_type == list and target_type == ChatPromptTemplate)
[docs]
def get_quality(self, source_type: type, target_type: type) -> ConversionQuality:
"""Determine conversion quality."""
# Same type
if source_type == target_type:
return ConversionQuality.LOSSLESS
# String conversions are lossy (lose template info)
if str in (source_type, target_type):
return ConversionQuality.LOSSY
# PromptTemplate <-> ChatPromptTemplate
if (source_type == PromptTemplate and target_type == ChatPromptTemplate) or (
source_type == ChatPromptTemplate and target_type == PromptTemplate
):
return ConversionQuality.SAFE
return ConversionQuality.SAFE
[docs]
def convert(self, value: Any, context: ConversionContext) -> Any:
"""Perform conversion."""
target_type_name = context.target_type.split(".")[-1]
# String to Prompt
if isinstance(value, str):
if target_type_name == "PromptTemplate":
return PromptTemplate.from_template(value)
if target_type_name == "ChatPromptTemplate":
return ChatPromptTemplate.from_template(value)
# Prompt to String
elif isinstance(value, BasePromptTemplate):
if target_type_name == "str":
# Try to get template string
if hasattr(value, "template"):
return value.template
context.add_warning("Complex prompt converted to string representation")
return str(value)
# PromptTemplate to ChatPromptTemplate
elif isinstance(value, PromptTemplate):
if target_type_name == "ChatPromptTemplate":
return ChatPromptTemplate.from_template(value.template)
# ChatPromptTemplate to PromptTemplate
elif isinstance(value, ChatPromptTemplate):
if target_type_name == "PromptTemplate":
# Flatten to single template
context.add_warning("Flattening ChatPromptTemplate to PromptTemplate")
messages = value.messages
template_parts = []
for msg in messages:
if hasattr(msg, "prompt") and hasattr(msg.prompt, "template"):
template_parts.append(msg.prompt.template)
elif isinstance(msg, MessagesPlaceholder):
template_parts.append(f"{{{msg.variable_name}}}")
return PromptTemplate.from_template("\n".join(template_parts))
# List of messages to ChatPromptTemplate
elif isinstance(value, list):
if target_type_name == "ChatPromptTemplate":
return ChatPromptTemplate.from_messages(value)
return value
[docs]
def register_langchain_converters(registry: Any | None = None) -> None:
"""Register all LangChain converters with the global registry."""
# Import here to avoid circular imports
from haive.core.schema.compatibility.converters import register_converter
# Register converters
register_converter(MessageConverter())
register_converter(DocumentConverter())
register_converter(PromptConverter())