Source code for haive.core.engine.retriever.providers.MetalRetrieverConfig

"""Metal Retriever implementation for the Haive framework.

from typing import Any
This module provides a configuration class for the Metal retriever,
which uses Metal's vector search infrastructure for high-performance
similarity search. Metal provides a managed vector database service
optimized for production use cases.

The MetalRetriever works by:
1. Connecting to a Metal index
2. Performing vector similarity search
3. Supporting metadata filtering and search
4. Providing production-ready vector infrastructure

This retriever is particularly useful when:
- Need managed vector search infrastructure
- Building production vector search applications
- Want optimized performance and scaling
- Need reliable vector database service
- Building recommendation or search systems

The implementation integrates with LangChain's MetalRetriever while
providing a consistent Haive configuration interface with secure API key management.
"""

from typing import Any

from langchain_core.documents import Document
from pydantic import Field, SecretStr

from haive.core.common.mixins.secure_config import SecureConfigMixin
from haive.core.engine.retriever.retriever import BaseRetrieverConfig
from haive.core.engine.retriever.types import RetrieverType


[docs] @BaseRetrieverConfig.register(RetrieverType.METAL) class MetalRetrieverConfig(SecureConfigMixin, BaseRetrieverConfig): """Configuration for Metal retriever in the Haive framework. This retriever uses Metal's vector search infrastructure to provide high-performance similarity search with managed scaling and reliability. Attributes: retriever_type (RetrieverType): The type of retriever (always METAL). metal_api_key (Optional[SecretStr]): Metal API key (auto-resolved from METAL_API_KEY). metal_client_id (Optional[SecretStr]): Metal client ID (auto-resolved from METAL_CLIENT_ID). index_id (str): Metal index ID for the vector collection. k (int): Number of documents to retrieve. filters (Optional[Dict]): Metadata filters for search results. Examples: >>> from haive.core.engine.retriever import MetalRetrieverConfig >>> >>> # Create the Metal retriever config >>> config = MetalRetrieverConfig( ... name="metal_retriever", ... index_id="my-metal-index-123", ... k=10 ... ) >>> >>> # Instantiate and use the retriever >>> retriever = config.instantiate() >>> docs = retriever.get_relevant_documents("machine learning algorithms") >>> >>> # Example with metadata filtering >>> filtered_config = MetalRetrieverConfig( ... name="filtered_metal_retriever", ... index_id="my-metal-index-123", ... k=5, ... filters={ ... "category": "technology", ... "published_year": {"$gte": 2020} ... } ... ) """ retriever_type: RetrieverType = Field( default=RetrieverType.METAL, description="The type of retriever" ) # Metal configuration index_id: str = Field(..., description="Metal index ID for the vector collection") # API configuration with SecureConfigMixin api_key: SecretStr | None = Field( default=None, description="Metal API key (auto-resolved from METAL_API_KEY)" ) metal_client_id: SecretStr | None = Field( default=None, description="Metal client ID (auto-resolved from METAL_CLIENT_ID)" ) # Provider for SecureConfigMixin provider: str = Field( default="metal", description="Provider name for API key resolution" ) # Search parameters k: int = Field( default=10, ge=1, le=100, description="Number of documents to retrieve" ) filters: dict[str, Any] | None = Field( default=None, description="Metadata filters for search results" ) # Advanced search parameters include_values: bool = Field( default=True, description="Whether to include vector values in response" ) include_metadata: bool = Field( default=True, description="Whether to include metadata in response" ) # Metal-specific parameters namespace: str | None = Field( default=None, description="Metal namespace for partitioning data" ) top_k: int | None = Field( default=None, description="Alias for k parameter (for compatibility)" )
[docs] def get_input_fields(self) -> dict[str, tuple[type, Any]]: """Return input field definitions for Metal retriever.""" return { "query": (str, Field(description="Vector search query for Metal")), }
[docs] def get_output_fields(self) -> dict[str, tuple[type, Any]]: """Return output field definitions for Metal retriever.""" return { "documents": ( list[Document], Field( default_factory=list, description="Documents from Metal vector search", ), ), }
[docs] def instantiate(self) -> Any: """Create a Metal retriever from this configuration. Returns: MetalRetriever: Instantiated retriever ready for vector search. Raises: ImportError: If required packages are not available. ValueError: If API key or configuration is invalid. """ try: from langchain_community.retrievers import MetalRetriever except ImportError: raise ImportError( "MetalRetriever requires langchain-community and metal_sdk packages. " "Install with: pip install langchain-community metal_sdk" ) # Get API credentials using SecureConfigMixin api_key = self.get_api_key() if not api_key: raise ValueError( "Metal API key is required. Set METAL_API_KEY environment variable " "or provide api_key parameter." ) client_id = ( self.metal_client_id.get_secret_value() if self.metal_client_id else None ) if not client_id: raise ValueError( "Metal client ID is required. Set METAL_CLIENT_ID environment variable " "or provide metal_client_id parameter." ) # Prepare configuration config = { "metal_api_key": api_key, "metal_client_id": client_id, "index_id": self.index_id, "k": self.k, } # Add optional parameters if self.filters: config["filters"] = self.filters if self.namespace: config["namespace"] = self.namespace config["include_values"] = self.include_values config["include_metadata"] = self.include_metadata # Use top_k if specified (compatibility) if self.top_k: config["k"] = self.top_k return MetalRetriever(**config)