Source code for haive.core.engine.retriever.providers.MergerRetrieverConfig
"""Merger Retriever implementation for the Haive framework.from typing import AnyThis module provides a configuration class for the Merger retriever,which combines and merges results from multiple retrievers to providecomprehensive and deduplicated search results.The MergerRetriever works by:1. Running multiple retrievers in parallel on the same query2. Collecting all results from different retrieval strategies3. Merging and deduplicating results based on content or metadata4. Applying optional ranking and filtering to the merged resultsThis retriever is particularly useful when:- Need to combine results from different retrieval approaches- Want comprehensive coverage across multiple data sources- Building systems that need to deduplicate overlapping results- Implementing federated search across different backendsThe implementation integrates with LangChain's MergerRetriever whileproviding a consistent Haive configuration interface with flexible merging options."""fromtypingimportAnyfrompydanticimportFieldfromhaive.core.engine.retriever.retrieverimportBaseRetrieverConfigfromhaive.core.engine.retriever.typesimportRetrieverType
[docs]@BaseRetrieverConfig.register(RetrieverType.MERGER)classMergerRetrieverConfig(BaseRetrieverConfig):"""Configuration for Merger retriever in the Haive framework. This retriever combines and merges results from multiple retrievers to provide comprehensive and deduplicated search results. Attributes: retriever_type (RetrieverType): The type of retriever (always MERGER). retrievers (List[BaseRetrieverConfig]): List of retriever configurations to merge. max_results (int): Maximum number of results to return after merging. Examples: >>> from haive.core.engine.retriever import MergerRetrieverConfig >>> from haive.core.engine.retriever.providers.BM25RetrieverConfig import BM25RetrieverConfig >>> from haive.core.engine.retriever.providers.VectorStoreRetrieverConfig import VectorStoreRetrieverConfig >>> >>> # Create individual retrievers >>> bm25_config = BM25RetrieverConfig(name="bm25", documents=docs, k=10) >>> vector_config = VectorStoreRetrieverConfig(name="vector", vectorstore_config=vs_config, k=10) >>> >>> # Create merger retriever >>> config = MergerRetrieverConfig( ... name="merger_retriever", ... retrievers=[bm25_config, vector_config], ... max_results=15 ... ) >>> >>> # Instantiate and use the retriever >>> retriever = config.instantiate() >>> docs = retriever.get_relevant_documents("machine learning algorithms") """retriever_type:RetrieverType=Field(default=RetrieverType.MERGER,description="The type of retriever")# Core configurationretrievers:list[BaseRetrieverConfig]=Field(...,min_items=2,description="List of retriever configurations to merge results from",)# Result limitingmax_results:int=Field(default=20,ge=1,le=200,description="Maximum number of results to return after merging",)
[docs]defget_input_fields(self)->dict[str,tuple[type,Any]]:"""Return input field definitions for Merger retriever."""return{"query":(str,Field(description="Query for merged retrieval across multiple sources"),),}
[docs]defget_output_fields(self)->dict[str,tuple[type,Any]]:"""Return output field definitions for Merger retriever."""return{"documents":(list[Any],# List[Document] but avoiding importField(default_factory=list,description="Merged and deduplicated documents from multiple retrievers",),),}
[docs]definstantiate(self)->Any:"""Create a Merger retriever from this configuration. Returns: MergerRetriever: Instantiated retriever ready for merging multiple retrieval results. Raises: ImportError: If required packages are not available. ValueError: If configuration is invalid. """try:fromlangchain.retrieversimportMergerRetrieverexceptImportError:raiseImportError("MergerRetriever requires langchain package. Install with: pip install langchain")# Instantiate all component retrieversinstantiated_retrievers=[]forretriever_configinself.retrievers:try:retriever=retriever_config.instantiate()instantiated_retrievers.append(retriever)exceptExceptionase:raiseValueError(f"Failed to instantiate retriever {retriever_config.name}: {e}")# Validate we have the right number of retrieversiflen(instantiated_retrievers)<2:raiseValueError(f"MergerRetriever requires at least 2 retrievers, got {len(instantiated_retrievers)}")returnMergerRetriever(retrievers=instantiated_retrievers,)