Source code for haive.core.engine.retriever.providers.RePhraseQueryRetrieverConfig
"""Rephrase Query Retriever implementation for the Haive framework.from typing import AnyThis module provides a configuration class for the Rephrase Query retriever,which reformulates user queries using an LLM to improve retrieval performanceby creating more effective search queries.The RePhraseQueryRetriever works by:1. Taking the user's original query as input2. Using an LLM to rephrase the query for better search effectiveness3. Running the rephrased query against the base retriever4. Returning documents found using the improved queryThis retriever is particularly useful when:- User queries are poorly formulated or ambiguous- Need to improve search effectiveness through query optimization- Building systems that need to handle natural language queries better- Want to bridge the gap between user intent and retrieval effectivenessThe implementation integrates with LangChain's RePhraseQueryRetriever whileproviding a consistent Haive configuration interface with LLM integration."""fromtypingimportAnyfrompydanticimportFieldfromhaive.core.engine.aug_llmimportAugLLMConfigfromhaive.core.engine.retriever.retrieverimportBaseRetrieverConfigfromhaive.core.engine.retriever.typesimportRetrieverType
[docs]@BaseRetrieverConfig.register(RetrieverType.REPHRASE_QUERY)classRePhraseQueryRetrieverConfig(BaseRetrieverConfig):"""Configuration for Rephrase Query retriever in the Haive framework. This retriever reformulates user queries using an LLM to improve retrieval performance by creating more effective search queries. Attributes: retriever_type (RetrieverType): The type of retriever (always REPHRASE_QUERY). base_retriever (BaseRetrieverConfig): The underlying retriever to query with rephrased query. llm_config (AugLLMConfig): LLM configuration for query rephrasing. prompt_template (Optional[str]): Custom prompt template for rephrasing. Examples: >>> from haive.core.engine.retriever import RePhraseQueryRetrieverConfig >>> from haive.core.engine.retriever.providers.VectorStoreRetrieverConfig import VectorStoreRetrieverConfig >>> from haive.core.engine.aug_llm import AugLLMConfig >>> >>> # Create base retriever and LLM config >>> base_config = VectorStoreRetrieverConfig(name="base", vectorstore_config=vs_config) >>> llm_config = AugLLMConfig(model_name="gpt-3.5-turbo", provider="openai") >>> >>> # Create rephrase query retriever >>> config = RePhraseQueryRetrieverConfig( ... name="rephrase_retriever", ... base_retriever=base_config, ... llm_config=llm_config ... ) >>> >>> # Instantiate and use the retriever >>> retriever = config.instantiate() >>> docs = retriever.get_relevant_documents("machine learning stuff") """retriever_type:RetrieverType=Field(default=RetrieverType.REPHRASE_QUERY,description="The type of retriever")# Core configurationbase_retriever:BaseRetrieverConfig=Field(...,description="Base retriever configuration to query with rephrased query")llm_config:AugLLMConfig=Field(...,description="LLM configuration for query rephrasing")# Prompt customizationprompt_template:str|None=Field(default=None,description="Custom prompt template for rephrasing (uses default if None)",)
[docs]defget_input_fields(self)->dict[str,tuple[type,Any]]:"""Return input field definitions for Rephrase Query retriever."""return{"query":(str,Field(description="Original query to be rephrased and searched"),),}
[docs]defget_output_fields(self)->dict[str,tuple[type,Any]]:"""Return output field definitions for Rephrase Query retriever."""return{"documents":(list[Any],# List[Document] but avoiding importField(default_factory=list,description="Documents retrieved using the rephrased query",),),}
[docs]definstantiate(self)->Any:"""Create a Rephrase Query retriever from this configuration. Returns: RePhraseQueryRetriever: Instantiated retriever ready for query rephrasing retrieval. Raises: ImportError: If required packages are not available. ValueError: If configuration is invalid. """try:fromlangchain.retrievers.re_phraserimportRePhraseQueryRetrieverexceptImportError:raiseImportError("RePhraseQueryRetriever requires langchain package. ""Install with: pip install langchain")# Instantiate the base retrievertry:base_retriever=self.base_retriever.instantiate()exceptExceptionase:raiseValueError(f"Failed to instantiate base retriever: {e}")# Instantiate the LLMtry:llm=self.llm_config.instantiate()exceptExceptionase:raiseValueError(f"Failed to instantiate LLM: {e}")# Create the rephrase query retrieverkwargs={"retriever":base_retriever,"llm_chain":llm,}# Add custom prompt if providedifself.prompt_template:try:fromlangchain.promptsimportPromptTemplateprompt=PromptTemplate(input_variables=["question"],template=self.prompt_template)kwargs["prompt"]=promptexceptImportError:raiseImportError("Custom prompt templates require langchain package. ""Install with: pip install langchain")returnRePhraseQueryRetriever.from_llm(**kwargs)