Source code for haive.core.engine.retriever.providers.WikipediaRetrieverConfig
"""Wikipedia Retriever implementation for the Haive framework.from typing import AnyThis module provides a configuration class for the Wikipedia retriever, whichretrieves articles from Wikipedia based on search queries.The WikipediaRetriever works by:1. Taking a search query2. Searching Wikipedia for matching articles3. Returning article content as documentsThis retriever is particularly useful when:- Need access to encyclopedic knowledge- Building general knowledge applications- Combining with other retrievers for comprehensive coverage- Providing factual background informationThe implementation integrates with LangChain's WikipediaRetriever while providinga consistent Haive configuration interface."""fromtypingimportAnyfromlangchain_core.documentsimportDocumentfrompydanticimportFieldfromhaive.core.engine.retriever.retrieverimportBaseRetrieverConfigfromhaive.core.engine.retriever.typesimportRetrieverType
[docs]@BaseRetrieverConfig.register(RetrieverType.WIKIPEDIA)classWikipediaRetrieverConfig(BaseRetrieverConfig):"""Configuration for Wikipedia retriever in the Haive framework. This retriever searches Wikipedia for articles matching the query and returns their content as documents. Attributes: retriever_type (RetrieverType): The type of retriever (always WIKIPEDIA). top_k_results (int): Maximum number of articles to retrieve (default: 3). lang (str): Language code for Wikipedia (default: "en"). load_max_docs (int): Maximum number of documents to load (default: 100). load_all_available_meta (bool): Whether to load all available metadata (default: False). Examples: >>> from haive.core.engine.retriever import WikipediaRetrieverConfig >>> >>> # Create the wikipedia retriever config >>> config = WikipediaRetrieverConfig( ... name="wikipedia_retriever", ... top_k_results=5, ... lang="en" ... ) >>> >>> # Instantiate and use the retriever >>> retriever = config.instantiate() >>> docs = retriever.get_relevant_documents("artificial intelligence") """retriever_type:RetrieverType=Field(default=RetrieverType.WIKIPEDIA,description="The type of retriever")top_k_results:int=Field(default=3,ge=1,le=20,description="Maximum number of Wikipedia articles to retrieve",)lang:str=Field(default="en",description="Language code for Wikipedia (e.g., 'en', 'es', 'fr')")load_max_docs:int=Field(default=100,ge=1,description="Maximum number of documents to load")load_all_available_meta:bool=Field(default=False,description="Whether to load all available metadata")
[docs]defget_input_fields(self)->dict[str,tuple[type,Any]]:"""Return input field definitions for Wikipedia retriever."""return{"query":(str,Field(description="Search query for Wikipedia articles")),}
[docs]defget_output_fields(self)->dict[str,tuple[type,Any]]:"""Return output field definitions for Wikipedia retriever."""return{"documents":(list[Document],Field(default_factory=list,description="Wikipedia articles"),),}
[docs]definstantiate(self)->Any:"""Create a Wikipedia retriever from this configuration. Returns: WikipediaRetriever: Instantiated retriever ready for document retrieval. Raises: ImportError: If required packages are not available. """try:fromlangchain_community.retrieversimportWikipediaRetrieverexceptImportError:raiseImportError("WikipediaRetriever requires wikipedia package. Install with: pip install wikipedia")returnWikipediaRetriever(top_k_results=self.top_k_results,lang=self.lang,load_max_docs=self.load_max_docs,load_all_available_meta=self.load_all_available_meta,)