Source code for haive.core.engine.retriever.providers.KNNRetrieverConfig
"""K-Nearest Neighbors Retriever implementation for the Haive framework.from typing import AnyThis module provides a configuration class for the KNN (K-Nearest Neighbors) retriever,which uses k-nearest neighbors algorithm for document retrieval based on vector similarity.KNN finds the k most similar documents to a query by computing distances in the embedding space.The KNNRetriever works by:1. Embedding documents and queries using a specified embedding model2. Computing similarity/distance metrics between query and document embeddings3. Finding the k nearest neighbors based on the distance metric4. Returning the k most similar documentsThis retriever is particularly useful when:- Working with small to medium-sized document collections- Need simple but effective similarity-based retrieval- Want interpretable distance-based ranking- Building baseline vector retrieval systems- Comparing against more complex vector databasesThe implementation integrates with LangChain's KNNRetriever while providinga consistent Haive configuration interface."""fromtypingimportAnyfromlangchain_core.documentsimportDocumentfrompydanticimportFieldfromhaive.core.engine.retriever.retrieverimportBaseRetrieverConfigfromhaive.core.engine.retriever.typesimportRetrieverType
[docs]@BaseRetrieverConfig.register(RetrieverType.KNN)classKNNRetrieverConfig(BaseRetrieverConfig):"""Configuration for K-Nearest Neighbors retriever in the Haive framework. This retriever uses the KNN algorithm to find the most similar documents based on vector embeddings and distance metrics. Attributes: retriever_type (RetrieverType): The type of retriever (always KNN). documents (List[Document]): Documents to index for retrieval. k (int): Number of nearest neighbors to retrieve (default: 4). distance_metric (str): Distance metric to use ("cosine", "euclidean", "manhattan"). embedding_model (Optional[str]): Embedding model to use for vectorization. Examples: >>> from haive.core.engine.retriever import KNNRetrieverConfig >>> from langchain_core.documents import Document >>> >>> # Create documents >>> docs = [ ... Document(page_content="Machine learning trains models on data"), ... Document(page_content="Deep learning uses neural network architectures"), ... Document(page_content="Natural language processing analyzes text patterns") ... ] >>> >>> # Create the KNN retriever config >>> config = KNNRetrieverConfig( ... name="knn_retriever", ... documents=docs, ... k=2, ... distance_metric="cosine", ... embedding_model="sentence-transformers/all-MiniLM-L6-v2" ... ) >>> >>> # Instantiate and use the retriever >>> retriever = config.instantiate() >>> docs = retriever.get_relevant_documents("machine learning training process") """retriever_type:RetrieverType=Field(default=RetrieverType.KNN,description="The type of retriever")# Documents to indexdocuments:list[Document]=Field(default_factory=list,description="Documents to index for KNN retrieval")# Retrieval parametersk:int=Field(default=4,ge=1,le=100,description="Number of nearest neighbors to retrieve")# KNN algorithm parametersdistance_metric:str=Field(default="cosine",description="Distance metric: 'cosine', 'euclidean', 'manhattan', 'hamming'",)embedding_model:str|None=Field(default=None,description="Embedding model for vectorization (e.g., 'sentence-transformers/all-MiniLM-L6-v2')",)# Additional KNN parametersalgorithm:str=Field(default="auto",description="KNN algorithm: 'auto', 'ball_tree', 'kd_tree', 'brute'",)
[docs]defget_input_fields(self)->dict[str,tuple[type,Any]]:"""Return input field definitions for KNN retriever."""return{"query":(str,Field(description="Text query for KNN similarity search")),}
[docs]defget_output_fields(self)->dict[str,tuple[type,Any]]:"""Return output field definitions for KNN retriever."""return{"documents":(list[Document],Field(default_factory=list,description="K nearest neighbor documents"),),}
[docs]definstantiate(self)->Any:"""Create a KNN retriever from this configuration. Returns: KNNRetriever: Instantiated retriever ready for similarity search. Raises: ImportError: If required packages are not available. ValueError: If documents list is empty. """try:fromlangchain_community.retrieversimportKNNRetrieverexceptImportError:raiseImportError("KNNRetriever requires langchain-community package. ""Install with: pip install langchain-community")ifnotself.documents:raiseValueError("KNNRetriever requires a non-empty list of documents. ""Provide documents in the configuration.")# Create KNN retriever with configurationreturnKNNRetriever.from_documents(documents=self.documents,k=self.k,distance_metric=self.distance_metric,embedding_model=self.embedding_model,algorithm=self.algorithm,)