Source code for haive.core.engine.retriever.providers.BedrockRetrieverConfig
"""Amazon Bedrock Retriever implementation for the Haive framework.from typing import AnyThis module provides a configuration class for the Amazon Bedrock retriever,which uses AWS Bedrock's foundation models for retrieval tasks. Bedrock providesaccess to foundation models from various providers (Anthropic, AI21, etc.) andcan be used for retrieval-augmented generation workflows.The BedrockRetriever works by:1. Connecting to Amazon Bedrock service2. Using foundation models for embedding generation3. Performing semantic search using model-generated embeddings4. Supporting various foundation model providersThis retriever is particularly useful when:- Building RAG applications with AWS Bedrock- Need access to multiple foundation model providers- Want managed AI model infrastructure- Building enterprise applications on AWS- Need consistent API across different model providersThe implementation integrates with LangChain's BedrockRetriever whileproviding a consistent Haive configuration interface with secure AWS credential management."""fromtypingimportAnyfromlangchain_core.documentsimportDocumentfrompydanticimportField,SecretStrfromhaive.core.common.mixins.secure_configimportSecureConfigMixinfromhaive.core.engine.retriever.retrieverimportBaseRetrieverConfigfromhaive.core.engine.retriever.typesimportRetrieverTypefromhaive.core.engine.vectorstore.vectorstoreimportVectorStoreConfig
[docs]@BaseRetrieverConfig.register(RetrieverType.BEDROCK)classBedrockRetrieverConfig(SecureConfigMixin,BaseRetrieverConfig):"""Configuration for Amazon Bedrock retriever in the Haive framework. This retriever uses AWS Bedrock foundation models for embedding generation and retrieval tasks within RAG workflows. Attributes: retriever_type (RetrieverType): The type of retriever (always BEDROCK). vectorstore_config (VectorStoreConfig): Vector store for document storage. model_id (str): Bedrock foundation model ID for embeddings. region_name (str): AWS region name. api_key (Optional[SecretStr]): AWS access key (auto-resolved from AWS_ACCESS_KEY_ID). secret_key (Optional[SecretStr]): AWS secret key (auto-resolved from AWS_SECRET_ACCESS_KEY). k (int): Number of documents to retrieve. Examples: >>> from haive.core.engine.retriever import BedrockRetrieverConfig >>> from haive.core.engine.vectorstore.providers.FAISSVectorStoreConfig import FAISSVectorStoreConfig >>> >>> # Configure vector store >>> vectorstore_config = FAISSVectorStoreConfig( ... name="bedrock_faiss_store", ... index_name="bedrock_index" ... ) >>> >>> # Create the Bedrock retriever config >>> config = BedrockRetrieverConfig( ... name="bedrock_retriever", ... vectorstore_config=vectorstore_config, ... model_id="amazon.titan-embed-text-v1", ... region_name="us-east-1", ... k=10 ... ) >>> >>> # Instantiate and use the retriever >>> retriever = config.instantiate() >>> docs = retriever.get_relevant_documents("cloud computing best practices") >>> >>> # Example with different embedding model >>> anthropic_config = BedrockRetrieverConfig( ... name="anthropic_bedrock_retriever", ... vectorstore_config=vectorstore_config, ... model_id="anthropic.claude-instant-v1", ... region_name="us-west-2" ... ) """retriever_type:RetrieverType=Field(default=RetrieverType.BEDROCK,description="The type of retriever")# Vector store configurationvectorstore_config:VectorStoreConfig=Field(...,description="Vector store configuration for document storage")# Bedrock model configurationmodel_id:str=Field(default="amazon.titan-embed-text-v1",description="Bedrock foundation model ID for embeddings",)region_name:str=Field(default="us-east-1",description="AWS region name")# API configuration with SecureConfigMixinapi_key:SecretStr|None=Field(default=None,description="AWS access key ID (auto-resolved from AWS_ACCESS_KEY_ID)",)secret_key:SecretStr|None=Field(default=None,description="AWS secret access key (auto-resolved from AWS_SECRET_ACCESS_KEY)",)# Provider for SecureConfigMixinprovider:str=Field(default="aws",description="Provider name for credential resolution")# Search parametersk:int=Field(default=10,ge=1,le=100,description="Number of documents to retrieve")# Bedrock specific parametersmodel_kwargs:dict[str,Any]|None=Field(default=None,description="Additional model parameters for Bedrock calls")endpoint_url:str|None=Field(default=None,description="Custom Bedrock endpoint URL (for VPC endpoints)")
[docs]defget_input_fields(self)->dict[str,tuple[type,Any]]:"""Return input field definitions for Bedrock retriever."""return{"query":(str,Field(description="Search query for Bedrock-powered retrieval"),),}
[docs]defget_output_fields(self)->dict[str,tuple[type,Any]]:"""Return output field definitions for Bedrock retriever."""return{"documents":(list[Document],Field(default_factory=list,description="Documents from Bedrock-powered search",),),}
[docs]definstantiate(self)->Any:"""Create an Amazon Bedrock retriever from this configuration. Returns: BedrockRetriever: Instantiated retriever ready for foundation model-powered search. Raises: ImportError: If required packages are not available. ValueError: If AWS credentials or configuration is invalid. """try:importboto3fromlangchain_aws.embeddingsimportBedrockEmbeddingsexceptImportError:raiseImportError("BedrockRetriever requires langchain-aws and boto3 packages. ""Install with: pip install langchain-aws boto3")# Get AWS credentials using SecureConfigMixin approachaccess_key=self.get_api_key()secret_key=self.secret_key.get_secret_value()ifself.secret_keyelseNone# Configure AWS sessionsession_kwargs={"region_name":self.region_name}ifaccess_keyandsecret_key:session_kwargs.update({"aws_access_key_id":access_key,"aws_secret_access_key":secret_key})session=boto3.Session(**session_kwargs)# Create Bedrock clientbedrock_kwargs={"region_name":self.region_name}ifself.endpoint_url:bedrock_kwargs["endpoint_url"]=self.endpoint_urlbedrock_client=session.client("bedrock-runtime",**bedrock_kwargs)# Create Bedrock embeddingsembedding_kwargs={"client":bedrock_client,"model_id":self.model_id}ifself.model_kwargs:embedding_kwargs["model_kwargs"]=self.model_kwargsBedrockEmbeddings(**embedding_kwargs)# Instantiate vector store with Bedrock embeddingsvectorstore=self.vectorstore_config.instantiate()# Configure retrieversearch_kwargs={"k":self.k}# Return the vector store as retriever with Bedrock embeddingsreturnvectorstore.as_retriever(search_kwargs=search_kwargs)