"""Base Embedding Models Module.
from typing import Any
This module provides the foundational abstractions for embedding models in the Haive framework.
It includes base classes and implementations for different embedding providers that transform
text into high-dimensional vector representations for use in semantic search, clustering,
and other NLP tasks.
Typical usage example:
Examples:
>>> from haive.core.models.embeddings.base import create_embeddings, HuggingFaceEmbeddingConfig
>>>
>>> # Create a HuggingFace embedding model configuration
>>> config = HuggingFaceEmbeddingConfig(
>>> model="sentence-transformers/all-MiniLM-L6-v2"
>>> )
>>>
>>> # Instantiate the embeddings model
>>> embeddings = create_embeddings(config)
>>>
>>> # Use the model to embed documents or queries
>>> doc_embeddings = embeddings.embed_documents(["Text to embed"])
"""
import os
from typing import Any
# Mock torch to avoid slow initialization during documentation builds
[docs]
class MockTorch:
"""Mock torch module for documentation builds."""
class cuda:
@staticmethod
def is_available():
return False
class Tensor:
def __init__(self, *args, **kwargs):
pass
def to(self, *args, **kwargs):
return self
def cpu(self):
return self
def cuda(self):
return self
def numpy(self):
return []
@staticmethod
def tensor(*args, **kwargs):
return MockTorch.Tensor()
@staticmethod
def zeros(*args, **kwargs):
return MockTorch.Tensor()
@staticmethod
def ones(*args, **kwargs):
return MockTorch.Tensor()
@staticmethod
def device(*args, **kwargs):
return "cpu"
torch = MockTorch()
# Mock VertexAI to avoid slow Google Cloud imports during documentation builds
class VertexAIEmbeddings:
"""Mock VertexAI embeddings to avoid slow imports."""
def __init__(self, *args, **kwargs):
pass
try:
from dotenv import load_dotenv
except ImportError:
def load_dotenv(*args, **kwargs): pass
try:
from langchain.embeddings import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain_community.embeddings.anyscale import AnyscaleEmbeddings
from langchain_community.embeddings.bedrock import BedrockEmbeddings
except ImportError:
# Fallback classes for documentation builds
class CacheBackedEmbeddings: pass
class LocalFileStore: pass
class AnyscaleEmbeddings: pass
class BedrockEmbeddings: pass
try:
from langchain_community.embeddings.cloudflare_workersai import (
CloudflareWorkersAIEmbeddings,
)
from langchain_community.embeddings.cohere import CohereEmbeddings
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.embeddings.jina import JinaEmbeddings
from langchain_community.embeddings.llamacpp import LlamaCppEmbeddings
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.embeddings.sentence_transformer import (
SentenceTransformerEmbeddings,
)
from langchain_community.embeddings.voyageai import VoyageEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
except ImportError:
# Fallback classes for documentation builds
class CloudflareWorkersAIEmbeddings: pass
class CohereEmbeddings: pass
class FastEmbedEmbeddings: pass
class JinaEmbeddings: pass
class LlamaCppEmbeddings: pass
class OllamaEmbeddings: pass
class SentenceTransformerEmbeddings: pass
class VoyageEmbeddings: pass
[docs]
class VertexAIEmbeddings: pass
class HuggingFaceEmbeddings: pass
class AzureOpenAIEmbeddings: pass
class OpenAIEmbeddings: pass
from pydantic import BaseModel, Field, SecretStr, ValidationInfo, field_validator
from haive.core.config.constants import EMBEDDINGS_CACHE_DIR
from haive.core.models.embeddings.provider_types import EmbeddingProvider
# Load environment variables from .env file if present
load_dotenv(".env")
[docs]
class SecureConfigMixin:
"""Mixin for securely handling API keys from environment variables.
This mixin provides methods for securely resolving API keys from environment
variables or explicitly provided values, with appropriate fallbacks.
"""
[docs]
@field_validator("api_key", mode="after")
@classmethod
def resolve_api_key(cls, v, info: ValidationInfo) -> Any:
"""Resolve API key from provided value or environment variables.
Args:
v: The provided API key value
info: ValidationInfo containing field data
Returns:
SecretStr: The resolved API key as a SecretStr
"""
if v and v.get_secret_value().strip():
return v
provider = info.data.get("provider")
if not provider:
return SecretStr("")
env_map = {
"azure": "AZURE_OPENAI_API_KEY",
"huggingface": "HUGGING_FACE_API_KEY",
"openai": "OPENAI_API_KEY",
"cohere": "COHERE_API_KEY",
"jina": "JINA_API_KEY",
"vertexai": "GOOGLE_API_KEY",
"bedrock": "AWS_ACCESS_KEY_ID",
"cloudflare": "CLOUDFLARE_API_TOKEN",
"voyageai": "VOYAGE_API_KEY",
"anyscale": "ANYSCALE_API_KEY",
"novita": "NOVITA_API_KEY",
}
env_key = env_map.get(provider.value.lower())
if env_key:
key = os.getenv(env_key, "")
if key.strip():
return SecretStr(key)
return SecretStr("")
[docs]
class BaseEmbeddingConfig(BaseModel, SecureConfigMixin):
"""Base configuration for embedding models.
This abstract base class defines the common interface for all embedding model
configurations, ensuring consistent instantiation patterns across providers.
Attributes:
provider: The embedding provider (e.g., Azure, HuggingFace)
model: The specific model identifier or name
api_key: The API key for the provider (if required)
"""
provider: EmbeddingProvider
model: str
api_key: SecretStr = Field(
default=SecretStr(""), description="API key for the embedding provider"
)
[docs]
def instantiate(self, **kwargs) -> Any:
"""Instantiate the embedding model with the configuration.
Args:
**kwargs: Additional keyword arguments to pass to the model constructor
Returns:
Any: The instantiated embedding model
Raises:
NotImplementedError: Must be implemented by subclasses
"""
raise NotImplementedError
[docs]
class AzureEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for Azure OpenAI embedding models.
This class configures embedding models from Azure OpenAI services,
supporting environment variable resolution for credentials.
Attributes:
provider: Set to EmbeddingProvider.AZURE
model: The Azure deployment name for the embedding model
api_version: The Azure OpenAI API version to use
api_base: The Azure endpoint URL
api_type: The API type (typically "azure")
"""
provider: EmbeddingProvider = EmbeddingProvider.AZURE
api_version: str = Field(
default_factory=lambda: os.getenv(
"AZURE_OPENAI_API_VERSION", "2024-02-15-preview"
),
description="Azure API version",
)
api_base: str = Field(
default_factory=lambda: os.getenv("AZURE_OPENAI_API_BASE", ""),
description="Azure API endpoint",
)
api_type: str = Field(
default_factory=lambda: os.getenv("AZURE_OPENAI_API_TYPE", "azure"),
description="Azure API type",
)
[docs]
def instantiate(self, **kwargs) -> AzureOpenAIEmbeddings:
"""Instantiate an Azure OpenAI embedding model.
Args:
**kwargs: Additional keyword arguments to pass to AzureOpenAIEmbeddings
Returns:
AzureOpenAIEmbeddings: The instantiated embedding model
"""
return AzureOpenAIEmbeddings(
model=self.model,
api_key=self.get_api_key(),
api_version=self.api_version,
azure_endpoint=self.api_base,
**kwargs,
)
[docs]
def get_api_key(self) -> str:
"""Get the API key as a string.
Returns:
str: The API key
"""
return self.api_key.get_secret_value()
[docs]
class HuggingFaceEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for HuggingFace embedding models.
This class configures embedding models from HuggingFace's model hub,
with support for local caching and hardware acceleration.
Attributes:
provider: Set to EmbeddingProvider.HUGGINGFACE
model: The HuggingFace model ID (defaults to all-MiniLM-L6-v2)
model_kwargs: Additional keyword arguments for model instantiation
encode_kwargs: Additional keyword arguments for encoding
query_encode_kwargs: Additional keyword arguments for query encoding
multi_process: Whether to use multi-processing for encoding
cache_folder: Where to cache the model files
show_progress: Whether to show progress bars
use_cache: Whether to use embedding caching
"""
provider: EmbeddingProvider = EmbeddingProvider.HUGGINGFACE
model_kwargs: dict[str, Any] | None = Field(
default_factory=lambda: {
"device": "cuda" if torch.cuda.is_available() else "cpu"
},
description="Additional keyword arguments for model instantiation",
)
model: str = Field(
default="sentence-transformers/all-MiniLM-L6-v2",
description="HuggingFace model ID",
)
encode_kwargs: dict[str, Any] | None = Field(
default_factory=dict, description="Additional keyword arguments for encoding"
)
query_encode_kwargs: dict[str, Any] | None = Field(
default_factory=dict,
description="Additional keyword arguments for query encoding",
)
multi_process: bool = Field(
default=False, description="Whether to use multi-processing for encoding"
)
cache_folder: str | None = Field(
default=str(EMBEDDINGS_CACHE_DIR), description="Where to cache the model files"
)
show_progress: bool = Field(
default=False, description="Whether to show progress bars"
)
use_cache: bool = Field(
default=True, description="Whether to use embedding caching"
)
[docs]
def instantiate(self, **kwargs) -> HuggingFaceEmbeddings:
"""Instantiate a HuggingFace embedding model.
This method includes error handling and GPU memory cleanup
in case of initialization failures.
Args:
**kwargs: Additional keyword arguments to pass to HuggingFaceEmbeddings
Returns:
HuggingFaceEmbeddings: The instantiated embedding model
Raises:
Exception: If model instantiation fails after cleanup attempt
"""
try:
embedder = HuggingFaceEmbeddings(
model_name=self.model,
model_kwargs=self.model_kwargs,
encode_kwargs=self.encode_kwargs,
multi_process=self.multi_process,
cache_folder=self.cache_folder,
show_progress=self.show_progress,
**kwargs,
)
if self.use_cache:
store = LocalFileStore(self.cache_folder)
return CacheBackedEmbeddings.from_bytes_store(
embedder,
document_embedding_cache=store,
query_embedding_cache=True,
namespace=self.model,
)
return embedder
except Exception:
# If first attempt fails, try cleaning up GPU memory and retry
try:
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
embedder = HuggingFaceEmbeddings(
model_name=self.model,
model_kwargs=self.model_kwargs,
encode_kwargs=self.encode_kwargs,
multi_process=self.multi_process,
cache_folder=self.cache_folder,
show_progress=self.show_progress,
**kwargs,
)
if self.use_cache:
store = LocalFileStore(self.cache_folder)
return CacheBackedEmbeddings.from_bytes_store(
embedder,
document_embedding_cache=store,
query_embedding_cache=True,
namespace=self.model,
)
return embedder
except Exception:
raise
[docs]
class OpenAIEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for OpenAI embedding models.
This class configures embedding models from OpenAI services,
supporting multiple model types and configurations.
Attributes:
provider: Set to EmbeddingProvider.OPENAI
model: The OpenAI model name for embeddings (defaults to text-embedding-3-small)
dimensions: Output dimensions for the embedding vectors
show_progress_bar: Whether to show progress bars during embedding
chunk_size: Batch size for embedding operations
"""
provider: EmbeddingProvider = EmbeddingProvider.OPENAI
model: str = Field(
default="text-embedding-3-small", description="OpenAI embedding model name"
)
dimensions: int | None = Field(
default=None, description="Output dimensions for the embedding vectors"
)
show_progress_bar: bool = Field(
default=False, description="Whether to show progress bars during embedding"
)
chunk_size: int = Field(
default=1000, description="Batch size for embedding operations"
)
[docs]
def instantiate(self, **kwargs) -> OpenAIEmbeddings:
"""Instantiate an OpenAI embedding model.
Args:
**kwargs: Additional keyword arguments to pass to OpenAIEmbeddings
Returns:
OpenAIEmbeddings: The instantiated embedding model
"""
embedding_kwargs = {
"model": self.model,
"openai_api_key": self.api_key.get_secret_value(),
"chunk_size": self.chunk_size,
"show_progress_bar": self.show_progress_bar,
}
if self.dimensions:
embedding_kwargs["dimensions"] = self.dimensions
return OpenAIEmbeddings(**embedding_kwargs, **kwargs)
[docs]
class CohereEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for Cohere embedding models.
This class configures embedding models from Cohere services.
Attributes:
provider: Set to EmbeddingProvider.COHERE
model: The Cohere model name for embeddings (defaults to embed-english-v3.0)
input_type: Type of input to be embedded (defaults to search_document)
"""
provider: EmbeddingProvider = EmbeddingProvider.COHERE
model: str = Field(
default="embed-english-v3.0", description="Cohere embedding model name"
)
input_type: str = Field(
default="search_document", description="Type of input to be embedded"
)
[docs]
def instantiate(self, **kwargs) -> CohereEmbeddings:
"""Instantiate a Cohere embedding model.
Args:
**kwargs: Additional keyword arguments to pass to CohereEmbeddings
Returns:
CohereEmbeddings: The instantiated embedding model
"""
return CohereEmbeddings(
model=self.model,
cohere_api_key=self.api_key.get_secret_value(),
input_type=self.input_type,
**kwargs,
)
[docs]
class OllamaEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for Ollama embedding models.
This class configures embedding models from Ollama, which runs
locally and doesn't require an API key.
Attributes:
provider: Set to EmbeddingProvider.OLLAMA
model: The Ollama model name (defaults to nomic-embed-text)
base_url: The base URL for the Ollama server
"""
provider: EmbeddingProvider = EmbeddingProvider.OLLAMA
model: str = Field(default="nomic-embed-text", description="Ollama model name")
base_url: str = Field(
default="http://localhost:11434", description="Base URL for the Ollama server"
)
[docs]
def instantiate(self, **kwargs) -> OllamaEmbeddings:
"""Instantiate an Ollama embedding model.
Args:
**kwargs: Additional keyword arguments to pass to OllamaEmbeddings
Returns:
OllamaEmbeddings: The instantiated embedding model
"""
return OllamaEmbeddings(
model=self.model,
base_url=self.base_url,
**kwargs,
)
[docs]
class FastEmbedEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for FastEmbed embedding models.
This class configures FastEmbed models, which are lightweight and efficient
embeddings that can run on CPU.
Attributes:
provider: Set to EmbeddingProvider.FASTEMBED
model: The model name (defaults to BAAI/bge-small-en-v1.5)
max_length: Maximum sequence length
cache_folder: Where to cache the model files
use_cache: Whether to use embedding caching
"""
provider: EmbeddingProvider = EmbeddingProvider.FASTEMBED
model: str = Field(
default="BAAI/bge-small-en-v1.5", description="FastEmbed model name"
)
max_length: int = Field(default=512, description="Maximum sequence length")
cache_folder: str | None = Field(
default=str(EMBEDDINGS_CACHE_DIR), description="Where to cache the model files"
)
use_cache: bool = Field(
default=True, description="Whether to use embedding caching"
)
[docs]
def instantiate(self, **kwargs) -> FastEmbedEmbeddings:
"""Instantiate a FastEmbed embedding model.
Args:
**kwargs: Additional keyword arguments to pass to FastEmbedEmbeddings
Returns:
FastEmbedEmbeddings: The instantiated embedding model
"""
embedder = FastEmbedEmbeddings(
model_name=self.model,
max_length=self.max_length,
cache_dir=self.cache_folder,
**kwargs,
)
if self.use_cache:
store = LocalFileStore(self.cache_folder)
return CacheBackedEmbeddings.from_bytes_store(
embedder,
document_embedding_cache=store,
query_embedding_cache=True,
namespace=self.model,
)
return embedder
[docs]
class JinaEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for Jina AI embedding models.
This class configures embedding models from Jina AI.
Attributes:
provider: Set to EmbeddingProvider.JINA
model: The model name (defaults to jina-embeddings-v2-base-en)
"""
provider: EmbeddingProvider = EmbeddingProvider.JINA
model: str = Field(
default="jina-embeddings-v2-base-en", description="Jina AI model name"
)
[docs]
def instantiate(self, **kwargs) -> JinaEmbeddings:
"""Instantiate a Jina AI embedding model.
Args:
**kwargs: Additional keyword arguments to pass to JinaEmbeddings
Returns:
JinaEmbeddings: The instantiated embedding model
"""
return JinaEmbeddings(
model_name=self.model,
jina_api_key=self.api_key.get_secret_value(),
**kwargs,
)
[docs]
class VertexAIEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for Google Vertex AI embedding models.
This class configures embedding models from Google Vertex AI.
Attributes:
provider: Set to EmbeddingProvider.VERTEXAI
model: The model name (defaults to textembedding-gecko@latest)
project: Google Cloud project ID
location: Google Cloud region
"""
provider: EmbeddingProvider = EmbeddingProvider.VERTEXAI
model: str = Field(
default="textembedding-gecko@latest", description="Vertex AI model name"
)
project: str | None = Field(
default_factory=lambda: os.getenv("GOOGLE_CLOUD_PROJECT", ""),
description="Google Cloud project ID",
)
location: str = Field(default="us-central1", description="Google Cloud region")
[docs]
def instantiate(self, **kwargs) -> VertexAIEmbeddings:
"""Instantiate a Google Vertex AI embedding model.
Args:
**kwargs: Additional keyword arguments to pass to VertexAIEmbeddings
Returns:
VertexAIEmbeddings: The instantiated embedding model
"""
return VertexAIEmbeddings(
model_name=self.model,
project=self.project,
location=self.location,
**kwargs,
)
[docs]
class BedrockEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for AWS Bedrock embedding models.
This class configures embedding models from AWS Bedrock service.
Attributes:
provider: Set to EmbeddingProvider.BEDROCK
model: The model ID (defaults to amazon.titan-embed-text-v1)
region: AWS region
credentials_profile_name: AWS credentials profile name
"""
provider: EmbeddingProvider = EmbeddingProvider.BEDROCK
model: str = Field(
default="amazon.titan-embed-text-v1", description="AWS Bedrock model ID"
)
region: str = Field(
default_factory=lambda: os.getenv("AWS_REGION", "us-east-1"),
description="AWS region",
)
credentials_profile_name: str | None = Field(
default=None, description="AWS credentials profile name"
)
[docs]
def instantiate(self, **kwargs) -> BedrockEmbeddings:
"""Instantiate an AWS Bedrock embedding model.
Args:
**kwargs: Additional keyword arguments to pass to BedrockEmbeddings
Returns:
BedrockEmbeddings: The instantiated embedding model
"""
config = {
"model_id": self.model,
"region_name": self.region,
}
if self.credentials_profile_name:
config["credentials_profile_name"] = self.credentials_profile_name
return BedrockEmbeddings(**config, **kwargs)
[docs]
class CloudflareEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for Cloudflare Workers AI embedding models.
This class configures embedding models from Cloudflare Workers AI.
Attributes:
provider: Set to EmbeddingProvider.CLOUDFLARE
model: The model name (defaults to @cf/baai/bge-small-en-v1.5)
account_id: Cloudflare account ID
"""
provider: EmbeddingProvider = EmbeddingProvider.CLOUDFLARE
model: str = Field(
default="@cf/baai/bge-small-en-v1.5",
description="Cloudflare Workers AI model name",
)
account_id: str = Field(
default_factory=lambda: os.getenv("CLOUDFLARE_ACCOUNT_ID", ""),
description="Cloudflare account ID",
)
[docs]
def instantiate(self, **kwargs) -> CloudflareWorkersAIEmbeddings:
"""Instantiate a Cloudflare Workers AI embedding model.
Args:
**kwargs: Additional keyword arguments to pass to CloudflareWorkersAIEmbeddings
Returns:
CloudflareWorkersAIEmbeddings: The instantiated embedding model
"""
return CloudflareWorkersAIEmbeddings(
model_name=self.model,
account_id=self.account_id,
api_token=self.api_key.get_secret_value(),
**kwargs,
)
[docs]
class LlamaCppEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for LlamaCpp local embedding models.
This class configures embedding models using LlamaCpp for local execution.
Attributes:
provider: Set to EmbeddingProvider.LLAMACPP
model: Required model name parameter (for compatibility with BaseEmbeddingConfig)
model_path: Path to the model file
n_ctx: Context size for the model
n_batch: Batch size for inference
n_gpu_layers: Number of layers to offload to GPU
"""
provider: EmbeddingProvider = EmbeddingProvider.LLAMACPP
model: str = Field(
default="llama2", description="Model name (required for BaseEmbeddingConfig)"
)
model_path: str = Field(description="Path to the model file")
n_ctx: int = Field(default=2048, description="Context size for the model")
n_batch: int = Field(default=8, description="Batch size for inference")
n_gpu_layers: int = Field(
default=0, description="Number of layers to offload to GPU"
)
[docs]
def instantiate(self, **kwargs) -> LlamaCppEmbeddings:
"""Instantiate a LlamaCpp embedding model.
Args:
**kwargs: Additional keyword arguments to pass to LlamaCppEmbeddings
Returns:
LlamaCppEmbeddings: The instantiated embedding model
"""
return LlamaCppEmbeddings(
model_path=self.model_path,
n_ctx=self.n_ctx,
n_batch=self.n_batch,
n_gpu_layers=self.n_gpu_layers,
**kwargs,
)
[docs]
class VoyageAIEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for Voyage AI embedding models.
This class configures embedding models from Voyage AI.
Attributes:
provider: Set to EmbeddingProvider.VOYAGEAI
model: The model name (defaults to voyage-2)
voyage_api_url: The API URL for Voyage AI
voyage_api_version: The API version for Voyage AI
"""
provider: EmbeddingProvider = EmbeddingProvider.VOYAGEAI
model: str = Field(default="voyage-2", description="Voyage AI model name")
voyage_api_url: str = Field(
default="https://api.voyageai.com/v1/embeddings",
description="Voyage AI API URL",
)
voyage_api_version: str = Field(
default="2023-11-06", description="Voyage AI API version"
)
[docs]
def instantiate(self, **kwargs) -> VoyageEmbeddings:
"""Instantiate a Voyage AI embedding model.
Args:
**kwargs: Additional keyword arguments to pass to VoyageEmbeddings
Returns:
VoyageEmbeddings: The instantiated embedding model
"""
return VoyageEmbeddings(
model_name=self.model,
voyage_api_key=self.api_key.get_secret_value(),
voyage_api_url=self.voyage_api_url,
voyage_api_version=self.voyage_api_version,
**kwargs,
)
[docs]
class AnyscaleEmbeddingConfig(BaseEmbeddingConfig):
"""Configuration for Anyscale embedding models.
This class configures embedding models from Anyscale.
Attributes:
provider: Set to EmbeddingProvider.ANYSCALE
model: The model name (defaults to thenlper/gte-large)
base_url: The base URL for the Anyscale API
"""
provider: EmbeddingProvider = EmbeddingProvider.ANYSCALE
model: str = Field(default="thenlper/gte-large", description="Anyscale model name")
base_url: str | None = Field(
default=None, description="Base URL for the Anyscale API"
)
[docs]
def instantiate(self, **kwargs) -> AnyscaleEmbeddings:
"""Instantiate an Anyscale embedding model.
Args:
**kwargs: Additional keyword arguments to pass to AnyscaleEmbeddings
Returns:
AnyscaleEmbeddings: The instantiated embedding model
"""
config = {
"model": self.model,
"anyscale_api_key": self.api_key.get_secret_value(),
}
if self.base_url:
config["anyscale_api_base"] = self.base_url
return AnyscaleEmbeddings(**config, **kwargs)
[docs]
def create_embeddings(config: BaseEmbeddingConfig) -> Any:
"""Factory function to create embedding models from a configuration.
This function simplifies the instantiation of embedding models by
delegating to the appropriate configuration class.
Args:
config: The embedding model configuration
Returns:
Any: The instantiated embedding model
Example:
Examples:
>>> config = HuggingFaceEmbeddingConfig(model="sentence-transformers/all-mpnet-base-v2")
>>> embeddings = create_embeddings(config)
"""
return config.instantiate()