Skip to content

cohere.embedders

A module for calling OpenAI's Embeddings models.

BaseEmbedder

Bases: BaseModel, Generic[BaseEmbeddingT], ABC

The base class abstract interface for interacting with LLM embeddings.

Source code in mirascope/rag/embedders.py
class BaseEmbedder(BaseModel, Generic[BaseEmbeddingT], ABC):
    """The base class abstract interface for interacting with LLM embeddings."""

    api_key: ClassVar[Optional[str]] = None
    base_url: ClassVar[Optional[str]] = None
    embedding_params: ClassVar[BaseEmbeddingParams] = BaseEmbeddingParams(
        model="text-embedding-ada-002"
    )
    dimensions: Optional[int] = None
    configuration: ClassVar[BaseConfig] = BaseConfig(llm_ops=[], client_wrappers=[])
    _provider: ClassVar[str] = "base"

    @abstractmethod
    def embed(self, input: list[str]) -> BaseEmbeddingT:
        """A call to the embedder with a single input"""
        ...  # pragma: no cover

    @abstractmethod
    async def embed_async(self, input: list[str]) -> BaseEmbeddingT:
        """Asynchronously call the embedder with a single input"""
        ...  # pragma: no cover

embed(input) abstractmethod

A call to the embedder with a single input

Source code in mirascope/rag/embedders.py
@abstractmethod
def embed(self, input: list[str]) -> BaseEmbeddingT:
    """A call to the embedder with a single input"""
    ...  # pragma: no cover

embed_async(input) abstractmethod async

Asynchronously call the embedder with a single input

Source code in mirascope/rag/embedders.py
@abstractmethod
async def embed_async(self, input: list[str]) -> BaseEmbeddingT:
    """Asynchronously call the embedder with a single input"""
    ...  # pragma: no cover

CohereEmbedder

Bases: BaseEmbedder[CohereEmbeddingResponse]

Cohere Embedder

model max_dimensions embed-english-v3.0 1024 embed-multilingual-v3.0 1024 embed-english-light-v3.0 384 embed-multilingual-light-v3.0 384 embed-english-v2.0 4096 embed-english-light-v2.0 1024 embed-multilingual-v2.0 768

Example:

import os
from mirascope.cohere import CohereEmbedder

os.environ["CO_API_KEY"] = "YOUR_COHERE_API_KEY"

cohere_embedder = CohereEmbedder()
response = cohere_embedder.embed(["your text to embed"])
print(response)
Source code in mirascope/cohere/embedders.py
class CohereEmbedder(BaseEmbedder[CohereEmbeddingResponse]):
    """Cohere Embedder

    model                           max_dimensions
    embed-english-v3.0              1024
    embed-multilingual-v3.0         1024
    embed-english-light-v3.0        384
    embed-multilingual-light-v3.0   384
    embed-english-v2.0              4096
    embed-english-light-v2.0        1024
    embed-multilingual-v2.0         768

    Example:

    ```python
    import os
    from mirascope.cohere import CohereEmbedder

    os.environ["CO_API_KEY"] = "YOUR_COHERE_API_KEY"

    cohere_embedder = CohereEmbedder()
    response = cohere_embedder.embed(["your text to embed"])
    print(response)
    ```
    """

    dimensions: Optional[int] = 1024
    embedding_params: ClassVar[CohereEmbeddingParams] = CohereEmbeddingParams(
        model="embed-english-v3.0"
    )
    _provider: ClassVar[str] = "cohere"

    def embed(self, inputs: list[str]) -> CohereEmbeddingResponse:
        """Call the embedder with multiple inputs"""

        co = get_wrapped_client(
            Client(api_key=self.api_key, base_url=self.base_url), self
        )
        embedding_type = (
            self.embedding_params.embedding_types[0]
            if self.embedding_params.embedding_types
            else None
        )
        start_time = datetime.datetime.now().timestamp() * 1000
        embed = get_wrapped_call(co.embed, self, response_type=CohereEmbeddingResponse)
        response = embed(texts=inputs, **self.embedding_params.kwargs())
        return CohereEmbeddingResponse(
            response=response,
            start_time=start_time,
            end_time=datetime.datetime.now().timestamp() * 1000,
            embedding_type=embedding_type,
        )

    async def embed_async(self, inputs: list[str]) -> CohereEmbeddingResponse:
        """Asynchronously call the embedder with multiple inputs"""
        co = get_wrapped_async_client(
            AsyncClient(api_key=self.api_key, base_url=self.base_url), self
        )
        embedding_type = (
            self.embedding_params.embedding_types[0]
            if self.embedding_params.embedding_types
            else None
        )
        start_time = datetime.datetime.now().timestamp() * 1000
        embed = get_wrapped_call(
            co.embed, self, is_async=True, response_type=CohereEmbeddingResponse
        )
        response = await embed(texts=inputs, **self.embedding_params.kwargs())
        return CohereEmbeddingResponse(
            response=response,
            start_time=start_time,
            end_time=datetime.datetime.now().timestamp() * 1000,
            embedding_type=embedding_type,
        )

    def __call__(
        self, input: list[str]
    ) -> Optional[Union[list[list[float]], list[list[int]]]]:
        """Call the embedder with a input

        Chroma expects parameter to be `input`.
        """
        response = self.embed(input)
        embeddings = response.embeddings
        return embeddings

embed(inputs)

Call the embedder with multiple inputs

Source code in mirascope/cohere/embedders.py
def embed(self, inputs: list[str]) -> CohereEmbeddingResponse:
    """Call the embedder with multiple inputs"""

    co = get_wrapped_client(
        Client(api_key=self.api_key, base_url=self.base_url), self
    )
    embedding_type = (
        self.embedding_params.embedding_types[0]
        if self.embedding_params.embedding_types
        else None
    )
    start_time = datetime.datetime.now().timestamp() * 1000
    embed = get_wrapped_call(co.embed, self, response_type=CohereEmbeddingResponse)
    response = embed(texts=inputs, **self.embedding_params.kwargs())
    return CohereEmbeddingResponse(
        response=response,
        start_time=start_time,
        end_time=datetime.datetime.now().timestamp() * 1000,
        embedding_type=embedding_type,
    )

embed_async(inputs) async

Asynchronously call the embedder with multiple inputs

Source code in mirascope/cohere/embedders.py
async def embed_async(self, inputs: list[str]) -> CohereEmbeddingResponse:
    """Asynchronously call the embedder with multiple inputs"""
    co = get_wrapped_async_client(
        AsyncClient(api_key=self.api_key, base_url=self.base_url), self
    )
    embedding_type = (
        self.embedding_params.embedding_types[0]
        if self.embedding_params.embedding_types
        else None
    )
    start_time = datetime.datetime.now().timestamp() * 1000
    embed = get_wrapped_call(
        co.embed, self, is_async=True, response_type=CohereEmbeddingResponse
    )
    response = await embed(texts=inputs, **self.embedding_params.kwargs())
    return CohereEmbeddingResponse(
        response=response,
        start_time=start_time,
        end_time=datetime.datetime.now().timestamp() * 1000,
        embedding_type=embedding_type,
    )

CohereEmbeddingResponse

Bases: BaseEmbeddingResponse[SkipValidation[EmbedResponse]]

A convenience wrapper around the Cohere EmbedResponse response.

Source code in mirascope/cohere/types.py
class CohereEmbeddingResponse(BaseEmbeddingResponse[SkipValidation[EmbedResponse]]):
    """A convenience wrapper around the Cohere `EmbedResponse` response."""

    embedding_type: Optional[
        Literal["float", "int8", "uint8", "binary", "ubinary"]
    ] = None

    @property
    def embeddings(
        self,
    ) -> Optional[Union[list[list[float]], list[list[int]]]]:
        """Returns the embeddings"""
        if self.response.response_type == "embeddings_floats":
            return self.response.embeddings
        else:
            embedding_type = self.embedding_type
            if embedding_type == "float":
                embedding_type == "float_"

            # TODO: Update to model_dump when Cohere updates to Pydantic v2
            embeddings_by_type: EmbedByTypeResponseEmbeddings = self.response.embeddings
            embedding_dict = embeddings_by_type.dict()
            return embedding_dict.get(str(embedding_type), None)

embeddings: Optional[Union[list[list[float]], list[list[int]]]] property

Returns the embeddings

get_wrapped_async_client(client, self)

Get a wrapped async client.

Source code in mirascope/base/ops_utils.py
def get_wrapped_async_client(client: T, self: Union[BaseCall, BaseEmbedder]) -> T:
    """Get a wrapped async client."""
    if self.configuration.client_wrappers:
        for op in self.configuration.client_wrappers:
            if op == "langfuse":  # pragma: no cover
                from langfuse.openai import AsyncOpenAI as LangfuseAsyncOpenAI

                client = LangfuseAsyncOpenAI(
                    api_key=self.api_key, base_url=self.base_url
                )
            elif op == "logfire":  # pragma: no cover
                import logfire

                if self._provider == "openai":
                    logfire.instrument_openai(client)  # type: ignore
                elif self._provider == "anthropic":
                    logfire.instrument_anthropic(client)  # type: ignore
            elif callable(op):
                client = op(client)
    return client

get_wrapped_call(call, self, **kwargs)

Wrap a call to add the llm_ops parameter if it exists.

Source code in mirascope/base/ops_utils.py
def get_wrapped_call(call: C, self: Union[BaseCall, BaseEmbedder], **kwargs) -> C:
    """Wrap a call to add the `llm_ops` parameter if it exists."""
    if self.configuration.llm_ops:
        wrapped_call = call
        for op in self.configuration.llm_ops:
            if op == "weave":  # pragma: no cover
                import weave

                wrapped_call = weave.op()(wrapped_call)
            elif callable(op):
                wrapped_call = op(
                    wrapped_call,
                    self._provider,
                    **kwargs,
                )
        return wrapped_call
    return call

get_wrapped_client(client, self)

Get a wrapped client.

Source code in mirascope/base/ops_utils.py
def get_wrapped_client(client: T, self: Union[BaseCall, BaseEmbedder]) -> T:
    """Get a wrapped client."""
    if self.configuration.client_wrappers:
        for op in self.configuration.client_wrappers:  # pragma: no cover
            if op == "langfuse":
                from langfuse.openai import OpenAI as LangfuseOpenAI

                client = LangfuseOpenAI(api_key=self.api_key, base_url=self.base_url)
            elif op == "logfire":  # pragma: no cover
                import logfire

                if self._provider == "openai":
                    logfire.instrument_openai(client)  # type: ignore
                elif self._provider == "anthropic":
                    logfire.instrument_anthropic(client)  # type: ignore
            elif callable(op):
                client = op(client)
    return client