Skip to content

chroma.vectorstores

A module for calling Chroma's Client and Collection.

BaseVectorStore

Bases: BaseModel, Generic[BaseQueryResultsT], ABC

The base class abstract interface for interacting with vectorstores.

Source code in mirascope/rag/vectorstores.py
class BaseVectorStore(BaseModel, Generic[BaseQueryResultsT], ABC):
    """The base class abstract interface for interacting with vectorstores."""

    api_key: ClassVar[Optional[str]] = None
    index_name: ClassVar[Optional[str]] = None
    chunker: ClassVar[BaseChunker] = TextChunker(chunk_size=1000, chunk_overlap=200)
    embedder: ClassVar[BaseEmbedder]
    vectorstore_params: ClassVar[BaseVectorStoreParams] = BaseVectorStoreParams()
    configuration: ClassVar[BaseConfig] = BaseConfig()
    _provider: ClassVar[str] = "base"

    @abstractmethod
    def retrieve(self, text: str, **kwargs: Any) -> BaseQueryResultsT:
        """Queries the vectorstore for closest match"""
        ...  # pragma: no cover

    @abstractmethod
    def add(self, text: Union[str, list[Document]], **kwargs: Any) -> None:
        """Takes unstructured data and upserts into vectorstore"""
        ...  # pragma: no cover

add(text, **kwargs) abstractmethod

Takes unstructured data and upserts into vectorstore

Source code in mirascope/rag/vectorstores.py
@abstractmethod
def add(self, text: Union[str, list[Document]], **kwargs: Any) -> None:
    """Takes unstructured data and upserts into vectorstore"""
    ...  # pragma: no cover

retrieve(text, **kwargs) abstractmethod

Queries the vectorstore for closest match

Source code in mirascope/rag/vectorstores.py
@abstractmethod
def retrieve(self, text: str, **kwargs: Any) -> BaseQueryResultsT:
    """Queries the vectorstore for closest match"""
    ...  # pragma: no cover

ChromaSettings

Bases: BaseModel

Source code in mirascope/chroma/types.py
class ChromaSettings(BaseModel):
    mode: Literal["http", "persistent", "ephemeral"] = "persistent"
    path: str = "./chroma"
    host: str = "localhost"
    port: int = 8000
    ssl: bool = False
    headers: Optional[dict[str, str]] = None
    settings: Optional[Settings] = None
    tenant: str = DEFAULT_TENANT
    database: str = DEFAULT_DATABASE

    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)

    def kwargs(self) -> dict[str, Any]:
        """Returns all parameters for the index as a keyword arguments dictionary."""
        if self.mode == "http":
            exclude = {"mode", "path"}
        elif self.mode == "persistent":
            exclude = {"mode", "host", "port", "ssl", "headers"}
        elif self.mode == "ephemeral":
            exclude = {"mode", "host", "port", "ssl", "headers", "path"}
        kwargs = {
            key: value
            for key, value in self.model_dump(exclude=exclude).items()
            if value is not None
        }
        return kwargs

kwargs()

Returns all parameters for the index as a keyword arguments dictionary.

Source code in mirascope/chroma/types.py
def kwargs(self) -> dict[str, Any]:
    """Returns all parameters for the index as a keyword arguments dictionary."""
    if self.mode == "http":
        exclude = {"mode", "path"}
    elif self.mode == "persistent":
        exclude = {"mode", "host", "port", "ssl", "headers"}
    elif self.mode == "ephemeral":
        exclude = {"mode", "host", "port", "ssl", "headers", "path"}
    kwargs = {
        key: value
        for key, value in self.model_dump(exclude=exclude).items()
        if value is not None
    }
    return kwargs

ChromaVectorStore

Bases: BaseVectorStore

A vectorstore for Chroma.

Example:

from mirascope.chroma import ChromaSettings, ChromaVectorStore
from mirascope.openai import OpenAIEmbedder
from mirascope.rag import TextChunker


class MyStore(ChromaVectorStore):
    embedder = OpenAIEmbedder()
    chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
    index_name = "my-store-0001"
    client_settings = ChromaSettings()

my_store = MyStore()
with open(f"{PATH_TO_FILE}") as file:
    data = file.read()
    my_store.add(data)
documents = my_store.retrieve("my question").documents
print(documents)
Source code in mirascope/chroma/vectorstores.py
class ChromaVectorStore(BaseVectorStore):
    """A vectorstore for Chroma.

    Example:

    ```python
    from mirascope.chroma import ChromaSettings, ChromaVectorStore
    from mirascope.openai import OpenAIEmbedder
    from mirascope.rag import TextChunker


    class MyStore(ChromaVectorStore):
        embedder = OpenAIEmbedder()
        chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
        index_name = "my-store-0001"
        client_settings = ChromaSettings()

    my_store = MyStore()
    with open(f"{PATH_TO_FILE}") as file:
        data = file.read()
        my_store.add(data)
    documents = my_store.retrieve("my question").documents
    print(documents)
    ```
    """

    vectorstore_params = ChromaParams(get_or_create=True)
    client_settings: ClassVar[ChromaSettings] = ChromaSettings(mode="persistent")
    _provider: ClassVar[str] = "chroma"

    def retrieve(
        self, text: Optional[Union[str, list[str]]] = None, **kwargs: Any
    ) -> ChromaQueryResult:
        """Queries the vectorstore for closest match"""
        if text:
            if isinstance(text, str):
                text = [text]
            query_result = self._index.query(query_texts=text, **kwargs)
        else:
            query_result = self._index.query(**kwargs)

        return ChromaQueryResult.model_validate(query_result)

    def add(self, text: Union[str, list[Document]], **kwargs: Any) -> None:
        """Takes unstructured data and upserts into vectorstore"""
        documents: list[Document]
        if isinstance(text, str):
            chunk = self.chunker.chunk
            documents = chunk(text)
        else:
            documents = text

        return self._index.upsert(
            ids=[document.id for document in documents],
            documents=[document.text for document in documents],
            metadatas=[cast(Metadata, document.metadata) for document in documents],
            **kwargs,
        )

    ############################# PRIVATE PROPERTIES #################################

    @cached_property
    def _client(self) -> ClientAPI:
        if self.client_settings.mode == "persistent":
            return PersistentClient(**self.client_settings.kwargs())
        elif self.client_settings.mode == "http":
            return HttpClient(**self.client_settings.kwargs())
        elif self.client_settings.mode == "ephemeral":
            return EphemeralClient(**self.client_settings.kwargs())

    @cached_property
    def _index(self) -> Collection:
        vectorstore_params = self.vectorstore_params
        if self.index_name:
            vectorstore_params = self.vectorstore_params.model_copy(
                update={"name": self.index_name}
            )

        return self._client.create_collection(
            **vectorstore_params.kwargs(),
            embedding_function=self.embedder,  # type: ignore
        )

add(text, **kwargs)

Takes unstructured data and upserts into vectorstore

Source code in mirascope/chroma/vectorstores.py
def add(self, text: Union[str, list[Document]], **kwargs: Any) -> None:
    """Takes unstructured data and upserts into vectorstore"""
    documents: list[Document]
    if isinstance(text, str):
        chunk = self.chunker.chunk
        documents = chunk(text)
    else:
        documents = text

    return self._index.upsert(
        ids=[document.id for document in documents],
        documents=[document.text for document in documents],
        metadatas=[cast(Metadata, document.metadata) for document in documents],
        **kwargs,
    )

retrieve(text=None, **kwargs)

Queries the vectorstore for closest match

Source code in mirascope/chroma/vectorstores.py
def retrieve(
    self, text: Optional[Union[str, list[str]]] = None, **kwargs: Any
) -> ChromaQueryResult:
    """Queries the vectorstore for closest match"""
    if text:
        if isinstance(text, str):
            text = [text]
        query_result = self._index.query(query_texts=text, **kwargs)
    else:
        query_result = self._index.query(**kwargs)

    return ChromaQueryResult.model_validate(query_result)

Document

Bases: BaseModel

A document to be added to the vectorstore.

Source code in mirascope/rag/types.py
class Document(BaseModel):
    """A document to be added to the vectorstore."""

    id: str
    text: str
    metadata: Optional[dict[str, Any]] = None