Bases: BaseVectorStore
A vectorstore for Chroma.
Example:
from mirascope.chroma import ChromaSettings, ChromaVectorStore
from mirascope.openai import OpenAIEmbedder
from mirascope.rag import TextChunker
class MyStore(ChromaVectorStore):
embedder = OpenAIEmbedder()
chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
index_name = "my-store-0001"
client_settings = ChromaSettings()
my_store = MyStore()
with open(f"{PATH_TO_FILE}") as file:
data = file.read()
my_store.add(data)
documents = my_store.retrieve("my question").documents
print(documents)
Source code in mirascope/chroma/vectorstores.py
| class ChromaVectorStore(BaseVectorStore):
"""A vectorstore for Chroma.
Example:
```python
from mirascope.chroma import ChromaSettings, ChromaVectorStore
from mirascope.openai import OpenAIEmbedder
from mirascope.rag import TextChunker
class MyStore(ChromaVectorStore):
embedder = OpenAIEmbedder()
chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
index_name = "my-store-0001"
client_settings = ChromaSettings()
my_store = MyStore()
with open(f"{PATH_TO_FILE}") as file:
data = file.read()
my_store.add(data)
documents = my_store.retrieve("my question").documents
print(documents)
```
"""
vectorstore_params = ChromaParams(get_or_create=True)
client_settings: ClassVar[ChromaSettings] = ChromaSettings(mode="persistent")
def retrieve(
self, text: Optional[Union[str, list[str]]] = None, **kwargs: Any
) -> ChromaQueryResult:
"""Queries the vectorstore for closest match"""
if text:
if isinstance(text, str):
text = [text]
query_result = self._index.query(query_texts=text, **kwargs)
else:
query_result = self._index.query(**kwargs)
return ChromaQueryResult.model_validate(query_result)
def add(self, text: Union[str, list[Document]], **kwargs: Any) -> None:
"""Takes unstructured data and upserts into vectorstore"""
documents: list[Document]
if isinstance(text, str):
chunk = self.chunker.chunk
if self.vectorstore_params.weave and not isinstance(self.chunker, weave.Op):
chunk = self.vectorstore_params.weave(
self.chunker.chunk
) # pragma: no cover
documents = chunk(text)
else:
documents = text
return self._index.upsert(
ids=[document.id for document in documents],
documents=[document.text for document in documents],
**kwargs,
)
############################# PRIVATE PROPERTIES #################################
@cached_property
def _client(self) -> ClientAPI:
if self.client_settings.mode == "persistent":
return PersistentClient(**self.client_settings.kwargs())
elif self.client_settings.mode == "http":
return HttpClient(**self.client_settings.kwargs())
elif self.client_settings.mode == "ephemeral":
return EphemeralClient(**self.client_settings.kwargs())
@cached_property
def _index(self) -> Collection:
vectorstore_params = self.vectorstore_params
if self.index_name:
vectorstore_params = self.vectorstore_params.model_copy(
update={"name": self.index_name}
)
create_collection = self._client.create_collection
if self.vectorstore_params.weave is not None:
create_collection = self.vectorstore_params.weave(
self._client.create_collection
) # pragma: no cover
return create_collection(
**vectorstore_params.kwargs(),
embedding_function=self.embedder, # type: ignore
)
|
add(text, **kwargs)
Takes unstructured data and upserts into vectorstore
Source code in mirascope/chroma/vectorstores.py
| def add(self, text: Union[str, list[Document]], **kwargs: Any) -> None:
"""Takes unstructured data and upserts into vectorstore"""
documents: list[Document]
if isinstance(text, str):
chunk = self.chunker.chunk
if self.vectorstore_params.weave and not isinstance(self.chunker, weave.Op):
chunk = self.vectorstore_params.weave(
self.chunker.chunk
) # pragma: no cover
documents = chunk(text)
else:
documents = text
return self._index.upsert(
ids=[document.id for document in documents],
documents=[document.text for document in documents],
**kwargs,
)
|
retrieve(text=None, **kwargs)
Queries the vectorstore for closest match
Source code in mirascope/chroma/vectorstores.py
| def retrieve(
self, text: Optional[Union[str, list[str]]] = None, **kwargs: Any
) -> ChromaQueryResult:
"""Queries the vectorstore for closest match"""
if text:
if isinstance(text, str):
text = [text]
query_result = self._index.query(query_texts=text, **kwargs)
else:
query_result = self._index.query(**kwargs)
return ChromaQueryResult.model_validate(query_result)
|