Skip to content

rag.vectorstores

Vectorstores for the RAG module.

BaseChunker

Bases: BaseModel, ABC

Base class for chunkers.

Example:

from mirascope.rag import BaseChunker, Document


class TextChunker(BaseChunker):
    chunk_size: int
    chunk_overlap: int

    def chunk(self, text: str) -> list[Document]:
        chunks: list[Document] = []
        start: int = 0
        while start < len(text):
            end: int = min(start + self.chunk_size, len(text))
            chunks.append(Document(text=text[start:end], id=str(uuid.uuid4())))
            start += self.chunk_size - self.chunk_overlap
        return chunks
Source code in mirascope/rag/chunkers/base_chunker.py
class BaseChunker(BaseModel, ABC):
    """Base class for chunkers.

    Example:

    ```python
    from mirascope.rag import BaseChunker, Document


    class TextChunker(BaseChunker):
        chunk_size: int
        chunk_overlap: int

        def chunk(self, text: str) -> list[Document]:
            chunks: list[Document] = []
            start: int = 0
            while start < len(text):
                end: int = min(start + self.chunk_size, len(text))
                chunks.append(Document(text=text[start:end], id=str(uuid.uuid4())))
                start += self.chunk_size - self.chunk_overlap
            return chunks
    ```
    """

    @abstractmethod
    def chunk(self, text: str) -> list[Document]:
        """Returns a Document that contains an id, text, and optionally metadata."""
        ...  # pragma: no cover

chunk(text) abstractmethod

Returns a Document that contains an id, text, and optionally metadata.

Source code in mirascope/rag/chunkers/base_chunker.py
@abstractmethod
def chunk(self, text: str) -> list[Document]:
    """Returns a Document that contains an id, text, and optionally metadata."""
    ...  # pragma: no cover

BaseEmbedder

Bases: BaseModel, Generic[BaseEmbeddingT], ABC

The base class abstract interface for interacting with LLM embeddings.

Source code in mirascope/rag/embedders.py
class BaseEmbedder(BaseModel, Generic[BaseEmbeddingT], ABC):
    """The base class abstract interface for interacting with LLM embeddings."""

    api_key: ClassVar[Optional[str]] = None
    base_url: ClassVar[Optional[str]] = None
    embedding_params: ClassVar[BaseEmbeddingParams] = BaseEmbeddingParams(
        model="text-embedding-ada-002"
    )
    dimensions: Optional[int] = None
    configuration: ClassVar[BaseConfig] = BaseConfig(llm_ops=[], client_wrappers=[])
    _provider: ClassVar[str] = "base"

    @abstractmethod
    def embed(self, input: list[str]) -> BaseEmbeddingT:
        """A call to the embedder with a single input"""
        ...  # pragma: no cover

    @abstractmethod
    async def embed_async(self, input: list[str]) -> BaseEmbeddingT:
        """Asynchronously call the embedder with a single input"""
        ...  # pragma: no cover

embed(input) abstractmethod

A call to the embedder with a single input

Source code in mirascope/rag/embedders.py
@abstractmethod
def embed(self, input: list[str]) -> BaseEmbeddingT:
    """A call to the embedder with a single input"""
    ...  # pragma: no cover

embed_async(input) abstractmethod async

Asynchronously call the embedder with a single input

Source code in mirascope/rag/embedders.py
@abstractmethod
async def embed_async(self, input: list[str]) -> BaseEmbeddingT:
    """Asynchronously call the embedder with a single input"""
    ...  # pragma: no cover

BaseQueryResults

Bases: BaseModel

The results of a query.

Source code in mirascope/rag/types.py
class BaseQueryResults(BaseModel):
    """The results of a query."""

    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)

BaseVectorStore

Bases: BaseModel, Generic[BaseQueryResultsT], ABC

The base class abstract interface for interacting with vectorstores.

Source code in mirascope/rag/vectorstores.py
class BaseVectorStore(BaseModel, Generic[BaseQueryResultsT], ABC):
    """The base class abstract interface for interacting with vectorstores."""

    api_key: ClassVar[Optional[str]] = None
    index_name: ClassVar[Optional[str]] = None
    chunker: ClassVar[BaseChunker] = TextChunker(chunk_size=1000, chunk_overlap=200)
    embedder: ClassVar[BaseEmbedder]
    vectorstore_params: ClassVar[BaseVectorStoreParams] = BaseVectorStoreParams()
    configuration: ClassVar[BaseConfig] = BaseConfig()
    _provider: ClassVar[str] = "base"

    @abstractmethod
    def retrieve(self, text: str, **kwargs: Any) -> BaseQueryResultsT:
        """Queries the vectorstore for closest match"""
        ...  # pragma: no cover

    @abstractmethod
    def add(self, text: Union[str, list[Document]], **kwargs: Any) -> None:
        """Takes unstructured data and upserts into vectorstore"""
        ...  # pragma: no cover

add(text, **kwargs) abstractmethod

Takes unstructured data and upserts into vectorstore

Source code in mirascope/rag/vectorstores.py
@abstractmethod
def add(self, text: Union[str, list[Document]], **kwargs: Any) -> None:
    """Takes unstructured data and upserts into vectorstore"""
    ...  # pragma: no cover

retrieve(text, **kwargs) abstractmethod

Queries the vectorstore for closest match

Source code in mirascope/rag/vectorstores.py
@abstractmethod
def retrieve(self, text: str, **kwargs: Any) -> BaseQueryResultsT:
    """Queries the vectorstore for closest match"""
    ...  # pragma: no cover

BaseVectorStoreParams

Bases: BaseModel

The parameters with which to make a vectorstore.

Source code in mirascope/rag/types.py
class BaseVectorStoreParams(BaseModel):
    """The parameters with which to make a vectorstore."""

    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)

    def kwargs(
        self,
    ) -> dict[str, Any]:
        """Returns all parameters for the index as a keyword arguments dictionary."""
        kwargs = {
            key: value for key, value in self.model_dump().items() if value is not None
        }
        return kwargs

kwargs()

Returns all parameters for the index as a keyword arguments dictionary.

Source code in mirascope/rag/types.py
def kwargs(
    self,
) -> dict[str, Any]:
    """Returns all parameters for the index as a keyword arguments dictionary."""
    kwargs = {
        key: value for key, value in self.model_dump().items() if value is not None
    }
    return kwargs

Document

Bases: BaseModel

A document to be added to the vectorstore.

Source code in mirascope/rag/types.py
class Document(BaseModel):
    """A document to be added to the vectorstore."""

    id: str
    text: str
    metadata: Optional[dict[str, Any]] = None

TextChunker

Bases: BaseChunker

A text chunker that splits a text into chunks of a certain size and overlaps.

Example:

from mirascope.rag import TextChunker

text_chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
chunks = text_chunker.chunk("This is a long text that I want to split into chunks.")
print(chunks)
Source code in mirascope/rag/chunkers/text_chunker.py
class TextChunker(BaseChunker):
    """A text chunker that splits a text into chunks of a certain size and overlaps.

    Example:

    ```python
    from mirascope.rag import TextChunker

    text_chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
    chunks = text_chunker.chunk("This is a long text that I want to split into chunks.")
    print(chunks)
    ```
    """

    chunk_size: int
    chunk_overlap: int

    def chunk(self, text: str) -> list[Document]:
        chunks: list[Document] = []
        start: int = 0
        while start < len(text):
            end: int = min(start + self.chunk_size, len(text))
            chunks.append(Document(text=text[start:end], id=str(uuid.uuid4())))
            start += self.chunk_size - self.chunk_overlap
        return chunks