Skip to content

rag

A module for interacting with Mirascope RAG.

BaseChunker

Bases: BaseModel, ABC

Base class for chunkers.

Example:

from mirascope.rag import BaseChunker, Document


class TextChunker(BaseChunker):
    chunk_size: int
    chunk_overlap: int

    def chunk(self, text: str) -> list[Document]:
        chunks: list[Document] = []
        start: int = 0
        while start < len(text):
            end: int = min(start + self.chunk_size, len(text))
            chunks.append(Document(text=text[start:end], id=str(uuid.uuid4())))
            start += self.chunk_size - self.chunk_overlap
        return chunks
Source code in mirascope/rag/chunkers/base_chunker.py
class BaseChunker(BaseModel, ABC):
    """Base class for chunkers.

    Example:

    ```python
    from mirascope.rag import BaseChunker, Document


    class TextChunker(BaseChunker):
        chunk_size: int
        chunk_overlap: int

        def chunk(self, text: str) -> list[Document]:
            chunks: list[Document] = []
            start: int = 0
            while start < len(text):
                end: int = min(start + self.chunk_size, len(text))
                chunks.append(Document(text=text[start:end], id=str(uuid.uuid4())))
                start += self.chunk_size - self.chunk_overlap
            return chunks
    ```
    """

    @abstractmethod
    def chunk(self, text: str) -> list[Document]:
        """Returns a Document that contains an id, text, and optionally metadata."""
        ...  # pragma: no cover

chunk(text) abstractmethod

Returns a Document that contains an id, text, and optionally metadata.

Source code in mirascope/rag/chunkers/base_chunker.py
@abstractmethod
def chunk(self, text: str) -> list[Document]:
    """Returns a Document that contains an id, text, and optionally metadata."""
    ...  # pragma: no cover

BaseEmbedder

Bases: BaseModel, Generic[BaseEmbeddingT], ABC

The base class abstract interface for interacting with LLM embeddings.

Source code in mirascope/rag/embedders.py
class BaseEmbedder(BaseModel, Generic[BaseEmbeddingT], ABC):
    """The base class abstract interface for interacting with LLM embeddings."""

    api_key: ClassVar[Optional[str]] = None
    base_url: ClassVar[Optional[str]] = None
    embedding_params: ClassVar[BaseEmbeddingParams] = BaseEmbeddingParams(
        model="text-embedding-ada-002"
    )
    dimensions: Optional[int] = None
    configuration: ClassVar[BaseConfig] = BaseConfig(llm_ops=[], client_wrappers=[])
    _provider: ClassVar[str] = "base"

    @abstractmethod
    def embed(self, input: list[str]) -> BaseEmbeddingT:
        """A call to the embedder with a single input"""
        ...  # pragma: no cover

    @abstractmethod
    async def embed_async(self, input: list[str]) -> BaseEmbeddingT:
        """Asynchronously call the embedder with a single input"""
        ...  # pragma: no cover

embed(input) abstractmethod

A call to the embedder with a single input

Source code in mirascope/rag/embedders.py
@abstractmethod
def embed(self, input: list[str]) -> BaseEmbeddingT:
    """A call to the embedder with a single input"""
    ...  # pragma: no cover

embed_async(input) abstractmethod async

Asynchronously call the embedder with a single input

Source code in mirascope/rag/embedders.py
@abstractmethod
async def embed_async(self, input: list[str]) -> BaseEmbeddingT:
    """Asynchronously call the embedder with a single input"""
    ...  # pragma: no cover

BaseEmbeddingParams

Bases: BaseModel

The parameters with which to make an embedding.

Source code in mirascope/rag/types.py
class BaseEmbeddingParams(BaseModel):
    """The parameters with which to make an embedding."""

    model: str

    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)

    def kwargs(self) -> dict[str, Any]:
        """Returns all parameters for the embedder as a keyword arguments dictionary."""
        kwargs = {
            key: value for key, value in self.model_dump().items() if value is not None
        }
        return kwargs

kwargs()

Returns all parameters for the embedder as a keyword arguments dictionary.

Source code in mirascope/rag/types.py
def kwargs(self) -> dict[str, Any]:
    """Returns all parameters for the embedder as a keyword arguments dictionary."""
    kwargs = {
        key: value for key, value in self.model_dump().items() if value is not None
    }
    return kwargs

BaseEmbeddingResponse

Bases: BaseModel, Generic[ResponseT], ABC

A base abstract interface for LLM embedding responses.

Attributes:

Name Type Description
response ResponseT

The original response from whichever model response this wraps.

Source code in mirascope/rag/types.py
class BaseEmbeddingResponse(BaseModel, Generic[ResponseT], ABC):
    """A base abstract interface for LLM embedding responses.

    Attributes:
        response: The original response from whichever model response this wraps.
    """

    response: ResponseT
    start_time: float  # The start time of the embedding in ms
    end_time: float  # The end time of the embedding in ms

    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)

    @property
    @abstractmethod
    def embeddings(self) -> Optional[Union[list[list[float]], list[list[int]]]]:
        """Should return the embedding of the response.

        If there are multiple choices in a response, this method should select the 0th
        choice and return it's embedding.
        """
        ...  # pragma: no cover

embeddings: Optional[Union[list[list[float]], list[list[int]]]] abstractmethod property

Should return the embedding of the response.

If there are multiple choices in a response, this method should select the 0th choice and return it's embedding.

BaseQueryResults

Bases: BaseModel

The results of a query.

Source code in mirascope/rag/types.py
class BaseQueryResults(BaseModel):
    """The results of a query."""

    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)

BaseVectorStoreParams

Bases: BaseModel

The parameters with which to make a vectorstore.

Source code in mirascope/rag/types.py
class BaseVectorStoreParams(BaseModel):
    """The parameters with which to make a vectorstore."""

    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)

    def kwargs(
        self,
    ) -> dict[str, Any]:
        """Returns all parameters for the index as a keyword arguments dictionary."""
        kwargs = {
            key: value for key, value in self.model_dump().items() if value is not None
        }
        return kwargs

kwargs()

Returns all parameters for the index as a keyword arguments dictionary.

Source code in mirascope/rag/types.py
def kwargs(
    self,
) -> dict[str, Any]:
    """Returns all parameters for the index as a keyword arguments dictionary."""
    kwargs = {
        key: value for key, value in self.model_dump().items() if value is not None
    }
    return kwargs

Document

Bases: BaseModel

A document to be added to the vectorstore.

Source code in mirascope/rag/types.py
class Document(BaseModel):
    """A document to be added to the vectorstore."""

    id: str
    text: str
    metadata: Optional[dict[str, Any]] = None

TextChunker

Bases: BaseChunker

A text chunker that splits a text into chunks of a certain size and overlaps.

Example:

from mirascope.rag import TextChunker

text_chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
chunks = text_chunker.chunk("This is a long text that I want to split into chunks.")
print(chunks)
Source code in mirascope/rag/chunkers/text_chunker.py
class TextChunker(BaseChunker):
    """A text chunker that splits a text into chunks of a certain size and overlaps.

    Example:

    ```python
    from mirascope.rag import TextChunker

    text_chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
    chunks = text_chunker.chunk("This is a long text that I want to split into chunks.")
    print(chunks)
    ```
    """

    chunk_size: int
    chunk_overlap: int

    def chunk(self, text: str) -> list[Document]:
        chunks: list[Document] = []
        start: int = 0
        while start < len(text):
            end: int = min(start + self.chunk_size, len(text))
            chunks.append(Document(text=text[start:end], id=str(uuid.uuid4())))
            start += self.chunk_size - self.chunk_overlap
        return chunks