Skip to content

pinecone

A module for interacting with Pinecone vectorstores.

PineconeParams

Bases: BaseModel

The parameters for Pinecone create_index

Source code in mirascope/pinecone/types.py
class PineconeParams(BaseModel):
    """The parameters for Pinecone create_index"""

    metric: Optional[Literal["cosine", "dotproduct", "euclidean"]] = "cosine"
    timeout: Optional[int] = None

    def kwargs(self) -> dict[str, Any]:
        """Returns all parameters for the index as a keyword arguments dictionary."""
        kwargs = {
            key: value for key, value in self.model_dump().items() if value is not None
        }
        return kwargs

kwargs()

Returns all parameters for the index as a keyword arguments dictionary.

Source code in mirascope/pinecone/types.py
def kwargs(self) -> dict[str, Any]:
    """Returns all parameters for the index as a keyword arguments dictionary."""
    kwargs = {
        key: value for key, value in self.model_dump().items() if value is not None
    }
    return kwargs

PineconePodParams

Bases: PineconeParams, PodSpec, BaseVectorStoreParams

The parameters for Pinecone create_index with pod spec and weave

Source code in mirascope/pinecone/types.py
class PineconePodParams(PineconeParams, PodSpec, BaseVectorStoreParams):
    """The parameters for Pinecone create_index with pod spec and weave"""

    def kwargs(self) -> dict[str, Any]:
        """Returns all parameters for the index as a keyword arguments dictionary."""
        pod_kwargs = PodSpec(**self.model_dump()).kwargs()
        pinecone_kwargs = PineconeParams(**self.model_dump()).kwargs()
        # print(pinecone_kwargs, serverless_kwargs)
        return {**pinecone_kwargs, "spec": {**pod_kwargs}}

kwargs()

Returns all parameters for the index as a keyword arguments dictionary.

Source code in mirascope/pinecone/types.py
def kwargs(self) -> dict[str, Any]:
    """Returns all parameters for the index as a keyword arguments dictionary."""
    pod_kwargs = PodSpec(**self.model_dump()).kwargs()
    pinecone_kwargs = PineconeParams(**self.model_dump()).kwargs()
    # print(pinecone_kwargs, serverless_kwargs)
    return {**pinecone_kwargs, "spec": {**pod_kwargs}}

PineconeQueryResult

Bases: BaseModel

The result of a Pinecone index query

Example:

from mirascope.pinecone import (
    PineconeServerlessParams,
    PineconeSettings,
    PineconeVectorStore,
)
from mirascope.openai import OpenAIEmbedder
from mirascope.rag import TextChunker


class MyStore(ChromaVectorStore):
    embedder = OpenAIEmbedder(dimensions=1536)
    chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
    index_name = "my-store-0001"
    api_key = settings.pinecone_api_key
    client_settings = PineconeSettings()
    vectorstore_params = PineconeServerlessParams(
        cloud="aws",
        region="us-west-2",
    )

my_store = MyStore()
with open(f"{PATH_TO_FILE}") as file:
    data = file.read()
    my_store.add(data)
query_results = my_store.retrieve("my question")
#> QueryResult(ids=['0'], documents=['my answer'],
# scores=[0.9999999999999999], embeddings=[[0.0, 0.0, 0.0, ...]])
Source code in mirascope/pinecone/types.py
class PineconeQueryResult(BaseModel):
    """The result of a Pinecone index query

    Example:

    ```python
    from mirascope.pinecone import (
        PineconeServerlessParams,
        PineconeSettings,
        PineconeVectorStore,
    )
    from mirascope.openai import OpenAIEmbedder
    from mirascope.rag import TextChunker


    class MyStore(ChromaVectorStore):
        embedder = OpenAIEmbedder(dimensions=1536)
        chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
        index_name = "my-store-0001"
        api_key = settings.pinecone_api_key
        client_settings = PineconeSettings()
        vectorstore_params = PineconeServerlessParams(
            cloud="aws",
            region="us-west-2",
        )

    my_store = MyStore()
    with open(f"{PATH_TO_FILE}") as file:
        data = file.read()
        my_store.add(data)
    query_results = my_store.retrieve("my question")
    #> QueryResult(ids=['0'], documents=['my answer'],
    # scores=[0.9999999999999999], embeddings=[[0.0, 0.0, 0.0, ...]])
    ```
    """

    ids: list[str]
    documents: Optional[list[str]] = None
    scores: Optional[list[float]] = None
    embeddings: Optional[list[list[float]]] = None

PineconeServerlessParams

Bases: PineconeParams, ServerlessSpec, BaseVectorStoreParams

The parameters for Pinecone create_index with serverless spec and weave

Source code in mirascope/pinecone/types.py
class PineconeServerlessParams(PineconeParams, ServerlessSpec, BaseVectorStoreParams):
    """The parameters for Pinecone create_index with serverless spec and weave"""

    def kwargs(self) -> dict[str, Any]:
        """Returns all parameters for the index as a keyword arguments dictionary."""
        serverless_kwargs = ServerlessSpec(**self.model_dump()).kwargs()
        pinecone_kwargs = PineconeParams(**self.model_dump()).kwargs()
        return {**pinecone_kwargs, "spec": {**serverless_kwargs}}

kwargs()

Returns all parameters for the index as a keyword arguments dictionary.

Source code in mirascope/pinecone/types.py
def kwargs(self) -> dict[str, Any]:
    """Returns all parameters for the index as a keyword arguments dictionary."""
    serverless_kwargs = ServerlessSpec(**self.model_dump()).kwargs()
    pinecone_kwargs = PineconeParams(**self.model_dump()).kwargs()
    return {**pinecone_kwargs, "spec": {**serverless_kwargs}}

PineconeSettings

Bases: BaseModel

Settings for Pinecone instance

Source code in mirascope/pinecone/types.py
class PineconeSettings(BaseModel):
    """Settings for Pinecone instance"""

    api_key: Optional[str] = None
    host: Optional[str] = None
    proxy_url: Optional[str] = None
    proxy_headers: Optional[dict[str, str]] = None
    ssl_ca_certs: Optional[str] = None
    ssl_verify: Optional[bool] = None
    config: Optional[Config] = None
    additional_headers: Optional[dict[str, str]] = {}
    pool_threads: Optional[int] = 1
    index_api: Optional[ManageIndexesApi] = None

    model_config = ConfigDict(arbitrary_types_allowed=True)

    def kwargs(self) -> dict[str, Any]:
        """Returns all parameters for the index as a keyword arguments dictionary."""
        kwargs = {
            key: value for key, value in self.model_dump().items() if value is not None
        }
        return kwargs

kwargs()

Returns all parameters for the index as a keyword arguments dictionary.

Source code in mirascope/pinecone/types.py
def kwargs(self) -> dict[str, Any]:
    """Returns all parameters for the index as a keyword arguments dictionary."""
    kwargs = {
        key: value for key, value in self.model_dump().items() if value is not None
    }
    return kwargs

PineconeVectorStore

Bases: BaseVectorStore

A vectorstore for Pinecone.

Example:

from mirascope.pinecone import (
    PineconeServerlessParams,
    PineconeSettings,
    PineconeVectorStore,
)
from mirascope.openai import OpenAIEmbedder
from mirascope.rag import TextChunker


class MyStore(ChromaVectorStore):
    embedder = OpenAIEmbedder(dimensions=1536)
    chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
    index_name = "my-store-0001"
    api_key = settings.pinecone_api_key
    client_settings = PineconeSettings()
    vectorstore_params = PineconeServerlessParams(
        cloud="aws",
        region="us-west-2",
    )

my_store = MyStore()
with open(f"{PATH_TO_FILE}") as file:
    data = file.read()
    my_store.add(data)
documents = my_store.retrieve("my question").documents
print(documents)
Source code in mirascope/pinecone/vectorstores.py
class PineconeVectorStore(BaseVectorStore):
    """A vectorstore for Pinecone.

    Example:

    ```python
    from mirascope.pinecone import (
        PineconeServerlessParams,
        PineconeSettings,
        PineconeVectorStore,
    )
    from mirascope.openai import OpenAIEmbedder
    from mirascope.rag import TextChunker


    class MyStore(ChromaVectorStore):
        embedder = OpenAIEmbedder(dimensions=1536)
        chunker = TextChunker(chunk_size=1000, chunk_overlap=200)
        index_name = "my-store-0001"
        api_key = settings.pinecone_api_key
        client_settings = PineconeSettings()
        vectorstore_params = PineconeServerlessParams(
            cloud="aws",
            region="us-west-2",
        )

    my_store = MyStore()
    with open(f"{PATH_TO_FILE}") as file:
        data = file.read()
        my_store.add(data)
    documents = my_store.retrieve("my question").documents
    print(documents)
    ```
    """

    handle_add_text: Optional[Callable[[list[Document]], None]] = None
    handle_retrieve_text: Optional[Callable[[list[float]], list[str]]] = None

    vectorstore_params: ClassVar[
        Union[PineconePodParams, PineconeServerlessParams]
    ] = PineconeServerlessParams(cloud="aws", region="us-east-1")
    client_settings: ClassVar[PineconeSettings] = PineconeSettings()
    _provider: ClassVar[str] = "pinecone"

    def retrieve(self, text: str, **kwargs: Any) -> PineconeQueryResult:
        """Queries the vectorstore for closest match"""
        embed = self.embedder.embed
        text_embedding: BaseEmbeddingResponse = embed([text])
        if "top_k" not in kwargs:
            kwargs["top_k"] = 8
        if text_embedding.embeddings is None:
            raise ValueError("Embedding is None")
        query_result: QueryResponse = self._index.query(
            vector=text_embedding.embeddings[0],
            **{"include_metadata": True, "include_values": True, **kwargs},
        )
        ids: list[str] = []
        scores: list[float] = []
        documents: list[str] = []
        embeddings: list[list[float]] = []
        for match in query_result.matches:
            ids.append(match.id)
            scores.append(match.score)
            documents.append(
                self.handle_retrieve_text([match.values])[0]
                if self.handle_retrieve_text
                else match.metadata["text"]
            )
            embeddings.append(match.values)

        return PineconeQueryResult(
            ids=ids,
            scores=scores,
            documents=documents,
            embeddings=embeddings,
        )

    def add(
        self,
        text: Union[str, list[Document]],
        **kwargs: Any,
    ) -> None:
        """Takes unstructured data and upserts into vectorstore"""
        documents: list[Document]
        if isinstance(text, str):
            chunk = self.chunker.chunk
            documents = chunk(text)
        else:
            documents = text
        inputs = [document.text for document in documents]
        embed = self.embedder.embed
        embedding_repsonse: BaseEmbeddingResponse = embed(inputs)
        if self.handle_add_text:
            self.handle_add_text(documents)
        if embedding_repsonse.embeddings is None:
            raise ValueError("Embedding is None")
        vectors = []
        for i, embedding in enumerate(embedding_repsonse.embeddings):
            if documents[i] is not None:
                metadata = documents[i].metadata or {}
                metadata_text = (
                    {"text": documents[i].text}
                    if documents[i].text and not self.handle_add_text
                    else {}
                )
                vectors.append(
                    {
                        "id": documents[i].id,
                        "values": embedding,
                        "metadata": {**metadata, **metadata_text},
                    }
                )
        return self._index.upsert(vectors, **kwargs)

    ############################# PRIVATE PROPERTIES #################################

    @cached_property
    def _client(self) -> Pinecone:
        return Pinecone(api_key=self.api_key, **self.client_settings.kwargs())

    @cached_property
    def _index(self) -> Index:
        if self.index_name not in self._client.list_indexes().names():
            self._client.create_index(
                name=self.index_name,
                dimension=self.embedder.dimensions,
                **self.vectorstore_params.kwargs(),
            )
        return self._client.Index(self.index_name)

add(text, **kwargs)

Takes unstructured data and upserts into vectorstore

Source code in mirascope/pinecone/vectorstores.py
def add(
    self,
    text: Union[str, list[Document]],
    **kwargs: Any,
) -> None:
    """Takes unstructured data and upserts into vectorstore"""
    documents: list[Document]
    if isinstance(text, str):
        chunk = self.chunker.chunk
        documents = chunk(text)
    else:
        documents = text
    inputs = [document.text for document in documents]
    embed = self.embedder.embed
    embedding_repsonse: BaseEmbeddingResponse = embed(inputs)
    if self.handle_add_text:
        self.handle_add_text(documents)
    if embedding_repsonse.embeddings is None:
        raise ValueError("Embedding is None")
    vectors = []
    for i, embedding in enumerate(embedding_repsonse.embeddings):
        if documents[i] is not None:
            metadata = documents[i].metadata or {}
            metadata_text = (
                {"text": documents[i].text}
                if documents[i].text and not self.handle_add_text
                else {}
            )
            vectors.append(
                {
                    "id": documents[i].id,
                    "values": embedding,
                    "metadata": {**metadata, **metadata_text},
                }
            )
    return self._index.upsert(vectors, **kwargs)

retrieve(text, **kwargs)

Queries the vectorstore for closest match

Source code in mirascope/pinecone/vectorstores.py
def retrieve(self, text: str, **kwargs: Any) -> PineconeQueryResult:
    """Queries the vectorstore for closest match"""
    embed = self.embedder.embed
    text_embedding: BaseEmbeddingResponse = embed([text])
    if "top_k" not in kwargs:
        kwargs["top_k"] = 8
    if text_embedding.embeddings is None:
        raise ValueError("Embedding is None")
    query_result: QueryResponse = self._index.query(
        vector=text_embedding.embeddings[0],
        **{"include_metadata": True, "include_values": True, **kwargs},
    )
    ids: list[str] = []
    scores: list[float] = []
    documents: list[str] = []
    embeddings: list[list[float]] = []
    for match in query_result.matches:
        ids.append(match.id)
        scores.append(match.score)
        documents.append(
            self.handle_retrieve_text([match.values])[0]
            if self.handle_retrieve_text
            else match.metadata["text"]
        )
        embeddings.append(match.values)

    return PineconeQueryResult(
        ids=ids,
        scores=scores,
        documents=documents,
        embeddings=embeddings,
    )