Skip to content

polars-llm

API reference

diegoglozano/polars-llm

API reference¶

polars-llm registers an llm namespace on every Polars expression. Import the package once and the namespace becomes available on any expression that resolves to a string column (the prompt).

import polars as pl
import polars_llm  # noqa: F401  — registers the `.llm` namespace

Chat verbs¶

Method	Provider	Mode
`openai`	OpenAI	sync
`aopenai`	OpenAI	async
`anthropic`	Anthropic	sync
`aanthropic`	Anthropic	async
`gemini`	Google Gemini	sync
`agemini`	Google Gemini	async

Chat verbs return a Utf8 column with the model's response. With schema=, they return a struct column matching the Pydantic model.

Embedding verbs¶

Method	Provider	Mode
`openai_embed`	OpenAI Embeddings	sync
`aopenai_embed`	OpenAI Embeddings	async
`gemini_embed`	Google Gemini	sync
`agemini_embed`	Google Gemini	async

Embedding verbs return a List[Float64] column.

`polars_llm.Llm`¶

`polars_llm.llm.Llm` ¶

Expression namespace for calling LLMs and embedding models per row.

Source code in polars_llm/llm.py

@pl.api.register_expr_namespace("llm")
class Llm:
    """Expression namespace for calling LLMs and embedding models per row."""

    def __init__(self, prompt: pl.Expr) -> None:
        self._prompt = prompt

    # ---- input shaping ----
    def _input_struct(self, system: str | pl.Expr | None) -> pl.Expr:
        if system is None:
            sys_expr: pl.Expr = pl.lit(None, dtype=pl.Utf8)
        elif isinstance(system, pl.Expr):
            sys_expr = system.cast(pl.Utf8)
        else:
            sys_expr = pl.lit(str(system))
        return pl.struct(self._prompt.alias("prompt"), sys_expr.alias("system"))

    # ---- internal chat dispatch ----
    def _chat(
        self,
        chat: Any,
        *,
        system: str | pl.Expr | None,
        schema: Any | None,
        retries: int,
        backoff: float,
        cache: bool,
        with_metadata: bool,
        on_error: OnError,
    ) -> pl.Expr:
        if schema is not None:
            chat = chat.with_structured_output(schema)

        def runner(rows: list[tuple[Any, Any]]) -> list[dict[str, Any]]:
            return chat_batch_sync(chat, rows, retries=retries, backoff=backoff, cache=cache)

        return chat_map_batches(
            self._input_struct(system),
            runner,
            with_metadata=with_metadata,
            on_error=on_error,
            structured=schema is not None,
        )

    def _achat(
        self,
        chat: Any,
        *,
        system: str | pl.Expr | None,
        schema: Any | None,
        retries: int,
        backoff: float,
        max_concurrency: int | None,
        cache: bool,
        with_metadata: bool,
        on_error: OnError,
    ) -> pl.Expr:
        if schema is not None:
            chat = chat.with_structured_output(schema)

        def runner(rows: list[tuple[Any, Any]]) -> list[dict[str, Any]]:
            return _arun(
                chat_batch_async(
                    chat,
                    rows,
                    retries=retries,
                    backoff=backoff,
                    max_concurrency=max_concurrency,
                    cache=cache,
                ),
            )

        return chat_map_batches(
            self._input_struct(system),
            runner,
            with_metadata=with_metadata,
            on_error=on_error,
            structured=schema is not None,
        )

    # ---- internal embed dispatch ----
    def _embed(
        self,
        embedder: Any,
        *,
        retries: int,
        backoff: float,
        cache: bool,
        chunk_size: int | None,
        dim: int | None,
        with_metadata: bool,
        on_error: OnError,
    ) -> pl.Expr:
        def runner(texts: list[Any]) -> list[dict[str, Any]]:
            return embed_batch_sync(
                embedder,
                texts,
                retries=retries,
                backoff=backoff,
                cache=cache,
                chunk_size=chunk_size,
            )

        return embed_map_batches(
            self._prompt,
            runner,
            with_metadata=with_metadata,
            on_error=on_error,
            dim=dim,
        )

    def _aembed(
        self,
        embedder: Any,
        *,
        retries: int,
        backoff: float,
        max_concurrency: int | None,
        cache: bool,
        chunk_size: int | None,
        dim: int | None,
        with_metadata: bool,
        on_error: OnError,
    ) -> pl.Expr:
        def runner(texts: list[Any]) -> list[dict[str, Any]]:
            return _arun(
                embed_batch_async(
                    embedder,
                    texts,
                    retries=retries,
                    backoff=backoff,
                    max_concurrency=max_concurrency,
                    cache=cache,
                    chunk_size=chunk_size,
                ),
            )

        return embed_map_batches(
            self._prompt,
            runner,
            with_metadata=with_metadata,
            on_error=on_error,
            dim=dim,
        )

    # ============================================================
    # Public chat verbs
    # ============================================================

    # ---- OpenAI ----
    def openai(
        self,
        *,
        model: str | None = None,
        system: str | pl.Expr | None = None,
        schema: Any | None = None,
        client: Any = None,
        retries: int = 0,
        backoff: float = 0.0,
        cache: bool = False,
        with_metadata: bool = False,
        on_error: OnError = "null",
        **model_kwargs: Any,
    ) -> pl.Expr:
        """Run an OpenAI chat completion per row, sync."""
        chat = _make_chat("openai", model, client, model_kwargs)
        return self._chat(
            chat,
            system=system,
            schema=schema,
            retries=retries,
            backoff=backoff,
            cache=cache,
            with_metadata=with_metadata,
            on_error=on_error,
        )

    def aopenai(
        self,
        *,
        model: str | None = None,
        system: str | pl.Expr | None = None,
        schema: Any | None = None,
        client: Any = None,
        retries: int = 0,
        backoff: float = 0.0,
        max_concurrency: int | None = None,
        cache: bool = False,
        with_metadata: bool = False,
        on_error: OnError = "null",
        **model_kwargs: Any,
    ) -> pl.Expr:
        """Run OpenAI chat completions concurrently across the batch."""
        chat = _make_chat("openai", model, client, model_kwargs)
        return self._achat(
            chat,
            system=system,
            schema=schema,
            retries=retries,
            backoff=backoff,
            max_concurrency=max_concurrency,
            cache=cache,
            with_metadata=with_metadata,
            on_error=on_error,
        )

    # ---- Anthropic ----
    def anthropic(
        self,
        *,
        model: str | None = None,
        system: str | pl.Expr | None = None,
        schema: Any | None = None,
        client: Any = None,
        retries: int = 0,
        backoff: float = 0.0,
        cache: bool = False,
        with_metadata: bool = False,
        on_error: OnError = "null",
        **model_kwargs: Any,
    ) -> pl.Expr:
        """Run an Anthropic chat completion per row, sync."""
        chat = _make_chat("anthropic", model, client, model_kwargs)
        return self._chat(
            chat,
            system=system,
            schema=schema,
            retries=retries,
            backoff=backoff,
            cache=cache,
            with_metadata=with_metadata,
            on_error=on_error,
        )

    def aanthropic(
        self,
        *,
        model: str | None = None,
        system: str | pl.Expr | None = None,
        schema: Any | None = None,
        client: Any = None,
        retries: int = 0,
        backoff: float = 0.0,
        max_concurrency: int | None = None,
        cache: bool = False,
        with_metadata: bool = False,
        on_error: OnError = "null",
        **model_kwargs: Any,
    ) -> pl.Expr:
        """Run Anthropic chat completions concurrently across the batch."""
        chat = _make_chat("anthropic", model, client, model_kwargs)
        return self._achat(
            chat,
            system=system,
            schema=schema,
            retries=retries,
            backoff=backoff,
            max_concurrency=max_concurrency,
            cache=cache,
            with_metadata=with_metadata,
            on_error=on_error,
        )

    # ---- Gemini ----
    def gemini(
        self,
        *,
        model: str | None = None,
        system: str | pl.Expr | None = None,
        schema: Any | None = None,
        client: Any = None,
        retries: int = 0,
        backoff: float = 0.0,
        cache: bool = False,
        with_metadata: bool = False,
        on_error: OnError = "null",
        **model_kwargs: Any,
    ) -> pl.Expr:
        """Run a Gemini chat completion per row, sync."""
        chat = _make_chat("gemini", model, client, model_kwargs)
        return self._chat(
            chat,
            system=system,
            schema=schema,
            retries=retries,
            backoff=backoff,
            cache=cache,
            with_metadata=with_metadata,
            on_error=on_error,
        )

    def agemini(
        self,
        *,
        model: str | None = None,
        system: str | pl.Expr | None = None,
        schema: Any | None = None,
        client: Any = None,
        retries: int = 0,
        backoff: float = 0.0,
        max_concurrency: int | None = None,
        cache: bool = False,
        with_metadata: bool = False,
        on_error: OnError = "null",
        **model_kwargs: Any,
    ) -> pl.Expr:
        """Run Gemini chat completions concurrently across the batch."""
        chat = _make_chat("gemini", model, client, model_kwargs)
        return self._achat(
            chat,
            system=system,
            schema=schema,
            retries=retries,
            backoff=backoff,
            max_concurrency=max_concurrency,
            cache=cache,
            with_metadata=with_metadata,
            on_error=on_error,
        )

    # ============================================================
    # Public embed verbs
    # ============================================================

    def openai_embed(
        self,
        *,
        model: str | None = None,
        client: Any = None,
        retries: int = 0,
        backoff: float = 0.0,
        cache: bool = False,
        chunk_size: int | None = None,
        dim: int | None = None,
        with_metadata: bool = False,
        on_error: OnError = "null",
        **model_kwargs: Any,
    ) -> pl.Expr:
        """Compute OpenAI embeddings per row, sync.

        Pass ``chunk_size=N`` to batch ``N`` rows into a single
        ``embed_documents`` call (cheaper / faster for corpus-style embedding).
        Pass ``dim=N`` to return ``Array(Float64, N)`` instead of the default
        ``List(Float64)`` (catches dim drift, plays nicely with vector libs).
        """
        embedder = _make_embed("openai", model, client, model_kwargs)
        return self._embed(
            embedder,
            retries=retries,
            backoff=backoff,
            cache=cache,
            chunk_size=chunk_size,
            dim=dim,
            with_metadata=with_metadata,
            on_error=on_error,
        )

    def aopenai_embed(
        self,
        *,
        model: str | None = None,
        client: Any = None,
        retries: int = 0,
        backoff: float = 0.0,
        max_concurrency: int | None = None,
        cache: bool = False,
        chunk_size: int | None = None,
        dim: int | None = None,
        with_metadata: bool = False,
        on_error: OnError = "null",
        **model_kwargs: Any,
    ) -> pl.Expr:
        """Compute OpenAI embeddings concurrently across the batch.

        Pass ``chunk_size=N`` to batch ``N`` rows per ``aembed_documents``
        call; ``max_concurrency`` then caps in-flight chunk calls. Pass
        ``dim=N`` to return ``Array(Float64, N)`` instead of ``List(Float64)``.
        """
        embedder = _make_embed("openai", model, client, model_kwargs)
        return self._aembed(
            embedder,
            retries=retries,
            backoff=backoff,
            max_concurrency=max_concurrency,
            cache=cache,
            chunk_size=chunk_size,
            dim=dim,
            with_metadata=with_metadata,
            on_error=on_error,
        )

    def gemini_embed(
        self,
        *,
        model: str | None = None,
        client: Any = None,
        retries: int = 0,
        backoff: float = 0.0,
        cache: bool = False,
        chunk_size: int | None = None,
        dim: int | None = None,
        with_metadata: bool = False,
        on_error: OnError = "null",
        **model_kwargs: Any,
    ) -> pl.Expr:
        """Compute Gemini embeddings per row, sync.

        Pass ``chunk_size=N`` to batch ``N`` rows into a single
        ``embed_documents`` call. Pass ``dim=N`` to return
        ``Array(Float64, N)`` instead of ``List(Float64)``.
        """
        embedder = _make_embed("gemini", model, client, model_kwargs)
        return self._embed(
            embedder,
            retries=retries,
            backoff=backoff,
            cache=cache,
            chunk_size=chunk_size,
            dim=dim,
            with_metadata=with_metadata,
            on_error=on_error,
        )

    def agemini_embed(
        self,
        *,
        model: str | None = None,
        client: Any = None,
        retries: int = 0,
        backoff: float = 0.0,
        max_concurrency: int | None = None,
        cache: bool = False,
        chunk_size: int | None = None,
        dim: int | None = None,
        with_metadata: bool = False,
        on_error: OnError = "null",
        **model_kwargs: Any,
    ) -> pl.Expr:
        """Compute Gemini embeddings concurrently across the batch.

        Pass ``chunk_size=N`` to batch ``N`` rows per ``aembed_documents``
        call; ``max_concurrency`` then caps in-flight chunk calls. Pass
        ``dim=N`` to return ``Array(Float64, N)`` instead of ``List(Float64)``.
        """
        embedder = _make_embed("gemini", model, client, model_kwargs)
        return self._aembed(
            embedder,
            retries=retries,
            backoff=backoff,
            max_concurrency=max_concurrency,
            cache=cache,
            chunk_size=chunk_size,
            dim=dim,
            with_metadata=with_metadata,
            on_error=on_error,
        )

    # ============================================================
    # Vector helpers (no provider call)
    # ============================================================

    def cosine(self, other: pl.Expr | pl.Series | list[float] | tuple[float, ...]) -> pl.Expr:
        """Cosine similarity between this vector column and ``other``.

        Accepts both ``Array(Float64, dim)`` and ``List(Float64)`` inputs;
        they are cast to ``List`` internally so the math is uniform. ``other``
        may be a ``pl.Expr`` (e.g. ``pl.col("vector_b")``), a ``pl.Series``,
        or a literal Python list/tuple of floats (broadcast against every
        row). Returns a ``Float64`` expression.

        Lowers to native Polars arithmetic — no API call is made. Rows where
        either vector is null produce ``null``; rows where either vector is
        all-zero produce ``NaN`` (0/0).
        """
        list_dtype = pl.List(pl.Float64)
        a = self._prompt.cast(list_dtype)
        if isinstance(other, pl.Expr):
            b: pl.Expr = other.cast(list_dtype)
        elif isinstance(other, pl.Series):
            b = pl.lit(other).cast(list_dtype)
        elif isinstance(other, (list, tuple)):
            b = pl.lit(pl.Series("", [list(other)], dtype=list_dtype))
        else:
            raise TypeError(
                "polars-llm: `cosine` expects a pl.Expr, pl.Series, or list of floats; " f"got {type(other).__name__}",
            )
        dot = (a * b).list.sum()
        norm_a = (a * a).list.sum().sqrt()
        norm_b = (b * b).list.sum().sqrt()
        return dot / (norm_a * norm_b)

`openai(*, model=None, system=None, schema=None, client=None, retries=0, backoff=0.0, cache=False, with_metadata=False, on_error='null', **model_kwargs)` ¶

Run an OpenAI chat completion per row, sync.

Source code in polars_llm/llm.py

def openai(
    self,
    *,
    model: str | None = None,
    system: str | pl.Expr | None = None,
    schema: Any | None = None,
    client: Any = None,
    retries: int = 0,
    backoff: float = 0.0,
    cache: bool = False,
    with_metadata: bool = False,
    on_error: OnError = "null",
    **model_kwargs: Any,
) -> pl.Expr:
    """Run an OpenAI chat completion per row, sync."""
    chat = _make_chat("openai", model, client, model_kwargs)
    return self._chat(
        chat,
        system=system,
        schema=schema,
        retries=retries,
        backoff=backoff,
        cache=cache,
        with_metadata=with_metadata,
        on_error=on_error,
    )

`aopenai(*, model=None, system=None, schema=None, client=None, retries=0, backoff=0.0, max_concurrency=None, cache=False, with_metadata=False, on_error='null', **model_kwargs)` ¶

Run OpenAI chat completions concurrently across the batch.

Source code in polars_llm/llm.py

def aopenai(
    self,
    *,
    model: str | None = None,
    system: str | pl.Expr | None = None,
    schema: Any | None = None,
    client: Any = None,
    retries: int = 0,
    backoff: float = 0.0,
    max_concurrency: int | None = None,
    cache: bool = False,
    with_metadata: bool = False,
    on_error: OnError = "null",
    **model_kwargs: Any,
) -> pl.Expr:
    """Run OpenAI chat completions concurrently across the batch."""
    chat = _make_chat("openai", model, client, model_kwargs)
    return self._achat(
        chat,
        system=system,
        schema=schema,
        retries=retries,
        backoff=backoff,
        max_concurrency=max_concurrency,
        cache=cache,
        with_metadata=with_metadata,
        on_error=on_error,
    )

`anthropic(*, model=None, system=None, schema=None, client=None, retries=0, backoff=0.0, cache=False, with_metadata=False, on_error='null', **model_kwargs)` ¶

Run an Anthropic chat completion per row, sync.

Source code in polars_llm/llm.py

def anthropic(
    self,
    *,
    model: str | None = None,
    system: str | pl.Expr | None = None,
    schema: Any | None = None,
    client: Any = None,
    retries: int = 0,
    backoff: float = 0.0,
    cache: bool = False,
    with_metadata: bool = False,
    on_error: OnError = "null",
    **model_kwargs: Any,
) -> pl.Expr:
    """Run an Anthropic chat completion per row, sync."""
    chat = _make_chat("anthropic", model, client, model_kwargs)
    return self._chat(
        chat,
        system=system,
        schema=schema,
        retries=retries,
        backoff=backoff,
        cache=cache,
        with_metadata=with_metadata,
        on_error=on_error,
    )

`aanthropic(*, model=None, system=None, schema=None, client=None, retries=0, backoff=0.0, max_concurrency=None, cache=False, with_metadata=False, on_error='null', **model_kwargs)` ¶

Run Anthropic chat completions concurrently across the batch.

Source code in polars_llm/llm.py

def aanthropic(
    self,
    *,
    model: str | None = None,
    system: str | pl.Expr | None = None,
    schema: Any | None = None,
    client: Any = None,
    retries: int = 0,
    backoff: float = 0.0,
    max_concurrency: int | None = None,
    cache: bool = False,
    with_metadata: bool = False,
    on_error: OnError = "null",
    **model_kwargs: Any,
) -> pl.Expr:
    """Run Anthropic chat completions concurrently across the batch."""
    chat = _make_chat("anthropic", model, client, model_kwargs)
    return self._achat(
        chat,
        system=system,
        schema=schema,
        retries=retries,
        backoff=backoff,
        max_concurrency=max_concurrency,
        cache=cache,
        with_metadata=with_metadata,
        on_error=on_error,
    )

`gemini(*, model=None, system=None, schema=None, client=None, retries=0, backoff=0.0, cache=False, with_metadata=False, on_error='null', **model_kwargs)` ¶

Run a Gemini chat completion per row, sync.

Source code in polars_llm/llm.py

def gemini(
    self,
    *,
    model: str | None = None,
    system: str | pl.Expr | None = None,
    schema: Any | None = None,
    client: Any = None,
    retries: int = 0,
    backoff: float = 0.0,
    cache: bool = False,
    with_metadata: bool = False,
    on_error: OnError = "null",
    **model_kwargs: Any,
) -> pl.Expr:
    """Run a Gemini chat completion per row, sync."""
    chat = _make_chat("gemini", model, client, model_kwargs)
    return self._chat(
        chat,
        system=system,
        schema=schema,
        retries=retries,
        backoff=backoff,
        cache=cache,
        with_metadata=with_metadata,
        on_error=on_error,
    )

`agemini(*, model=None, system=None, schema=None, client=None, retries=0, backoff=0.0, max_concurrency=None, cache=False, with_metadata=False, on_error='null', **model_kwargs)` ¶

Run Gemini chat completions concurrently across the batch.

Source code in polars_llm/llm.py

def agemini(
    self,
    *,
    model: str | None = None,
    system: str | pl.Expr | None = None,
    schema: Any | None = None,
    client: Any = None,
    retries: int = 0,
    backoff: float = 0.0,
    max_concurrency: int | None = None,
    cache: bool = False,
    with_metadata: bool = False,
    on_error: OnError = "null",
    **model_kwargs: Any,
) -> pl.Expr:
    """Run Gemini chat completions concurrently across the batch."""
    chat = _make_chat("gemini", model, client, model_kwargs)
    return self._achat(
        chat,
        system=system,
        schema=schema,
        retries=retries,
        backoff=backoff,
        max_concurrency=max_concurrency,
        cache=cache,
        with_metadata=with_metadata,
        on_error=on_error,
    )

`openai_embed(*, model=None, client=None, retries=0, backoff=0.0, cache=False, chunk_size=None, dim=None, with_metadata=False, on_error='null', **model_kwargs)` ¶

Compute OpenAI embeddings per row, sync.

Pass chunk_size=N to batch N rows into a single embed_documents call (cheaper / faster for corpus-style embedding). Pass dim=N to return Array(Float64, N) instead of the default List(Float64) (catches dim drift, plays nicely with vector libs).

Source code in polars_llm/llm.py

def openai_embed(
    self,
    *,
    model: str | None = None,
    client: Any = None,
    retries: int = 0,
    backoff: float = 0.0,
    cache: bool = False,
    chunk_size: int | None = None,
    dim: int | None = None,
    with_metadata: bool = False,
    on_error: OnError = "null",
    **model_kwargs: Any,
) -> pl.Expr:
    """Compute OpenAI embeddings per row, sync.

    Pass ``chunk_size=N`` to batch ``N`` rows into a single
    ``embed_documents`` call (cheaper / faster for corpus-style embedding).
    Pass ``dim=N`` to return ``Array(Float64, N)`` instead of the default
    ``List(Float64)`` (catches dim drift, plays nicely with vector libs).
    """
    embedder = _make_embed("openai", model, client, model_kwargs)
    return self._embed(
        embedder,
        retries=retries,
        backoff=backoff,
        cache=cache,
        chunk_size=chunk_size,
        dim=dim,
        with_metadata=with_metadata,
        on_error=on_error,
    )

`aopenai_embed(*, model=None, client=None, retries=0, backoff=0.0, max_concurrency=None, cache=False, chunk_size=None, dim=None, with_metadata=False, on_error='null', **model_kwargs)` ¶

Compute OpenAI embeddings concurrently across the batch.

Pass chunk_size=N to batch N rows per aembed_documents call; max_concurrency then caps in-flight chunk calls. Pass dim=N to return Array(Float64, N) instead of List(Float64).

Source code in polars_llm/llm.py

def aopenai_embed(
    self,
    *,
    model: str | None = None,
    client: Any = None,
    retries: int = 0,
    backoff: float = 0.0,
    max_concurrency: int | None = None,
    cache: bool = False,
    chunk_size: int | None = None,
    dim: int | None = None,
    with_metadata: bool = False,
    on_error: OnError = "null",
    **model_kwargs: Any,
) -> pl.Expr:
    """Compute OpenAI embeddings concurrently across the batch.

    Pass ``chunk_size=N`` to batch ``N`` rows per ``aembed_documents``
    call; ``max_concurrency`` then caps in-flight chunk calls. Pass
    ``dim=N`` to return ``Array(Float64, N)`` instead of ``List(Float64)``.
    """
    embedder = _make_embed("openai", model, client, model_kwargs)
    return self._aembed(
        embedder,
        retries=retries,
        backoff=backoff,
        max_concurrency=max_concurrency,
        cache=cache,
        chunk_size=chunk_size,
        dim=dim,
        with_metadata=with_metadata,
        on_error=on_error,
    )

`gemini_embed(*, model=None, client=None, retries=0, backoff=0.0, cache=False, chunk_size=None, dim=None, with_metadata=False, on_error='null', **model_kwargs)` ¶

Compute Gemini embeddings per row, sync.

Pass chunk_size=N to batch N rows into a single embed_documents call. Pass dim=N to return Array(Float64, N) instead of List(Float64).

Source code in polars_llm/llm.py

def gemini_embed(
    self,
    *,
    model: str | None = None,
    client: Any = None,
    retries: int = 0,
    backoff: float = 0.0,
    cache: bool = False,
    chunk_size: int | None = None,
    dim: int | None = None,
    with_metadata: bool = False,
    on_error: OnError = "null",
    **model_kwargs: Any,
) -> pl.Expr:
    """Compute Gemini embeddings per row, sync.

    Pass ``chunk_size=N`` to batch ``N`` rows into a single
    ``embed_documents`` call. Pass ``dim=N`` to return
    ``Array(Float64, N)`` instead of ``List(Float64)``.
    """
    embedder = _make_embed("gemini", model, client, model_kwargs)
    return self._embed(
        embedder,
        retries=retries,
        backoff=backoff,
        cache=cache,
        chunk_size=chunk_size,
        dim=dim,
        with_metadata=with_metadata,
        on_error=on_error,
    )

`agemini_embed(*, model=None, client=None, retries=0, backoff=0.0, max_concurrency=None, cache=False, chunk_size=None, dim=None, with_metadata=False, on_error='null', **model_kwargs)` ¶

Compute Gemini embeddings concurrently across the batch.

Pass chunk_size=N to batch N rows per aembed_documents call; max_concurrency then caps in-flight chunk calls. Pass dim=N to return Array(Float64, N) instead of List(Float64).

Source code in polars_llm/llm.py

def agemini_embed(
    self,
    *,
    model: str | None = None,
    client: Any = None,
    retries: int = 0,
    backoff: float = 0.0,
    max_concurrency: int | None = None,
    cache: bool = False,
    chunk_size: int | None = None,
    dim: int | None = None,
    with_metadata: bool = False,
    on_error: OnError = "null",
    **model_kwargs: Any,
) -> pl.Expr:
    """Compute Gemini embeddings concurrently across the batch.

    Pass ``chunk_size=N`` to batch ``N`` rows per ``aembed_documents``
    call; ``max_concurrency`` then caps in-flight chunk calls. Pass
    ``dim=N`` to return ``Array(Float64, N)`` instead of ``List(Float64)``.
    """
    embedder = _make_embed("gemini", model, client, model_kwargs)
    return self._aembed(
        embedder,
        retries=retries,
        backoff=backoff,
        max_concurrency=max_concurrency,
        cache=cache,
        chunk_size=chunk_size,
        dim=dim,
        with_metadata=with_metadata,
        on_error=on_error,
    )

`cosine(other)` ¶

Cosine similarity between this vector column and other.

Accepts both Array(Float64, dim) and List(Float64) inputs; they are cast to List internally so the math is uniform. other may be a pl.Expr (e.g. pl.col("vector_b")), a pl.Series, or a literal Python list/tuple of floats (broadcast against every row). Returns a Float64 expression.

Lowers to native Polars arithmetic — no API call is made. Rows where either vector is null produce null; rows where either vector is all-zero produce NaN (0/0).

Source code in polars_llm/llm.py

def cosine(self, other: pl.Expr | pl.Series | list[float] | tuple[float, ...]) -> pl.Expr:
    """Cosine similarity between this vector column and ``other``.

    Accepts both ``Array(Float64, dim)`` and ``List(Float64)`` inputs;
    they are cast to ``List`` internally so the math is uniform. ``other``
    may be a ``pl.Expr`` (e.g. ``pl.col("vector_b")``), a ``pl.Series``,
    or a literal Python list/tuple of floats (broadcast against every
    row). Returns a ``Float64`` expression.

    Lowers to native Polars arithmetic — no API call is made. Rows where
    either vector is null produce ``null``; rows where either vector is
    all-zero produce ``NaN`` (0/0).
    """
    list_dtype = pl.List(pl.Float64)
    a = self._prompt.cast(list_dtype)
    if isinstance(other, pl.Expr):
        b: pl.Expr = other.cast(list_dtype)
    elif isinstance(other, pl.Series):
        b = pl.lit(other).cast(list_dtype)
    elif isinstance(other, (list, tuple)):
        b = pl.lit(pl.Series("", [list(other)], dtype=list_dtype))
    else:
        raise TypeError(
            "polars-llm: `cosine` expects a pl.Expr, pl.Series, or list of floats; " f"got {type(other).__name__}",
        )
    dot = (a * b).list.sum()
    norm_a = (a * a).list.sum().sqrt()
    norm_b = (b * b).list.sum().sqrt()
    return dot / (norm_a * norm_b)