API reference¶

polars-api registers an api namespace on every Polars expression. Import the package once and the namespace becomes available on any expression that resolves to a URL string.

import polars as pl
import polars_api  # noqa: F401  — registers the `.api` namespace

Methods¶

Method	HTTP verb	Mode
`get`	GET	sync
`aget`	GET	async
`post`	POST	sync
`apost`	POST	async

All methods return a pl.Expr of dtype Utf8 containing the response body for each row. To parse JSON, pass an explicit schema to .str.json_decode(dtype) in an expression (recent Polars makes the dtype argument required), or call .str.json_decode() on the materialized Series to infer the schema. See Decoding JSON responses.

Global configuration¶

Set request options once instead of passing them on every call. Anything left unset on a call falls back to the configured default, then the built-in default; explicit per-call arguments always take precedence.

import httpx
import polars_api

polars_api.set_defaults(client=httpx.Client(base_url="https://api.example.com"))

with polars_api.defaults(retries=3):
    ...  # `.api` calls in here retry up to 3 times

`polars_api.set_defaults(**options)` ¶

Register global default values for .api request options.

Any option accepted by get / post / aget / paginate etc. can be set once here and is then applied to every subsequent call that does not pass that argument explicitly. Explicit per-call arguments always take precedence. This is most useful for the client argument when talking to an authenticated API:

import httpx
import polars_api

polars_api.set_defaults(
    client=httpx.Client(
        base_url="https://api.example.com",
        headers={"Authorization": "Bearer ..."},
    ),
)
pl.col("path").api.get()  # uses the configured client automatically

Note that client is shared across the sync (httpx.Client) and async (aiohttp.ClientSession) paths, which require different client types. If you mix sync and async verbs, prefer setting client per call (or use the defaults context manager) so each path gets the right client.

Raises ValueError if an unknown option name is supplied.

Source code in polars_api/api.py

def set_defaults(**options: Any) -> None:
    """Register global default values for ``.api`` request options.

    Any option accepted by ``get`` / ``post`` / ``aget`` / ``paginate`` etc. can
    be set once here and is then applied to every subsequent call that does not
    pass that argument explicitly. Explicit per-call arguments always take
    precedence. This is most useful for the ``client`` argument when talking to
    an authenticated API:

    ```python
    import httpx
    import polars_api

    polars_api.set_defaults(
        client=httpx.Client(
            base_url="https://api.example.com",
            headers={"Authorization": "Bearer ..."},
        ),
    )
    pl.col("path").api.get()  # uses the configured client automatically
    ```

    Note that ``client`` is shared across the sync (``httpx.Client``) and async
    (``aiohttp.ClientSession``) paths, which require different client types. If
    you mix sync and async verbs, prefer setting ``client`` per call (or use the
    ``defaults`` context manager) so each path gets the right client.

    Raises ``ValueError`` if an unknown option name is supplied.
    """
    _check_option_names(options, "set_defaults")
    with _DEFAULTS_LOCK:
        _DEFAULTS.update(options)

`polars_api.get_defaults()` ¶

Return a copy of the currently configured global defaults.

Source code in polars_api/api.py

def get_defaults() -> dict[str, Any]:
    """Return a copy of the currently configured global defaults."""
    with _DEFAULTS_LOCK:
        return dict(_DEFAULTS)

`polars_api.reset_defaults(*options)` ¶

Clear configured global defaults.

With no arguments, clears all of them. Pass option names to clear only those:

polars_api.reset_defaults()           # clear everything
polars_api.reset_defaults("client")   # clear just `client`

Source code in polars_api/api.py

def reset_defaults(*options: str) -> None:
    """Clear configured global defaults.

    With no arguments, clears all of them. Pass option names to clear only
    those:

    ```python
    polars_api.reset_defaults()           # clear everything
    polars_api.reset_defaults("client")   # clear just `client`
    ```
    """
    with _DEFAULTS_LOCK:
        if not options:
            _DEFAULTS.clear()
            return
        for name in options:
            _DEFAULTS.pop(name, None)

`polars_api.defaults(**options)` ¶

Context manager that applies global defaults only within the block.

Restores the previous defaults on exit, so it composes with (and overrides) any values set via set_defaults:

with polars_api.defaults(client=session, retries=3):
    df.with_columns(pl.col("path").api.aget().alias("res"))

Raises ValueError if an unknown option name is supplied.

Source code in polars_api/api.py

@contextlib.contextmanager
def defaults(**options: Any) -> "Iterator[None]":
    """Context manager that applies global defaults only within the block.

    Restores the previous defaults on exit, so it composes with (and overrides)
    any values set via ``set_defaults``:

    ```python
    with polars_api.defaults(client=session, retries=3):
        df.with_columns(pl.col("path").api.aget().alias("res"))
    ```

    Raises ``ValueError`` if an unknown option name is supplied.
    """
    _check_option_names(options, "defaults")
    with _DEFAULTS_LOCK:
        previous = dict(_DEFAULTS)
        _DEFAULTS.update(options)
    try:
        yield
    finally:
        with _DEFAULTS_LOCK:
            _DEFAULTS.clear()
            _DEFAULTS.update(previous)

`polars_api.Api`¶

`polars_api.api.Api` ¶

Source code in polars_api/api.py

@pl.api.register_expr_namespace("api")
class Api:
    def __init__(self, url: pl.Expr) -> None:
        self._url = url

    @staticmethod
    def _send_sync(
        client: httpx.Client,
        method: str,
        url: str,
        params: Optional[dict[str, Any]],
        body: Optional[dict[str, Any]],
        data: Optional[dict[str, Any]],
        headers: Optional[dict[str, Any]],
        timeout: Optional[float],
        on_request: Optional[RequestHook],
        on_response: Optional[ResponseHook],
    ) -> httpx.Response:
        kwargs = _build_request_kwargs(params, body, data, headers, timeout)
        request = client.build_request(method, url, **kwargs)
        if on_request is not None:
            on_request(request)
        response = client.send(request)
        if on_response is not None:
            on_response(response)
        return response

    @staticmethod
    def _build_aio_kwargs(
        params: Optional[dict[str, Any]],
        body: Optional[dict[str, Any]],
        data: Optional[dict[str, Any]],
        headers: Optional[dict[str, Any]],
        timeout: Optional[float],
    ) -> dict[str, Any]:
        kwargs: dict[str, Any] = {}
        if params is not None:
            # aiohttp's params accept str/int/float values; coerce non-strings
            # so behaviour matches httpx.
            kwargs["params"] = {k: v if isinstance(v, str) else str(v) for k, v in params.items() if v is not None}
        if headers is not None:
            kwargs["headers"] = headers
        if body is not None:
            kwargs["json"] = body
        if data is not None:
            kwargs["data"] = data
        if timeout is not None:
            kwargs["timeout"] = aiohttp.ClientTimeout(total=timeout)
        return kwargs

    @classmethod
    def _sync_one(
        cls,
        client: httpx.Client,
        method: str,
        url: str,
        params: Optional[dict[str, Any]],
        body: Optional[dict[str, Any]],
        data: Optional[dict[str, Any]],
        headers: Optional[dict[str, Any]],
        timeout: Optional[float],
        retries: int,
        backoff: float,
        with_response_headers: bool,
        on_request: Optional[RequestHook],
        on_response: Optional[ResponseHook],
    ) -> dict[str, Any]:
        attempt = 0
        start = time.monotonic()
        while True:
            attempt_start = time.monotonic()
            try:
                response = cls._send_sync(
                    client,
                    method,
                    url,
                    params,
                    body,
                    data,
                    headers,
                    timeout,
                    on_request,
                    on_response,
                )
            except httpx.HTTPError as exc:
                if attempt < retries:
                    wait = backoff * (2**attempt) if backoff > 0 else 0.0
                    if wait > 0:
                        time.sleep(wait)
                    attempt += 1
                    continue
                elapsed_ms = (time.monotonic() - attempt_start) * 1000
                return _result_struct(
                    None,
                    0,
                    elapsed_ms,
                    f"{type(exc).__name__}: {exc}",
                    include_response_headers=with_response_headers,
                )

            status = response.status_code
            text = response.text
            resp_headers = _serialize_response_headers(response) if with_response_headers else None
            if response.is_success:
                elapsed_ms = (time.monotonic() - start) * 1000
                return _result_struct(
                    text,
                    status,
                    elapsed_ms,
                    None,
                    resp_headers,
                    include_response_headers=with_response_headers,
                )
            if attempt < retries and _is_retryable_status(status):
                wait = _retry_after_seconds(response)
                if wait is None:
                    wait = backoff * (2**attempt) if backoff > 0 else 0.0
                if wait > 0:
                    time.sleep(wait)
                attempt += 1
                continue
            elapsed_ms = (time.monotonic() - start) * 1000
            return _result_struct(
                text,
                status,
                elapsed_ms,
                f"HTTP {status}",
                resp_headers,
                include_response_headers=with_response_headers,
            )

    @classmethod
    async def _async_attempt(
        cls,
        session: aiohttp.ClientSession,
        method: str,
        url: str,
        params: Optional[dict[str, Any]],
        body: Optional[dict[str, Any]],
        data: Optional[dict[str, Any]],
        headers: Optional[dict[str, Any]],
        timeout: Optional[float],
        attempt: int,
        retries: int,
        backoff: float,
        start: float,
        with_response_headers: bool,
        on_request: Optional[AsyncRequestHook],
        on_response: Optional[AsyncResponseHook],
    ) -> tuple[Optional[dict[str, Any]], Optional[float]]:
        attempt_start = time.monotonic()
        kwargs = cls._build_aio_kwargs(params, body, data, headers, timeout)
        try:
            if on_request is not None:
                on_request(method, url, kwargs)
            async with session.request(method, url, **kwargs) as response:
                if on_response is not None:
                    on_response(response)
                status = response.status
                text = await response.text()
                resp_headers = _serialize_aio_headers(response) if with_response_headers else None
                retry_after = _retry_after_seconds_aio(response)
        except (aiohttp.ClientError, asyncio.TimeoutError) as exc:
            if attempt < retries:
                wait = backoff * (2**attempt) if backoff > 0 else 0.0
                return None, wait
            elapsed_ms = (time.monotonic() - attempt_start) * 1000
            return _result_struct(
                None,
                0,
                elapsed_ms,
                f"{type(exc).__name__}: {exc}",
                include_response_headers=with_response_headers,
            ), None

        if 200 <= status < 300:
            elapsed_ms = (time.monotonic() - start) * 1000
            return _result_struct(
                text,
                status,
                elapsed_ms,
                None,
                resp_headers,
                include_response_headers=with_response_headers,
            ), None
        if attempt < retries and _is_retryable_status(status):
            wait = retry_after if retry_after is not None else (backoff * (2**attempt) if backoff > 0 else 0.0)
            return None, wait
        elapsed_ms = (time.monotonic() - start) * 1000
        return _result_struct(
            text,
            status,
            elapsed_ms,
            f"HTTP {status}",
            resp_headers,
            include_response_headers=with_response_headers,
        ), None

    @classmethod
    async def _async_one(
        cls,
        session: aiohttp.ClientSession,
        semaphore: Optional[asyncio.Semaphore],
        method: str,
        url: str,
        params: Optional[dict[str, Any]],
        body: Optional[dict[str, Any]],
        data: Optional[dict[str, Any]],
        headers: Optional[dict[str, Any]],
        timeout: Optional[float],
        retries: int,
        backoff: float,
        with_response_headers: bool,
        on_request: Optional[AsyncRequestHook],
        on_response: Optional[AsyncResponseHook],
    ) -> dict[str, Any]:
        async def _go() -> dict[str, Any]:
            attempt = 0
            start = time.monotonic()
            while True:
                result, wait = await cls._async_attempt(
                    session,
                    method,
                    url,
                    params,
                    body,
                    data,
                    headers,
                    timeout,
                    attempt,
                    retries,
                    backoff,
                    start,
                    with_response_headers,
                    on_request,
                    on_response,
                )
                if result is not None:
                    return result
                if wait and wait > 0:
                    await asyncio.sleep(wait)
                attempt += 1

        if semaphore is None:
            return await _go()
        async with semaphore:
            return await _go()

    @classmethod
    def _send_sync_with_retries(
        cls,
        client: httpx.Client,
        method: str,
        url: str,
        params: Optional[dict[str, Any]],
        body: Optional[dict[str, Any]],
        data: Optional[dict[str, Any]],
        headers: Optional[dict[str, Any]],
        timeout: Optional[float],
        retries: int,
        backoff: float,
        on_request: Optional[RequestHook],
        on_response: Optional[ResponseHook],
    ) -> Optional[httpx.Response]:
        """Like ``_send_sync`` but retries on failure. Returns the last response, or None
        if every attempt failed with a network error."""
        attempt = 0
        while True:
            try:
                response = cls._send_sync(
                    client,
                    method,
                    url,
                    params,
                    body,
                    data,
                    headers,
                    timeout,
                    on_request,
                    on_response,
                )
            except httpx.HTTPError:
                if attempt < retries:
                    wait = backoff * (2**attempt) if backoff > 0 else 0.0
                    if wait > 0:
                        time.sleep(wait)
                    attempt += 1
                    continue
                return None
            if response.is_success:
                return response
            if attempt < retries and _is_retryable_status(response.status_code):
                wait = _retry_after_seconds(response)
                if wait is None:
                    wait = backoff * (2**attempt) if backoff > 0 else 0.0
                if wait > 0:
                    time.sleep(wait)
                attempt += 1
                continue
            return None

    @classmethod
    def _sync_batch(
        cls,
        method: str,
        rows: list[tuple[str, Any, Any, Any, Any]],
        *,
        client: Optional[httpx.Client],
        timeout: Optional[float],
        retries: int,
        backoff: float,
        cache: bool,
        with_response_headers: bool,
        on_request: Optional[RequestHook],
        on_response: Optional[ResponseHook],
    ) -> list[dict[str, Any]]:
        own_client = client is None
        cli: httpx.Client = httpx.Client() if own_client else client  # type: ignore[assignment]
        results: list[Optional[dict[str, Any]]] = [None] * len(rows)
        memo: dict[Any, dict[str, Any]] = {}
        try:
            for i, (url, params, body, data, headers) in enumerate(rows):
                key = None
                if cache:
                    key = (method, url, _hashable(params), _hashable(body), _hashable(data), _hashable(headers))
                    if key in memo:
                        results[i] = memo[key]
                        continue
                result = cls._sync_one(
                    cli,
                    method,
                    url,
                    params,
                    body,
                    data,
                    headers,
                    timeout,
                    retries,
                    backoff,
                    with_response_headers,
                    on_request,
                    on_response,
                )
                if cache and key is not None:
                    memo[key] = result
                results[i] = result
        finally:
            if own_client:
                cli.close()
        return [r for r in results if r is not None]

    @classmethod
    async def _async_many(
        cls,
        method: str,
        rows: list[tuple[str, Any, Any, Any, Any]],
        *,
        client: Optional[aiohttp.ClientSession],
        timeout: Optional[float],
        retries: int,
        backoff: float,
        max_concurrency: Optional[int],
        cache: bool,
        with_response_headers: bool,
        on_request: Optional[AsyncRequestHook],
        on_response: Optional[AsyncResponseHook],
    ) -> list[dict[str, Any]]:
        semaphore = asyncio.Semaphore(max_concurrency) if max_concurrency else None

        # Dedupe identical rows up-front so we only fire one task per unique key.
        unique_indices: dict[Any, int] = {}
        order: list[int] = []  # index into rows for each unique task
        result_index: list[int] = [0] * len(rows)  # row -> position in tasks list
        for i, (url, params, body, data, headers) in enumerate(rows):
            if cache:
                key = (method, url, _hashable(params), _hashable(body), _hashable(data), _hashable(headers))
                if key in unique_indices:
                    result_index[i] = unique_indices[key]
                    continue
                unique_indices[key] = len(order)
                result_index[i] = len(order)
                order.append(i)
            else:
                result_index[i] = len(order)
                order.append(i)

        own_session = client is None
        if own_session:
            # Size the connection pool to the requested concurrency so we don't
            # bottleneck on aiohttp's default pool limit (100). 0 == unlimited.
            connector = aiohttp.TCPConnector(limit=max_concurrency or 0)
            sess = aiohttp.ClientSession(connector=connector)
        else:
            sess = client  # type: ignore[assignment]
        try:
            tasks = [
                cls._async_one(
                    sess,
                    semaphore,
                    method,
                    rows[idx][0],
                    rows[idx][1],
                    rows[idx][2],
                    rows[idx][3],
                    rows[idx][4],
                    timeout,
                    retries,
                    backoff,
                    with_response_headers,
                    on_request,
                    on_response,
                )
                for idx in order
            ]
            unique_results = await asyncio.gather(*tasks)
        finally:
            if own_session:
                await sess.close()
        return [unique_results[result_index[i]] for i in range(len(rows))]

    @staticmethod
    def _results_to_series(
        results: list[dict[str, Any]],
        *,
        with_metadata: bool,
        with_response_headers: bool,
        on_error: OnError,
    ) -> pl.Series:
        if with_metadata:
            return pl.Series(results, dtype=_metadata_dtype(with_response_headers))
        out: list[Optional[str]] = []
        silent_errors: list[str] = []
        for r in results:
            if r["error"] is None:
                out.append(_coerce_body(r["body"]))
            elif on_error == "raise":
                raise RuntimeError(r["error"])
            elif on_error == "return":
                out.append(_coerce_body(r["body"]))
            else:
                out.append(None)
                silent_errors.append(r["error"])
        if silent_errors:
            warnings.warn(
                f"polars-api: {len(silent_errors)}/{len(results)} request(s) failed and "
                f"were replaced with null (first error: {silent_errors[0]}). "
                "Pass with_metadata=True to inspect per-row errors, or "
                "on_error='raise'/'return' to change handling.",
                stacklevel=2,
            )
        return pl.Series(out, dtype=pl.Utf8)

    @staticmethod
    def _rows_from_struct(s: pl.Series) -> list[tuple[str, Any, Any, Any, Any]]:
        urls = s.struct.field("url").to_list()
        params = s.struct.field("params").to_list()
        bodies = s.struct.field("body").to_list()
        data = s.struct.field("data").to_list()
        headers = s.struct.field("headers").to_list()
        return list(zip(urls, params, bodies, data, headers))

    def _build_headers_expr(
        self,
        headers: Optional[pl.Expr],
        auth: Optional[tuple[str, str]],
        bearer: Optional[Union[str, pl.Expr]],
        api_key: Optional[Union[str, pl.Expr]],
        api_key_header: str,
    ) -> Optional[pl.Expr]:
        extras: dict[str, pl.Expr] = {}
        if auth is not None:
            user, password = auth
            extras["Authorization"] = pl.lit(_basic_auth_header(user, password))
        if bearer is not None:
            bearer_expr = bearer if isinstance(bearer, pl.Expr) else pl.lit(bearer)
            extras["Authorization"] = pl.lit("Bearer ") + bearer_expr.cast(pl.Utf8)
        if api_key is not None:
            api_key_expr = api_key if isinstance(api_key, pl.Expr) else pl.lit(api_key)
            extras[api_key_header] = api_key_expr.cast(pl.Utf8)

        if not extras and headers is None:
            return None
        if not extras:
            return headers
        extra_exprs = [expr.alias(name) for name, expr in extras.items()]
        if headers is None:
            return pl.struct(*extra_exprs)
        return headers.struct.with_fields(*extra_exprs)

    def _input_struct(
        self,
        params: Optional[pl.Expr],
        body: Optional[pl.Expr],
        data: Optional[pl.Expr],
        headers: Optional[pl.Expr],
    ) -> pl.Expr:
        return pl.struct(
            self._url.alias("url"),
            (pl.lit(None) if params is None else params).alias("params"),
            (pl.lit(None) if body is None else body).alias("body"),
            (pl.lit(None) if data is None else data).alias("data"),
            (pl.lit(None) if headers is None else headers).alias("headers"),
        )

    def _sync_call(
        self,
        method: str,
        params: Optional[pl.Expr],
        body: Optional[pl.Expr],
        data: Optional[pl.Expr],
        headers: Optional[pl.Expr],
        *,
        client: Optional[httpx.Client],
        timeout: Optional[float],
        retries: int,
        backoff: float,
        cache: bool,
        with_metadata: bool,
        with_response_headers: bool,
        on_error: OnError,
        on_request: Optional[RequestHook],
        on_response: Optional[ResponseHook],
    ) -> pl.Expr:
        return_dtype = _metadata_dtype(with_response_headers) if with_metadata else pl.Utf8
        return self._input_struct(params, body, data, headers).map_batches(
            lambda s: self._results_to_series(
                self._sync_batch(
                    method,
                    self._rows_from_struct(s),
                    client=client,
                    timeout=timeout,
                    retries=retries,
                    backoff=backoff,
                    cache=cache,
                    with_response_headers=with_response_headers,
                    on_request=on_request,
                    on_response=on_response,
                ),
                with_metadata=with_metadata,
                with_response_headers=with_response_headers,
                on_error=on_error,
            ),
            return_dtype=return_dtype,
        )

    def _async_call(
        self,
        method: str,
        params: Optional[pl.Expr],
        body: Optional[pl.Expr],
        data: Optional[pl.Expr],
        headers: Optional[pl.Expr],
        *,
        client: Optional[aiohttp.ClientSession],
        timeout: Optional[float],
        retries: int,
        backoff: float,
        max_concurrency: Optional[int],
        cache: bool,
        with_metadata: bool,
        with_response_headers: bool,
        on_error: OnError,
        on_request: Optional[AsyncRequestHook],
        on_response: Optional[AsyncResponseHook],
    ) -> pl.Expr:
        return_dtype = _metadata_dtype(with_response_headers) if with_metadata else pl.Utf8
        return self._input_struct(params, body, data, headers).map_batches(
            lambda s: self._results_to_series(
                _arun(
                    self._async_many(
                        method,
                        self._rows_from_struct(s),
                        client=client,
                        timeout=timeout,
                        retries=retries,
                        backoff=backoff,
                        max_concurrency=max_concurrency,
                        cache=cache,
                        with_response_headers=with_response_headers,
                        on_request=on_request,
                        on_response=on_response,
                    )
                ),
                with_metadata=with_metadata,
                with_response_headers=with_response_headers,
                on_error=on_error,
            ),
            return_dtype=return_dtype,
        )

    # ---- Public API: full request() / arequest() entry points ----

    def request(
        self,
        method: str,
        params: Optional[pl.Expr] = None,
        body: Optional[pl.Expr] = None,
        *,
        data: Optional[pl.Expr] = None,
        headers: Any = UNSET,
        client: Any = UNSET,
        timeout: Any = UNSET,
        retries: Any = UNSET,
        backoff: Any = UNSET,
        cache: Any = UNSET,
        with_metadata: Any = UNSET,
        with_response_headers: Any = UNSET,
        on_error: Any = UNSET,
        on_request: Any = UNSET,
        on_response: Any = UNSET,
        auth: Any = UNSET,
        bearer: Any = UNSET,
        api_key: Any = UNSET,
        api_key_header: Any = UNSET,
    ) -> pl.Expr:
        """Issue a synchronous HTTP request per row.

        Any argument left unset falls back to the value registered via
        ``set_defaults`` (or the ``defaults`` context manager), then to the built-in default.
        """
        merged_headers = self._build_headers_expr(
            _resolve("headers", headers),
            _resolve("auth", auth),
            _resolve("bearer", bearer),
            _resolve("api_key", api_key),
            _resolve("api_key_header", api_key_header),
        )
        return self._sync_call(
            method.upper(),
            params,
            body,
            data,
            merged_headers,
            client=_resolve("client", client),
            timeout=_resolve("timeout", timeout),
            retries=_resolve("retries", retries),
            backoff=_resolve("backoff", backoff),
            cache=_resolve("cache", cache),
            with_metadata=_resolve("with_metadata", with_metadata),
            with_response_headers=_resolve("with_response_headers", with_response_headers),
            on_error=_resolve("on_error", on_error),
            on_request=_resolve("on_request", on_request),
            on_response=_resolve("on_response", on_response),
        )

    def arequest(
        self,
        method: str,
        params: Optional[pl.Expr] = None,
        body: Optional[pl.Expr] = None,
        *,
        data: Optional[pl.Expr] = None,
        headers: Any = UNSET,
        client: Any = UNSET,
        timeout: Any = UNSET,
        retries: Any = UNSET,
        backoff: Any = UNSET,
        max_concurrency: Any = UNSET,
        cache: Any = UNSET,
        with_metadata: Any = UNSET,
        with_response_headers: Any = UNSET,
        on_error: Any = UNSET,
        on_request: Any = UNSET,
        on_response: Any = UNSET,
        auth: Any = UNSET,
        bearer: Any = UNSET,
        api_key: Any = UNSET,
        api_key_header: Any = UNSET,
    ) -> pl.Expr:
        """Issue concurrent asynchronous HTTP requests across the batch.

        Any argument left unset falls back to the value registered via
        ``set_defaults`` (or the ``defaults`` context manager), then to the built-in default.
        """
        merged_headers = self._build_headers_expr(
            _resolve("headers", headers),
            _resolve("auth", auth),
            _resolve("bearer", bearer),
            _resolve("api_key", api_key),
            _resolve("api_key_header", api_key_header),
        )
        return self._async_call(
            method.upper(),
            params,
            body,
            data,
            merged_headers,
            client=_resolve("client", client),
            timeout=_resolve("timeout", timeout),
            retries=_resolve("retries", retries),
            backoff=_resolve("backoff", backoff),
            max_concurrency=_resolve("max_concurrency", max_concurrency),
            cache=_resolve("cache", cache),
            with_metadata=_resolve("with_metadata", with_metadata),
            with_response_headers=_resolve("with_response_headers", with_response_headers),
            on_error=_resolve("on_error", on_error),
            on_request=_resolve("on_request", on_request),
            on_response=_resolve("on_response", on_response),
        )

    # ---- Verb wrappers (sync) ----

    def get(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
        """Issue a synchronous GET per row."""
        return self.request("GET", params, None, timeout=timeout, **kwargs)

    def post(
        self,
        params: Optional[pl.Expr] = None,
        body: Optional[pl.Expr] = None,
        timeout: Any = UNSET,
        **kwargs: Any,
    ) -> pl.Expr:
        """Issue a synchronous POST per row."""
        return self.request("POST", params, body, timeout=timeout, **kwargs)

    def put(
        self,
        params: Optional[pl.Expr] = None,
        body: Optional[pl.Expr] = None,
        timeout: Any = UNSET,
        **kwargs: Any,
    ) -> pl.Expr:
        """Issue a synchronous PUT per row."""
        return self.request("PUT", params, body, timeout=timeout, **kwargs)

    def patch(
        self,
        params: Optional[pl.Expr] = None,
        body: Optional[pl.Expr] = None,
        timeout: Any = UNSET,
        **kwargs: Any,
    ) -> pl.Expr:
        """Issue a synchronous PATCH per row."""
        return self.request("PATCH", params, body, timeout=timeout, **kwargs)

    def delete(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
        """Issue a synchronous DELETE per row."""
        return self.request("DELETE", params, None, timeout=timeout, **kwargs)

    def head(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
        """Issue a synchronous HEAD per row."""
        return self.request("HEAD", params, None, timeout=timeout, **kwargs)

    # ---- Verb wrappers (async) ----

    def aget(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
        """Issue concurrent asynchronous GETs across the batch."""
        return self.arequest("GET", params, None, timeout=timeout, **kwargs)

    def apost(
        self,
        params: Optional[pl.Expr] = None,
        body: Optional[pl.Expr] = None,
        timeout: Any = UNSET,
        **kwargs: Any,
    ) -> pl.Expr:
        """Issue concurrent asynchronous POSTs across the batch."""
        return self.arequest("POST", params, body, timeout=timeout, **kwargs)

    def aput(
        self,
        params: Optional[pl.Expr] = None,
        body: Optional[pl.Expr] = None,
        timeout: Any = UNSET,
        **kwargs: Any,
    ) -> pl.Expr:
        """Issue concurrent asynchronous PUTs across the batch."""
        return self.arequest("PUT", params, body, timeout=timeout, **kwargs)

    def apatch(
        self,
        params: Optional[pl.Expr] = None,
        body: Optional[pl.Expr] = None,
        timeout: Any = UNSET,
        **kwargs: Any,
    ) -> pl.Expr:
        """Issue concurrent asynchronous PATCHes across the batch."""
        return self.arequest("PATCH", params, body, timeout=timeout, **kwargs)

    def adelete(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
        """Issue concurrent asynchronous DELETEs across the batch."""
        return self.arequest("DELETE", params, None, timeout=timeout, **kwargs)

    def ahead(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
        """Issue concurrent asynchronous HEADs across the batch."""
        return self.arequest("HEAD", params, None, timeout=timeout, **kwargs)

    # ---- Pagination ----

    def paginate(
        self,
        params: Optional[pl.Expr] = None,
        *,
        method: str = "GET",
        max_pages: int = 10,
        next_url: Optional[NextUrl] = None,
        headers: Any = UNSET,
        client: Any = UNSET,
        timeout: Any = UNSET,
        retries: Any = UNSET,
        backoff: Any = UNSET,
        on_request: Any = UNSET,
        on_response: Any = UNSET,
        auth: Any = UNSET,
        bearer: Any = UNSET,
        api_key: Any = UNSET,
        api_key_header: Any = UNSET,
    ) -> pl.Expr:
        """Synchronously paginate per row, following Link: rel="next" by default.

        Returns a column of List[Utf8] — one list of response bodies per starting
        URL. Pipe through `.list.eval(pl.element().str.json_decode(dtype))` and
        `.explode(...)` to flatten paginated rows back into the DataFrame (the
        `dtype` schema is required when decoding inside an expression).

        Pass `next_url=lambda response: ...` to extract the next URL from a custom
        location (e.g. a JSON field) instead of the Link header.

        Any unset argument falls back to the value registered via
        ``set_defaults`` (or the ``defaults`` context manager), then to the built-in default.
        """
        client = _resolve("client", client)
        timeout = _resolve("timeout", timeout)
        retries = _resolve("retries", retries)
        backoff = _resolve("backoff", backoff)
        on_request = _resolve("on_request", on_request)
        on_response = _resolve("on_response", on_response)
        merged_headers = self._build_headers_expr(
            _resolve("headers", headers),
            _resolve("auth", auth),
            _resolve("bearer", bearer),
            _resolve("api_key", api_key),
            _resolve("api_key_header", api_key_header),
        )
        params_expr = pl.lit(None) if params is None else params
        headers_expr = pl.lit(None) if merged_headers is None else merged_headers
        extractor = next_url or (lambda r: _parse_link_next(r.headers.get("link")))
        verb = method.upper()

        def _follow_links(cli: httpx.Client, url: str, p: Any, h: Any) -> list[str]:
            bodies: list[str] = []
            current_url, current_params = url, p
            for _ in range(max_pages):
                response = self._send_sync_with_retries(
                    cli,
                    verb,
                    current_url,
                    current_params,
                    None,
                    None,
                    h,
                    timeout,
                    retries,
                    backoff,
                    on_request,
                    on_response,
                )
                if response is None:
                    break
                bodies.append(response.text)
                nxt = extractor(response)
                if not nxt:
                    break
                current_url = nxt
                current_params = None  # next URL already encodes its own query string
            return bodies

        def _paginate_batch(s: pl.Series) -> pl.Series:
            urls = s.struct.field("url").to_list()
            param_list = s.struct.field("params").to_list()
            header_list = s.struct.field("headers").to_list()
            own_client = client is None
            cli: httpx.Client = httpx.Client() if own_client else client  # type: ignore[assignment]
            try:
                out = [_follow_links(cli, url, p, h) for url, p, h in zip(urls, param_list, header_list)]
                return pl.Series(out, dtype=pl.List(pl.Utf8))
            finally:
                if own_client:
                    cli.close()

        return pl.struct(
            self._url.alias("url"),
            params_expr.alias("params"),
            headers_expr.alias("headers"),
        ).map_batches(_paginate_batch, return_dtype=pl.List(pl.Utf8))

`request(method, params=None, body=None, *, data=None, headers=UNSET, client=UNSET, timeout=UNSET, retries=UNSET, backoff=UNSET, cache=UNSET, with_metadata=UNSET, with_response_headers=UNSET, on_error=UNSET, on_request=UNSET, on_response=UNSET, auth=UNSET, bearer=UNSET, api_key=UNSET, api_key_header=UNSET)` ¶

Issue a synchronous HTTP request per row.

Any argument left unset falls back to the value registered via set_defaults (or the defaults context manager), then to the built-in default.

Source code in polars_api/api.py

def request(
    self,
    method: str,
    params: Optional[pl.Expr] = None,
    body: Optional[pl.Expr] = None,
    *,
    data: Optional[pl.Expr] = None,
    headers: Any = UNSET,
    client: Any = UNSET,
    timeout: Any = UNSET,
    retries: Any = UNSET,
    backoff: Any = UNSET,
    cache: Any = UNSET,
    with_metadata: Any = UNSET,
    with_response_headers: Any = UNSET,
    on_error: Any = UNSET,
    on_request: Any = UNSET,
    on_response: Any = UNSET,
    auth: Any = UNSET,
    bearer: Any = UNSET,
    api_key: Any = UNSET,
    api_key_header: Any = UNSET,
) -> pl.Expr:
    """Issue a synchronous HTTP request per row.

    Any argument left unset falls back to the value registered via
    ``set_defaults`` (or the ``defaults`` context manager), then to the built-in default.
    """
    merged_headers = self._build_headers_expr(
        _resolve("headers", headers),
        _resolve("auth", auth),
        _resolve("bearer", bearer),
        _resolve("api_key", api_key),
        _resolve("api_key_header", api_key_header),
    )
    return self._sync_call(
        method.upper(),
        params,
        body,
        data,
        merged_headers,
        client=_resolve("client", client),
        timeout=_resolve("timeout", timeout),
        retries=_resolve("retries", retries),
        backoff=_resolve("backoff", backoff),
        cache=_resolve("cache", cache),
        with_metadata=_resolve("with_metadata", with_metadata),
        with_response_headers=_resolve("with_response_headers", with_response_headers),
        on_error=_resolve("on_error", on_error),
        on_request=_resolve("on_request", on_request),
        on_response=_resolve("on_response", on_response),
    )

`arequest(method, params=None, body=None, *, data=None, headers=UNSET, client=UNSET, timeout=UNSET, retries=UNSET, backoff=UNSET, max_concurrency=UNSET, cache=UNSET, with_metadata=UNSET, with_response_headers=UNSET, on_error=UNSET, on_request=UNSET, on_response=UNSET, auth=UNSET, bearer=UNSET, api_key=UNSET, api_key_header=UNSET)` ¶

Issue concurrent asynchronous HTTP requests across the batch.

Any argument left unset falls back to the value registered via set_defaults (or the defaults context manager), then to the built-in default.

Source code in polars_api/api.py

def arequest(
    self,
    method: str,
    params: Optional[pl.Expr] = None,
    body: Optional[pl.Expr] = None,
    *,
    data: Optional[pl.Expr] = None,
    headers: Any = UNSET,
    client: Any = UNSET,
    timeout: Any = UNSET,
    retries: Any = UNSET,
    backoff: Any = UNSET,
    max_concurrency: Any = UNSET,
    cache: Any = UNSET,
    with_metadata: Any = UNSET,
    with_response_headers: Any = UNSET,
    on_error: Any = UNSET,
    on_request: Any = UNSET,
    on_response: Any = UNSET,
    auth: Any = UNSET,
    bearer: Any = UNSET,
    api_key: Any = UNSET,
    api_key_header: Any = UNSET,
) -> pl.Expr:
    """Issue concurrent asynchronous HTTP requests across the batch.

    Any argument left unset falls back to the value registered via
    ``set_defaults`` (or the ``defaults`` context manager), then to the built-in default.
    """
    merged_headers = self._build_headers_expr(
        _resolve("headers", headers),
        _resolve("auth", auth),
        _resolve("bearer", bearer),
        _resolve("api_key", api_key),
        _resolve("api_key_header", api_key_header),
    )
    return self._async_call(
        method.upper(),
        params,
        body,
        data,
        merged_headers,
        client=_resolve("client", client),
        timeout=_resolve("timeout", timeout),
        retries=_resolve("retries", retries),
        backoff=_resolve("backoff", backoff),
        max_concurrency=_resolve("max_concurrency", max_concurrency),
        cache=_resolve("cache", cache),
        with_metadata=_resolve("with_metadata", with_metadata),
        with_response_headers=_resolve("with_response_headers", with_response_headers),
        on_error=_resolve("on_error", on_error),
        on_request=_resolve("on_request", on_request),
        on_response=_resolve("on_response", on_response),
    )

`get(params=None, timeout=UNSET, **kwargs)` ¶

Issue a synchronous GET per row.

Source code in polars_api/api.py

def get(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
    """Issue a synchronous GET per row."""
    return self.request("GET", params, None, timeout=timeout, **kwargs)

`post(params=None, body=None, timeout=UNSET, **kwargs)` ¶

Issue a synchronous POST per row.

Source code in polars_api/api.py

def post(
    self,
    params: Optional[pl.Expr] = None,
    body: Optional[pl.Expr] = None,
    timeout: Any = UNSET,
    **kwargs: Any,
) -> pl.Expr:
    """Issue a synchronous POST per row."""
    return self.request("POST", params, body, timeout=timeout, **kwargs)

`put(params=None, body=None, timeout=UNSET, **kwargs)` ¶

Issue a synchronous PUT per row.

Source code in polars_api/api.py

def put(
    self,
    params: Optional[pl.Expr] = None,
    body: Optional[pl.Expr] = None,
    timeout: Any = UNSET,
    **kwargs: Any,
) -> pl.Expr:
    """Issue a synchronous PUT per row."""
    return self.request("PUT", params, body, timeout=timeout, **kwargs)

`patch(params=None, body=None, timeout=UNSET, **kwargs)` ¶

Issue a synchronous PATCH per row.

Source code in polars_api/api.py

def patch(
    self,
    params: Optional[pl.Expr] = None,
    body: Optional[pl.Expr] = None,
    timeout: Any = UNSET,
    **kwargs: Any,
) -> pl.Expr:
    """Issue a synchronous PATCH per row."""
    return self.request("PATCH", params, body, timeout=timeout, **kwargs)

`delete(params=None, timeout=UNSET, **kwargs)` ¶

Issue a synchronous DELETE per row.

Source code in polars_api/api.py

def delete(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
    """Issue a synchronous DELETE per row."""
    return self.request("DELETE", params, None, timeout=timeout, **kwargs)

`head(params=None, timeout=UNSET, **kwargs)` ¶

Issue a synchronous HEAD per row.

Source code in polars_api/api.py

def head(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
    """Issue a synchronous HEAD per row."""
    return self.request("HEAD", params, None, timeout=timeout, **kwargs)

`aget(params=None, timeout=UNSET, **kwargs)` ¶

Issue concurrent asynchronous GETs across the batch.

Source code in polars_api/api.py

def aget(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
    """Issue concurrent asynchronous GETs across the batch."""
    return self.arequest("GET", params, None, timeout=timeout, **kwargs)

`apost(params=None, body=None, timeout=UNSET, **kwargs)` ¶

Issue concurrent asynchronous POSTs across the batch.

Source code in polars_api/api.py

def apost(
    self,
    params: Optional[pl.Expr] = None,
    body: Optional[pl.Expr] = None,
    timeout: Any = UNSET,
    **kwargs: Any,
) -> pl.Expr:
    """Issue concurrent asynchronous POSTs across the batch."""
    return self.arequest("POST", params, body, timeout=timeout, **kwargs)

`aput(params=None, body=None, timeout=UNSET, **kwargs)` ¶

Issue concurrent asynchronous PUTs across the batch.

Source code in polars_api/api.py

def aput(
    self,
    params: Optional[pl.Expr] = None,
    body: Optional[pl.Expr] = None,
    timeout: Any = UNSET,
    **kwargs: Any,
) -> pl.Expr:
    """Issue concurrent asynchronous PUTs across the batch."""
    return self.arequest("PUT", params, body, timeout=timeout, **kwargs)

`apatch(params=None, body=None, timeout=UNSET, **kwargs)` ¶

Issue concurrent asynchronous PATCHes across the batch.

Source code in polars_api/api.py

def apatch(
    self,
    params: Optional[pl.Expr] = None,
    body: Optional[pl.Expr] = None,
    timeout: Any = UNSET,
    **kwargs: Any,
) -> pl.Expr:
    """Issue concurrent asynchronous PATCHes across the batch."""
    return self.arequest("PATCH", params, body, timeout=timeout, **kwargs)

`adelete(params=None, timeout=UNSET, **kwargs)` ¶

Issue concurrent asynchronous DELETEs across the batch.

Source code in polars_api/api.py

def adelete(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
    """Issue concurrent asynchronous DELETEs across the batch."""
    return self.arequest("DELETE", params, None, timeout=timeout, **kwargs)

`ahead(params=None, timeout=UNSET, **kwargs)` ¶

Issue concurrent asynchronous HEADs across the batch.

Source code in polars_api/api.py

def ahead(self, params: Optional[pl.Expr] = None, timeout: Any = UNSET, **kwargs: Any) -> pl.Expr:
    """Issue concurrent asynchronous HEADs across the batch."""
    return self.arequest("HEAD", params, None, timeout=timeout, **kwargs)

`paginate(params=None, *, method='GET', max_pages=10, next_url=None, headers=UNSET, client=UNSET, timeout=UNSET, retries=UNSET, backoff=UNSET, on_request=UNSET, on_response=UNSET, auth=UNSET, bearer=UNSET, api_key=UNSET, api_key_header=UNSET)` ¶

Synchronously paginate per row, following Link: rel="next" by default.

Returns a column of List[Utf8] — one list of response bodies per starting URL. Pipe through .list.eval(pl.element().str.json_decode(dtype)) and .explode(...) to flatten paginated rows back into the DataFrame (the dtype schema is required when decoding inside an expression).

Pass next_url=lambda response: ... to extract the next URL from a custom location (e.g. a JSON field) instead of the Link header.

Any unset argument falls back to the value registered via set_defaults (or the defaults context manager), then to the built-in default.

Source code in polars_api/api.py

def paginate(
    self,
    params: Optional[pl.Expr] = None,
    *,
    method: str = "GET",
    max_pages: int = 10,
    next_url: Optional[NextUrl] = None,
    headers: Any = UNSET,
    client: Any = UNSET,
    timeout: Any = UNSET,
    retries: Any = UNSET,
    backoff: Any = UNSET,
    on_request: Any = UNSET,
    on_response: Any = UNSET,
    auth: Any = UNSET,
    bearer: Any = UNSET,
    api_key: Any = UNSET,
    api_key_header: Any = UNSET,
) -> pl.Expr:
    """Synchronously paginate per row, following Link: rel="next" by default.

    Returns a column of List[Utf8] — one list of response bodies per starting
    URL. Pipe through `.list.eval(pl.element().str.json_decode(dtype))` and
    `.explode(...)` to flatten paginated rows back into the DataFrame (the
    `dtype` schema is required when decoding inside an expression).

    Pass `next_url=lambda response: ...` to extract the next URL from a custom
    location (e.g. a JSON field) instead of the Link header.

    Any unset argument falls back to the value registered via
    ``set_defaults`` (or the ``defaults`` context manager), then to the built-in default.
    """
    client = _resolve("client", client)
    timeout = _resolve("timeout", timeout)
    retries = _resolve("retries", retries)
    backoff = _resolve("backoff", backoff)
    on_request = _resolve("on_request", on_request)
    on_response = _resolve("on_response", on_response)
    merged_headers = self._build_headers_expr(
        _resolve("headers", headers),
        _resolve("auth", auth),
        _resolve("bearer", bearer),
        _resolve("api_key", api_key),
        _resolve("api_key_header", api_key_header),
    )
    params_expr = pl.lit(None) if params is None else params
    headers_expr = pl.lit(None) if merged_headers is None else merged_headers
    extractor = next_url or (lambda r: _parse_link_next(r.headers.get("link")))
    verb = method.upper()

    def _follow_links(cli: httpx.Client, url: str, p: Any, h: Any) -> list[str]:
        bodies: list[str] = []
        current_url, current_params = url, p
        for _ in range(max_pages):
            response = self._send_sync_with_retries(
                cli,
                verb,
                current_url,
                current_params,
                None,
                None,
                h,
                timeout,
                retries,
                backoff,
                on_request,
                on_response,
            )
            if response is None:
                break
            bodies.append(response.text)
            nxt = extractor(response)
            if not nxt:
                break
            current_url = nxt
            current_params = None  # next URL already encodes its own query string
        return bodies

    def _paginate_batch(s: pl.Series) -> pl.Series:
        urls = s.struct.field("url").to_list()
        param_list = s.struct.field("params").to_list()
        header_list = s.struct.field("headers").to_list()
        own_client = client is None
        cli: httpx.Client = httpx.Client() if own_client else client  # type: ignore[assignment]
        try:
            out = [_follow_links(cli, url, p, h) for url, p, h in zip(urls, param_list, header_list)]
            return pl.Series(out, dtype=pl.List(pl.Utf8))
        finally:
            if own_client:
                cli.close()

    return pl.struct(
        self._url.alias("url"),
        params_expr.alias("params"),
        headers_expr.alias("headers"),
    ).map_batches(_paginate_batch, return_dtype=pl.List(pl.Utf8))

API reference¶

Methods¶

Global configuration¶

polars_api.set_defaults(**options) ¶

polars_api.get_defaults() ¶

polars_api.reset_defaults(*options) ¶

polars_api.defaults(**options) ¶

polars_api.Api¶

polars_api.api.Api ¶

get(params=None, timeout=UNSET, **kwargs) ¶

post(params=None, body=None, timeout=UNSET, **kwargs) ¶

put(params=None, body=None, timeout=UNSET, **kwargs) ¶

patch(params=None, body=None, timeout=UNSET, **kwargs) ¶

delete(params=None, timeout=UNSET, **kwargs) ¶

head(params=None, timeout=UNSET, **kwargs) ¶

aget(params=None, timeout=UNSET, **kwargs) ¶

apost(params=None, body=None, timeout=UNSET, **kwargs) ¶

aput(params=None, body=None, timeout=UNSET, **kwargs) ¶

apatch(params=None, body=None, timeout=UNSET, **kwargs) ¶

adelete(params=None, timeout=UNSET, **kwargs) ¶

ahead(params=None, timeout=UNSET, **kwargs) ¶

paginate(params=None, *, method='GET', max_pages=10, next_url=None, headers=UNSET, client=UNSET, timeout=UNSET, retries=UNSET, backoff=UNSET, on_request=UNSET, on_response=UNSET, auth=UNSET, bearer=UNSET, api_key=UNSET, api_key_header=UNSET) ¶

`polars_api.set_defaults(**options)` ¶

`polars_api.get_defaults()` ¶

`polars_api.reset_defaults(*options)` ¶

`polars_api.defaults(**options)` ¶

`polars_api.Api`¶

`polars_api.api.Api` ¶

`get(params=None, timeout=UNSET, **kwargs)` ¶

`post(params=None, body=None, timeout=UNSET, **kwargs)` ¶

`put(params=None, body=None, timeout=UNSET, **kwargs)` ¶

`patch(params=None, body=None, timeout=UNSET, **kwargs)` ¶

`delete(params=None, timeout=UNSET, **kwargs)` ¶

`head(params=None, timeout=UNSET, **kwargs)` ¶

`aget(params=None, timeout=UNSET, **kwargs)` ¶

`apost(params=None, body=None, timeout=UNSET, **kwargs)` ¶

`aput(params=None, body=None, timeout=UNSET, **kwargs)` ¶

`apatch(params=None, body=None, timeout=UNSET, **kwargs)` ¶

`adelete(params=None, timeout=UNSET, **kwargs)` ¶

`ahead(params=None, timeout=UNSET, **kwargs)` ¶

`paginate(params=None, *, method='GET', max_pages=10, next_url=None, headers=UNSET, client=UNSET, timeout=UNSET, retries=UNSET, backoff=UNSET, on_request=UNSET, on_response=UNSET, auth=UNSET, bearer=UNSET, api_key=UNSET, api_key_header=UNSET)` ¶