API Reference

Auto-generated from source docstrings.

`point_collocation`

Top-level convenience imports:

import point_collocation as pc

pc.plan(...)     # build a matchup plan
pc.matchup(...)  # execute the plan

Core

plan

point_collocation.core.plan.plan

plan(points: PointsFrame, *, data_source: str = 'earthaccess', source_kwargs: dict[str, Any] | None = None, time_buffer: str | Timedelta | timedelta | int = '0h') -> Plan

Build a :class:Plan previewing which granules cover each point.

Parameters:

Name	Type	Description	Default
`points`	`PointsFrame`	DataFrame with at minimum `lat`, `lon`, and `time` (or `date` as an alias). If the column is named `date` and contains date-only values, the time-of-day is set to noon (12:00 UTC) for matching purposes.	required
`data_source`	`str`	Data source to search. Currently only `"earthaccess"` is supported.	`'earthaccess'`
`source_kwargs`	`dict[str, Any] \| None`	Keyword arguments forwarded to `earthaccess.search_data()`. Must contain at least `"short_name"`.	`None`
`time_buffer`	`str \| Timedelta \| timedelta \| int`	Extra temporal margin when matching a point to a granule. A point at time t matches a granule whose coverage is `[begin, end]` if `begin - buffer ≤ t ≤ end + buffer`. Accepts a :class:`pandas.Timedelta`, :class:`datetime.timedelta`, or a pandas-parseable string (`"12H"`, `"30min"`, …). Default is `"0h"` (exact overlap required).	`'0h'`

Returns:

Type	Description
`Plan`	The planning object; inspect with :meth:`Plan.summary` and execute with :func:`~point_collocation.matchup`.

Raises:

Type	Description
`ValueError`	If points is missing required columns, data_source is not recognised, or `source_kwargs` does not contain `"short_name"`.
`ImportError`	If the `earthaccess` package is not installed.

Source code in point_collocation/core/plan.py

def plan(
    points: PointsFrame,
    *,
    data_source: str = "earthaccess",
    source_kwargs: dict[str, Any] | None = None,
    time_buffer: str | pd.Timedelta | datetime.timedelta | int = "0h",
) -> Plan:
    """Build a :class:`Plan` previewing which granules cover each point.

    Parameters
    ----------
    points:
        DataFrame with at minimum ``lat``, ``lon``, and ``time`` (or
        ``date`` as an alias).  If the column is named ``date`` and
        contains date-only values, the time-of-day is set to noon
        (12:00 UTC) for matching purposes.
    data_source:
        Data source to search.  Currently only ``"earthaccess"`` is
        supported.
    source_kwargs:
        Keyword arguments forwarded to ``earthaccess.search_data()``.
        Must contain at least ``"short_name"``.
    time_buffer:
        Extra temporal margin when matching a point to a granule.  A
        point at time *t* matches a granule whose coverage is
        ``[begin, end]`` if ``begin - buffer ≤ t ≤ end + buffer``.
        Accepts a :class:`pandas.Timedelta`, :class:`datetime.timedelta`,
        or a pandas-parseable string (``"12H"``, ``"30min"``, …).
        Default is ``"0h"`` (exact overlap required).

    Returns
    -------
    Plan
        The planning object; inspect with :meth:`Plan.summary` and
        execute with :func:`~point_collocation.matchup`.

    Raises
    ------
    ValueError
        If *points* is missing required columns, *data_source* is not
        recognised, or ``source_kwargs`` does not contain ``"short_name"``.
    ImportError
        If the ``earthaccess`` package is not installed.
    """
    if data_source != "earthaccess":
        raise ValueError(
            f"Unknown data_source {data_source!r}. "
            "Currently only 'earthaccess' is supported."
        )

    points = _plan_normalise_time(points)
    _plan_validate_points(points)

    buffer = _parse_time_buffer(time_buffer)
    results, granule_metas = _search_earthaccess(points, source_kwargs=source_kwargs)
    point_granule_map = _match_points_to_granules(points, granule_metas, buffer)

    return Plan(
        points=points,
        results=results,
        granules=granule_metas,
        point_granule_map=point_granule_map,
        source_kwargs=dict(source_kwargs or {}),
        time_buffer=buffer,
    )

matchup

point_collocation.core.engine.matchup

matchup(plan: 'Plan', *, geometry: str, variables: list[str] | None = None, open_method: str | None = None, spatial_method: str | None = None, open_dataset_kwargs: dict | None = None, silent: bool = False, batch_size: int = 10, save_dir: str | PathLike | None = None, granule_range: tuple[int, int] | None = None) -> pd.DataFrame

Extract variables from cloud-hosted granules at the given points.

Parameters:

Name	Type	Description	Default
`plan`	`'Plan'`	A :class:`~point_collocation.core.plan.Plan` object previously built with :func:`~point_collocation.plan`. Data source and search parameters are taken from the plan. One output row is produced per (point, granule) pair; points with zero matching granules produce a single NaN row.	required
`geometry`	`str`	Data geometry type. Must be `"grid"` (L3/gridded, 1-D lat/lon coordinates) or `"swath"` (L2/swath, 2-D lat/lon arrays). This is a required argument — no default is provided.	required
`variables`	`list[str] \| None`	Variable names to extract from each granule. When provided, overrides any variables stored on the plan. When omitted, falls back to `plan.variables`. If the resolved list is empty, the output will have no variable columns. Raises :exc:`ValueError` if a requested variable is not found in the opened dataset.	`None`
`open_method`	`str \| None`	How granules are opened. `"dataset"` opens each granule with `xarray.open_dataset`; `"datatree-merge"` opens with DataTree and merges groups into a flat dataset. Defaults to `"dataset"` when `geometry="grid"` and `"datatree-merge"` when `geometry="swath"`.	`None`
`spatial_method`	`str \| None`	Method used for spatial matching. `"nearest"` uses `ds.sel(..., method="nearest")` and requires 1-D coordinates (gridded data). `"xoak"` uses the `xoak` package for nearest-neighbour matching on 2-D (irregular/swath) grids. Defaults to `"nearest"` when `geometry="grid"` and `"xoak"` when `geometry="swath"`.	`None`
`open_dataset_kwargs`	`dict \| None`	Optional dictionary of keyword arguments forwarded to `xarray.open_dataset` for every granule opened during the run. `chunks` defaults to `{}` (lazy/dask loading) unless explicitly overridden. `engine` defaults to `"h5netcdf"` when no `engine` key is present in the dict.	`None`
`silent`	`bool`	When `False` (default), a progress message is printed to stdout after every batch_size granules. Set to `True` to suppress all progress output.	`False`
`batch_size`	`int`	Number of granules to process between progress reports (and between intermediate saves when save_dir is set). Defaults to `10`.	`10`
`save_dir`	`str \| PathLike \| None`	Directory in which intermediate results are saved as Parquet files after each batch of batch_size granules. The directory is created automatically if it does not exist. Each batch is saved as `plan_<first>_<last>.parquet` where first and last are the granule indices from the plan. When `None` (default), no intermediate files are written.	`None`
`granule_range`	`tuple[int, int] \| None`	Optional `(start, end)` tuple (both 1-based and inclusive) that restricts processing to a contiguous slice of the matched granules, ordered by granule index. For example, `granule_range=(261, 620)` resumes from granule 261 after a crash that completed granules 1–260. Progress messages continue to report absolute granule numbers (e.g. "granules 261-270 of 620 processed") so the output is directly comparable with messages from the original run. When `None` (default), all matched granules are processed.	`None`

Returns:

Type	Description
`DataFrame`	One row per (point, granule) pair, including a `granule_id` column and one column per variable. Points with zero matching granules contribute a single NaN row.

Raises:

Type	Description
`ValueError`	If `geometry` is not `"grid"` or `"swath"`.
`ValueError`	If a requested variable is not present in an opened dataset.
`ValueError`	If geolocation variables cannot be detected unambiguously.
`ValueError`	If the geolocation array dimensionality does not match geometry.
`ValueError`	If `granule_range` is not a 2-tuple of positive integers with `start <= end`, or if either bound exceeds the number of matched granules in the plan.
`ImportError`	If `spatial_method="xoak"` and the `xoak` package is not installed.

Source code in point_collocation/core/engine.py

def matchup(
    plan: "Plan",
    *,
    geometry: str,
    variables: list[str] | None = None,
    open_method: str | None = None,
    spatial_method: str | None = None,
    open_dataset_kwargs: dict | None = None,
    silent: bool = False,
    batch_size: int = 10,
    save_dir: str | os.PathLike | None = None,
    granule_range: tuple[int, int] | None = None,
) -> pd.DataFrame:
    """Extract variables from cloud-hosted granules at the given points.

    Parameters
    ----------
    plan:
        A :class:`~point_collocation.core.plan.Plan` object previously
        built with :func:`~point_collocation.plan`.  Data source and
        search parameters are taken from the plan.  One output row is
        produced per (point, granule) pair; points with zero matching
        granules produce a single NaN row.
    geometry:
        Data geometry type.  Must be ``"grid"`` (L3/gridded, 1-D lat/lon
        coordinates) or ``"swath"`` (L2/swath, 2-D lat/lon arrays).
        This is a required argument — no default is provided.
    variables:
        Variable names to extract from each granule.  When provided,
        overrides any variables stored on the plan.  When omitted,
        falls back to ``plan.variables``.  If the resolved list is
        empty, the output will have no variable columns.
        Raises :exc:`ValueError` if a requested variable is not found
        in the opened dataset.
    open_method:
        How granules are opened.  ``"dataset"`` opens each granule with
        ``xarray.open_dataset``; ``"datatree-merge"`` opens with
        DataTree and merges groups into a flat dataset.  Defaults to
        ``"dataset"`` when ``geometry="grid"`` and ``"datatree-merge"``
        when ``geometry="swath"``.
    spatial_method:
        Method used for spatial matching.  ``"nearest"`` uses
        ``ds.sel(..., method="nearest")`` and requires 1-D coordinates
        (gridded data).  ``"xoak"`` uses the ``xoak`` package for
        nearest-neighbour matching on 2-D (irregular/swath) grids.
        Defaults to ``"nearest"`` when ``geometry="grid"`` and
        ``"xoak"`` when ``geometry="swath"``.
    open_dataset_kwargs:
        Optional dictionary of keyword arguments forwarded to
        ``xarray.open_dataset`` for every granule opened during the run.
        ``chunks`` defaults to ``{}`` (lazy/dask loading) unless
        explicitly overridden.  ``engine`` defaults to ``"h5netcdf"``
        when no ``engine`` key is present in the dict.
    silent:
        When ``False`` (default), a progress message is printed to
        stdout after every *batch_size* granules.  Set to ``True`` to
        suppress all progress output.
    batch_size:
        Number of granules to process between progress reports (and
        between intermediate saves when *save_dir* is set).  Defaults
        to ``10``.
    save_dir:
        Directory in which intermediate results are saved as Parquet
        files after each batch of *batch_size* granules.  The directory
        is created automatically if it does not exist.  Each batch is
        saved as ``plan_<first>_<last>.parquet`` where *first* and
        *last* are the granule indices from the plan.  When ``None``
        (default), no intermediate files are written.
    granule_range:
        Optional ``(start, end)`` tuple (both **1-based and inclusive**)
        that restricts processing to a contiguous slice of the matched
        granules, ordered by granule index.  For example,
        ``granule_range=(261, 620)`` resumes from granule 261 after a
        crash that completed granules 1–260.  Progress messages continue
        to report absolute granule numbers (e.g.
        "granules 261-270 of 620 processed") so the output is directly
        comparable with messages from the original run.  When ``None``
        (default), all matched granules are processed.

    Returns
    -------
    pandas.DataFrame
        One row per (point, granule) pair, including a ``granule_id``
        column and one column per variable.  Points with zero matching
        granules contribute a single NaN row.

    Raises
    ------
    ValueError
        If ``geometry`` is not ``"grid"`` or ``"swath"``.
    ValueError
        If a requested variable is not present in an opened dataset.
    ValueError
        If geolocation variables cannot be detected unambiguously.
    ValueError
        If the geolocation array dimensionality does not match *geometry*.
    ValueError
        If ``granule_range`` is not a 2-tuple of positive integers with
        ``start <= end``, or if either bound exceeds the number of matched
        granules in the plan.
    ImportError
        If ``spatial_method="xoak"`` and the ``xoak`` package is not
        installed.
    """
    if geometry not in _VALID_GEOMETRIES:
        raise ValueError(
            f"geometry={geometry!r} is not valid. "
            f"Must be one of {sorted(_VALID_GEOMETRIES)}."
        )

    if granule_range is not None:
        if (
            len(granule_range) != 2
            or not isinstance(granule_range[0], int)
            or not isinstance(granule_range[1], int)
            or granule_range[0] < 1
            or granule_range[1] < granule_range[0]
        ):
            raise ValueError(
                f"granule_range={granule_range!r} is not valid. "
                "Must be a (start, end) tuple of positive integers with start <= end, "
                "both 1-based and inclusive (e.g. granule_range=(261, 620))."
            )

    # Apply geometry-based defaults.
    if open_method is None:
        open_method = "dataset" if geometry == "grid" else "datatree-merge"
    if spatial_method is None:
        spatial_method = "nearest" if geometry == "grid" else "xoak"

    if open_method not in _VALID_OPEN_METHODS:
        raise ValueError(
            f"open_method={open_method!r} is not valid. "
            f"Must be one of {sorted(_VALID_OPEN_METHODS)}."
        )
    if spatial_method not in _VALID_SPATIAL_METHODS:
        raise ValueError(
            f"spatial_method={spatial_method!r} is not valid. "
            f"Must be one of {sorted(_VALID_SPATIAL_METHODS)}."
        )

    # Validate xoak is importable before we start processing granules.
    if spatial_method == "xoak":
        try:
            from xoak.tree_adapters import SklearnKDTreeAdapter  # type: ignore[import-untyped]  # noqa: F401
        except ImportError as exc:
            raise ImportError(
                "The 'xoak' package (and scikit-learn) are required for spatial_method='xoak'. "
                "Install them with: pip install xoak scikit-learn"
            ) from exc

    effective_vars: list[str] = variables if variables is not None else plan.variables
    effective_kwargs = {"chunks": {}, **(open_dataset_kwargs or {})}
    return _execute_plan(
        plan,
        geometry=geometry,
        open_method=open_method,
        spatial_method=spatial_method,
        variables=effective_vars,
        silent=silent,
        batch_size=batch_size,
        save_dir=save_dir,
        granule_range=granule_range,
        **effective_kwargs,
    )

Plan

point_collocation.core.plan.Plan `dataclass`

A planned matchup: stores the point→granule mapping and search results.

Attributes:

Name	Type	Description
`points`	`DataFrame`	Normalised points DataFrame (always has a `time` column).
`results`	`list[Any]`	Original earthaccess result objects in search order. Passed directly to `earthaccess.open()` when executing the plan.
`granules`	`list[GranuleMeta]`	:class:`GranuleMeta` for every unique granule returned by the search (parallel with results).
`point_granule_map`	`dict[Any, list[int]]`	Maps each row index of points to a (possibly empty) list of indices into granules.
`variables`	`list[str]`	Default variables to extract during :func:`~point_collocation.matchup`. Can be overridden by passing `variables` directly to :func:`~point_collocation.matchup`.
`source_kwargs`	`dict[str, Any]`	earthaccess search kwargs used to build this plan.
`time_buffer`	`Timedelta`	Temporal buffer that was applied when matching points to granules.

Source code in point_collocation/core/plan.py

@dataclass
class Plan:
    """A planned matchup: stores the point→granule mapping and search results.

    Attributes
    ----------
    points:
        Normalised points DataFrame (always has a ``time`` column).
    results:
        Original earthaccess result objects in search order.  Passed
        directly to ``earthaccess.open()`` when executing the plan.
    granules:
        :class:`GranuleMeta` for every unique granule returned by the
        search (parallel with *results*).
    point_granule_map:
        Maps each row index of *points* to a (possibly empty) list of
        indices into *granules*.
    variables:
        Default variables to extract during :func:`~point_collocation.matchup`.
        Can be overridden by passing ``variables`` directly to
        :func:`~point_collocation.matchup`.
    source_kwargs:
        earthaccess search kwargs used to build this plan.
    time_buffer:
        Temporal buffer that was applied when matching points to granules.
    """

    points: pd.DataFrame
    results: list[Any]
    granules: list[GranuleMeta]
    point_granule_map: dict[Any, list[int]]
    variables: list[str] = field(default_factory=list)
    source_kwargs: dict[str, Any] = field(default_factory=dict)
    time_buffer: pd.Timedelta = field(default_factory=lambda: pd.Timedelta(0))

    # ------------------------------------------------------------------
    # Indexing — plan[0] returns a result object; plan[0:10] returns a
    # subset Plan restricted to the sliced points.
    # ------------------------------------------------------------------

    def __getitem__(self, idx: int | slice) -> "Plan | Any":
        """Return a subset :class:`Plan` or a single earthaccess result.

        Parameters
        ----------
        idx:
            * **Integer** — returns the earthaccess result object at that
              position (``self.results[idx]``), so that ``plan[0]`` can
              still be passed to :meth:`open_dataset`.
            * **Slice** — returns a new :class:`Plan` whose ``points``
              are the rows selected by the slice (``points.iloc[idx]``),
              with ``point_granule_map``, ``granules``, and ``results``
              filtered and re-indexed accordingly.  This allows users to
              test a subset of a large plan::

                  res = pc.matchup(plan[0:10], geometry="grid", variables=["avw"])
        """
        if isinstance(idx, int):
            return self.results[idx]

        # --- Slice: subset by points ---
        subset_points = self.points.iloc[idx]
        subset_pt_indices = list(subset_points.index)

        # Collect granule indices (into self.granules) needed by the subset.
        needed_g_idx: list[int] = []
        seen_g: set[int] = set()
        for pt_idx in subset_pt_indices:
            for g_idx in self.point_granule_map.get(pt_idx, []):
                if g_idx not in seen_g:
                    needed_g_idx.append(g_idx)
                    seen_g.add(g_idx)
        needed_g_idx.sort()

        # Build re-index map: old granule index → new granule index.
        g_remap: dict[int, int] = {old: new for new, old in enumerate(needed_g_idx)}

        # New granules with corrected result_index (sequential from 0).
        new_granules = [
            GranuleMeta(
                granule_id=self.granules[old_g].granule_id,
                begin=self.granules[old_g].begin,
                end=self.granules[old_g].end,
                bbox=self.granules[old_g].bbox,
                result_index=new_g,
            )
            for new_g, old_g in enumerate(needed_g_idx)
        ]

        # New results list — only the results referenced by kept granules.
        new_results = [self.results[self.granules[old_g].result_index] for old_g in needed_g_idx]

        # New point_granule_map using re-indexed granule indices.
        new_pgm: dict[Any, list[int]] = {
            pt_idx: [g_remap[g] for g in self.point_granule_map.get(pt_idx, [])]
            for pt_idx in subset_pt_indices
        }

        return Plan(
            points=subset_points,
            results=new_results,
            granules=new_granules,
            point_granule_map=new_pgm,
            variables=list(self.variables),
            source_kwargs=dict(self.source_kwargs),
            time_buffer=self.time_buffer,
        )

    # ------------------------------------------------------------------
    # Dataset opening helpers
    # ------------------------------------------------------------------

    def open_dataset(
        self,
        result: Any,
        geometry: str | None = None,
        open_method: str | None = None,
        open_dataset_kwargs: dict[str, Any] | None = None,
    ) -> "xr.Dataset":
        """Open a single granule result as an :class:`xarray.Dataset`.

        Parameters
        ----------
        result:
            A single earthaccess result object, typically obtained via
            ``plan[n]``.
        geometry:
            Data geometry type.  ``"grid"`` (L3/gridded) or ``"swath"``
            (L2/swath).  When provided, determines the default
            ``open_method`` if *open_method* is not given explicitly.
        open_method:
            How to open the granule.  ``"dataset"`` uses a plain
            ``xarray.open_dataset`` call (the default when *geometry* is
            ``None`` or ``"grid"``).  ``"datatree-merge"`` opens as a
            DataTree and merges all groups into a flat dataset (the
            default when *geometry* is ``"swath"``).
        open_dataset_kwargs:
            Keyword arguments forwarded to ``xarray.open_dataset`` or
            ``xarray.open_datatree``.  ``chunks`` defaults to ``{}``
            (lazy/dask loading) unless explicitly overridden.  ``engine``
            defaults to ``"h5netcdf"`` when not specified.

        Returns
        -------
        xarray.Dataset
        """
        from point_collocation.core.engine import (
            _VALID_GEOMETRIES,
            _VALID_OPEN_METHODS,
            _merge_datatree,
            _open_datatree,
        )

        if geometry is not None and geometry not in _VALID_GEOMETRIES:
            raise ValueError(
                f"geometry={geometry!r} is not valid. "
                f"Must be one of {sorted(_VALID_GEOMETRIES)}."
            )

        # Resolve open_method default from geometry.
        if open_method is None:
            open_method = "datatree-merge" if geometry == "swath" else "dataset"

        if open_method not in _VALID_OPEN_METHODS:
            raise ValueError(
                f"open_method={open_method!r} is not valid. "
                f"Must be one of {sorted(_VALID_OPEN_METHODS)}."
            )

        try:
            import earthaccess  # type: ignore[import-untyped]
        except ImportError as exc:
            raise ImportError(
                "The 'earthaccess' package is required. "
                "Install it with: pip install earthaccess"
            ) from exc

        import xarray as xr

        kwargs = {"chunks": {}, **(open_dataset_kwargs or {})}
        if "engine" not in kwargs:
            kwargs["engine"] = "h5netcdf"

        file_objs = earthaccess.open([result], pqdm_kwargs={"disable": True})
        if len(file_objs) != 1:
            raise RuntimeError(
                f"Expected 1 file object from earthaccess.open, got {len(file_objs)}."
            )

        if open_method == "datatree-merge":
            dt = _open_datatree(file_objs[0], kwargs)
            try:
                return _merge_datatree(dt)
            finally:
                if hasattr(dt, "close"):
                    dt.close()

        return xr.open_dataset(file_objs[0], **kwargs)  # type: ignore[arg-type]

    def open_mfdataset(
        self,
        results: "list[Any] | Plan",
        geometry: str | None = None,
        open_method: str | None = None,
        open_dataset_kwargs: dict[str, Any] | None = None,
    ) -> "xr.Dataset":
        """Open multiple granule results as a single :class:`xarray.Dataset`.

        Parameters
        ----------
        results:
            A list of earthaccess result objects, or a :class:`Plan`
            (e.g. ``plan[0:2]``).  When a :class:`Plan` is passed its
            ``results`` attribute is used.
        geometry:
            Data geometry type.  ``"grid"`` (L3/gridded) or ``"swath"``
            (L2/swath).  When provided, determines the default
            ``open_method`` if *open_method* is not given explicitly.
        open_method:
            How to open each granule.  ``"dataset"`` uses
            ``xarray.open_mfdataset`` across all file objects (the default
            when *geometry* is ``None`` or ``"grid"``).
            ``"datatree-merge"`` opens each granule as a DataTree, merges
            its groups into a flat dataset, then concatenates all granules
            along a new ``granule`` dimension (the default when *geometry*
            is ``"swath"``).
        open_dataset_kwargs:
            Keyword arguments forwarded to ``xarray.open_mfdataset`` or
            ``xarray.open_datatree``.  ``chunks`` defaults to ``{}``
            (lazy/dask loading) unless explicitly overridden.  ``engine``
            defaults to ``"h5netcdf"`` when not specified.

        Returns
        -------
        xarray.Dataset
        """
        from point_collocation.core.engine import (
            _VALID_GEOMETRIES,
            _VALID_OPEN_METHODS,
            _merge_datatree,
            _open_datatree,
        )

        if geometry is not None and geometry not in _VALID_GEOMETRIES:
            raise ValueError(
                f"geometry={geometry!r} is not valid. "
                f"Must be one of {sorted(_VALID_GEOMETRIES)}."
            )

        # Resolve open_method default from geometry.
        if open_method is None:
            open_method = "datatree-merge" if geometry == "swath" else "dataset"

        if open_method not in _VALID_OPEN_METHODS:
            raise ValueError(
                f"open_method={open_method!r} is not valid. "
                f"Must be one of {sorted(_VALID_OPEN_METHODS)}."
            )

        try:
            import earthaccess  # type: ignore[import-untyped]
        except ImportError as exc:
            raise ImportError(
                "The 'earthaccess' package is required. "
                "Install it with: pip install earthaccess"
            ) from exc

        import xarray as xr

        kwargs = {"chunks": {}, **(open_dataset_kwargs or {})}
        if "engine" not in kwargs:
            kwargs["engine"] = "h5netcdf"

        result_list = results.results if isinstance(results, Plan) else list(results)
        file_objs = earthaccess.open(result_list, pqdm_kwargs={"disable": True})

        if open_method == "datatree-merge":
            # Open each granule as a DataTree, merge its groups, then
            # concatenate all granule datasets along a new "granule" dim.
            merged_datasets: list[xr.Dataset] = []
            for file_obj in file_objs:
                dt = _open_datatree(file_obj, kwargs)
                try:
                    merged_datasets.append(_merge_datatree(dt))
                finally:
                    if hasattr(dt, "close"):
                        dt.close()
            if not merged_datasets:
                return xr.Dataset()
            return xr.concat(merged_datasets, dim="granule")

        return xr.open_mfdataset(file_objs, **kwargs)  # type: ignore[arg-type]

    # ------------------------------------------------------------------
    # Variable inspection
    # ------------------------------------------------------------------

    def show_variables(
        self,
        geometry: str,
        open_method: str | None = None,
        open_dataset_kwargs: dict[str, Any] | None = None,
    ) -> None:
        """Open the first granule and print its dimensions and variables.

        Uses :meth:`open_dataset` (or a DataTree for
        ``open_method="datatree-merge"``) to load the first result in the
        plan, then prints the dataset dimensions, data variable names, and
        geolocation detection results.  This lets users discover available
        variable names before running a full :func:`~point_collocation.matchup`.

        Parameters
        ----------
        geometry:
            Data geometry type.  Must be ``"grid"`` (L3/gridded, 1-D
            lat/lon coordinates) or ``"swath"`` (L2/swath, 2-D lat/lon
            arrays).  This is a required argument — no default is provided.
        open_method:
            How to open the granule.  ``"dataset"`` uses a plain
            ``xarray.open_dataset`` call.  ``"datatree-merge"`` opens as a
            DataTree, merges into a flat dataset, then prints the merged
            summary followed by group details at the end.  Defaults to
            ``"dataset"`` when ``geometry="grid"`` and ``"datatree-merge"``
            when ``geometry="swath"``.
        open_dataset_kwargs:
            Keyword arguments forwarded to ``xarray.open_dataset`` when
            opening the first granule.  Passed unchanged to
            :meth:`open_dataset`.

        Raises
        ------
        ValueError
            If the plan contains no granules.
        """
        from point_collocation.core.engine import (
            _GEOLOC_PAIRS,
            _VALID_GEOMETRIES,
            _VALID_OPEN_METHODS,
            _merge_datatree,
            _open_datatree,
        )

        if geometry not in _VALID_GEOMETRIES:
            raise ValueError(
                f"geometry={geometry!r} is not valid. "
                f"Must be one of {sorted(_VALID_GEOMETRIES)}."
            )

        if open_method is None:
            open_method = "dataset" if geometry == "grid" else "datatree-merge"

        if open_method not in _VALID_OPEN_METHODS:
            raise ValueError(
                f"open_method={open_method!r} is not valid. "
                f"Must be one of {sorted(_VALID_OPEN_METHODS)}."
            )

        if not self.results:
            raise ValueError("No granules in plan — cannot show variables.")

        import xarray as xr

        kwargs: dict[str, Any] = {"chunks": {}, **(open_dataset_kwargs or {})}
        if "engine" not in kwargs:
            kwargs["engine"] = "h5netcdf"

        try:
            import earthaccess  # type: ignore[import-untyped]
        except ImportError as exc:
            raise ImportError(
                "The 'earthaccess' package is required. "
                "Install it with: pip install earthaccess"
            ) from exc

        file_objs = earthaccess.open([self.results[0]], pqdm_kwargs={"disable": True})
        if len(file_objs) != 1:
            raise RuntimeError(
                f"Expected 1 file object from earthaccess.open, got {len(file_objs)}."
            )
        file_obj = file_objs[0]

        print(f"geometry     : {geometry!r}")
        print(f"open_method  : {open_method!r}")

        if open_method == "datatree-merge":
            # Open as DataTree and merge for the summary view.
            dt = _open_datatree(file_obj, kwargs)
            ds_flat = _merge_datatree(dt)

            # Print merged summary first.
            print(f"Dimensions : {dict(ds_flat.sizes)}")
            print(f"Variables  : {list(ds_flat.data_vars)}")
        else:
            ds_flat = xr.open_dataset(file_obj, **kwargs)  # type: ignore[arg-type]
            print(f"Dimensions : {dict(ds_flat.sizes)}")
            print(f"Variables  : {list(ds_flat.data_vars)}")

        # Geolocation detection results.
        found_pairs: list[tuple[str, str]] = []
        for lon_name, lat_name in _GEOLOC_PAIRS:
            has_lon = lon_name in ds_flat.coords or lon_name in ds_flat.data_vars
            has_lat = lat_name in ds_flat.coords or lat_name in ds_flat.data_vars
            if has_lon and has_lat:
                found_pairs.append((lon_name, lat_name))

        if len(found_pairs) == 0:
            alt_open_method = "datatree-merge" if open_method == "dataset" else "dataset"
            alt = f"plan.show_variables(geometry={geometry!r}, open_method={alt_open_method!r})"
            print(
                f"\nGeolocation: NONE detected with open_method={open_method!r}. "
                f"Try {alt}."
            )
        elif len(found_pairs) == 1:
            lon_n, lat_n = found_pairs[0]
            lon_var = ds_flat.coords[lon_n] if lon_n in ds_flat.coords else ds_flat[lon_n]
            lat_var = ds_flat.coords[lat_n] if lat_n in ds_flat.coords else ds_flat[lat_n]
            print(
                f"\nGeolocation: ({lon_n!r}, {lat_n!r}) — "
                f"lon dims={tuple(lon_var.dims)}, lat dims={tuple(lat_var.dims)}"
            )
        else:
            print(f"\nGeolocation: ambiguous — detected pairs: {found_pairs}")

        # For datatree-merge, print group details at the end.
        if open_method == "datatree-merge":
            print("\nDataTree groups (detail):")
            try:
                # xarray DataTree API (>= 2024.x).
                for node in dt.subtree:  # type: ignore[union-attr]
                    path = node.path if hasattr(node, "path") else str(node.name)
                    ds_node = node.ds
                    if ds_node is not None:
                        dims_str = dict(ds_node.sizes)
                        vars_list = list(ds_node.data_vars)
                        print(f"  {path or '/'}")
                        print(f"    Dimensions : {dims_str}")
                        print(f"    Variables  : {vars_list}")
            except AttributeError:
                # datatree package API.
                for path, node in dt.items():  # type: ignore[union-attr]
                    ds_node = node.ds
                    if ds_node is not None:
                        dims_str = dict(ds_node.sizes)
                        vars_list = list(ds_node.data_vars)
                        print(f"  {path or '/'}")
                        print(f"    Dimensions : {dims_str}")
                        print(f"    Variables  : {vars_list}")

    # ------------------------------------------------------------------
    # Summary
    # ------------------------------------------------------------------

    def summary(self, n: int | None = None) -> None:
        """Print a human-readable summary of the plan.

        Parameters
        ----------
        n:
            Number of points to show in the per-point section.
            Defaults to ``min(5, len(self.points))``.
            ``0`` or negative values suppress the per-point section.
        """
        if n is None:
            n = min(5, len(self.points))
        elif n < 0:
            n = 0

        zero_match = sum(
            1 for g_list in self.point_granule_map.values() if len(g_list) == 0
        )
        multi_match = sum(
            1 for g_list in self.point_granule_map.values() if len(g_list) > 1
        )

        matched_granule_count = len(
            {g_idx for g_list in self.point_granule_map.values() for g_idx in g_list}
        )

        lines: list[str] = [
            f"Plan: {len(self.points)} points → {matched_granule_count} unique granule(s)",
            f"  Points with 0 matches : {zero_match}",
            f"  Points with >1 matches: {multi_match}",
            f"  Time buffer: {self.time_buffer}",
        ]

        n_show = min(n, len(self.points))
        if n_show > 0:
            lines.append("")
            lines.append(f"First {n_show} point(s):")
            for pt_idx, row in self.points.head(n_show).iterrows():
                g_indices = self.point_granule_map.get(pt_idx, [])
                lines.append(
                    f"  [{pt_idx}] lat={row['lat']:.4f}, lon={row['lon']:.4f}, "
                    f"time={row['time']}: {len(g_indices)} match(es)"
                )
                for g_idx in g_indices:
                    lines.append(f"    → {self.granules[g_idx].granule_id}")

        print("\n".join(lines))

summary

summary(n: int | None = None) -> None

Print a human-readable summary of the plan.

Parameters:

Name	Type	Description	Default
`n`	`int \| None`	Number of points to show in the per-point section. Defaults to `min(5, len(self.points))`. `0` or negative values suppress the per-point section.	`None`

Source code in point_collocation/core/plan.py

def summary(self, n: int | None = None) -> None:
    """Print a human-readable summary of the plan.

    Parameters
    ----------
    n:
        Number of points to show in the per-point section.
        Defaults to ``min(5, len(self.points))``.
        ``0`` or negative values suppress the per-point section.
    """
    if n is None:
        n = min(5, len(self.points))
    elif n < 0:
        n = 0

    zero_match = sum(
        1 for g_list in self.point_granule_map.values() if len(g_list) == 0
    )
    multi_match = sum(
        1 for g_list in self.point_granule_map.values() if len(g_list) > 1
    )

    matched_granule_count = len(
        {g_idx for g_list in self.point_granule_map.values() for g_idx in g_list}
    )

    lines: list[str] = [
        f"Plan: {len(self.points)} points → {matched_granule_count} unique granule(s)",
        f"  Points with 0 matches : {zero_match}",
        f"  Points with >1 matches: {multi_match}",
        f"  Time buffer: {self.time_buffer}",
    ]

    n_show = min(n, len(self.points))
    if n_show > 0:
        lines.append("")
        lines.append(f"First {n_show} point(s):")
        for pt_idx, row in self.points.head(n_show).iterrows():
            g_indices = self.point_granule_map.get(pt_idx, [])
            lines.append(
                f"  [{pt_idx}] lat={row['lat']:.4f}, lon={row['lon']:.4f}, "
                f"time={row['time']}: {len(g_indices)} match(es)"
            )
            for g_idx in g_indices:
                lines.append(f"    → {self.granules[g_idx].granule_id}")

    print("\n".join(lines))

show_variables

show_variables(geometry: str, open_method: str | None = None, open_dataset_kwargs: dict[str, Any] | None = None) -> None

Open the first granule and print its dimensions and variables.

Uses :meth:open_dataset (or a DataTree for open_method="datatree-merge") to load the first result in the plan, then prints the dataset dimensions, data variable names, and geolocation detection results. This lets users discover available variable names before running a full :func:~point_collocation.matchup.

Parameters:

Name	Type	Description	Default
`geometry`	`str`	Data geometry type. Must be `"grid"` (L3/gridded, 1-D lat/lon coordinates) or `"swath"` (L2/swath, 2-D lat/lon arrays). This is a required argument — no default is provided.	required
`open_method`	`str \| None`	How to open the granule. `"dataset"` uses a plain `xarray.open_dataset` call. `"datatree-merge"` opens as a DataTree, merges into a flat dataset, then prints the merged summary followed by group details at the end. Defaults to `"dataset"` when `geometry="grid"` and `"datatree-merge"` when `geometry="swath"`.	`None`
`open_dataset_kwargs`	`dict[str, Any] \| None`	Keyword arguments forwarded to `xarray.open_dataset` when opening the first granule. Passed unchanged to :meth:`open_dataset`.	`None`

Raises:

Type	Description
`ValueError`	If the plan contains no granules.

Source code in point_collocation/core/plan.py

def show_variables(
    self,
    geometry: str,
    open_method: str | None = None,
    open_dataset_kwargs: dict[str, Any] | None = None,
) -> None:
    """Open the first granule and print its dimensions and variables.

    Uses :meth:`open_dataset` (or a DataTree for
    ``open_method="datatree-merge"``) to load the first result in the
    plan, then prints the dataset dimensions, data variable names, and
    geolocation detection results.  This lets users discover available
    variable names before running a full :func:`~point_collocation.matchup`.

    Parameters
    ----------
    geometry:
        Data geometry type.  Must be ``"grid"`` (L3/gridded, 1-D
        lat/lon coordinates) or ``"swath"`` (L2/swath, 2-D lat/lon
        arrays).  This is a required argument — no default is provided.
    open_method:
        How to open the granule.  ``"dataset"`` uses a plain
        ``xarray.open_dataset`` call.  ``"datatree-merge"`` opens as a
        DataTree, merges into a flat dataset, then prints the merged
        summary followed by group details at the end.  Defaults to
        ``"dataset"`` when ``geometry="grid"`` and ``"datatree-merge"``
        when ``geometry="swath"``.
    open_dataset_kwargs:
        Keyword arguments forwarded to ``xarray.open_dataset`` when
        opening the first granule.  Passed unchanged to
        :meth:`open_dataset`.

    Raises
    ------
    ValueError
        If the plan contains no granules.
    """
    from point_collocation.core.engine import (
        _GEOLOC_PAIRS,
        _VALID_GEOMETRIES,
        _VALID_OPEN_METHODS,
        _merge_datatree,
        _open_datatree,
    )

    if geometry not in _VALID_GEOMETRIES:
        raise ValueError(
            f"geometry={geometry!r} is not valid. "
            f"Must be one of {sorted(_VALID_GEOMETRIES)}."
        )

    if open_method is None:
        open_method = "dataset" if geometry == "grid" else "datatree-merge"

    if open_method not in _VALID_OPEN_METHODS:
        raise ValueError(
            f"open_method={open_method!r} is not valid. "
            f"Must be one of {sorted(_VALID_OPEN_METHODS)}."
        )

    if not self.results:
        raise ValueError("No granules in plan — cannot show variables.")

    import xarray as xr

    kwargs: dict[str, Any] = {"chunks": {}, **(open_dataset_kwargs or {})}
    if "engine" not in kwargs:
        kwargs["engine"] = "h5netcdf"

    try:
        import earthaccess  # type: ignore[import-untyped]
    except ImportError as exc:
        raise ImportError(
            "The 'earthaccess' package is required. "
            "Install it with: pip install earthaccess"
        ) from exc

    file_objs = earthaccess.open([self.results[0]], pqdm_kwargs={"disable": True})
    if len(file_objs) != 1:
        raise RuntimeError(
            f"Expected 1 file object from earthaccess.open, got {len(file_objs)}."
        )
    file_obj = file_objs[0]

    print(f"geometry     : {geometry!r}")
    print(f"open_method  : {open_method!r}")

    if open_method == "datatree-merge":
        # Open as DataTree and merge for the summary view.
        dt = _open_datatree(file_obj, kwargs)
        ds_flat = _merge_datatree(dt)

        # Print merged summary first.
        print(f"Dimensions : {dict(ds_flat.sizes)}")
        print(f"Variables  : {list(ds_flat.data_vars)}")
    else:
        ds_flat = xr.open_dataset(file_obj, **kwargs)  # type: ignore[arg-type]
        print(f"Dimensions : {dict(ds_flat.sizes)}")
        print(f"Variables  : {list(ds_flat.data_vars)}")

    # Geolocation detection results.
    found_pairs: list[tuple[str, str]] = []
    for lon_name, lat_name in _GEOLOC_PAIRS:
        has_lon = lon_name in ds_flat.coords or lon_name in ds_flat.data_vars
        has_lat = lat_name in ds_flat.coords or lat_name in ds_flat.data_vars
        if has_lon and has_lat:
            found_pairs.append((lon_name, lat_name))

    if len(found_pairs) == 0:
        alt_open_method = "datatree-merge" if open_method == "dataset" else "dataset"
        alt = f"plan.show_variables(geometry={geometry!r}, open_method={alt_open_method!r})"
        print(
            f"\nGeolocation: NONE detected with open_method={open_method!r}. "
            f"Try {alt}."
        )
    elif len(found_pairs) == 1:
        lon_n, lat_n = found_pairs[0]
        lon_var = ds_flat.coords[lon_n] if lon_n in ds_flat.coords else ds_flat[lon_n]
        lat_var = ds_flat.coords[lat_n] if lat_n in ds_flat.coords else ds_flat[lat_n]
        print(
            f"\nGeolocation: ({lon_n!r}, {lat_n!r}) — "
            f"lon dims={tuple(lon_var.dims)}, lat dims={tuple(lat_var.dims)}"
        )
    else:
        print(f"\nGeolocation: ambiguous — detected pairs: {found_pairs}")

    # For datatree-merge, print group details at the end.
    if open_method == "datatree-merge":
        print("\nDataTree groups (detail):")
        try:
            # xarray DataTree API (>= 2024.x).
            for node in dt.subtree:  # type: ignore[union-attr]
                path = node.path if hasattr(node, "path") else str(node.name)
                ds_node = node.ds
                if ds_node is not None:
                    dims_str = dict(ds_node.sizes)
                    vars_list = list(ds_node.data_vars)
                    print(f"  {path or '/'}")
                    print(f"    Dimensions : {dims_str}")
                    print(f"    Variables  : {vars_list}")
        except AttributeError:
            # datatree package API.
            for path, node in dt.items():  # type: ignore[union-attr]
                ds_node = node.ds
                if ds_node is not None:
                    dims_str = dict(ds_node.sizes)
                    vars_list = list(ds_node.data_vars)
                    print(f"  {path or '/'}")
                    print(f"    Dimensions : {dims_str}")
                    print(f"    Variables  : {vars_list}")

open_dataset

open_dataset(result: Any, geometry: str | None = None, open_method: str | None = None, open_dataset_kwargs: dict[str, Any] | None = None) -> 'xr.Dataset'

Open a single granule result as an :class:xarray.Dataset.

Parameters:

Name	Type	Description	Default
`result`	`Any`	A single earthaccess result object, typically obtained via `plan[n]`.	required
`geometry`	`str \| None`	Data geometry type. `"grid"` (L3/gridded) or `"swath"` (L2/swath). When provided, determines the default `open_method` if open_method is not given explicitly.	`None`
`open_method`	`str \| None`	How to open the granule. `"dataset"` uses a plain `xarray.open_dataset` call (the default when geometry is `None` or `"grid"`). `"datatree-merge"` opens as a DataTree and merges all groups into a flat dataset (the default when geometry is `"swath"`).	`None`
`open_dataset_kwargs`	`dict[str, Any] \| None`	Keyword arguments forwarded to `xarray.open_dataset` or `xarray.open_datatree`. `chunks` defaults to `{}` (lazy/dask loading) unless explicitly overridden. `engine` defaults to `"h5netcdf"` when not specified.	`None`

Returns:

Type	Description
`Dataset`

Source code in point_collocation/core/plan.py

def open_dataset(
    self,
    result: Any,
    geometry: str | None = None,
    open_method: str | None = None,
    open_dataset_kwargs: dict[str, Any] | None = None,
) -> "xr.Dataset":
    """Open a single granule result as an :class:`xarray.Dataset`.

    Parameters
    ----------
    result:
        A single earthaccess result object, typically obtained via
        ``plan[n]``.
    geometry:
        Data geometry type.  ``"grid"`` (L3/gridded) or ``"swath"``
        (L2/swath).  When provided, determines the default
        ``open_method`` if *open_method* is not given explicitly.
    open_method:
        How to open the granule.  ``"dataset"`` uses a plain
        ``xarray.open_dataset`` call (the default when *geometry* is
        ``None`` or ``"grid"``).  ``"datatree-merge"`` opens as a
        DataTree and merges all groups into a flat dataset (the
        default when *geometry* is ``"swath"``).
    open_dataset_kwargs:
        Keyword arguments forwarded to ``xarray.open_dataset`` or
        ``xarray.open_datatree``.  ``chunks`` defaults to ``{}``
        (lazy/dask loading) unless explicitly overridden.  ``engine``
        defaults to ``"h5netcdf"`` when not specified.

    Returns
    -------
    xarray.Dataset
    """
    from point_collocation.core.engine import (
        _VALID_GEOMETRIES,
        _VALID_OPEN_METHODS,
        _merge_datatree,
        _open_datatree,
    )

    if geometry is not None and geometry not in _VALID_GEOMETRIES:
        raise ValueError(
            f"geometry={geometry!r} is not valid. "
            f"Must be one of {sorted(_VALID_GEOMETRIES)}."
        )

    # Resolve open_method default from geometry.
    if open_method is None:
        open_method = "datatree-merge" if geometry == "swath" else "dataset"

    if open_method not in _VALID_OPEN_METHODS:
        raise ValueError(
            f"open_method={open_method!r} is not valid. "
            f"Must be one of {sorted(_VALID_OPEN_METHODS)}."
        )

    try:
        import earthaccess  # type: ignore[import-untyped]
    except ImportError as exc:
        raise ImportError(
            "The 'earthaccess' package is required. "
            "Install it with: pip install earthaccess"
        ) from exc

    import xarray as xr

    kwargs = {"chunks": {}, **(open_dataset_kwargs or {})}
    if "engine" not in kwargs:
        kwargs["engine"] = "h5netcdf"

    file_objs = earthaccess.open([result], pqdm_kwargs={"disable": True})
    if len(file_objs) != 1:
        raise RuntimeError(
            f"Expected 1 file object from earthaccess.open, got {len(file_objs)}."
        )

    if open_method == "datatree-merge":
        dt = _open_datatree(file_objs[0], kwargs)
        try:
            return _merge_datatree(dt)
        finally:
            if hasattr(dt, "close"):
                dt.close()

    return xr.open_dataset(file_objs[0], **kwargs)  # type: ignore[arg-type]

open_mfdataset

open_mfdataset(results: 'list[Any] | Plan', geometry: str | None = None, open_method: str | None = None, open_dataset_kwargs: dict[str, Any] | None = None) -> 'xr.Dataset'

Open multiple granule results as a single :class:xarray.Dataset.

Parameters:

Name	Type	Description	Default
`results`	`'list[Any] \| Plan'`	A list of earthaccess result objects, or a :class:`Plan` (e.g. `plan[0:2]`). When a :class:`Plan` is passed its `results` attribute is used.	required
`geometry`	`str \| None`	Data geometry type. `"grid"` (L3/gridded) or `"swath"` (L2/swath). When provided, determines the default `open_method` if open_method is not given explicitly.	`None`
`open_method`	`str \| None`	How to open each granule. `"dataset"` uses `xarray.open_mfdataset` across all file objects (the default when geometry is `None` or `"grid"`). `"datatree-merge"` opens each granule as a DataTree, merges its groups into a flat dataset, then concatenates all granules along a new `granule` dimension (the default when geometry is `"swath"`).	`None`
`open_dataset_kwargs`	`dict[str, Any] \| None`	Keyword arguments forwarded to `xarray.open_mfdataset` or `xarray.open_datatree`. `chunks` defaults to `{}` (lazy/dask loading) unless explicitly overridden. `engine` defaults to `"h5netcdf"` when not specified.	`None`

Returns:

Type	Description
`Dataset`

Source code in point_collocation/core/plan.py

def open_mfdataset(
    self,
    results: "list[Any] | Plan",
    geometry: str | None = None,
    open_method: str | None = None,
    open_dataset_kwargs: dict[str, Any] | None = None,
) -> "xr.Dataset":
    """Open multiple granule results as a single :class:`xarray.Dataset`.

    Parameters
    ----------
    results:
        A list of earthaccess result objects, or a :class:`Plan`
        (e.g. ``plan[0:2]``).  When a :class:`Plan` is passed its
        ``results`` attribute is used.
    geometry:
        Data geometry type.  ``"grid"`` (L3/gridded) or ``"swath"``
        (L2/swath).  When provided, determines the default
        ``open_method`` if *open_method* is not given explicitly.
    open_method:
        How to open each granule.  ``"dataset"`` uses
        ``xarray.open_mfdataset`` across all file objects (the default
        when *geometry* is ``None`` or ``"grid"``).
        ``"datatree-merge"`` opens each granule as a DataTree, merges
        its groups into a flat dataset, then concatenates all granules
        along a new ``granule`` dimension (the default when *geometry*
        is ``"swath"``).
    open_dataset_kwargs:
        Keyword arguments forwarded to ``xarray.open_mfdataset`` or
        ``xarray.open_datatree``.  ``chunks`` defaults to ``{}``
        (lazy/dask loading) unless explicitly overridden.  ``engine``
        defaults to ``"h5netcdf"`` when not specified.

    Returns
    -------
    xarray.Dataset
    """
    from point_collocation.core.engine import (
        _VALID_GEOMETRIES,
        _VALID_OPEN_METHODS,
        _merge_datatree,
        _open_datatree,
    )

    if geometry is not None and geometry not in _VALID_GEOMETRIES:
        raise ValueError(
            f"geometry={geometry!r} is not valid. "
            f"Must be one of {sorted(_VALID_GEOMETRIES)}."
        )

    # Resolve open_method default from geometry.
    if open_method is None:
        open_method = "datatree-merge" if geometry == "swath" else "dataset"

    if open_method not in _VALID_OPEN_METHODS:
        raise ValueError(
            f"open_method={open_method!r} is not valid. "
            f"Must be one of {sorted(_VALID_OPEN_METHODS)}."
        )

    try:
        import earthaccess  # type: ignore[import-untyped]
    except ImportError as exc:
        raise ImportError(
            "The 'earthaccess' package is required. "
            "Install it with: pip install earthaccess"
        ) from exc

    import xarray as xr

    kwargs = {"chunks": {}, **(open_dataset_kwargs or {})}
    if "engine" not in kwargs:
        kwargs["engine"] = "h5netcdf"

    result_list = results.results if isinstance(results, Plan) else list(results)
    file_objs = earthaccess.open(result_list, pqdm_kwargs={"disable": True})

    if open_method == "datatree-merge":
        # Open each granule as a DataTree, merge its groups, then
        # concatenate all granule datasets along a new "granule" dim.
        merged_datasets: list[xr.Dataset] = []
        for file_obj in file_objs:
            dt = _open_datatree(file_obj, kwargs)
            try:
                merged_datasets.append(_merge_datatree(dt))
            finally:
                if hasattr(dt, "close"):
                    dt.close()
        if not merged_datasets:
            return xr.Dataset()
        return xr.concat(merged_datasets, dim="granule")

    return xr.open_mfdataset(file_objs, **kwargs)  # type: ignore[arg-type]

IO / Adapters

point_collocation.adapters

Source adapters that normalise heterogeneous inputs into the SourceProtocol.

Built-in adapters

earthaccess : wraps file-like objects returned by earthaccess.open()

Future adapters (not yet implemented)

stac : STAC item assets url : plain HTTPS URLs local : local file paths

SourceAdapter

Bases: ABC

Abstract base for source adapters.

Subclass this to add support for a new data source. The core engine only calls :meth:open_dataset; everything else is internal to the adapter.

Source code in point_collocation/adapters/base.py

class SourceAdapter(ABC):
    """Abstract base for source adapters.

    Subclass this to add support for a new data source.  The core
    engine only calls :meth:`open_dataset`; everything else is internal
    to the adapter.
    """

    @abstractmethod
    def open_dataset(self, **kwargs: object) -> object:
        """Return an ``xarray.Dataset`` for this source.

        Parameters
        ----------
        **kwargs:
            Forwarded verbatim to ``xarray.open_dataset``.
        """
        raise NotImplementedError  # pragma: no cover

open_dataset `abstractmethod`

open_dataset(**kwargs: object) -> object

Return an xarray.Dataset for this source.

Parameters:

Name	Type	Description	Default
`**kwargs`	`object`	Forwarded verbatim to `xarray.open_dataset`.	`{}`

Source code in point_collocation/adapters/base.py

@abstractmethod
def open_dataset(self, **kwargs: object) -> object:
    """Return an ``xarray.Dataset`` for this source.

    Parameters
    ----------
    **kwargs:
        Forwarded verbatim to ``xarray.open_dataset``.
    """
    raise NotImplementedError  # pragma: no cover

Search

point_collocation.core._granule

Helpers for working with individual granules (source files).

Responsibilities

Extract a human-readable identifier from an arbitrary source object.
Parse the temporal coverage (start/end date) from a NASA-style L3 granule filename.

Supported filename conventions

YYYYDOY — single day (DOY = day-of-year, 001–366) YYYYDOY_YYYYDOY — multi-day range (e.g., 8-day composites, monthly) YYYYMMDD — single day in calendar format YYYYMMDD_YYYYMMDD — multi-day range in calendar format

The period keyword embedded in the filename (.DAY., .8D., .MO.) is used to infer the end date when only a start date is present.

Examples of supported filenames

PACE_OCI_2024070.L3m.DAY.RRS.Rrs_412.4km.nc
PACE_OCI_2024049_2024056.L3m.8D.CHL.chlor_a.9km.nc
AQUA_MODIS.20230601.L3m.DAY.SST.sst.4km.nc
AQUA_MODIS.20230601_20230630.L3m.MO.CHL.chlor_a.9km.nc

get_source_id

get_source_id(source: object) -> str

Return a human-readable identifier (basename) for source.

Tries, in order:

pathlib.Path → path.name
Plain str → os.path.basename(source)
Object with a .path or .name string attribute
str(source) as last resort

Source code in point_collocation/core/_granule.py

def get_source_id(source: object) -> str:
    """Return a human-readable identifier (basename) for *source*.

    Tries, in order:

    1. ``pathlib.Path`` → ``path.name``
    2. Plain ``str`` → ``os.path.basename(source)``
    3. Object with a ``.path`` or ``.name`` string attribute
    4. ``str(source)`` as last resort
    """
    if isinstance(source, pathlib.Path):
        return source.name
    if isinstance(source, str):
        return os.path.basename(source)
    for attr in ("path", "name"):
        val = getattr(source, attr, None)
        if isinstance(val, str) and val:
            return os.path.basename(val)
    return str(source)

parse_temporal_range

parse_temporal_range(filename: str) -> tuple[pd.Timestamp, pd.Timestamp]

Return (start, end) timestamps for the granule named filename.

Only the basename of filename is examined.

Parameters:

Name	Type	Description	Default
`filename`	`str`	File path or basename.	required

Returns:

Type	Description
`tuple[Timestamp, Timestamp]`	Inclusive start and end dates (time component is midnight UTC).

Raises:

Type	Description
`ValueError`	If no recognisable date pattern is found in filename.

Source code in point_collocation/core/_granule.py

def parse_temporal_range(filename: str) -> tuple[pd.Timestamp, pd.Timestamp]:
    """Return ``(start, end)`` timestamps for the granule named *filename*.

    Only the basename of *filename* is examined.

    Parameters
    ----------
    filename:
        File path or basename.

    Returns
    -------
    tuple[pandas.Timestamp, pandas.Timestamp]
        Inclusive start and end dates (time component is midnight UTC).

    Raises
    ------
    ValueError
        If no recognisable date pattern is found in *filename*.
    """
    basename = os.path.basename(filename)

    # ------------------------------------------------------------------
    # DOY-format pair:  YYYYDOY_YYYYDOY
    # ------------------------------------------------------------------
    m = re.search(r"(?<!\d)(\d{7})_(\d{7})(?!\d)", basename)
    if m:
        try:
            start = datetime.strptime(m.group(1), "%Y%j")
            end = datetime.strptime(m.group(2), "%Y%j")
            return pd.Timestamp(start), pd.Timestamp(end)
        except ValueError:
            pass

    # ------------------------------------------------------------------
    # Calendar-format pair:  YYYYMMDD_YYYYMMDD
    # ------------------------------------------------------------------
    m = re.search(r"(?<!\d)(20\d{6})_(20\d{6})(?!\d)", basename)
    if m:
        try:
            start = datetime.strptime(m.group(1), "%Y%m%d")
            end = datetime.strptime(m.group(2), "%Y%m%d")
            return pd.Timestamp(start), pd.Timestamp(end)
        except ValueError:
            pass

    # ------------------------------------------------------------------
    # Single DOY date:  YYYYDOY
    # ------------------------------------------------------------------
    m = re.search(r"(?<!\d)(\d{7})(?!\d)", basename)
    if m:
        try:
            start = datetime.strptime(m.group(1), "%Y%j")
            end = _infer_end_date(start, basename)
            return pd.Timestamp(start), pd.Timestamp(end)
        except ValueError:
            pass

    # ------------------------------------------------------------------
    # Single calendar date:  YYYYMMDD (must start with "20…")
    # ------------------------------------------------------------------
    m = re.search(r"(?<!\d)(20\d{6})(?!\d)", basename)
    if m:
        try:
            start = datetime.strptime(m.group(1), "%Y%m%d")
            end = _infer_end_date(start, basename)
            return pd.Timestamp(start), pd.Timestamp(end)
        except ValueError:
            pass

    raise ValueError(
        f"Cannot parse temporal range from filename: {basename!r}"
    )

API Reference

point_collocation

Core

plan

point_collocation.core.plan.plan

matchup

point_collocation.core.engine.matchup

Plan

point_collocation.core.plan.Plan dataclass

summary

show_variables

open_dataset

open_mfdataset

IO / Adapters

point_collocation.adapters

SourceAdapter

open_dataset abstractmethod

Search

point_collocation.core._granule

get_source_id

parse_temporal_range

`point_collocation`

point_collocation.core.plan.Plan `dataclass`

open_dataset `abstractmethod`