Source code for gpm.visualization.quicklooks

# -----------------------------------------------------------------------------.
# MIT License

# Copyright (c) 2024 GPM-API developers
#
# This file is part of GPM-API.

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# -----------------------------------------------------------------------------.
"""This module contains utility for generating quicklooks."""
import numpy as np
import xarray as xr

from gpm.utils.slices import get_indices_from_list_slices



[docs]
def create_quicklooks_dataset(list_ds, spacing=2, total_size=200, concat_dim="along_track"):
    """Concatenate multiple xarray.Dataset objects for quicklook plotting.

    This function merges the Datasets in `list_ds` along the dimension `concat_dim`.
    Between consecutive Datasets, a dummy (NaN-filled) Dataset of width `spacing`
    is inserted to visually separate the events in a plot. After the last Dataset,
    any remaining space to reach `total_size` is also filled with NaNs.

    A coordinate named "spacing_flag" is added to the result, indicating which
    indices in `concat_dim` are actual data (0) versus inserted NaNs (1).

    Parameters
    ----------
    list_ds : list of xarray.Dataset
        List of xarray Datasets to concatenate along the dimension `concat_dim`.
        All Datasets should share the same variables and dimension names,
        except for differences in the size of `concat_dim`.
    spacing : int, optional
        Number of NaN entries to insert between consecutive Datasets.
        Defaults to 2.
    total_size : int, optional
        The desired total size (maximum length) along the `concat_dim`
        dimension in the final "quicklook" dataset. Defaults to 200.
    concat_dim : str, optional
        Name of the dimension along which to concatenate. Typically "along_track".

    Returns
    -------
    xarray.Dataset
        A new Dataset of size `total_size` (or less if the data plus spacing
        exceeds `total_size`) along the `concat_dim` dimension. Between real
        data segments, there are NaNs for visual separation, and an integer
        coordinate "spacing_flag" (1 = NaNs region, 0 = real data).
    """
    # Create dummy dataset of size subplot_size
    ds_template = list_ds[0].isel({concat_dim: [0]}, drop=False)
    ds_template = ds_template.isel({concat_dim: np.zeros(total_size, dtype=int)})
    ds_nan = xr.full_like(ds_template, fill_value=np.nan)

    # Create composite dataset
    # - Original dataset slices are interleaved by NaN dataset of size 'spacing'
    list_ds_quicklook = []
    list_dummy_slices = []
    size = 0
    n_slices = len(list_ds)
    for i, ds in enumerate(list_ds):
        list_ds_quicklook.append(ds)
        size += len(ds[concat_dim])
        # Insert NaN data between slices
        if i == n_slices - 1:  # noqa SIM108
            size_dummy = total_size - size
        else:
            size_dummy = spacing
        if size_dummy > 0:
            list_dummy_slices.append(slice(size, size + size_dummy))
            size += size_dummy
            list_ds_quicklook.append(ds_nan.isel({concat_dim: slice(0, size_dummy)}))

    # Combine slices together
    ds_quicklook = xr.concat(list_ds_quicklook, dim=concat_dim)
    spacing_flag = np.zeros(ds_quicklook[concat_dim].shape)
    spacing_flag[get_indices_from_list_slices(list_dummy_slices)] = 1
    ds_quicklook = ds_quicklook.assign_coords({"spacing_flag": (concat_dim, spacing_flag)})

    # Truncate if we exceeded total_size
    if ds_quicklook.sizes[concat_dim] > total_size:
        ds_quicklook = ds_quicklook.isel({concat_dim: slice(0, total_size)})
    return ds_quicklook




[docs]
def create_quicklooks_datasets(ds, list_slices, subplot_size=200, spacing=2, n_subplots=4, concat_dim="along_track"):
    """
    Build an array dataset for quicklook plotting of interesting data regions.

    It extracts slices of interest from the input dataset, ensuring each group of slices
    having a total length not exceeding ``subplot_size``.
    Within each group,  slices are concatenated along the "along_track" dimension,
    and a dummy (NaN) dataset of width ``spacing`` is inserted between slices to visually separate them.
    Any leftover space in the subplot is filled with NaNs.

    A new coordinate, "spacing_flag", is added to indicate the indices that correspond
    to these NaN (spacing) regions.

    Parameters
    ----------
    ds : xarray.Dataset
        The input dataset containing an "along_track" dimension.
    list_slices : list of slice
        List of valid data segments along the "along_track" dimension.
    subplot_size : int, optional
        Maximum length of each subplot (group of slices) along the "along_track" axis.
        Defaults to 200.
    spacing : int, optional
        Number of NaN points inserted between consecutive slices in a subplot. Defaults to 2.
    n_subplots : int, optional
        Maximum number of subplot groups to produce. Defaults to 4.

    Returns
    -------
    list of xarray.Dataset
        A list of up to ``n_subplots`` datasets, each dataset having:

        * Dimension "along_track" size up to ``subplot_size``.
        * Slices concatenated with NaNs for spacing.
        * A "spacing_flag" coordinate along "along_track" (0 for real data, 1 for NaN spacing).

    """
    # Rerieve dataset slices for subplots
    list_subplot_slices = get_subplot_slices(
        list_slices=list_slices,
        subplot_size=subplot_size,
        spacing=spacing,
        n_subplots=n_subplots,
    )

    list_subplots_ds = [
        create_quicklooks_dataset(
            list_ds=[ds.isel({concat_dim: slc}) for slc in subplot_slices],
            spacing=spacing,
            total_size=subplot_size,
            concat_dim=concat_dim,
        )
        for subplot_slices in list_subplot_slices
    ]
    return list_subplots_ds


    # # Create dummy dataset
    # ds_nan = xr.ones_like(ds.isel({"along_track": slice(0, subplot_size)}))*np.nan

    # # Create composite dataset for each subplot
    # # - Original dataset slices are interleaved by NaN dataset of size 'spacing'
    # list_subplots_datasets = []
    # for subplot_slices in list_subplot_slices:

    #     list_ds_subplot = []
    #     list_dummy_slices = []
    #     size = 0
    #     n_slices = len(subplot_slices)
    #     for i in range(0, n_slices):
    #         # Add slice to list
    #         slc = subplot_slices[i]
    #         list_ds_subplot.append(ds.isel({"along_track":slc}))
    #         size += slc.stop - slc.start
    #         # Insert NaN data between slices
    #         if i == n_slices - 1:
    #             size_dummy = subplot_size - size
    #         else:
    #             size_dummy = spacing
    #         if size_dummy > 0:
    #             list_dummy_slices.append(slice(size, size + size_dummy))
    #             size += size_dummy
    #             list_ds_subplot.append(ds_nan.isel({"along_track": slice(0, size_dummy)}))
    #     # Combine slices together
    #     subplot_dataset = xr.concat(list_ds_subplot, dim="along_track")
    #     spacing_flag = np.zeros(subplot_dataset["along_track"].shape)
    #     spacing_flag[get_indices_from_list_slices(list_dummy_slices)] = 1
    #     subplot_dataset = subplot_dataset.assign_coords({"spacing_flag": ("along_track", spacing_flag)})
    #     # Add subplot dataset to the subplots list
    #     list_subplots_datasets.append(subplot_dataset)
    # return list_subplots_datasets



[docs]
def get_subplot_slices(list_slices, subplot_size=100, spacing=2, n_subplots=4):
    """
    Group slices into subplots, ensuring each subplot stays within a maximum size.

    This function accumulates slices (plus their inter-slice spacing) until adding
    another slice would exceed ``subplot_size``. It then starts a new group (subplot).
    The number of subplots is capped at ``n_subplots``.

    Parameters
    ----------
    list_slices : list of slice
        List of Python slice objects along an xarray object dimension.
        Each slice has a .start and .stop attribute (integers).
    subplot_size : int, optional
        The maximum allowable sum of slice lengths (plus spacing) for each subplot.
        Defaults to 100.
    spacing : int, optional
        Spacing that is reserved after each slice. Defaults to 2.
        This is accounted for in the total length when deciding whether a slice fits in
        the current subplot.
    n_subplots : int, optional
        Maximum number of subplot groups to return. Defaults to 4.

    Returns
    -------
    list of list of slice
        A list of sub-lists, where each sub-list contains the slices assigned
        to one subplot. Each group's combined length (sum of slices plus spacing)
        does not exceed ``subplot_size`` (except possibly the last group),
        unless truncated by ``n_subplots``.
    """
    # TODO:
    # - ENABLE SPLIT AT BORDER: current_length + length > subplot_size --> create two slices !
    # - DEAL WHEN LENGTH > subplot_size

    subplots_slices = []
    current_group = []
    current_length = 0

    for slc in list_slices:
        length = slc.stop - slc.start
        # TODO: here we remove data slice if larger than subplot size
        if length > subplot_size:
            continue
        # If adding this slice exceeds the available space in the current subplot, start a new one
        if current_length + length > subplot_size:
            subplots_slices.append(current_group)
            current_group = [slc]
            current_length = length + spacing
        else:
            current_group.append(slc)
            current_length += length + spacing

    # Add the last group if not empty
    if current_group:
        subplots_slices.append(current_group)

    # Keep only up to n_subplots subplots (truncate if you have more)
    subplots_slices = subplots_slices[:n_subplots]
    return subplots_slices