# -----------------------------------------------------------------------------.
# MIT License
# Copyright (c) 2024 GPM-API developers
#
# This file is part of GPM-API.
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# -----------------------------------------------------------------------------.
"""This module contains functions for subsetting and aligning GPM ORBIT Datasets."""
import numpy as np
from xarray.core.utils import either_dict_or_kwargs
[docs]
def is_1d_non_dimensional_coord(xr_obj, coord):
"""Checks if a coordinate is a 1d, non-dimensional coordinate."""
if coord not in xr_obj.coords:
return False
if xr_obj[coord].ndim != 1:
return False
is_1d_dim_coord = xr_obj[coord].dims[0] == coord
return not is_1d_dim_coord
def _get_dim_of_1d_non_dimensional_coord(xr_obj, coord):
"""Get the dimension of a 1D non-dimension coordinate."""
if not is_1d_non_dimensional_coord(xr_obj, coord):
raise ValueError(f"'{coord}' is not a 1D non-dimensional coordinate.")
dim = xr_obj[coord].dims[0]
return dim
def _get_dim_isel_on_non_dim_coord_from_isel(xr_obj, coord, isel_indices):
"""Get dimension and isel_indices related to a 1D non-dimension coordinate.
Parameters
----------
xr_obj : (xr.Dataset, xr.DataArray)
A xarray object.
coord : str
Name of the coordinate wishing to subset with .sel
isel_indices : (str, int, float, list, np.array)
Coordinate indices wishing to be selected.
Returns
-------
dim : str
Dimension related to the 1D non-dimension coordinate.
isel_indices : (int, list, slice)
Indices for index-based selection.
"""
dim = _get_dim_of_1d_non_dimensional_coord(xr_obj, coord)
return dim, isel_indices
def _get_dim_isel_indices_from_isel_indices(xr_obj, key, indices):
"""Return the dimension and isel_indices related to the dimension position indices of a coordinate."""
# Non-dimensional coordinate case
if key not in xr_obj.dims:
key, indices = _get_dim_isel_on_non_dim_coord_from_isel(xr_obj, coord=key, isel_indices=indices)
return key, indices
def _get_isel_indices_from_sel_indices(xr_obj, coord, sel_indices):
"""Get isel_indices corresponding to sel_indices."""
da_coord = xr_obj[coord]
dim = da_coord.dims[0]
da_coord = da_coord.assign_coords({"isel_indices": (dim, np.arange(0, da_coord.size))})
da_subset = da_coord.swap_dims({dim: coord}).sel({coord: sel_indices})
isel_indices = da_subset["isel_indices"].data
return isel_indices
def _get_dim_isel_on_non_dim_coord_from_sel(xr_obj, coord, sel_indices):
"""
Return the dimension and isel_indices related to a 1D non-dimension coordinate.
Parameters
----------
xr_obj : (xr.Dataset, xr.DataArray)
A xarray object.
coord : str
Name of the coordinate wishing to subset with .sel
sel_indices : (str, int, float, list, np.array)
Coordinate values wishing to be selected.
Returns
-------
dim : str
Dimension related to the 1D non-dimension coordinate.
isel_indices : np.ndarray
Indices for index-based selection.
"""
dim = _get_dim_of_1d_non_dimensional_coord(xr_obj, coord)
isel_indices = _get_isel_indices_from_sel_indices(xr_obj, coord=coord, sel_indices=sel_indices)
return dim, isel_indices
def _get_dim_isel_indices_from_sel_indices(xr_obj, key, indices):
"""Return the dimension and isel_indices related to values of a coordinate."""
# Dimension case
if key in xr_obj.dims:
if key not in xr_obj.coords:
raise ValueError(f"Can not subset with gpm.sel the dimension '{key}' if it is not also a coordinate.")
isel_indices = _get_isel_indices_from_sel_indices(xr_obj, coord=key, sel_indices=indices)
# Non-dimensional coordinate case
else:
key, isel_indices = _get_dim_isel_on_non_dim_coord_from_sel(xr_obj, coord=key, sel_indices=indices)
return key, isel_indices
def _get_dim_isel_indices_function(func):
func_dict = {
"sel": _get_dim_isel_indices_from_sel_indices,
"isel": _get_dim_isel_indices_from_isel_indices,
}
return func_dict[func]
def _subset(xr_obj, indexers=None, func="isel", drop=False, **indexers_kwargs):
"""Perform selection with isel or isel."""
# Retrieve indexers
indexers = either_dict_or_kwargs(indexers, indexers_kwargs, func)
# Get function returning isel_indices
get_dim_isel_indices = _get_dim_isel_indices_function(func)
# Define isel_dict
isel_dict = {}
for key, indices in indexers.items():
key, isel_indices = get_dim_isel_indices(xr_obj, key=key, indices=indices)
if key in isel_dict:
raise ValueError(f"Multiple indexers point to the '{key}' dimension.")
isel_dict[key] = isel_indices
# Subset and update area
xr_obj = xr_obj.isel(isel_dict, drop=drop)
return xr_obj
[docs]
def isel(xr_obj, indexers=None, drop=False, **indexers_kwargs):
"""Perform index-based dimension selection."""
return _subset(xr_obj, indexers=indexers, func="isel", drop=drop, **indexers_kwargs)
[docs]
def sel(xr_obj, indexers=None, drop=False, **indexers_kwargs):
"""Perform value-based coordinate selection.
Slices are treated as inclusive of both the start and stop values, unlike normal Python indexing.
The gpm `sel` method is empowered to:
- slice by gpm-id strings !
- slice by any xarray coordinate value !
You can use string shortcuts for datetime coordinates (e.g., '2000-01' to select all values in January 2000).
"""
return _subset(xr_obj, indexers=indexers, func="sel", drop=drop, **indexers_kwargs)
####------------------------------------------------------------------------------------------------------------------.
#### Alignment
def _check_coord_exist(xr_obj, coord):
if coord not in xr_obj.coords:
msg = f"The xarray objects does not have the '{coord}' coordinate. Impossible to align."
raise ValueError(msg)
def _split_gpm_id_key(s):
prefix, suffix = s.split("-")
return (prefix, int(suffix))
def _align_spatial_coord(coord, *args):
"""
Align GPM / GPM-GEO xarray objects along a coordinate.
Parameters
----------
coords: str
Coordinate name.
args : list
A list of GPM / GPM-GEO xr.Dataset or xr.DataArray.
Returns
-------
list_aligned : list
A list of aligned GPM / GPM-GEO xr.Dataset or xr.DataArray.
"""
from functools import reduce
list_xr_obj = args
# Check the coordinate is always available
_ = [_check_coord_exist(xr_obj, coord) for xr_obj in list_xr_obj]
# Retrieve list of coordinate values
list_id = [xr_obj[coord].data for xr_obj in list_xr_obj]
# Retrieve intersection of coordinates values
# - np.atleast_1d ensure that the dimension is not dropped if only 1 value
# - np.intersect1d returns the sorted array of common unique elements
# - PROBLEM: for 'gpm_id': '36083-999' comes after '36083-4483'. So ad-hoc reorder
sel_indices = np.atleast_1d(reduce(np.intersect1d, list_id))
if len(sel_indices) == 0:
raise ValueError(f"No common {coord}.")
# Reorder if gpm_id
if coord == "gpm_id":
sel_indices = np.array(sorted(sel_indices, key=_split_gpm_id_key))
# Subset datasets
list_aligned = []
for xr_obj in list_xr_obj:
xr_obj = sel(xr_obj, {coord: sel_indices})
list_aligned.append(xr_obj)
return list_aligned
[docs]
def align_along_track(*args):
"""
Align GPM / GPM-GEO xarray objects in the along-track direction.
Parameters
----------
args : list
A list of GPM / GPM-GEO xr.Dataset or xr.DataArray.
Returns
-------
list_aligned : list
A list of aligned GPM / GPM-GEO xr.Dataset or xr.DataArray.
"""
return _align_spatial_coord("gpm_id", *args)
[docs]
def align_cross_track(*args):
"""
Align GPM / GPM-GEO xarray objects in the cross-track direction.
Parameters
----------
args : list
A list of GPM / GPM-GEO xr.Dataset or xr.DataArray.
Returns
-------
list_aligned : list
A list of aligned GPM / GPM-GEO xr.Dataset or xr.DataArray.
"""
return _align_spatial_coord("gpm_cross_track_id", *args)