Source code for gpm.dataset.datatree

# -----------------------------------------------------------------------------.
# MIT License

# Copyright (c) 2024 GPM-API developers
#
# This file is part of GPM-API.

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# -----------------------------------------------------------------------------.
"""This module contains functions to read a GPM granule into a DataTree object."""
import os

import datatree
import xarray as xr

import gpm
from gpm.dataset.attrs import decode_string
from gpm.dataset.dimensions import _rename_datatree_dimensions

# TODO:
# --> open datatrees and concat datatrees
# --> create datatree with option "flattened_scan_modes"
# --> gpm.open_granule(datatree=False)  # or if multiple scan_modes provided
# --> gpm.open_dataset(datatree=False)  # or if multiple scan_modes provided


[docs] def open_datatree(filepath, chunks={}, decode_cf=False, use_api_defaults=True): """Open HDF5 in datatree object. - chunks={} --> Lazy map to dask.array --> Wait for https://github.com/pydata/xarray/pull/7948 --> Maybe need to implement "auto" option manually that defaults to full shape" - chunks="auto" --> datatree fails. Can not estimate size of object dtype ! - chunks=None --> lazy map to numpy.array """ try: dt = datatree.open_datatree(filepath, engine="netcdf4", chunks=chunks, decode_cf=decode_cf) check_non_empty_granule(dt, filepath) except Exception as e: check_valid_granule(filepath) raise ValueError(e) # Assign dimension names return _rename_datatree_dimensions(dt, use_api_defaults=use_api_defaults)
[docs] def check_non_empty_granule(dt, filepath): """Check that the datatree (or dataset) is not empty.""" attrs = dt.attrs attrs = decode_string(attrs["FileHeader"]) is_empty_granule = attrs["EmptyGranule"] != "NOT_EMPTY" if is_empty_granule: raise ValueError(f"{filepath} is an EMPTY granule !")
[docs] def check_valid_granule(filepath): """Raise an explanatory error if the GPM granule is not readable.""" # Check the file exists if not os.path.exists(filepath): raise FileNotFoundError(f"The filepath {filepath} does not exist.") # Identify the cause of the error if xarray can't open the file try: with xr.open_dataset(filepath, engine="netcdf4", group="") as ds: check_non_empty_granule(ds, filepath) except Exception as e: if "an EMPTY granule" in str(e): raise e _identify_error(e, filepath)
def _identify_error(e, filepath): """Identify error when opening HDF file.""" error_str = str(e) if "[Errno -101] NetCDF: HDF error" in error_str: info = "" if gpm.config.get("remove_corrupted_files"): # default False info = " and is being removed" os.remove(filepath) msg = f"The file {filepath} is corrupted{info}. It must be redownload." raise ValueError(msg) if "[Errno -51] NetCDF: Unknown file format" in error_str: msg = f"The GPM-API is not currently able to read the file format of {filepath}. Report the issue please." raise ValueError(msg) if "lock" in error_str: msg = "Unfortunately, HDF locking is occurring." msg += "Export the environment variable HDF5_USE_FILE_LOCKING = 'FALSE' into your environment (i.e. in the .bashrc).\n" # noqa msg += f"The error is: '{error_str}'." raise ValueError(msg) msg = f"The following file is corrupted. Error is {e}. Redownload the file." raise ValueError(msg)