# -----------------------------------------------------------------------------.
# MIT License
# Copyright (c) 2024 GPM-API developers
#
# This file is part of GPM-API.
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# -----------------------------------------------------------------------------.
"""This module contains functions to parse GPM granule attributes."""
import ast
import numpy as np
from gpm.io.checks import get_current_utc_time
STATIC_GLOBAL_ATTRS = (
## FileHeader
"DOI",
"DOIauthority",
"AlgorithmID",
"AlgorithmVersion",
"ProductVersion",
"SatelliteName",
"InstrumentName",
"ProcessingSystem", # "PPS" or "JAXA"
# EmptyGranule (granule discarded if empty)
## FileInfo,
"DataFormatVersion",
"MetadataVersion",
## JaxaInfo
"ProcessingMode", # "STD", "NRT"
## SwathHeader
"ScanType",
## GSMaPInfo
"AlgorithmName",
## GprofInfo
"Satellite",
"Sensor",
# DPRKuInfo, DPRKaInfo,
"eqvWavelength",
)
GRANULE_ONLY_GLOBAL_ATTRS = (
## FileHeader
"FileName",
## Navigation Record
"EphemerisFileName",
"AttitudeFileName",
## JaxaInfo
"TotalQualityCode",
"DielectricFactorKa",
"DielectricFactorKu",
)
DYNAMIC_GLOBAL_ATTRS = (
"MissingData", # number of missing scans
"NumberOfRainPixelsFS",
"NumberOfRainPixelsHS",
)
# TODO: read this dictionary from config YAML ...
def _is_str_list(s):
"""Check if the string start and end with brackets.
Return a boolean indicating if the string can be converted to a list.
"""
if s.startswith("[") and s.endswith("]"):
try:
ast.literal_eval(s)
return True
except ValueError:
return False
else:
return False
def _isfloat(s):
"""Return a boolean indicating if the string can be converted to float."""
try:
float(s)
return True
except ValueError:
return False
def _isinteger(s):
"""Return a boolean indicating if the string can be converted to float."""
if _isfloat(s):
return float(s).is_integer()
return False
def _remove_multiple_spaces(string):
"""Remove consecutive spaces in a string."""
return " ".join(string.split())
def _parse_attr_string(s):
"""Parse attribute string value.
This function can return a string, list, integer or float.
"""
# If there are contiguous spaces, just keep one
if isinstance(s, str):
s = _remove_multiple_spaces(s)
# If multiple stuffs between brackets [ ], convert to list
if isinstance(s, str) and _is_str_list(s):
s = ast.literal_eval(s)
# If still , or \n in a string --> Convert into a list
if isinstance(s, str) and "," in s:
s = s.split(",")
if isinstance(s, str) and "\n" in s:
s = s.split("\n")
# If the character can be a number, convert it
if isinstance(s, str) and _isinteger(s):
s = int(float(s)) # prior float because '0.0000' otherwise crash
elif isinstance(s, str) and _isfloat(s):
s = float(s)
return s
[docs]
def decode_string(string):
r"""Decode string dictionary.
Format: ``"<key>=<value>\\n".``.
It removes ``;`` and ``\\t`` prior to parsing the string.
"""
# Clean the string
string = string.replace("\t", "").rstrip("\n")
# Create dictionary if = is present
if "=" in string:
list_key_value = [key_value.split("=", 1) for key_value in string.split(";") if len(key_value) > 0]
value = {key.replace("\n", ""): _parse_attr_string(value) for key, value in list_key_value}
else:
value = _parse_attr_string(string)
return value
[docs]
def decode_attrs(attrs):
"""Decode GPM nested dictionary attributes from a xarray object."""
new_dict = {}
for k, v in attrs.items():
value = decode_string(v)
if isinstance(value, dict):
new_dict[k] = {}
new_dict[k].update(decode_string(v))
else:
new_dict[k] = value
return new_dict
def _has_nested_dictionary(attrs):
"""Check if the dictionary has nested dictionaries."""
return np.any([isinstance(v, dict) for v in attrs.values()])
[docs]
def get_granule_attrs(dt):
"""Get granule global attributes."""
# Retrieve attributes dictionary (per group)
nested_attrs = decode_attrs(dt.attrs)
# Flatten attributes (without group)
if _has_nested_dictionary(nested_attrs):
attrs = {}
_ = [attrs.update(group_attrs) for group, group_attrs in nested_attrs.items()]
else:
attrs = nested_attrs
# Subset only required attributes
valid_keys = GRANULE_ONLY_GLOBAL_ATTRS + DYNAMIC_GLOBAL_ATTRS + STATIC_GLOBAL_ATTRS
return {key: attrs[key] for key in valid_keys if key in attrs}
[docs]
def add_history(ds):
"""Add the history attribute to the xarray.Dataset."""
current_time = get_current_utc_time().strftime("%Y-%m-%d %H:%M:%S")
history = f"Created by ghiggi/gpm_api software on {current_time}"
ds.attrs["history"] = history
return ds