Source code for gpm.bucket.filters
import numpy as np
import polars as pl
import pyproj
from gpm.bucket.dataframe import (
df_add_column,
df_get_column,
df_select_valid_rows,
)
[docs]
def get_geodesic_distance_from_point(lons, lats, lon, lat):
lons = np.asanyarray(lons)
lats = np.asanyarray(lats)
geod = pyproj.Geod(ellps="WGS84")
_, _, distance = geod.inv(lons, lats, np.ones(lons.shape) * lon, np.ones(lats.shape) * lat, radians=False)
return distance
[docs]
def filter_around_point(df, lon, lat, distance):
# https://stackoverflow.com/questions/76262681/i-need-to-create-a-column-with-the-distance-between-two-coordinates-in-polars
# Retrieve coordinates
lons = df_get_column(df, column="lon")
lats = df_get_column(df, column="lat")
# Compute geodesic distance
distances = get_geodesic_distance_from_point(lons=lons, lats=lats, lon=lon, lat=lat)
valid_indices = distances <= distance
# Add distance
df = df_add_column(df, column="distance", values=distances)
# Select only valid rows
df = df_select_valid_rows(df, valid_rows=valid_indices)
return df
[docs]
def filter_by_extent(df, extent, x="lon", y="lat"):
if isinstance(df, (pl.DataFrame, pl.LazyFrame)):
df = df.filter(
pl.col(x) >= extent[0],
pl.col(x) <= extent[1],
pl.col(y) >= extent[2],
pl.col(y) <= extent[3],
)
else: # pandas
idx_valid = (df[x] >= extent[0]) & (df[x] <= extent[1]) & (df[y] >= extent[2]) & (df[y] <= extent[3])
df = df.loc[idx_valid]
return df
[docs]
def apply_spatial_filters(df, filters=None):
if filters is None:
filters = {}
if "extent" in filters:
df = filter_by_extent(df, extent=filters["extent"], x="lon", y="lat")
if "point_radius" in filters:
lon, lat, distance = filters["point_radius"]
df = filter_around_point(df, lon=lon, lat=lat, distance=distance)
return df