# -----------------------------------------------------------------------------.
# MIT License
# Copyright (c) 2024 GPM-API developers
#
# This file is part of GPM-API.
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# -----------------------------------------------------------------------------.
"""This module contains plotting functions for exploratory data visualization."""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.dates import date2num
[docs]
def plot_boxplot(
df_stats,
ax=None,
label=None,
showfliers=False,
showwhisker=False,
showmeans=False,
positions=None,
widths=0.6,
add_median_points=False,
add_median_line=False,
median_points_kwargs=None,
median_line_kwargs=None,
boxprops=None,
whiskerprops=None,
medianprops=None,
**kwargs,
):
"""
Draw a box and whisker plot from pre-computed statistics.
The box extends from the first quartile *q25* to the third
quartile *q75* of the data, with a line at the median (*median*).
The whiskers extend from *whislow* to *whishi*.
Flier points are markers past the end of the whiskers.
See https://en.wikipedia.org/wiki/Box_plot for reference.
.. code-block:: none
whislow q25 med q75 whishi
|-----:-----|
o |--------| : |--------| o o
|-----:-----|
flier fliers
.. note::
This is a low-level drawing function for when you already
have the statistical parameters. If you want a boxplot based
on a dataset, use `matplotlib.axes.Axes.boxplot` instead.
Parameters
----------
df_stats : pandas.DataFrame
The dataframe index controls the boxplot position along the xaxis,
unless 'positions' is specified.
Required columns are:
- 'mean':
- 'median':
- 'q10':
- 'q25':
- 'q75':
- 'q90':
- 'iqr': Needed if ``showwhisker=True``.
- 'min': Needed if ``showfliers=True``.
- 'max': Needed if ``showfliers=True``.
- 'n':
A DatetimeIndex or a 'time' column enable to display the x axis
with the desired date (irregular vs uniformly spaced)
label: str, optional
Column of the dataframe to be used as tick label for the boxplot
positions : array-like, optional
The positions of the boxes. If not specified, the ticks and limits
are automatically set as function of the dataframe index.
widths : float or array-like, optional
The widths of the boxes. The default is
``clip(0.15*(distance between extreme positions), 0.15, 0.5)``.
capwidths : float or array-like
Either a scalar or a vector and sets the width of each cap.
The default is ``0.5*(width of the box)``, see *widths*.
orientation : str, optional
Either 'vertical' or 'horizontal'.
If 'horizontal', plots the boxes horizontally. Otherwise, plots the boxes vertically.
The default is 'vertical'.
patch_artist : bool, optional
If `False` produces boxes with the `.Line2D` artist.
If `True` produces boxes with the `~matplotlib.patches.Patch` artist.
The default is False.
shownotches, showmeans, showcaps, showbox, showfliers : bool, optional
Whether to draw the CI notches, the mean value (both default to
False), the caps, the box, and the fliers (all three default to
True).
boxprops, whiskerprops, capprops, flierprops, medianprops, meanprops : dict, optional
Artist properties for the boxes, whiskers, caps, fliers, medians, and
means.
manage_ticks : bool, optional
If True (the default), the tick locations and labels will be adjusted to match the
boxplot positions.
zorder : float, optional
The zorder of the resulting boxplot.
Returns
-------
matplotlib.Axes
"""
# Ensure sorted index
df_stats = df_stats.sort_index()
# Define default properties
medianprops = {"color": "black"} if medianprops is None else medianprops
whiskerprops = {} if whiskerprops is None else whiskerprops
# Compute IQR if not already a column
if "iqr" not in df_stats:
df_stats["iqr"] = df_stats["q75"] - df_stats["q25"]
# Compute whislo and whislo
df_stats["whislo"] = np.maximum(df_stats["q25"] - 1.5 * df_stats["iqr"], df_stats["min"])
df_stats["whishi"] = np.maximum(df_stats["q75"] + 1.5 * df_stats["iqr"], df_stats["max"])
# df_stats["whislo"] = df_stats["q10"]
# df_stats["whishi"] = df_stats["q90"]
# Prepare data for bxp
box_data = []
for i in range(len(df_stats)):
df_row = df_stats.iloc[i]
box_dict = {
"q1": df_row["q25"].item(),
"med": df_row["median"].item(),
"q3": df_row["q75"].item(),
"whislo": df_row["whislo"].item(),
"whishi": df_row["whishi"].item(),
"fliers": [df_row["min"].item(), df_row["max"].item()],
"mean": df_row["mean"].item(),
}
# Add label
if label is not None:
box_dict["label"] = str(df_row[label])
box_data.append(box_dict)
# Disable show_whisker
if not showwhisker:
whiskerprops["alpha"] = 0
# Define positions
is_datetime_index = False
if positions is None:
is_datetime_index = isinstance(df_stats.index, pd.DatetimeIndex)
if is_datetime_index:
positions = np.asarray(df_stats.index)
positions = date2num(positions)
# position_index = df_stats.index.astype(int)
# positions = position_index - position_index.min()
# positions = positions/positions.max()*len(positions)
else:
positions = range(len(df_stats))
# Create the boxplot with bxp
if ax is None:
fig, ax = plt.subplots() # noqa: RUF059
bplot = ax.bxp(
box_data,
positions=positions,
widths=widths,
showmeans=showmeans,
showfliers=showfliers,
boxprops=boxprops,
medianprops=medianprops,
whiskerprops=whiskerprops,
**kwargs,
)
# Add median points
if add_median_points:
median_points_kwargs = {} if median_points_kwargs is None else median_points_kwargs
ax.scatter(positions, df_stats["median"], **median_points_kwargs)
# Add line between median points
if add_median_line:
median_line_kwargs = {} if median_line_kwargs is None else median_line_kwargs
ax.plot(positions, df_stats["median"], **median_line_kwargs)
# Set y limits to whiskers if outliers not displayed
if not showwhisker and not showfliers:
ax.set_ylim(np.nanmin(df_stats["q25"]), np.nanmax(df_stats["q75"]))
return bplot