Source code for nwm_region_mgr.parreg.plot_outputs

"""Functions for generating specific/interim plots from parameter regionalization.

plot_outputs.py

Functions:
    - plot_missing_attr_counts: Plots the number of catchments with missing values for each selected attribute.
    - plot_donor_spatial_map: Plots the spatial distribution of donors within a VPU and its buffer zone.

"""

import logging
from pathlib import Path
from typing import Union

import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.lines import Line2D
from shapely.geometry import MultiPolygon, Point, Polygon

from nwm_region_mgr.parreg import config_schema as cs
from nwm_region_mgr.utils import read_table

logger = logging.getLogger(__name__)


[docs] def plot_missing_attr_counts( config: cs.Config, vpu: str, df_attrs_all: pd.DataFrame ) -> None: """Plot the number of catchments with missing values for each selected attribute. Args: config : cs.Config Configuration object containing settings for the regionalization. vpu : str The VPU (Vector Processing Unit) identifier. df_attrs_all : pd.DataFrame DataFrame containing all attributes for the catchments. """ cols = [] for dataset in config.general.attr_dataset_list: cols = cols + [ dataset + "_" + x for x in getattr(config.attr_datasets, dataset).attr_list ] missing_cols = set(cols) - set(df_attrs_all.columns) cols1 = [col for col in cols if col not in missing_cols] if missing_cols: logger.warning( f"Missing columns in final attribute dataframe df_attrs_all: {missing_cols}." ) logger.info("Please check the configuration.") # Compute missing counts missing_counts = df_attrs_all[cols1].isnull().sum() total = len(df_attrs_all) # Plot fig, ax1 = plt.subplots(figsize=(10, 5)) ax1.bar( missing_counts.index, missing_counts.values, color="skyblue", edgecolor="black" ) ax1.set_ylabel("Missing Count") ax1.set_title( f"Number (and %) of Catchments with Missing Values for Each Selected Attribute (VPU {vpu})" ) # Rotate x-labels for readability plt.xticks(rotation=90, ha="center") # Add secondary axis for percentage ax2 = ax1.twinx() ax2.set_ylim(ax1.get_ylim()[0], ax1.get_ylim()[1]) ax2.set_ylabel("Missing (%)") # Set percent ticks aligned with counts yticks = ax1.get_yticks() ax2.set_yticks(yticks) ax2.set_yticklabels([f"{y / total * 100:.1f}%" for y in yticks]) # Save figure out = getattr(config.output, "attr_data_final", None) if out is None: msg = "Output configuration for 'attr_data_final' is not defined. Skipping plot saving." logger.warning(msg) return outfile = Path( out.path, f"plots/bar_attr_missing_count_{config.general.domain}_vpu{vpu}.png", ) outfile.parent.mkdir(parents=True, exist_ok=True) fig.savefig(outfile, bbox_inches="tight") logger.info(f"Missing attribute counts plot saved to {outfile}") plt.close(fig)
[docs] def plot_donor_spatial_map( config: cs.Config, vpu: str, donor_basins: list, final_donor_basins: list, gdf_buffered: Union[Polygon, MultiPolygon], combined_geom: Union[Polygon, MultiPolygon], donors_only: bool = False, ) -> None: """Plot the spatial distribution of donors within a VPU and its buffer zone. Args: config : cs.Config Configuration object containing settings for the regionalization. vpu : str The VPU (Vector Processing Unit) identifier. donor_basins : list List of initial donor basin identifiers. final_donor_basins : list List of final donor basin identifiers. gdf_buffered : gpd.GeoDataFrame GeoDataFrame of the buffered VPU polygon. combined_geom : gpd.GeoDataFrame GeoDataFrame of the combined geometry of the VPU. donors_only: bool Whether to plot donor basins only (i.e., omit receivers) """ # read in lat/lon of all donors gage_id_name = getattr(config.general.id_col, "gage", "gage_id") donors_all = read_table(config.general.donor_gage_file, dtype={gage_id_name: str}) # filter donors based on the donor_basins list (qualified donors) donors = donors_all[donors_all[gage_id_name].isin(donor_basins)] donor_gdf = gpd.GeoDataFrame( donors, geometry=[Point(xy) for xy in zip(donors["longitude"], donors["latitude"])], crs="EPSG:4326", ) # Project to meters for accurate distance calculations donor_gdf = donor_gdf.to_crs(epsg=3857) # visualize the donors selected fig, ax = plt.subplots(figsize=(8, 5)) # Plot base layer, use combined_geom if ring is empty (buffer is zero) ring = gdf_buffered.difference(combined_geom) if ring.is_empty: base_geom = combined_geom else: base_geom = ring ring_gdf = gpd.GeoDataFrame(geometry=[base_geom], crs=donor_gdf.crs) ring_gdf.plot( ax=ax, color="lightgray", edgecolor="black", ) # donor and non-donor calibration basins in the buffered VPU non_donor_gdf = donor_gdf[~donor_gdf["gage_id"].isin(final_donor_basins)] final_donor_gdf = donor_gdf[donor_gdf["gage_id"].isin(final_donor_basins)] # plot calibration basins in the buffered VPU if not donors_only: non_donor_gdf.plot(ax=ax, color="red", markersize=20, marker="x") final_donor_gdf.plot( ax=ax, edgecolor="red", facecolor="none", markersize=20, marker="o" ) # donors and non-donors in the original VPU donors_vpu = final_donor_gdf[final_donor_gdf.geometry.within(combined_geom)] non_donors_vpu = non_donor_gdf[non_donor_gdf.geometry.within(combined_geom)] donors_vpu.plot( ax=ax, edgecolor="blue", facecolor="none", markersize=20, marker="o" ) if not donors_only: non_donors_vpu.plot(ax=ax, color="blue", markersize=20, marker="x") # Create custom legend handles leg_donor_vpu = Line2D( [0], [0], marker="o", color="w", label=f"Donors within VPU ({len(donors_vpu)})", markeredgecolor="blue", markerfacecolor="none", markersize=6, ) leg_donor_buffer = Line2D( [0], [0], marker="o", color="w", label=f"Donors in buffer zone ({len(final_donor_gdf) - len(donors_vpu)})", markeredgecolor="red", markerfacecolor="none", markersize=6, ) leg_non_donor_vpu = Line2D( [0], [0], marker="x", color="blue", label=f"non-donors within VPU ({len(non_donors_vpu)})", markersize=6, linestyle="None", ) leg_non_donor_buffer = Line2D( [0], [0], marker="x", color="red", label=f"non-donors in buffer zone ({len(non_donor_gdf) - len(non_donors_vpu)})", markersize=6, linestyle="None", ) leg_buffer_zone = Line2D( [0], [0], marker="s", color="black", label=f"Buffer zone ({round(config.donor.buffer_km)} km)", markerfacecolor="lightgray", markersize=6, linestyle="None", ) if ring.is_empty: if not donors_only: legend_elements = [ leg_donor_vpu, leg_non_donor_vpu, ] else: legend_elements = [leg_donor_vpu] else: if not donors_only: legend_elements = [ leg_donor_vpu, leg_donor_buffer, leg_non_donor_vpu, leg_non_donor_buffer, leg_buffer_zone, ] else: legend_elements = [ leg_donor_vpu, leg_donor_buffer, leg_buffer_zone, ] # Add legend ax.legend(handles=legend_elements, loc="center left", bbox_to_anchor=(1, 0.5)) # title plt.title(f"Donor basins available for VPU {vpu} regionalization") # tidy up plot ax.set_axis_off() plt.tight_layout() # save the figure out = getattr(config.output, "pairs", None) if out is None: msg = "Output configuration for 'pairs' is not defined. Skipping plot saving." logger.warning(msg) return outfile = Path( out.path, f"plots/map_donors_{config.general.domain}_vpu{vpu}.png", ) outfile.parent.mkdir(parents=True, exist_ok=True) plt.savefig(outfile, bbox_inches="tight") logger.info(f"Donor spatial map saved to {outfile}") plt.close(fig)