gdptools example without the OPeNDAP catalog - gridMET data#

import logging
import warnings

import hvplot.pandas
import hvplot.xarray
import numpy as np
import pandas as pd
import xarray as xr
from pynhd import NLDI, WaterData
from pygeohydro import watershed

from gdptools import AggGen, UserCatData, WeightGen

warnings.filterwarnings("ignore")

Create GeoDataFrame of Delaware River Basin HUC12s#

In this cell we use clients to several https://api.water.usgs.gov tools:

More examples of using the HyRiver pynhd package can be found here

# Delaware River Basin HUC12s
# --------------------------------
wbd = watershed.WBD("huc4")
delaware_basin = wbd.byids(field="huc4", fids="0204")
huc12_basins = WaterData("wbd12").bygeom(delaware_basin.iloc[0].geometry)
huc12_basins = huc12_basins[huc12_basins["huc12"].str.startswith(("020401", "020402"))]
huc12_basins.head()
geometry tnmid metasourceid sourcedatadesc sourceoriginator sourcefeatureid loaddate gnis_id areaacres areasqkm states huc12 name hutype humod tohuc noncontributingareaacres noncontributingareasqkm globalid
0 MULTIPOLYGON (((-75.02421 39.44397, -75.02353 ... {001055CD-7B5E-4F96-ABB1-22923FD2B454} NaN None None None 2013-01-18T07:08:41Z None 16178.10 65.47 NJ 020402060505 White Marsh Run-Maurice River S NM 020402060507 0 0 {A70A97FA-E29C-11E2-8094-0021280458E6}
1 MULTIPOLYGON (((-74.62113 41.00565, -74.62139 ... {009DA440-393A-46A0-BBA3-C38139823414} {ED602145-9201-4827-9CE1-05D252484579} None None None 2017-10-03T20:11:06Z None 29107.93 117.80 NJ 020401050502 Lubbers Run-Musconetcong River S NM 020401050503 0 0 {B830B4F8-E29C-11E2-8094-0021280458E6}
2 MULTIPOLYGON (((-75.75805 40.65996, -75.75783 ... {00F73634-FA00-464F-93F0-F0BCCB464567} {ED602145-9201-4827-9CE1-05D252484579} None None None 2017-10-03T20:10:58Z None 35229.84 142.57 PA 020402030305 Upper Maiden Creek S NM 020402030307 0 0 {BAF5E77A-E29C-11E2-8094-0021280458E6}
3 MULTIPOLYGON (((-75.13279 39.88601, -75.13439 ... {017B8A58-E959-4600-B212-8EB921F0F9F7} {E9F5C988-2313-440E-A05E-C5871E2773A6} None None None 2017-10-03T20:11:04Z None 24920.90 100.85 NJ,PA 020402020507 Woodbury Creek-Delaware River S TF 020402020607 0 0 {B122D6B7-E29C-11E2-8094-0021280458E6}
4 MULTIPOLYGON (((-74.92817 40.55256, -74.92833 ... {01B4598B-0623-407F-9133-3040D99A8D1A} {ED602145-9201-4827-9CE1-05D252484579} None None None 2017-10-03T20:10:52Z None 15086.80 61.05 NJ 020401050905 Lockatong Creek S NM 020401050908 0 0 {A6AB04BE-E29C-11E2-8094-0021280458E6}
from holoviews.element.tiles import EsriTerrain

drb = huc12_basins.hvplot(
    geo=True, coastline='50m', alpha=0.2, c='r', frame_width=300,
    xlabel="longitude", ylabel="latitude", clabel="monthly water evaporation (mm)",
    title="Delaware River HUC12 basins", xlim=(-76.8, -73.8), aspect='equal'
)
EsriTerrain() * drb

Load gridMET OPeNDAP endpoint with xarray#

  • Note currently the gridMET data require ‘#fillmismatch’ appended to the end of the OPeNDAP endpoint. For example

gridmet_URL = [
    'http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_met_tmmx_1979_CurrentYear_CONUS.nc#fillmismatch',
    'http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_met_tmmn_1979_CurrentYear_CONUS.nc#fillmismatch'
]
data = xr.open_mfdataset(gridmet_URL)
data
<xarray.Dataset> Size: 224GB
Dimensions:                    (day: 17273, lat: 585, lon: 1386, crs: 1)
Coordinates:
  * day                        (day) datetime64[ns] 138kB 1979-01-01 ... 2026...
  * lat                        (lat) float64 5kB 49.4 49.36 ... 25.11 25.07
  * lon                        (lon) float64 11kB -124.8 -124.7 ... -67.1 -67.06
  * crs                        (crs) float32 4B 3.0
Data variables:
    daily_maximum_temperature  (day, lat, lon) float64 112GB dask.array<chunksize=(17273, 585, 1386), meta=np.ndarray>
    daily_minimum_temperature  (day, lat, lon) float64 112GB dask.array<chunksize=(17273, 585, 1386), meta=np.ndarray>
Attributes: (12/19)
    geospatial_bounds_crs:      EPSG:4326
    Conventions:                CF-1.0
    geospatial_bounds:          POLYGON((-124.7666666333333 49.40000000000000...
    geospatial_lat_min:         25.066666666666666
    geospatial_lat_max:         49.40000000000000
    geospatial_lon_min:         -124.7666666333333
    ...                         ...
    date:                       03 March 2026
    note1:                      The projection information for this file is: ...
    note2:                      Citation: Abatzoglou, J.T., 2013, Development...
    note3:                      Data in slices after last_permanent_slice (1-...
    note4:                      Data in slices after last_provisional_slice (...
    note5:                      Days correspond approximately to calendar day...

Calculate weights#

  • To determine an area-weighted statistic for each HUC12 polygon, the area of all grid cells (a Shapely Polygon object representing each cell of the grid).

  • The weight of each cell contributing to a HUC12 polygon is the cell-intersected-area / HUC12 polygon-area.

  • Not all all data readable by xarray are made the same. Therefor, when using non-OPeNDAP catalog endpoints, the user must supply some metadata about the xarray data and GeoDataFrame data.

Enable logging#

gdptools uses Python’s standard logging module. By default it emits no output. Configuring the gdptools logger before running weight generation and aggregation lets you monitor progress, timing, and data dimensions throughout the workflow. Here we set gdptools to INFO while keeping third-party libraries at WARNING to avoid noisy output from rasterio, fiona, etc.

# Configure logging: gdptools at INFO, everything else at WARNING
logging.basicConfig(
    level=logging.WARNING,
    format="%(name)s | %(levelname)s | %(message)s",
    force=True,
)
logging.getLogger("gdptools").setLevel(logging.INFO)
data['crs'].attrs.get('spatial_ref')
'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]'
# metadata for calculating weights

data_crs = data['crs'].attrs.get('spatial_ref')
x_coord = "lon"
y_coord = "lat"
t_coord = "day"
sdate = "1979-01-01"
edate = "1979-01-07"
var = ["daily_maximum_temperature", "daily_minimum_temperature"]
shp_crs = 4326
shp_poly_idx = "huc12"
wght_gen_crs = 6931

user_data = UserCatData(
    source_ds=data,
    source_crs=data_crs,
    source_x_coord=x_coord,
    source_y_coord=y_coord,
    source_t_coord=t_coord,
    source_var=var,
    target_gdf=huc12_basins,
    target_crs=shp_crs,
    target_id=shp_poly_idx,
    source_time_period=[sdate, edate]
)

wght_gen = WeightGen(
    user_data=user_data,
    method="serial",
    output_file='example_weights.csv',
    weight_gen_crs=6931,
)

wdf = wght_gen.calculate_weights()

To illustrate the format of the saved weights file it’s opened here, however in the next step we use the returned datafame.

pd.read_csv('example_weights.csv')
huc12 i j wght
0 20402060505 74 35 0.081887
1 20402060505 77 35 0.002041
2 20402060505 76 35 0.168929
3 20402060505 75 35 0.201884
4 20402060505 76 34 0.119210
... ... ... ... ...
5677 20401050103 37 36 0.032199
5678 20401050103 36 37 0.184037
5679 20401050103 35 37 0.100861
5680 20401050103 34 37 0.000047
5681 20401050103 36 36 0.023088

5682 rows × 4 columns

Run the grid-to-polygon area weighted average interpolation using the calculated weights.#

  • Future releases will add additional area-weighted statistics. Currently only the mean is returned.

  • If more that one variable were available in the dataset then the run_weights function could be embedded in a loop that indexes each variable.

run_weights Output#

Run weighs returns:

  • ngdf is the GeoDataFrame passed to run_weights. huc12_basins maybe reordered when running weights so the re-ordered GeoDataFrame is returned.

  • vals are the area-weighted interpolated values ordered by the ngdf.index

agg_gen = AggGen(
    user_data=user_data,
    stat_method="masked_mean",
    agg_engine="serial",
    agg_writer="csv",
    weights='example_weights.csv',
    out_path='.',
    file_prefix="test_gm_nc"
)
ngdf, ds_out = agg_gen.calculate_agg()
ds_out
<xarray.Dataset> Size: 58kB
Dimensions:                    (time: 7, huc12: 427)
Coordinates:
  * time                       (time) datetime64[ns] 56B 1979-01-01 ... 1979-...
  * huc12                      (huc12) object 3kB '020401010101' ... '0204020...
    lat                        (huc12) float64 3kB 42.39 42.38 ... 38.76 38.83
    lon                        (huc12) float64 3kB -74.62 -74.71 ... -75.18
Data variables:
    daily_maximum_temperature  (time, huc12) float64 24kB 283.3 283.8 ... 283.8
    crs                        float64 8B 1.0
    daily_minimum_temperature  (time, huc12) float64 24kB 276.9 277.3 ... 273.5
Attributes:
    Conventions:  CF-1.8
    featureType:  timeSeries
    history:      2026_04_18_03_42_45 Original file created by gdptools packa...
pd.read_csv('test_gm_nc.csv', index_col=0)
time varname units 020401010101 020401010102 020401010103 020401010104 020401010105 020401010106 020401010201 ... 020402070503 020402070504 020402070505 020402070506 020402070507 020402070601 020402070602 020402070603 020402070604 020402070605
index
0 1979-01-01 daily_maximum_temperature K 283.342101 283.789520 283.891782 284.313867 284.279681 283.724576 283.688328 ... 288.614109 287.102319 287.257388 287.192510 285.911595 288.647165 287.673752 287.576502 286.573011 285.854688
1 1979-01-02 daily_maximum_temperature K 282.645539 282.961788 283.040125 283.368031 283.418329 282.748207 282.790915 ... 287.766527 285.970048 286.572958 286.722795 285.626663 286.617455 286.196586 286.075713 285.564972 285.071556
2 1979-01-03 daily_maximum_temperature K 270.354667 270.317264 270.102145 270.433602 270.454707 270.254940 270.760374 ... 278.432881 278.402124 278.703507 278.566303 278.221036 278.464381 278.479914 278.553679 278.529936 278.169785
3 1979-01-04 daily_maximum_temperature K 263.918458 263.961688 263.736135 264.144980 264.093689 264.068521 264.586014 ... 274.790052 274.646453 275.191191 275.202789 274.948669 274.324795 274.502752 274.522852 274.589280 274.395581
4 1979-01-05 daily_maximum_temperature K 264.065839 264.056051 263.842020 264.165891 264.218206 264.084139 264.706366 ... 277.148031 277.046858 277.584110 277.602286 277.298743 276.941864 276.956516 277.059530 277.263571 277.058642
5 1979-01-06 daily_maximum_temperature K 269.963731 270.232074 270.220195 270.616740 270.654793 270.416690 270.891040 ... 278.232881 278.396659 278.907483 278.801530 278.666241 278.123928 278.283779 278.408128 278.699587 278.547546
6 1979-01-07 daily_maximum_temperature K 275.403016 275.938433 276.217032 276.513022 276.740472 275.976119 276.418405 ... 284.271129 284.194942 284.334873 284.167058 283.541393 284.590986 284.436977 284.424597 284.175313 283.795570
0 1979-01-01 daily_minimum_temperature K 276.875121 277.317797 277.205682 277.407442 277.398712 277.034636 276.912089 ... 281.526809 280.340967 280.397078 280.529055 280.087390 280.910730 280.530568 280.421870 280.030056 280.079895
1 1979-01-02 daily_minimum_temperature K 269.256667 269.413816 269.070310 269.239835 269.257574 269.118803 269.312846 ... 274.801765 275.134578 274.757241 274.653814 274.893253 275.437823 275.296745 275.423789 275.366362 275.487308
2 1979-01-03 daily_minimum_temperature K 257.501491 257.836032 257.340293 257.918269 257.588494 258.058366 258.241108 ... 263.393736 263.564219 263.443779 263.438990 263.702673 263.170775 263.485903 263.498352 263.640248 263.869741
3 1979-01-04 daily_minimum_temperature K 258.095835 258.408270 257.959467 258.499743 258.186048 258.647050 258.789096 ... 263.532182 263.871596 263.858109 263.911718 264.327853 263.159602 263.670639 263.696368 264.030124 264.398075
4 1979-01-05 daily_minimum_temperature K 259.297268 259.592666 259.187715 259.580379 259.406128 259.667843 259.853056 ... 268.417116 268.802196 269.038191 269.138270 269.711928 267.564882 268.407991 268.464200 269.103907 269.629261
5 1979-01-06 daily_minimum_temperature K 261.074570 261.397559 261.168801 261.430951 261.427480 261.406271 261.782945 ... 271.789742 272.279462 272.127984 272.111718 272.457381 271.686187 272.149811 272.132619 272.379940 272.600686
6 1979-01-07 daily_minimum_temperature K 262.672844 262.951875 262.715915 262.921207 263.001742 262.820974 263.269528 ... 272.644310 273.099901 273.012330 272.954145 273.317758 272.646589 272.972866 273.022227 273.226546 273.487308

14 rows × 430 columns

aggdata = agg_gen.agg_data
ds_ss = aggdata.get("daily_maximum_temperature").da

Plot the gridded data and interpolated data#

var = "daily_maximum_temperature"
time_step = "1979-01-07"
tmin = np.nanmin(ds_ss.sel(day=time_step).values)
tmax = np.nanmax(ds_ss.sel(day=time_step).values)
print(tmin,tmax)
273.1 286.1
ds_out[var].shape
(7, 427)
ngdf[var] = ds_out[var].sel(time=time_step)
gridmet_agg = ngdf.to_crs(4326).hvplot(
    c='daily_maximum_temperature', geo=True, coastline='50m', frame_width=300, alpha=1, cmap='viridis', clim=(tmin, tmax),
    xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
    title="HUC12 area-weighted average", aspect='equal'
)
gridmet_raw = ds_ss.isel(day=6).hvplot.quadmesh(
    'lon', 'lat', 'daily_maximum_temperature', cmap='viridis', alpha=1, grid=True, geo=True, coastline='50m', frame_width=300, clim=(tmin, tmax),
    xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
    title="Gridmet daily max temperature", aspect='equal'
)
(EsriTerrain() * gridmet_raw) + (EsriTerrain() * gridmet_agg) + (EsriTerrain() * gridmet_raw * gridmet_agg)