gdptools example without the OPeNDAP catalog - gridMET data#
import logging
import warnings
import hvplot.pandas
import hvplot.xarray
import numpy as np
import pandas as pd
import xarray as xr
from pynhd import NLDI, WaterData
from pygeohydro import watershed
from gdptools import AggGen, UserCatData, WeightGen
warnings.filterwarnings("ignore")
Create GeoDataFrame of Delaware River Basin HUC12s#
In this cell we use clients to several https://api.water.usgs.gov tools:
Tool: USGS NLDI Client: pynhd.NLDI
Tool: USGS GeoServer Client: pynhd.WaterData
More examples of using the HyRiver pynhd package can be found here
# Delaware River Basin HUC12s
# --------------------------------
wbd = watershed.WBD("huc4")
delaware_basin = wbd.byids(field="huc4", fids="0204")
huc12_basins = WaterData("wbd12").bygeom(delaware_basin.iloc[0].geometry)
huc12_basins = huc12_basins[huc12_basins["huc12"].str.startswith(("020401", "020402"))]
huc12_basins.head()
| geometry | tnmid | metasourceid | sourcedatadesc | sourceoriginator | sourcefeatureid | loaddate | gnis_id | areaacres | areasqkm | states | huc12 | name | hutype | humod | tohuc | noncontributingareaacres | noncontributingareasqkm | globalid | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | MULTIPOLYGON (((-75.02421 39.44397, -75.02353 ... | {001055CD-7B5E-4F96-ABB1-22923FD2B454} | NaN | None | None | None | 2013-01-18T07:08:41Z | None | 16178.10 | 65.47 | NJ | 020402060505 | White Marsh Run-Maurice River | S | NM | 020402060507 | 0 | 0 | {A70A97FA-E29C-11E2-8094-0021280458E6} |
| 1 | MULTIPOLYGON (((-74.62113 41.00565, -74.62139 ... | {009DA440-393A-46A0-BBA3-C38139823414} | {ED602145-9201-4827-9CE1-05D252484579} | None | None | None | 2017-10-03T20:11:06Z | None | 29107.93 | 117.80 | NJ | 020401050502 | Lubbers Run-Musconetcong River | S | NM | 020401050503 | 0 | 0 | {B830B4F8-E29C-11E2-8094-0021280458E6} |
| 2 | MULTIPOLYGON (((-75.75805 40.65996, -75.75783 ... | {00F73634-FA00-464F-93F0-F0BCCB464567} | {ED602145-9201-4827-9CE1-05D252484579} | None | None | None | 2017-10-03T20:10:58Z | None | 35229.84 | 142.57 | PA | 020402030305 | Upper Maiden Creek | S | NM | 020402030307 | 0 | 0 | {BAF5E77A-E29C-11E2-8094-0021280458E6} |
| 3 | MULTIPOLYGON (((-75.13279 39.88601, -75.13439 ... | {017B8A58-E959-4600-B212-8EB921F0F9F7} | {E9F5C988-2313-440E-A05E-C5871E2773A6} | None | None | None | 2017-10-03T20:11:04Z | None | 24920.90 | 100.85 | NJ,PA | 020402020507 | Woodbury Creek-Delaware River | S | TF | 020402020607 | 0 | 0 | {B122D6B7-E29C-11E2-8094-0021280458E6} |
| 4 | MULTIPOLYGON (((-74.92817 40.55256, -74.92833 ... | {01B4598B-0623-407F-9133-3040D99A8D1A} | {ED602145-9201-4827-9CE1-05D252484579} | None | None | None | 2017-10-03T20:10:52Z | None | 15086.80 | 61.05 | NJ | 020401050905 | Lockatong Creek | S | NM | 020401050908 | 0 | 0 | {A6AB04BE-E29C-11E2-8094-0021280458E6} |
from holoviews.element.tiles import EsriTerrain
drb = huc12_basins.hvplot(
geo=True, coastline='50m', alpha=0.2, c='r', frame_width=300,
xlabel="longitude", ylabel="latitude", clabel="monthly water evaporation (mm)",
title="Delaware River HUC12 basins", xlim=(-76.8, -73.8), aspect='equal'
)
EsriTerrain() * drb
Load gridMET OPeNDAP endpoint with xarray#
Note currently the gridMET data require ‘#fillmismatch’ appended to the end of the OPeNDAP endpoint. For example
gridmet_URL = [
'http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_met_tmmx_1979_CurrentYear_CONUS.nc#fillmismatch',
'http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_met_tmmn_1979_CurrentYear_CONUS.nc#fillmismatch'
]
data = xr.open_mfdataset(gridmet_URL)
data
<xarray.Dataset> Size: 224GB
Dimensions: (day: 17272, lat: 585, lon: 1386, crs: 1)
Coordinates:
* day (day) datetime64[ns] 138kB 1979-01-01 ... 2026...
* lat (lat) float64 5kB 49.4 49.36 ... 25.11 25.07
* lon (lon) float64 11kB -124.8 -124.7 ... -67.1 -67.06
* crs (crs) float32 4B 3.0
Data variables:
daily_maximum_temperature (day, lat, lon) float64 112GB dask.array<chunksize=(17272, 585, 1386), meta=np.ndarray>
daily_minimum_temperature (day, lat, lon) float64 112GB dask.array<chunksize=(17272, 585, 1386), meta=np.ndarray>
Attributes: (12/19)
geospatial_bounds_crs: EPSG:4326
Conventions: CF-1.0
geospatial_bounds: POLYGON((-124.7666666333333 49.40000000000000...
geospatial_lat_min: 25.066666666666666
geospatial_lat_max: 49.40000000000000
geospatial_lon_min: -124.7666666333333
... ...
date: 03 March 2026
note1: The projection information for this file is: ...
note2: Citation: Abatzoglou, J.T., 2013, Development...
note3: Data in slices after last_permanent_slice (1-...
note4: Data in slices after last_provisional_slice (...
note5: Days correspond approximately to calendar day...Calculate weights#
To determine an area-weighted statistic for each HUC12 polygon, the area of all grid cells (a Shapely Polygon object representing each cell of the grid).
The weight of each cell contributing to a HUC12 polygon is the cell-intersected-area / HUC12 polygon-area.
Not all all data readable by xarray are made the same. Therefor, when using non-OPeNDAP catalog endpoints, the user must supply some metadata about the xarray data and GeoDataFrame data.
Enable logging#
gdptools uses Python’s standard logging module. By default it emits no
output. Configuring the gdptools logger before running weight generation and
aggregation lets you monitor progress, timing, and data dimensions throughout
the workflow. Here we set gdptools to INFO while keeping third-party
libraries at WARNING to avoid noisy output from rasterio, fiona, etc.
# Configure logging: gdptools at INFO, everything else at WARNING
logging.basicConfig(
level=logging.WARNING,
format="%(name)s | %(levelname)s | %(message)s",
force=True,
)
logging.getLogger("gdptools").setLevel(logging.INFO)
data['crs'].attrs.get('spatial_ref')
'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]]'
# metadata for calculating weights
data_crs = data['crs'].attrs.get('spatial_ref')
x_coord = "lon"
y_coord = "lat"
t_coord = "day"
sdate = "1979-01-01"
edate = "1979-01-07"
var = ["daily_maximum_temperature", "daily_minimum_temperature"]
shp_crs = 4326
shp_poly_idx = "huc12"
wght_gen_crs = 6931
user_data = UserCatData(
source_ds=data,
source_crs=data_crs,
source_x_coord=x_coord,
source_y_coord=y_coord,
source_t_coord=t_coord,
source_var=var,
target_gdf=huc12_basins,
target_crs=shp_crs,
target_id=shp_poly_idx,
source_time_period=[sdate, edate]
)
wght_gen = WeightGen(
user_data=user_data,
method="serial",
output_file='example_weights.csv',
weight_gen_crs=6931,
)
wdf = wght_gen.calculate_weights()
To illustrate the format of the saved weights file it’s opened here, however in the next step we use the returned datafame.
pd.read_csv('example_weights.csv')
| huc12 | i | j | wght | |
|---|---|---|---|---|
| 0 | 20402060505 | 74 | 35 | 0.081887 |
| 1 | 20402060505 | 77 | 35 | 0.002041 |
| 2 | 20402060505 | 76 | 35 | 0.168929 |
| 3 | 20402060505 | 75 | 35 | 0.201884 |
| 4 | 20402060505 | 76 | 34 | 0.119210 |
| ... | ... | ... | ... | ... |
| 5677 | 20401050103 | 37 | 36 | 0.032199 |
| 5678 | 20401050103 | 36 | 37 | 0.184037 |
| 5679 | 20401050103 | 35 | 37 | 0.100861 |
| 5680 | 20401050103 | 34 | 37 | 0.000047 |
| 5681 | 20401050103 | 36 | 36 | 0.023088 |
5682 rows × 4 columns
Run the grid-to-polygon area weighted average interpolation using the calculated weights.#
Future releases will add additional area-weighted statistics. Currently only the mean is returned.
If more that one variable were available in the dataset then the run_weights function could be embedded in a loop that indexes each variable.
run_weights Output#
Run weighs returns:
ngdf is the GeoDataFrame passed to run_weights. huc12_basins maybe reordered when running weights so the re-ordered GeoDataFrame is returned.
vals are the area-weighted interpolated values ordered by the ngdf.index
agg_gen = AggGen(
user_data=user_data,
stat_method="masked_mean",
agg_engine="serial",
agg_writer="csv",
weights='example_weights.csv',
out_path='.',
file_prefix="test_gm_nc"
)
ngdf, ds_out = agg_gen.calculate_agg()
ds_out
<xarray.Dataset> Size: 58kB
Dimensions: (time: 7, huc12: 427)
Coordinates:
* time (time) datetime64[ns] 56B 1979-01-01 ... 1979-...
* huc12 (huc12) object 3kB '020401010101' ... '0204020...
lat (huc12) float64 3kB 42.39 42.38 ... 38.76 38.83
lon (huc12) float64 3kB -74.62 -74.71 ... -75.18
Data variables:
daily_maximum_temperature (time, huc12) float64 24kB 283.3 283.8 ... 283.8
crs float64 8B 1.0
daily_minimum_temperature (time, huc12) float64 24kB 276.9 277.3 ... 273.5
Attributes:
Conventions: CF-1.8
featureType: timeSeries
history: 2026_04_17_18_26_49 Original file created by gdptools packa...pd.read_csv('test_gm_nc.csv', index_col=0)
| time | varname | units | 020401010101 | 020401010102 | 020401010103 | 020401010104 | 020401010105 | 020401010106 | 020401010201 | ... | 020402070503 | 020402070504 | 020402070505 | 020402070506 | 020402070507 | 020402070601 | 020402070602 | 020402070603 | 020402070604 | 020402070605 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| index | |||||||||||||||||||||
| 0 | 1979-01-01 | daily_maximum_temperature | K | 283.342101 | 283.789520 | 283.891782 | 284.313867 | 284.279681 | 283.724576 | 283.688328 | ... | 288.614109 | 287.102319 | 287.257388 | 287.192510 | 285.911595 | 288.647165 | 287.673752 | 287.576502 | 286.573011 | 285.854688 |
| 1 | 1979-01-02 | daily_maximum_temperature | K | 282.645539 | 282.961788 | 283.040125 | 283.368031 | 283.418329 | 282.748207 | 282.790915 | ... | 287.766527 | 285.970048 | 286.572958 | 286.722795 | 285.626663 | 286.617455 | 286.196586 | 286.075713 | 285.564972 | 285.071556 |
| 2 | 1979-01-03 | daily_maximum_temperature | K | 270.354667 | 270.317264 | 270.102145 | 270.433602 | 270.454707 | 270.254940 | 270.760374 | ... | 278.432881 | 278.402124 | 278.703507 | 278.566303 | 278.221036 | 278.464381 | 278.479914 | 278.553679 | 278.529936 | 278.169785 |
| 3 | 1979-01-04 | daily_maximum_temperature | K | 263.918458 | 263.961688 | 263.736135 | 264.144980 | 264.093689 | 264.068521 | 264.586014 | ... | 274.790052 | 274.646453 | 275.191191 | 275.202789 | 274.948669 | 274.324795 | 274.502752 | 274.522852 | 274.589280 | 274.395581 |
| 4 | 1979-01-05 | daily_maximum_temperature | K | 264.065839 | 264.056051 | 263.842020 | 264.165891 | 264.218206 | 264.084139 | 264.706366 | ... | 277.148031 | 277.046858 | 277.584110 | 277.602286 | 277.298743 | 276.941864 | 276.956516 | 277.059530 | 277.263571 | 277.058642 |
| 5 | 1979-01-06 | daily_maximum_temperature | K | 269.963731 | 270.232074 | 270.220195 | 270.616740 | 270.654793 | 270.416690 | 270.891040 | ... | 278.232881 | 278.396659 | 278.907483 | 278.801530 | 278.666241 | 278.123928 | 278.283779 | 278.408128 | 278.699587 | 278.547546 |
| 6 | 1979-01-07 | daily_maximum_temperature | K | 275.403016 | 275.938433 | 276.217032 | 276.513022 | 276.740472 | 275.976119 | 276.418405 | ... | 284.271129 | 284.194942 | 284.334873 | 284.167058 | 283.541393 | 284.590986 | 284.436977 | 284.424597 | 284.175313 | 283.795570 |
| 0 | 1979-01-01 | daily_minimum_temperature | K | 276.875121 | 277.317797 | 277.205682 | 277.407442 | 277.398712 | 277.034636 | 276.912089 | ... | 281.526809 | 280.340967 | 280.397078 | 280.529055 | 280.087390 | 280.910730 | 280.530568 | 280.421870 | 280.030056 | 280.079895 |
| 1 | 1979-01-02 | daily_minimum_temperature | K | 269.256667 | 269.413816 | 269.070310 | 269.239835 | 269.257574 | 269.118803 | 269.312846 | ... | 274.801765 | 275.134578 | 274.757241 | 274.653814 | 274.893253 | 275.437823 | 275.296745 | 275.423789 | 275.366362 | 275.487308 |
| 2 | 1979-01-03 | daily_minimum_temperature | K | 257.501491 | 257.836032 | 257.340293 | 257.918269 | 257.588494 | 258.058366 | 258.241108 | ... | 263.393736 | 263.564219 | 263.443779 | 263.438990 | 263.702673 | 263.170775 | 263.485903 | 263.498352 | 263.640248 | 263.869741 |
| 3 | 1979-01-04 | daily_minimum_temperature | K | 258.095835 | 258.408270 | 257.959467 | 258.499743 | 258.186048 | 258.647050 | 258.789096 | ... | 263.532182 | 263.871596 | 263.858109 | 263.911718 | 264.327853 | 263.159602 | 263.670639 | 263.696368 | 264.030124 | 264.398075 |
| 4 | 1979-01-05 | daily_minimum_temperature | K | 259.297268 | 259.592666 | 259.187715 | 259.580379 | 259.406128 | 259.667843 | 259.853056 | ... | 268.417116 | 268.802196 | 269.038191 | 269.138270 | 269.711928 | 267.564882 | 268.407991 | 268.464200 | 269.103907 | 269.629261 |
| 5 | 1979-01-06 | daily_minimum_temperature | K | 261.074570 | 261.397559 | 261.168801 | 261.430951 | 261.427480 | 261.406271 | 261.782945 | ... | 271.789742 | 272.279462 | 272.127984 | 272.111718 | 272.457381 | 271.686187 | 272.149811 | 272.132619 | 272.379940 | 272.600686 |
| 6 | 1979-01-07 | daily_minimum_temperature | K | 262.672844 | 262.951875 | 262.715915 | 262.921207 | 263.001742 | 262.820974 | 263.269528 | ... | 272.644310 | 273.099901 | 273.012330 | 272.954145 | 273.317758 | 272.646589 | 272.972866 | 273.022227 | 273.226546 | 273.487308 |
14 rows × 430 columns
aggdata = agg_gen.agg_data
ds_ss = aggdata.get("daily_maximum_temperature").da
Plot the gridded data and interpolated data#
var = "daily_maximum_temperature"
time_step = "1979-01-07"
tmin = np.nanmin(ds_ss.sel(day=time_step).values)
tmax = np.nanmax(ds_ss.sel(day=time_step).values)
print(tmin,tmax)
273.1 286.1
ds_out[var].shape
(7, 427)
ngdf[var] = ds_out[var].sel(time=time_step)
gridmet_agg = ngdf.to_crs(4326).hvplot(
c='daily_maximum_temperature', geo=True, coastline='50m', frame_width=300, alpha=1, cmap='viridis', clim=(tmin, tmax),
xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
title="HUC12 area-weighted average", aspect='equal'
)
gridmet_raw = ds_ss.isel(day=6).hvplot.quadmesh(
'lon', 'lat', 'daily_maximum_temperature', cmap='viridis', alpha=1, grid=True, geo=True, coastline='50m', frame_width=300, clim=(tmin, tmax),
xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
title="Gridmet daily max temperature", aspect='equal'
)
(EsriTerrain() * gridmet_raw) + (EsriTerrain() * gridmet_agg) + (EsriTerrain() * gridmet_raw * gridmet_agg)