gdptools example without the OPeNDAP catalog - gridMET data#

import warnings

import xarray as xr
import pandas as pd
import numpy as np
import pyproj
from pynhd import NLDI
from pynhd import WaterData
import hvplot.xarray
import hvplot.pandas

from gdptools import WeightGen
from gdptools import AggGen
from gdptools import UserCatData

warnings.filterwarnings("ignore")

Create GeoDataFrame of Delaware River Basin HUC12s#

In this cell we use clients to several https://labs.waterdata.ugsgs.gov tools:

More examples of using the HyRiver pynhd package can be found here

# USGS gage 01482100 Delaware River at Del Mem Bridge at Wilmington De
gage_id = "01482100"
nldi = NLDI()
del_basins = nldi.get_basins(gage_id)
huc12_basins = WaterData("huc12").bygeom(del_basins.geometry[0])
---------------------------------------------------------------------------
MissingInputError                         Traceback (most recent call last)
Cell In[2], line 5
      3 nldi = NLDI()
      4 del_basins = nldi.get_basins(gage_id)
----> 5 huc12_basins = WaterData("huc12").bygeom(del_basins.geometry[0])

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pynhd/pynhd.py:676, in WaterData.bygeom(self, geometry, geo_crs, xy, predicate, sort_attr)
    634 def bygeom(
    635     self,
    636     geometry: Polygon | MultiPolygon,
   (...)
    640     sort_attr: str | None = None,
    641 ) -> gpd.GeoDataFrame:
    642     """Get features within a geometry.
    643 
    644     Parameters
   (...)
    674         The requested features in the given geometry.
    675     """
--> 676     resp = self.wfs.getfeature_bygeom(
    677         geometry, geo_crs, always_xy=not xy, predicate=predicate.upper(), sort_attr=sort_attr
    678     )
    679     resp = cast("list[dict[str, Any]]", resp)
    680     return self._to_geodf(resp)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/pygeoogc.py:689, in WFS.getfeature_bygeom(self, geometry, geo_crs, always_xy, predicate, sort_attr)
    686 if predicate.upper() not in valid_predicates:
    687     raise InputValueError("predicate", valid_predicates)
--> 689 return self.getfeature_byfilter(
    690     f"{predicate.upper()}({geom_name}, {g_wkt})", method="POST", sort_attr=sort_attr
    691 )

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/pygeoogc.py:790, in WFS.getfeature_byfilter(self, cql_filter, method, sort_attr)
    787 except (IndexError, ValueError) as ex:
    788     raise ServiceError(resp[0], self.url) from ex
--> 790 payloads = [
    791     {
    792         "service": "wfs",
    793         "version": self.version,
    794         "outputFormat": self.outformat,
    795         "request": "GetFeature",
    796         "typeName": self.layer,
    797         "srsName": self.crs_str,
    798         "cql_filter": cql_filter,
    799         **self.sort_params(sort_attr, nfeatures, i),
    800     }
    801     for i in range(0, nfeatures, self.max_nrecords)
    802 ]
    803 if method == "GET":
    804     return self.retrieve([self.url] * len(payloads), [{"params": p} for p in payloads])

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/pygeoogc.py:799, in <listcomp>(.0)
    787 except (IndexError, ValueError) as ex:
    788     raise ServiceError(resp[0], self.url) from ex
    790 payloads = [
    791     {
    792         "service": "wfs",
    793         "version": self.version,
    794         "outputFormat": self.outformat,
    795         "request": "GetFeature",
    796         "typeName": self.layer,
    797         "srsName": self.crs_str,
    798         "cql_filter": cql_filter,
--> 799         **self.sort_params(sort_attr, nfeatures, i),
    800     }
    801     for i in range(0, nfeatures, self.max_nrecords)
    802 ]
    803 if method == "GET":
    804     return self.retrieve([self.url] * len(payloads), [{"params": p} for p in payloads])

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/core.py:626, in WFSBase.sort_params(self, sort_attr, nfeatures, start_index)
    624         msg += " Please set the sort_attr manually. Available columns are:\n"
    625         msg += ", ".join(list(valid_attrs))
--> 626         raise MissingInputError(msg)
    628 if valid_attrs and sort_attr not in valid_attrs:
    629     raise InputValueError("sort_attr", list(valid_attrs))

MissingInputError: sort_attr is None and no id column found in the schema. Please set the sort_attr manually. Available columns are:
huc12, tohuc, areaacres, areasqkm, hutype, humod, states, noncontrib
from holoviews.element.tiles import EsriTerrain
drb = huc12_basins.hvplot(
    geo=True, coastline='50m', alpha=0.2, c='r', frame_width=300,
    xlabel="longitude", ylabel="latitude", clabel="monthly water evaporation (mm)",
    title="Delaware River HUC12 basins", xlim=(-76.8, -73.8), aspect='equal'
)
EsriTerrain() * drb
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[3], line 2
      1 from holoviews.element.tiles import EsriTerrain
----> 2 drb = huc12_basins.hvplot(
      3     geo=True, coastline='50m', alpha=0.2, c='r', frame_width=300,
      4     xlabel="longitude", ylabel="latitude", clabel="monthly water evaporation (mm)",
      5     title="Delaware River HUC12 basins", xlim=(-76.8, -73.8), aspect='equal'
      6 )
      7 EsriTerrain() * drb

NameError: name 'huc12_basins' is not defined

Load gridMET OPeNDAP endpoint with xarray#

  • Note currently the gridMET data require ‘#fillmismatch’ appended to the end of the OPeNDAP endpoint. For example

gridmet_URL = [
    'http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_met_tmmx_1979_CurrentYear_CONUS.nc#fillmismatch',
    'http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_met_tmmn_1979_CurrentYear_CONUS.nc#fillmismatch'
]
data = xr.open_mfdataset(gridmet_URL)
data
<xarray.Dataset>
Dimensions:                    (lat: 585, crs: 1, lon: 1386, day: 16465)
Coordinates:
  * lat                        (lat) float64 49.4 49.36 49.32 ... 25.11 25.07
  * crs                        (crs) float32 3.0
  * lon                        (lon) float64 -124.8 -124.7 ... -67.1 -67.06
  * day                        (day) datetime64[ns] 1979-01-01 ... 2024-01-29
Data variables:
    daily_maximum_temperature  (day, lat, lon) float32 dask.array<chunksize=(16465, 585, 1386), meta=np.ndarray>
    daily_minimum_temperature  (day, lat, lon) float32 dask.array<chunksize=(16465, 585, 1386), meta=np.ndarray>
Attributes: (12/19)
    geospatial_bounds_crs:      EPSG:4326
    Conventions:                CF-1.0
    geospatial_bounds:          POLYGON((-124.7666666333333 49.40000000000000...
    geospatial_lat_min:         25.066666666666666
    geospatial_lat_max:         49.40000000000000
    geospatial_lon_min:         -124.7666666333333
    ...                         ...
    date:                       30 January 2024
    note1:                      The projection information for this file is: ...
    note2:                      Citation: Abatzoglou, J.T., 2013, Development...
    note3:                      Data in slices after last_permanent_slice (1-...
    note4:                      Data in slices after last_provisional_slice (...
    note5:                      Days correspond approximately to calendar day...

Calculate weights#

  • To determine an area-weighted statistic for each HUC12 polygon, the area of all grid cells (a Shapely Polygon object representing each cell of the grid).

  • The weight of each cell contributing to a HUC12 polygon is the cell-intersected-area / HUC12 polygon-area.

  • Not all all data readable by xarray are made the same. Therefor, when using non-OPeNDAP catalog endpoints, the user must supply some metadata about the xarray data and GeoDataFrame data.

# metadata for calculating weights

data_crs = 4326
x_coord = "lon"
y_coord = "lat"
t_coord = "day"
sdate = "1979-01-01"
edate = "1979-01-07"
var = ["daily_maximum_temperature", "daily_minimum_temperature"]
shp_crs = 4326
shp_poly_idx = "huc12"
wght_gen_crs = 6931

user_data = UserCatData(
    ds=data,
    proj_ds=data_crs,
    x_coord=x_coord,
    y_coord=y_coord,
    t_coord=t_coord,
    var=var,
    f_feature=huc12_basins,
    proj_feature=shp_crs,
    id_feature=shp_poly_idx,
    period=[sdate, edate]
)

wght_gen = WeightGen(
    user_data=user_data,
    method="serial",
    output_file='example_weights.csv',
    weight_gen_crs=6931,
)

wdf = wght_gen.calculate_weights()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 21
     11 shp_poly_idx = "huc12"
     12 wght_gen_crs = 6931
     14 user_data = UserCatData(
     15     ds=data,
     16     proj_ds=data_crs,
     17     x_coord=x_coord,
     18     y_coord=y_coord,
     19     t_coord=t_coord,
     20     var=var,
---> 21     f_feature=huc12_basins,
     22     proj_feature=shp_crs,
     23     id_feature=shp_poly_idx,
     24     period=[sdate, edate]
     25 )
     27 wght_gen = WeightGen(
     28     user_data=user_data,
     29     method="serial",
     30     output_file='example_weights.csv',
     31     weight_gen_crs=6931,
     32 )
     34 wdf = wght_gen.calculate_weights()

NameError: name 'huc12_basins' is not defined

To illustrate the format of the saved weights file it’s opened here, however in the next step we use the returned datafame.

pd.read_csv('example_weights.csv')
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[6], line 1
----> 1 pd.read_csv('example_weights.csv')

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1024, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
   1011 kwds_defaults = _refine_defaults_read(
   1012     dialect,
   1013     delimiter,
   (...)
   1020     dtype_backend=dtype_backend,
   1021 )
   1022 kwds.update(kwds_defaults)
-> 1024 return _read(filepath_or_buffer, kwds)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:618, in _read(filepath_or_buffer, kwds)
    615 _validate_names(kwds.get("names", None))
    617 # Create the parser.
--> 618 parser = TextFileReader(filepath_or_buffer, **kwds)
    620 if chunksize or iterator:
    621     return parser

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1618, in TextFileReader.__init__(self, f, engine, **kwds)
   1615     self.options["has_index_names"] = kwds["has_index_names"]
   1617 self.handles: IOHandles | None = None
-> 1618 self._engine = self._make_engine(f, self.engine)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1878, in TextFileReader._make_engine(self, f, engine)
   1876     if "b" not in mode:
   1877         mode += "b"
-> 1878 self.handles = get_handle(
   1879     f,
   1880     mode,
   1881     encoding=self.options.get("encoding", None),
   1882     compression=self.options.get("compression", None),
   1883     memory_map=self.options.get("memory_map", False),
   1884     is_text=is_text,
   1885     errors=self.options.get("encoding_errors", "strict"),
   1886     storage_options=self.options.get("storage_options", None),
   1887 )
   1888 assert self.handles is not None
   1889 f = self.handles.handle

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/common.py:873, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    868 elif isinstance(handle, str):
    869     # Check whether the filename is to be opened in binary mode.
    870     # Binary mode does not support 'encoding' and 'newline'.
    871     if ioargs.encoding and "b" not in ioargs.mode:
    872         # Encoding
--> 873         handle = open(
    874             handle,
    875             ioargs.mode,
    876             encoding=ioargs.encoding,
    877             errors=errors,
    878             newline="",
    879         )
    880     else:
    881         # Binary mode
    882         handle = open(handle, ioargs.mode)

FileNotFoundError: [Errno 2] No such file or directory: 'example_weights.csv'

Run the grid-to-polygon area weighted average interpolation using the calculated weights.#

  • Future releases will add additional area-weighted statistics. Currently only the mean is returned.

  • If more that one variable were available in the dataset then the run_weights function could be embedded in a loop that indexes each variable.

run_weights Output#

Run weighs returns:

  • ngdf is the GeoDataFrame passed to run_weights. huc12_basins maybe reordered when running weights so the re-ordered GeoDataFrame is returned.

  • vals are the area-weighted interpolated values ordered by the ngdf.index

agg_gen = AggGen(
    user_data=user_data,
    stat_method="masked_mean",
    agg_engine="serial",
    agg_writer="csv",
    weights='example_weights.csv',
    out_path='.',
    file_prefix="test_gm_nc"
)
ngdf, ds_out = agg_gen.calculate_agg()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[7], line 2
      1 agg_gen = AggGen(
----> 2     user_data=user_data,
      3     stat_method="masked_mean",
      4     agg_engine="serial",
      5     agg_writer="csv",
      6     weights='example_weights.csv',
      7     out_path='.',
      8     file_prefix="test_gm_nc"
      9 )
     10 ngdf, ds_out = agg_gen.calculate_agg()

NameError: name 'user_data' is not defined
ds_out
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[8], line 1
----> 1 ds_out

NameError: name 'ds_out' is not defined
pd.read_csv('test_gm_nc.csv', index_col=0)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[9], line 1
----> 1 pd.read_csv('test_gm_nc.csv', index_col=0)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1024, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
   1011 kwds_defaults = _refine_defaults_read(
   1012     dialect,
   1013     delimiter,
   (...)
   1020     dtype_backend=dtype_backend,
   1021 )
   1022 kwds.update(kwds_defaults)
-> 1024 return _read(filepath_or_buffer, kwds)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:618, in _read(filepath_or_buffer, kwds)
    615 _validate_names(kwds.get("names", None))
    617 # Create the parser.
--> 618 parser = TextFileReader(filepath_or_buffer, **kwds)
    620 if chunksize or iterator:
    621     return parser

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1618, in TextFileReader.__init__(self, f, engine, **kwds)
   1615     self.options["has_index_names"] = kwds["has_index_names"]
   1617 self.handles: IOHandles | None = None
-> 1618 self._engine = self._make_engine(f, self.engine)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1878, in TextFileReader._make_engine(self, f, engine)
   1876     if "b" not in mode:
   1877         mode += "b"
-> 1878 self.handles = get_handle(
   1879     f,
   1880     mode,
   1881     encoding=self.options.get("encoding", None),
   1882     compression=self.options.get("compression", None),
   1883     memory_map=self.options.get("memory_map", False),
   1884     is_text=is_text,
   1885     errors=self.options.get("encoding_errors", "strict"),
   1886     storage_options=self.options.get("storage_options", None),
   1887 )
   1888 assert self.handles is not None
   1889 f = self.handles.handle

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/common.py:873, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    868 elif isinstance(handle, str):
    869     # Check whether the filename is to be opened in binary mode.
    870     # Binary mode does not support 'encoding' and 'newline'.
    871     if ioargs.encoding and "b" not in ioargs.mode:
    872         # Encoding
--> 873         handle = open(
    874             handle,
    875             ioargs.mode,
    876             encoding=ioargs.encoding,
    877             errors=errors,
    878             newline="",
    879         )
    880     else:
    881         # Binary mode
    882         handle = open(handle, ioargs.mode)

FileNotFoundError: [Errno 2] No such file or directory: 'test_gm_nc.csv'
aggdata = agg_gen.agg_data
ds_ss = aggdata.get("daily_maximum_temperature").da
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[10], line 1
----> 1 aggdata = agg_gen.agg_data
      2 ds_ss = aggdata.get("daily_maximum_temperature").da

NameError: name 'agg_gen' is not defined

Plot the gridded data and interpolated data#

var = "daily_maximum_temperature"
time_step = "1979-01-07"
tmin = np.nanmin(ds_ss.sel(day=time_step).values)
tmax = np.nanmax(ds_ss.sel(day=time_step).values)
print(tmin,tmax)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[11], line 3
      1 var = "daily_maximum_temperature"
      2 time_step = "1979-01-07"
----> 3 tmin = np.nanmin(ds_ss.sel(day=time_step).values)
      4 tmax = np.nanmax(ds_ss.sel(day=time_step).values)
      5 print(tmin,tmax)

NameError: name 'ds_ss' is not defined
ds_out[var].shape
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[12], line 1
----> 1 ds_out[var].shape

NameError: name 'ds_out' is not defined
ngdf[var] = ds_out[var].sel(time=time_step)
gridmet_agg = ngdf.to_crs(4326).hvplot(
    c='daily_maximum_temperature', geo=True, coastline='50m', frame_width=300, alpha=1, cmap='viridis', clim=(tmin, tmax),
    xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
    title="HUC12 area-weighted average", aspect='equal'
)
gridmet_raw = ds_ss.isel(day=6).hvplot.quadmesh(
    'lon', 'lat', 'daily_maximum_temperature', cmap='viridis', alpha=1, grid=True, geo=True, coastline='50m', frame_width=300, clim=(tmin, tmax),
    xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
    title="Gridmet daily max temperature", aspect='equal'
)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[13], line 1
----> 1 ngdf[var] = ds_out[var].sel(time=time_step)
      2 gridmet_agg = ngdf.to_crs(4326).hvplot(
      3     c='daily_maximum_temperature', geo=True, coastline='50m', frame_width=300, alpha=1, cmap='viridis', clim=(tmin, tmax),
      4     xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
      5     title="HUC12 area-weighted average", aspect='equal'
      6 )
      7 gridmet_raw = ds_ss.isel(day=6).hvplot.quadmesh(
      8     'lon', 'lat', 'daily_maximum_temperature', cmap='viridis', alpha=1, grid=True, geo=True, coastline='50m', frame_width=300, clim=(tmin, tmax),
      9     xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
     10     title="Gridmet daily max temperature", aspect='equal'
     11 )

NameError: name 'ds_out' is not defined
(EsriTerrain() * gridmet_raw) + (EsriTerrain() * gridmet_agg)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[14], line 1
----> 1 (EsriTerrain() * gridmet_raw) + (EsriTerrain() * gridmet_agg)

NameError: name 'gridmet_raw' is not defined
(EsriTerrain() * gridmet_raw * gridmet_agg)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[15], line 1
----> 1 (EsriTerrain() * gridmet_raw * gridmet_agg)

NameError: name 'gridmet_raw' is not defined