gdptools example without the OPeNDAP catalog - gridMET data

gdptools example without the OPeNDAP catalog - gridMET data#

import warnings

import xarray as xr
import pandas as pd
import numpy as np
import pyproj
from pynhd import NLDI
from pynhd import WaterData
import hvplot.xarray
import hvplot.pandas

from gdptools import WeightGen
from gdptools import AggGen
from gdptools import UserCatData

warnings.filterwarnings("ignore")

Create GeoDataFrame of Delaware River Basin HUC12s#

In this cell we use clients to several https://labs.waterdata.ugsgs.gov tools:

Tool: USGS NLDI Client: pynhd.NLDI
Tool: USGS GeoServer Client: pynhd.WaterData

More examples of using the HyRiver pynhd package can be found here

# USGS gage 01482100 Delaware River at Del Mem Bridge at Wilmington De
gage_id = "01482100"
nldi = NLDI()
del_basins = nldi.get_basins(gage_id)
huc12_basins = WaterData("huc12").bygeom(del_basins.geometry[0])

---------------------------------------------------------------------------
MissingInputError                         Traceback (most recent call last)
Cell In[2], line 5
nldi = NLDI()
del_basins = nldi.get_basins(gage_id)
----> 5 huc12_basins = WaterData("huc12").bygeom(del_basins.geometry[0])

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pynhd/pynhd.py:676, in WaterData.bygeom(self, geometry, geo_crs, xy, predicate, sort_attr)
def bygeom(
   self,
   geometry: Polygon | MultiPolygon,
   (...)
   sort_attr: str | None = None,
) -> gpd.GeoDataFrame:
   """Get features within a geometry.

   Parameters
   (...)
       The requested features in the given geometry.
   """
--> 676     resp = self.wfs.getfeature_bygeom(
       geometry, geo_crs, always_xy=not xy, predicate=predicate.upper(), sort_attr=sort_attr
   )
   resp = cast("list[dict[str, Any]]", resp)
   return self._to_geodf(resp)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/pygeoogc.py:689, in WFS.getfeature_bygeom(self, geometry, geo_crs, always_xy, predicate, sort_attr)
if predicate.upper() not in valid_predicates:
   raise InputValueError("predicate", valid_predicates)
--> 689 return self.getfeature_byfilter(
   f"{predicate.upper()}({geom_name}, {g_wkt})", method="POST", sort_attr=sort_attr
)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/pygeoogc.py:790, in WFS.getfeature_byfilter(self, cql_filter, method, sort_attr)
except (IndexError, ValueError) as ex:
   raise ServiceError(resp[0], self.url) from ex
--> 790 payloads = [
   {
       "service": "wfs",
       "version": self.version,
       "outputFormat": self.outformat,
       "request": "GetFeature",
       "typeName": self.layer,
       "srsName": self.crs_str,
       "cql_filter": cql_filter,
       **self.sort_params(sort_attr, nfeatures, i),
   }
   for i in range(0, nfeatures, self.max_nrecords)
]
if method == "GET":
   return self.retrieve([self.url] * len(payloads), [{"params": p} for p in payloads])

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/pygeoogc.py:799, in <listcomp>(.0)
except (IndexError, ValueError) as ex:
   raise ServiceError(resp[0], self.url) from ex
payloads = [
   {
       "service": "wfs",
       "version": self.version,
       "outputFormat": self.outformat,
       "request": "GetFeature",
       "typeName": self.layer,
       "srsName": self.crs_str,
       "cql_filter": cql_filter,
--> 799         **self.sort_params(sort_attr, nfeatures, i),
   }
   for i in range(0, nfeatures, self.max_nrecords)
]
if method == "GET":
   return self.retrieve([self.url] * len(payloads), [{"params": p} for p in payloads])

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/core.py:626, in WFSBase.sort_params(self, sort_attr, nfeatures, start_index)
       msg += " Please set the sort_attr manually. Available columns are:\n"
       msg += ", ".join(list(valid_attrs))
--> 626         raise MissingInputError(msg)
if valid_attrs and sort_attr not in valid_attrs:
   raise InputValueError("sort_attr", list(valid_attrs))

MissingInputError: sort_attr is None and no id column found in the schema. Please set the sort_attr manually. Available columns are:
huc12, tohuc, areaacres, areasqkm, hutype, humod, states, noncontrib

from holoviews.element.tiles import EsriTerrain
drb = huc12_basins.hvplot(
    geo=True, coastline='50m', alpha=0.2, c='r', frame_width=300,
    xlabel="longitude", ylabel="latitude", clabel="monthly water evaporation (mm)",
    title="Delaware River HUC12 basins", xlim=(-76.8, -73.8), aspect='equal'
)
EsriTerrain() * drb

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[3], line 2
      1 from holoviews.element.tiles import EsriTerrain
----> 2 drb = huc12_basins.hvplot(
      3     geo=True, coastline='50m', alpha=0.2, c='r', frame_width=300,
      4     xlabel="longitude", ylabel="latitude", clabel="monthly water evaporation (mm)",
      5     title="Delaware River HUC12 basins", xlim=(-76.8, -73.8), aspect='equal'
      6 )
      7 EsriTerrain() * drb

NameError: name 'huc12_basins' is not defined

Load gridMET OPeNDAP endpoint with xarray#

Note currently the gridMET data require ‘#fillmismatch’ appended to the end of the OPeNDAP endpoint. For example

gridmet_URL = [
    'http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_met_tmmx_1979_CurrentYear_CONUS.nc#fillmismatch',
    'http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_met_tmmn_1979_CurrentYear_CONUS.nc#fillmismatch'
]
data = xr.open_mfdataset(gridmet_URL)
data

<xarray.Dataset>
Dimensions:                    (lat: 585, crs: 1, lon: 1386, day: 16465)
Coordinates:
  * lat                        (lat) float64 49.4 49.36 49.32 ... 25.11 25.07
  * crs                        (crs) float32 3.0
  * lon                        (lon) float64 -124.8 -124.7 ... -67.1 -67.06
  * day                        (day) datetime64[ns] 1979-01-01 ... 2024-01-29
Data variables:
    daily_maximum_temperature  (day, lat, lon) float32 dask.array<chunksize=(16465, 585, 1386), meta=np.ndarray>
    daily_minimum_temperature  (day, lat, lon) float32 dask.array<chunksize=(16465, 585, 1386), meta=np.ndarray>
Attributes: (12/19)
    geospatial_bounds_crs:      EPSG:4326
    Conventions:                CF-1.0
    geospatial_bounds:          POLYGON((-124.7666666333333 49.40000000000000...
    geospatial_lat_min:         25.066666666666666
    geospatial_lat_max:         49.40000000000000
    geospatial_lon_min:         -124.7666666333333
    ...                         ...
    date:                       30 January 2024
    note1:                      The projection information for this file is: ...
    note2:                      Citation: Abatzoglou, J.T., 2013, Development...
    note3:                      Data in slices after last_permanent_slice (1-...
    note4:                      Data in slices after last_provisional_slice (...
    note5:                      Days correspond approximately to calendar day...

Calculate weights#

To determine an area-weighted statistic for each HUC12 polygon, the area of all grid cells (a Shapely Polygon object representing each cell of the grid).
The weight of each cell contributing to a HUC12 polygon is the cell-intersected-area / HUC12 polygon-area.
Not all all data readable by xarray are made the same. Therefor, when using non-OPeNDAP catalog endpoints, the user must supply some metadata about the xarray data and GeoDataFrame data.

# metadata for calculating weights

data_crs = 4326
x_coord = "lon"
y_coord = "lat"
t_coord = "day"
sdate = "1979-01-01"
edate = "1979-01-07"
var = ["daily_maximum_temperature", "daily_minimum_temperature"]
shp_crs = 4326
shp_poly_idx = "huc12"
wght_gen_crs = 6931

user_data = UserCatData(
    ds=data,
    proj_ds=data_crs,
    x_coord=x_coord,
    y_coord=y_coord,
    t_coord=t_coord,
    var=var,
    f_feature=huc12_basins,
    proj_feature=shp_crs,
    id_feature=shp_poly_idx,
    period=[sdate, edate]
)

wght_gen = WeightGen(
    user_data=user_data,
    method="serial",
    output_file='example_weights.csv',
    weight_gen_crs=6931,
)

wdf = wght_gen.calculate_weights()

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[5], line 21
shp_poly_idx = "huc12"
wght_gen_crs = 6931
user_data = UserCatData(
   ds=data,
   proj_ds=data_crs,
   x_coord=x_coord,
   y_coord=y_coord,
   t_coord=t_coord,
   var=var,
---> 21     f_feature=huc12_basins,
   proj_feature=shp_crs,
   id_feature=shp_poly_idx,
   period=[sdate, edate]
)
wght_gen = WeightGen(
   user_data=user_data,
   method="serial",
   output_file='example_weights.csv',
   weight_gen_crs=6931,
)
wdf = wght_gen.calculate_weights()

NameError: name 'huc12_basins' is not defined

To illustrate the format of the saved weights file it’s opened here, however in the next step we use the returned datafame.

pd.read_csv('example_weights.csv')

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[6], line 1
----> 1 pd.read_csv('example_weights.csv')

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1024, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
kwds_defaults = _refine_defaults_read(
   dialect,
   delimiter,
   (...)
   dtype_backend=dtype_backend,
)
kwds.update(kwds_defaults)
-> 1024 return _read(filepath_or_buffer, kwds)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:618, in _read(filepath_or_buffer, kwds)
_validate_names(kwds.get("names", None))
# Create the parser.
--> 618 parser = TextFileReader(filepath_or_buffer, **kwds)
if chunksize or iterator:
   return parser

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1618, in TextFileReader.__init__(self, f, engine, **kwds)
   self.options["has_index_names"] = kwds["has_index_names"]
self.handles: IOHandles | None = None
-> 1618 self._engine = self._make_engine(f, self.engine)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1878, in TextFileReader._make_engine(self, f, engine)
   if "b" not in mode:
       mode += "b"
-> 1878 self.handles = get_handle(
   f,
   mode,
   encoding=self.options.get("encoding", None),
   compression=self.options.get("compression", None),
   memory_map=self.options.get("memory_map", False),
   is_text=is_text,
   errors=self.options.get("encoding_errors", "strict"),
   storage_options=self.options.get("storage_options", None),
)
assert self.handles is not None
f = self.handles.handle

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/common.py:873, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
elif isinstance(handle, str):
   # Check whether the filename is to be opened in binary mode.
   # Binary mode does not support 'encoding' and 'newline'.
   if ioargs.encoding and "b" not in ioargs.mode:
       # Encoding
--> 873         handle = open(
           handle,
           ioargs.mode,
           encoding=ioargs.encoding,
           errors=errors,
           newline="",
       )
   else:
       # Binary mode
       handle = open(handle, ioargs.mode)

FileNotFoundError: [Errno 2] No such file or directory: 'example_weights.csv'

Run the grid-to-polygon area weighted average interpolation using the calculated weights.#

Future releases will add additional area-weighted statistics. Currently only the mean is returned.
If more that one variable were available in the dataset then the run_weights function could be embedded in a loop that indexes each variable.

run_weights Output#

Run weighs returns:

ngdf is the GeoDataFrame passed to run_weights. huc12_basins maybe reordered when running weights so the re-ordered GeoDataFrame is returned.
vals are the area-weighted interpolated values ordered by the ngdf.index

agg_gen = AggGen(
    user_data=user_data,
    stat_method="masked_mean",
    agg_engine="serial",
    agg_writer="csv",
    weights='example_weights.csv',
    out_path='.',
    file_prefix="test_gm_nc"
)
ngdf, ds_out = agg_gen.calculate_agg()

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[7], line 2
      1 agg_gen = AggGen(
----> 2     user_data=user_data,
      3     stat_method="masked_mean",
      4     agg_engine="serial",
      5     agg_writer="csv",
      6     weights='example_weights.csv',
      7     out_path='.',
      8     file_prefix="test_gm_nc"
      9 )
     10 ngdf, ds_out = agg_gen.calculate_agg()

NameError: name 'user_data' is not defined

ds_out

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[8], line 1
----> 1 ds_out

NameError: name 'ds_out' is not defined

pd.read_csv('test_gm_nc.csv', index_col=0)

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[9], line 1
----> 1 pd.read_csv('test_gm_nc.csv', index_col=0)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1024, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
kwds_defaults = _refine_defaults_read(
   dialect,
   delimiter,
   (...)
   dtype_backend=dtype_backend,
)
kwds.update(kwds_defaults)
-> 1024 return _read(filepath_or_buffer, kwds)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:618, in _read(filepath_or_buffer, kwds)
_validate_names(kwds.get("names", None))
# Create the parser.
--> 618 parser = TextFileReader(filepath_or_buffer, **kwds)
if chunksize or iterator:
   return parser

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1618, in TextFileReader.__init__(self, f, engine, **kwds)
   self.options["has_index_names"] = kwds["has_index_names"]
self.handles: IOHandles | None = None
-> 1618 self._engine = self._make_engine(f, self.engine)

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1878, in TextFileReader._make_engine(self, f, engine)
   if "b" not in mode:
       mode += "b"
-> 1878 self.handles = get_handle(
   f,
   mode,
   encoding=self.options.get("encoding", None),
   compression=self.options.get("compression", None),
   memory_map=self.options.get("memory_map", False),
   is_text=is_text,
   errors=self.options.get("encoding_errors", "strict"),
   storage_options=self.options.get("storage_options", None),
)
assert self.handles is not None
f = self.handles.handle

File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/common.py:873, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
elif isinstance(handle, str):
   # Check whether the filename is to be opened in binary mode.
   # Binary mode does not support 'encoding' and 'newline'.
   if ioargs.encoding and "b" not in ioargs.mode:
       # Encoding
--> 873         handle = open(
           handle,
           ioargs.mode,
           encoding=ioargs.encoding,
           errors=errors,
           newline="",
       )
   else:
       # Binary mode
       handle = open(handle, ioargs.mode)

FileNotFoundError: [Errno 2] No such file or directory: 'test_gm_nc.csv'

aggdata = agg_gen.agg_data
ds_ss = aggdata.get("daily_maximum_temperature").da

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[10], line 1
----> 1 aggdata = agg_gen.agg_data
      2 ds_ss = aggdata.get("daily_maximum_temperature").da

NameError: name 'agg_gen' is not defined

Plot the gridded data and interpolated data#

var = "daily_maximum_temperature"
time_step = "1979-01-07"
tmin = np.nanmin(ds_ss.sel(day=time_step).values)
tmax = np.nanmax(ds_ss.sel(day=time_step).values)
print(tmin,tmax)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[11], line 3
      1 var = "daily_maximum_temperature"
      2 time_step = "1979-01-07"
----> 3 tmin = np.nanmin(ds_ss.sel(day=time_step).values)
      4 tmax = np.nanmax(ds_ss.sel(day=time_step).values)
      5 print(tmin,tmax)

NameError: name 'ds_ss' is not defined

ds_out[var].shape

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[12], line 1
----> 1 ds_out[var].shape

NameError: name 'ds_out' is not defined

ngdf[var] = ds_out[var].sel(time=time_step)
gridmet_agg = ngdf.to_crs(4326).hvplot(
    c='daily_maximum_temperature', geo=True, coastline='50m', frame_width=300, alpha=1, cmap='viridis', clim=(tmin, tmax),
    xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
    title="HUC12 area-weighted average", aspect='equal'
)
gridmet_raw = ds_ss.isel(day=6).hvplot.quadmesh(
    'lon', 'lat', 'daily_maximum_temperature', cmap='viridis', alpha=1, grid=True, geo=True, coastline='50m', frame_width=300, clim=(tmin, tmax),
    xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
    title="Gridmet daily max temperature", aspect='equal'
)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[13], line 1
----> 1 ngdf[var] = ds_out[var].sel(time=time_step)
      2 gridmet_agg = ngdf.to_crs(4326).hvplot(
      3     c='daily_maximum_temperature', geo=True, coastline='50m', frame_width=300, alpha=1, cmap='viridis', clim=(tmin, tmax),
      4     xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
      5     title="HUC12 area-weighted average", aspect='equal'
      6 )
      7 gridmet_raw = ds_ss.isel(day=6).hvplot.quadmesh(
      8     'lon', 'lat', 'daily_maximum_temperature', cmap='viridis', alpha=1, grid=True, geo=True, coastline='50m', frame_width=300, clim=(tmin, tmax),
      9     xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
     10     title="Gridmet daily max temperature", aspect='equal'
     11 )

NameError: name 'ds_out' is not defined

(EsriTerrain() * gridmet_raw) + (EsriTerrain() * gridmet_agg)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[14], line 1
----> 1 (EsriTerrain() * gridmet_raw) + (EsriTerrain() * gridmet_agg)

NameError: name 'gridmet_raw' is not defined

(EsriTerrain() * gridmet_raw * gridmet_agg)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[15], line 1
----> 1 (EsriTerrain() * gridmet_raw * gridmet_agg)

NameError: name 'gridmet_raw' is not defined