gdptools example without the OPeNDAP catalog - gridMET data#
import warnings
import xarray as xr
import pandas as pd
import numpy as np
import pyproj
from pynhd import NLDI
from pynhd import WaterData
import hvplot.xarray
import hvplot.pandas
from gdptools import WeightGen
from gdptools import AggGen
from gdptools import UserCatData
warnings.filterwarnings("ignore")
Create GeoDataFrame of Delaware River Basin HUC12s#
In this cell we use clients to several https://labs.waterdata.ugsgs.gov tools:
Tool: USGS NLDI Client: pynhd.NLDI
Tool: USGS GeoServer Client: pynhd.WaterData
More examples of using the HyRiver pynhd package can be found here
# USGS gage 01482100 Delaware River at Del Mem Bridge at Wilmington De
gage_id = "01482100"
nldi = NLDI()
del_basins = nldi.get_basins(gage_id)
huc12_basins = WaterData("huc12").bygeom(del_basins.geometry[0])
---------------------------------------------------------------------------
MissingInputError Traceback (most recent call last)
Cell In[2], line 5
3 nldi = NLDI()
4 del_basins = nldi.get_basins(gage_id)
----> 5 huc12_basins = WaterData("huc12").bygeom(del_basins.geometry[0])
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pynhd/pynhd.py:676, in WaterData.bygeom(self, geometry, geo_crs, xy, predicate, sort_attr)
634 def bygeom(
635 self,
636 geometry: Polygon | MultiPolygon,
(...)
640 sort_attr: str | None = None,
641 ) -> gpd.GeoDataFrame:
642 """Get features within a geometry.
643
644 Parameters
(...)
674 The requested features in the given geometry.
675 """
--> 676 resp = self.wfs.getfeature_bygeom(
677 geometry, geo_crs, always_xy=not xy, predicate=predicate.upper(), sort_attr=sort_attr
678 )
679 resp = cast("list[dict[str, Any]]", resp)
680 return self._to_geodf(resp)
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/pygeoogc.py:689, in WFS.getfeature_bygeom(self, geometry, geo_crs, always_xy, predicate, sort_attr)
686 if predicate.upper() not in valid_predicates:
687 raise InputValueError("predicate", valid_predicates)
--> 689 return self.getfeature_byfilter(
690 f"{predicate.upper()}({geom_name}, {g_wkt})", method="POST", sort_attr=sort_attr
691 )
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/pygeoogc.py:790, in WFS.getfeature_byfilter(self, cql_filter, method, sort_attr)
787 except (IndexError, ValueError) as ex:
788 raise ServiceError(resp[0], self.url) from ex
--> 790 payloads = [
791 {
792 "service": "wfs",
793 "version": self.version,
794 "outputFormat": self.outformat,
795 "request": "GetFeature",
796 "typeName": self.layer,
797 "srsName": self.crs_str,
798 "cql_filter": cql_filter,
799 **self.sort_params(sort_attr, nfeatures, i),
800 }
801 for i in range(0, nfeatures, self.max_nrecords)
802 ]
803 if method == "GET":
804 return self.retrieve([self.url] * len(payloads), [{"params": p} for p in payloads])
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/pygeoogc.py:799, in <listcomp>(.0)
787 except (IndexError, ValueError) as ex:
788 raise ServiceError(resp[0], self.url) from ex
790 payloads = [
791 {
792 "service": "wfs",
793 "version": self.version,
794 "outputFormat": self.outformat,
795 "request": "GetFeature",
796 "typeName": self.layer,
797 "srsName": self.crs_str,
798 "cql_filter": cql_filter,
--> 799 **self.sort_params(sort_attr, nfeatures, i),
800 }
801 for i in range(0, nfeatures, self.max_nrecords)
802 ]
803 if method == "GET":
804 return self.retrieve([self.url] * len(payloads), [{"params": p} for p in payloads])
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pygeoogc/core.py:626, in WFSBase.sort_params(self, sort_attr, nfeatures, start_index)
624 msg += " Please set the sort_attr manually. Available columns are:\n"
625 msg += ", ".join(list(valid_attrs))
--> 626 raise MissingInputError(msg)
628 if valid_attrs and sort_attr not in valid_attrs:
629 raise InputValueError("sort_attr", list(valid_attrs))
MissingInputError: sort_attr is None and no id column found in the schema. Please set the sort_attr manually. Available columns are:
huc12, tohuc, areaacres, areasqkm, hutype, humod, states, noncontrib
from holoviews.element.tiles import EsriTerrain
drb = huc12_basins.hvplot(
geo=True, coastline='50m', alpha=0.2, c='r', frame_width=300,
xlabel="longitude", ylabel="latitude", clabel="monthly water evaporation (mm)",
title="Delaware River HUC12 basins", xlim=(-76.8, -73.8), aspect='equal'
)
EsriTerrain() * drb
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[3], line 2
1 from holoviews.element.tiles import EsriTerrain
----> 2 drb = huc12_basins.hvplot(
3 geo=True, coastline='50m', alpha=0.2, c='r', frame_width=300,
4 xlabel="longitude", ylabel="latitude", clabel="monthly water evaporation (mm)",
5 title="Delaware River HUC12 basins", xlim=(-76.8, -73.8), aspect='equal'
6 )
7 EsriTerrain() * drb
NameError: name 'huc12_basins' is not defined
Load gridMET OPeNDAP endpoint with xarray#
Note currently the gridMET data require ‘#fillmismatch’ appended to the end of the OPeNDAP endpoint. For example
gridmet_URL = [
'http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_met_tmmx_1979_CurrentYear_CONUS.nc#fillmismatch',
'http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_met_tmmn_1979_CurrentYear_CONUS.nc#fillmismatch'
]
data = xr.open_mfdataset(gridmet_URL)
data
<xarray.Dataset> Dimensions: (lat: 585, crs: 1, lon: 1386, day: 16465) Coordinates: * lat (lat) float64 49.4 49.36 49.32 ... 25.11 25.07 * crs (crs) float32 3.0 * lon (lon) float64 -124.8 -124.7 ... -67.1 -67.06 * day (day) datetime64[ns] 1979-01-01 ... 2024-01-29 Data variables: daily_maximum_temperature (day, lat, lon) float32 dask.array<chunksize=(16465, 585, 1386), meta=np.ndarray> daily_minimum_temperature (day, lat, lon) float32 dask.array<chunksize=(16465, 585, 1386), meta=np.ndarray> Attributes: (12/19) geospatial_bounds_crs: EPSG:4326 Conventions: CF-1.0 geospatial_bounds: POLYGON((-124.7666666333333 49.40000000000000... geospatial_lat_min: 25.066666666666666 geospatial_lat_max: 49.40000000000000 geospatial_lon_min: -124.7666666333333 ... ... date: 30 January 2024 note1: The projection information for this file is: ... note2: Citation: Abatzoglou, J.T., 2013, Development... note3: Data in slices after last_permanent_slice (1-... note4: Data in slices after last_provisional_slice (... note5: Days correspond approximately to calendar day...
Calculate weights#
To determine an area-weighted statistic for each HUC12 polygon, the area of all grid cells (a Shapely Polygon object representing each cell of the grid).
The weight of each cell contributing to a HUC12 polygon is the cell-intersected-area / HUC12 polygon-area.
Not all all data readable by xarray are made the same. Therefor, when using non-OPeNDAP catalog endpoints, the user must supply some metadata about the xarray data and GeoDataFrame data.
# metadata for calculating weights
data_crs = 4326
x_coord = "lon"
y_coord = "lat"
t_coord = "day"
sdate = "1979-01-01"
edate = "1979-01-07"
var = ["daily_maximum_temperature", "daily_minimum_temperature"]
shp_crs = 4326
shp_poly_idx = "huc12"
wght_gen_crs = 6931
user_data = UserCatData(
ds=data,
proj_ds=data_crs,
x_coord=x_coord,
y_coord=y_coord,
t_coord=t_coord,
var=var,
f_feature=huc12_basins,
proj_feature=shp_crs,
id_feature=shp_poly_idx,
period=[sdate, edate]
)
wght_gen = WeightGen(
user_data=user_data,
method="serial",
output_file='example_weights.csv',
weight_gen_crs=6931,
)
wdf = wght_gen.calculate_weights()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[5], line 21
11 shp_poly_idx = "huc12"
12 wght_gen_crs = 6931
14 user_data = UserCatData(
15 ds=data,
16 proj_ds=data_crs,
17 x_coord=x_coord,
18 y_coord=y_coord,
19 t_coord=t_coord,
20 var=var,
---> 21 f_feature=huc12_basins,
22 proj_feature=shp_crs,
23 id_feature=shp_poly_idx,
24 period=[sdate, edate]
25 )
27 wght_gen = WeightGen(
28 user_data=user_data,
29 method="serial",
30 output_file='example_weights.csv',
31 weight_gen_crs=6931,
32 )
34 wdf = wght_gen.calculate_weights()
NameError: name 'huc12_basins' is not defined
To illustrate the format of the saved weights file it’s opened here, however in the next step we use the returned datafame.
pd.read_csv('example_weights.csv')
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[6], line 1
----> 1 pd.read_csv('example_weights.csv')
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1024, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
1011 kwds_defaults = _refine_defaults_read(
1012 dialect,
1013 delimiter,
(...)
1020 dtype_backend=dtype_backend,
1021 )
1022 kwds.update(kwds_defaults)
-> 1024 return _read(filepath_or_buffer, kwds)
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:618, in _read(filepath_or_buffer, kwds)
615 _validate_names(kwds.get("names", None))
617 # Create the parser.
--> 618 parser = TextFileReader(filepath_or_buffer, **kwds)
620 if chunksize or iterator:
621 return parser
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1618, in TextFileReader.__init__(self, f, engine, **kwds)
1615 self.options["has_index_names"] = kwds["has_index_names"]
1617 self.handles: IOHandles | None = None
-> 1618 self._engine = self._make_engine(f, self.engine)
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1878, in TextFileReader._make_engine(self, f, engine)
1876 if "b" not in mode:
1877 mode += "b"
-> 1878 self.handles = get_handle(
1879 f,
1880 mode,
1881 encoding=self.options.get("encoding", None),
1882 compression=self.options.get("compression", None),
1883 memory_map=self.options.get("memory_map", False),
1884 is_text=is_text,
1885 errors=self.options.get("encoding_errors", "strict"),
1886 storage_options=self.options.get("storage_options", None),
1887 )
1888 assert self.handles is not None
1889 f = self.handles.handle
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/common.py:873, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
868 elif isinstance(handle, str):
869 # Check whether the filename is to be opened in binary mode.
870 # Binary mode does not support 'encoding' and 'newline'.
871 if ioargs.encoding and "b" not in ioargs.mode:
872 # Encoding
--> 873 handle = open(
874 handle,
875 ioargs.mode,
876 encoding=ioargs.encoding,
877 errors=errors,
878 newline="",
879 )
880 else:
881 # Binary mode
882 handle = open(handle, ioargs.mode)
FileNotFoundError: [Errno 2] No such file or directory: 'example_weights.csv'
Run the grid-to-polygon area weighted average interpolation using the calculated weights.#
Future releases will add additional area-weighted statistics. Currently only the mean is returned.
If more that one variable were available in the dataset then the run_weights function could be embedded in a loop that indexes each variable.
run_weights Output#
Run weighs returns:
ngdf is the GeoDataFrame passed to run_weights. huc12_basins maybe reordered when running weights so the re-ordered GeoDataFrame is returned.
vals are the area-weighted interpolated values ordered by the ngdf.index
agg_gen = AggGen(
user_data=user_data,
stat_method="masked_mean",
agg_engine="serial",
agg_writer="csv",
weights='example_weights.csv',
out_path='.',
file_prefix="test_gm_nc"
)
ngdf, ds_out = agg_gen.calculate_agg()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[7], line 2
1 agg_gen = AggGen(
----> 2 user_data=user_data,
3 stat_method="masked_mean",
4 agg_engine="serial",
5 agg_writer="csv",
6 weights='example_weights.csv',
7 out_path='.',
8 file_prefix="test_gm_nc"
9 )
10 ngdf, ds_out = agg_gen.calculate_agg()
NameError: name 'user_data' is not defined
ds_out
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[8], line 1
----> 1 ds_out
NameError: name 'ds_out' is not defined
pd.read_csv('test_gm_nc.csv', index_col=0)
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[9], line 1
----> 1 pd.read_csv('test_gm_nc.csv', index_col=0)
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1024, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
1011 kwds_defaults = _refine_defaults_read(
1012 dialect,
1013 delimiter,
(...)
1020 dtype_backend=dtype_backend,
1021 )
1022 kwds.update(kwds_defaults)
-> 1024 return _read(filepath_or_buffer, kwds)
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:618, in _read(filepath_or_buffer, kwds)
615 _validate_names(kwds.get("names", None))
617 # Create the parser.
--> 618 parser = TextFileReader(filepath_or_buffer, **kwds)
620 if chunksize or iterator:
621 return parser
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1618, in TextFileReader.__init__(self, f, engine, **kwds)
1615 self.options["has_index_names"] = kwds["has_index_names"]
1617 self.handles: IOHandles | None = None
-> 1618 self._engine = self._make_engine(f, self.engine)
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1878, in TextFileReader._make_engine(self, f, engine)
1876 if "b" not in mode:
1877 mode += "b"
-> 1878 self.handles = get_handle(
1879 f,
1880 mode,
1881 encoding=self.options.get("encoding", None),
1882 compression=self.options.get("compression", None),
1883 memory_map=self.options.get("memory_map", False),
1884 is_text=is_text,
1885 errors=self.options.get("encoding_errors", "strict"),
1886 storage_options=self.options.get("storage_options", None),
1887 )
1888 assert self.handles is not None
1889 f = self.handles.handle
File ~/checkouts/readthedocs.org/user_builds/gdptools/conda/latest/lib/python3.9/site-packages/pandas/io/common.py:873, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
868 elif isinstance(handle, str):
869 # Check whether the filename is to be opened in binary mode.
870 # Binary mode does not support 'encoding' and 'newline'.
871 if ioargs.encoding and "b" not in ioargs.mode:
872 # Encoding
--> 873 handle = open(
874 handle,
875 ioargs.mode,
876 encoding=ioargs.encoding,
877 errors=errors,
878 newline="",
879 )
880 else:
881 # Binary mode
882 handle = open(handle, ioargs.mode)
FileNotFoundError: [Errno 2] No such file or directory: 'test_gm_nc.csv'
aggdata = agg_gen.agg_data
ds_ss = aggdata.get("daily_maximum_temperature").da
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[10], line 1
----> 1 aggdata = agg_gen.agg_data
2 ds_ss = aggdata.get("daily_maximum_temperature").da
NameError: name 'agg_gen' is not defined
Plot the gridded data and interpolated data#
var = "daily_maximum_temperature"
time_step = "1979-01-07"
tmin = np.nanmin(ds_ss.sel(day=time_step).values)
tmax = np.nanmax(ds_ss.sel(day=time_step).values)
print(tmin,tmax)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[11], line 3
1 var = "daily_maximum_temperature"
2 time_step = "1979-01-07"
----> 3 tmin = np.nanmin(ds_ss.sel(day=time_step).values)
4 tmax = np.nanmax(ds_ss.sel(day=time_step).values)
5 print(tmin,tmax)
NameError: name 'ds_ss' is not defined
ds_out[var].shape
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[12], line 1
----> 1 ds_out[var].shape
NameError: name 'ds_out' is not defined
ngdf[var] = ds_out[var].sel(time=time_step)
gridmet_agg = ngdf.to_crs(4326).hvplot(
c='daily_maximum_temperature', geo=True, coastline='50m', frame_width=300, alpha=1, cmap='viridis', clim=(tmin, tmax),
xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
title="HUC12 area-weighted average", aspect='equal'
)
gridmet_raw = ds_ss.isel(day=6).hvplot.quadmesh(
'lon', 'lat', 'daily_maximum_temperature', cmap='viridis', alpha=1, grid=True, geo=True, coastline='50m', frame_width=300, clim=(tmin, tmax),
xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
title="Gridmet daily max temperature", aspect='equal'
)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[13], line 1
----> 1 ngdf[var] = ds_out[var].sel(time=time_step)
2 gridmet_agg = ngdf.to_crs(4326).hvplot(
3 c='daily_maximum_temperature', geo=True, coastline='50m', frame_width=300, alpha=1, cmap='viridis', clim=(tmin, tmax),
4 xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
5 title="HUC12 area-weighted average", aspect='equal'
6 )
7 gridmet_raw = ds_ss.isel(day=6).hvplot.quadmesh(
8 'lon', 'lat', 'daily_maximum_temperature', cmap='viridis', alpha=1, grid=True, geo=True, coastline='50m', frame_width=300, clim=(tmin, tmax),
9 xlabel="longitude", ylabel="latitude", clabel="daily_maximum_temperature (K)", xlim=(-76.8, -73.8),
10 title="Gridmet daily max temperature", aspect='equal'
11 )
NameError: name 'ds_out' is not defined
(EsriTerrain() * gridmet_raw) + (EsriTerrain() * gridmet_agg)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[14], line 1
----> 1 (EsriTerrain() * gridmet_raw) + (EsriTerrain() * gridmet_agg)
NameError: name 'gridmet_raw' is not defined
(EsriTerrain() * gridmet_raw * gridmet_agg)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[15], line 1
----> 1 (EsriTerrain() * gridmet_raw * gridmet_agg)
NameError: name 'gridmet_raw' is not defined