Source code for zen3geo.datapipes.geopandas

"""
DataPipes for :doc:`geopandas <geopandas:index>`.
"""
from typing import Any, Dict, Iterator, Optional, Union

try:
    import geopandas as gpd
except ImportError:
    gpd = None
import xarray as xr
from torchdata.datapipes import functional_datapipe
from torchdata.datapipes.iter import IterDataPipe


[docs] @functional_datapipe("clip_vector_with_rectangle") class GeoPandasRectangleClipperIterDataPipe(IterDataPipe): """ Takes vector :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame` geometries and clips them with the rectangular extent of an :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset` grid to yield tuples of spatially subsetted :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame` vectors and the correponding :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset` raster object used as the clip mask (functional name: ``clip_vector_with_rectangle``). Uses the rectangular clip algorithm of :py:func:`geopandas.clip`, with the bounding box rectangle (minx, miny, maxx, maxy) derived from input raster mask's bounding box extent. Note ---- If the input vector's coordinate reference system (``crs``) is different to the raster mask's coordinate reference system (``rio.crs``), the vector will be reprojected using :py:meth:`geopandas.GeoDataFrame.to_crs` to match the raster's coordinate reference system. Parameters ---------- source_datapipe : IterDataPipe[geopandas.GeoDataFrame] A DataPipe that contains :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame` vector geometries with a :py:attr:`.crs <geopandas.GeoDataFrame.crs>` property. mask_datapipe : IterDataPipe[xarray.DataArray] A DataPipe that contains :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset` objects with a :py:attr:`.rio.crs <rioxarray.rioxarray.XRasterBase.crs>` property and :py:meth:`.rio.bounds <rioxarray.rioxarray.XRasterBase.bounds>` method. kwargs : Optional Extra keyword arguments to pass to :py:func:`geopandas.clip`. Yields ------ paired_obj : Tuple[geopandas.GeoDataFrame, xarray.DataArray] A tuple consisting of the spatially subsetted :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame` vector, and the corresponding :py:class:`xarray.DataArray` or :py:class:`xarray.Dataset` raster used as the clip mask. Raises ------ ModuleNotFoundError If ``geopandas`` is not installed. See :doc:`install instructions for geopandas <geopandas:getting_started/install>` (e.g. via ``pip install geopandas``) before using this class. NotImplementedError If the length of the vector ``source_datapipe`` is not 1. Currently, all of the vector geometries have to be merged into a single :py:class:`geopandas.GeoSeries` or :py:class:`geopandas.GeoDataFrame`. Refer to the section on Appending under geopandas' :doc:`geopandas:docs/user_guide/mergingdata` docs. Example ------- >>> import pytest >>> import rioxarray >>> gpd = pytest.importorskip("geopandas") ... >>> from torchdata.datapipes.iter import IterableWrapper >>> from zen3geo.datapipes import GeoPandasRectangleClipper ... >>> # Read in a vector polygon data source >>> geodataframe = gpd.read_file( ... filename="https://github.com/geopandas/geopandas/raw/v0.11.1/geopandas/tests/data/overlay/polys/df1.geojson", ... ) >>> assert geodataframe.crs == "EPSG:4326" # latitude/longitude coords >>> dp_vector = IterableWrapper(iterable=[geodataframe]) ... >>> # Get list of raster grids to cut up the vector polygon later >>> dataarray = rioxarray.open_rasterio( ... filename="https://github.com/rasterio/rasterio/raw/1.3.2/tests/data/world.byte.tif" ... ) >>> assert dataarray.rio.crs == "EPSG:4326" # latitude/longitude coords >>> dp_raster = IterableWrapper( ... iterable=[ ... dataarray.sel(x=slice(0, 2)), # longitude 0 to 2 degrees ... dataarray.sel(x=slice(2, 4)), # longitude 2 to 4 degrees ... ] ... ) ... >>> # Clip vector point geometries based on raster masks >>> dp_clipped = dp_vector.clip_vector_with_rectangle( ... mask_datapipe=dp_raster ... ) ... >>> # Loop or iterate over the DataPipe stream >>> it = iter(dp_clipped) >>> geodataframe0, raster0 = next(it) >>> geodataframe0 col1 geometry 0 1 POLYGON ((0.00000 0.00000, 0.00000 2.00000, 2.... >>> raster0 <xarray.DataArray (band: 1, y: 1200, x: 16)> array([[[0, 0, ..., 0, 0], [0, 0, ..., 0, 0], ..., [1, 1, ..., 1, 1], [1, 1, ..., 1, 1]]], dtype=uint8) Coordinates: * band (band) int64 1 * x (x) float64 0.0625 0.1875 0.3125 0.4375 ... 1.688 1.812 1.938 * y (y) float64 74.94 74.81 74.69 74.56 ... -74.69 -74.81 -74.94 spatial_ref int64 0 ... >>> geodataframe1, raster1 = next(it) >>> geodataframe1 col1 geometry 1 2 POLYGON ((2.00000 2.00000, 2.00000 4.00000, 4.... """ def __init__( self, source_datapipe: IterDataPipe, mask_datapipe: IterDataPipe[Union[xr.DataArray, xr.Dataset]], **kwargs: Optional[Dict[str, Any]], ) -> None: if gpd is None: raise ModuleNotFoundError( "Package `geopandas` is required to be installed to use this datapipe. " "Please use `pip install geopandas` or " "`conda install -c conda-forge geopandas` " "to install the package" ) self.source_datapipe: IterDataPipe = source_datapipe self.mask_datapipe: IterDataPipe[xr.DataArray] = mask_datapipe self.kwargs = kwargs len_vector_datapipe: int = len(self.source_datapipe) if len_vector_datapipe != 1: raise NotImplementedError( f"The vector datapipe's length can only be (1) for now, but got " f"({len_vector_datapipe}) instead. Consider merging your vector data " f"into a single `geopandas.GeoSeries` or `geopandas.GeoDataFrame`, " f"e.g. using `geodataframe0.append(geodataframe2)`." ) def __iter__(self) -> Iterator: geodataframe = list(self.source_datapipe).pop() for raster in self.mask_datapipe: mask = raster.rio.bounds() try: assert geodataframe.crs == raster.rio.crs _geodataframe = geodataframe except AssertionError: _geodataframe = geodataframe.to_crs(crs=raster.rio.crs) clipped_geodataframe = _geodataframe.clip(mask=mask, **self.kwargs) yield clipped_geodataframe, raster def __len__(self) -> int: return len(self.mask_datapipe)