xpublish-community · jonmjoyce · Apr 11, 2024 · May 20, 2024 · May 20, 2024 · May 20, 2024
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -7,6 +7,7 @@ graft xpublish
 prune docs
 prune tests
 prune notebooks
+prune examples
 prune *.egg-info
 
 global-exclude *.nc

diff --git a/codecov.yml b/codecov.yml
@@ -21,3 +21,7 @@ coverage:
       default:
         threshold: 0%
         if_not_found: success
+
+ignore:
+  - "examples"
+  - "docs"
diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,63 @@
+# XPublish Exploring Setup
+
+## Using Mamba
+
+In many cases, conda times out while trying to create this environment. In that case, install [Micromamba](https://mamba.readthedocs.io/en/latest/micromamba-installation.html) first and then run these commands.
+
+```bash
+micromamba env create -f environment.yaml
+micromamba activate xpublish-exploring
+```
+
+## Using conda
+
+> Note: This method may time out
+
+Create the environment and activate:
+
+```bash
+conda env create -f environment.yaml
+conda activate xpublish-exploring
+```
+
+## Starting the Server
+
+Execute `demo.py` and you should see similar output indicating success:
+
+`Uvicorn running on http://0.0.0.0:9000 (Press CTRL+C to quit)`
+
+After seeing this message, you can then navigate to this address in your browser:
+
+http://localhost:9000/docs
+
+Here you should see a web page that allows you to inspect and test the available endpoints from Xpublish.
+
+# XPublish Tutorial
+
+## Exploring available datasets
+
+Navigate to http://localhost:9000/datasets to return a list of available datasets running on your xpublish host. These are defined in the `TutorialsDataset` plugin class in `demo.py`.
+
+Inspect one of those datasets by appending the dataset name to the URL. For example, http://localhost:9000/datasets/air_temperature/
+
+This returns an xarray model view of that dataset.
+
+## Calculating the mean value
+
+XPublish's plugin system allows custom operations to be run against the data on the server. In the example `mean.py`, a custom plugin has been defined that allows the mean of a specific variable to be calculated. The power of this ecosystem is that any dataset in xpublish is interoperable with any sensible plugin.
+
+For the end-user, you can choose which variable to calculate using the URL syntax `/[dataset]/[variable]/mean`. For example, http://localhost:9000/datasets/air_temperature/air/mean
+
+## Working with Custom Bounding Boxes
+
+Another custom plugin is defined in `lme.py`. You can access this plugin at http://localhost:9000/lme/
+
+The plugin defines several regions of interest (not actual LME's, but several areas of interest from GMRI). The key or `lmi_ID` references a bounding box which can be used to subset any of the datasets, assuming those overlap.
+
+This plugin will provide a subset of data based on the bounding box defined for each LME. The URL structure is `/[dataset]/lme/[lme_ID]` for example, http://localhost:9000/datasets/air_temperature/lme/EC/
+
+> Note: This will currently return "NaN" when the regions don't overlap. This is something we can work on as part of this breakout session.
+
+## Combining the Plugins
+
+Plugins can be combined by adding their URLs together. For example, to find the mean of a specific region, you could access http://localhost:9000/datasets/air_temperature/lme/EC/air/mean adding the variable and mean calculation to the end.
diff --git a/examples/demo.py b/examples/demo.py
@@ -0,0 +1,57 @@
+import xarray as xr
+from requests import HTTPError
+
+from xpublish import Plugin, Rest, hookimpl
+
+
+class TutorialDataset(Plugin):
+    """Demonstrates how to create a plugin to load a dataset for demo purposes.
+
+    This uses the default xarray tutorial datasets.
+    """
+
+    name: str = 'xarray-tutorial-datasets'
+
+    @hookimpl
+    def get_datasets(self):
+        """Returns a list of available datasets.
+
+        This function returns a list of the available datasets that can be loaded using the xarray.tutorial.file_formats module.
+        """
+        return list(xr.tutorial.file_formats)
+
+    @hookimpl
+    def get_dataset(self, dataset_id: str):
+        """Retrieves a dataset from the xarray tutorial dataset by the given dataset ID.
+
+        Args:
+            dataset_id (str): The ID of the dataset to retrieve.
+
+        Returns:
+            xarray.Dataset: The retrieved dataset, or None if the dataset could not be loaded.
+        """
+        try:
+            ds = xr.tutorial.open_dataset(dataset_id)
+            if ds.cf.coords['longitude'].dims[0] == 'longitude':
+                ds = ds.assign_coords(longitude=(((ds.longitude + 180) % 360) - 180)).sortby(
+                    'longitude'
+                )
+                # TODO: Yeah this should not be assumed... but for regular grids we will viz with rioxarray so for now we will assume
+                ds = ds.rio.write_crs(4326)
+            return ds
+        except HTTPError:
+            return None
+
+
+rest = Rest({})
+rest.register_plugin(TutorialDataset())
+
+### For this tutorial, you can uncomment the following lines to activate the other plugins:
+
+from lme import LmeSubsetPlugin
+from mean import MeanPlugin
+
+rest.register_plugin(MeanPlugin())
+rest.register_plugin(LmeSubsetPlugin())
+
+rest.serve()
diff --git a/examples/environment.yaml b/examples/environment.yaml
@@ -0,0 +1,25 @@
+name: xpublish-exploring
+channels:
+  - conda-forge
+dependencies:
+  - python=3.11
+  - xarray-datatree
+  - xpublish>=0.3.1
+  - xpublish-edr
+  - xpublish-opendap
+  - dask
+  - distributed
+  - netcdf4
+  - zarr
+  - s3fs
+  - fsspec
+  - cf_xarray
+  - kerchunk
+  - h5py
+  - intake-xarray
+  - h5netcdf
+  - pydap
+  - h5pyd
+  - regionmask
+  - ipykernel
+  - pooch
diff --git a/examples/lme.py b/examples/lme.py
@@ -0,0 +1,107 @@
+from typing import Sequence
+
+from fastapi import APIRouter
+
+from xpublish import Dependencies, Plugin, hookimpl
+
+regions = {
+    'GB': {'bbox': [-69.873, -65.918, 40.280, 42.204], 'name': 'Georges Bank'},
+    'GOM': {'bbox': [-70.975, -65.375, 40.375, 45.125], 'name': 'Gulf Of Maine'},
+    'MAB': {
+        'bbox': [-77.036, -70.005, 35.389, 41.640],
+        'name': 'MidAtlantic Bight',
+    },
+    'NESHELF': {
+        'bbox': [-77.45, -66.35, 34.50, 44.50],
+        'name': 'North East Shelf',
+    },
+    'SS': {'bbox': [-66.775, -65.566, 41.689, 45.011], 'name': 'Scotian Shelf'},
+    'EC': {'bbox': [-81.75, -65.375, 25.000, 45.125], 'name': 'East Coast'},
+    'NEC': {'bbox': [-81.45, -63.30, 28.70, 44.80], 'name': 'Northeast Coast'},
+}
+
+DEFAULT_TAGS = ['lme', 'large marine ecosystem', 'subset']
+
+
+class LmeSubsetPlugin(Plugin):
+    """The LmeSubsetPlugin class is a FastAPI plugin that provides an API for retrieving information about Large Marine Ecosystems (LMEs) and generating datasets for specific LME regions.
+
+    The plugin defines two routers:
+    - The `app_router` provides a GET endpoint at `/lme` that returns a dictionary of LME names and their IDs.
+    - The `dataset_router` provides a GET endpoint at `/lme/{region_id}` that takes a dataset ID and a region ID, and returns a subset of the dataset for the specified region.
+
+    The `get_region_dataset` function is used to generate the dataset subset by slicing the dataset along the latitude dimension based on the bounding box of the specified region.
+    """
+
+    name: str = 'lme-subset-plugin'
+
+    app_router_prefix: str = '/lme'
+    app_router_tags: Sequence[str] = DEFAULT_TAGS
+
+    dataset_router_prefix: str = '/lme'
+    dataset_router_tags: Sequence[str] = DEFAULT_TAGS
+
+    @hookimpl
+    def app_router(self):
+        """Provides an API router for retrieving a list of LME regions.
+
+        The `app_router` function returns an instance of `APIRouter` with the following configuration:
+        - Prefix: The value of `self.app_router_prefix`
+        - Tags: A list of values from `self.app_router_tags`
+
+        The router includes a single GET endpoint at the root path ("/") that returns a dictionary mapping region keys to their names.
+        """
+        router = APIRouter(prefix=self.app_router_prefix, tags=list(self.app_router_tags))
+
+        @router.get('/')
+        def get_lme_regions():
+            return {key: value['name'] for key, value in regions.items()}
+
+        return router
+
+    @hookimpl
+    def dataset_router(self, deps: Dependencies):
+        """Defines a dataset router that allows accessing datasets for specific regions.
+
+        The `dataset_router` function creates a FastAPI router that provides an endpoint for retrieving a dataset for a specific region. The region is identified by its `region_id`, and the dataset is identified by its `dataset_id`.
+
+        The function uses the `Dependencies` object to access the dataset and perform the necessary slicing operations to extract the data for the specified region. The `get_region_dataset` function is defined within the `dataset_router` function and is responsible for the actual data retrieval and slicing.
+
+        The router is then populated with the necessary routes and returned for inclusion in the main application.
+        """
+        router = APIRouter(prefix=self.dataset_router_prefix, tags=list(self.dataset_router_tags))
+
+        def get_region_dataset(dataset_id: str, region_id: str):
+            region = regions[region_id]
+            bbox = region['bbox']
+
+            # lat_slice = slice(bbox[2], bbox[3])
+            # air_temperature lats are descending
+            lat_slice = slice(bbox[3], bbox[2])
+            # lon_slice = slice(bbox[0], bbox[1])
+
+            # print(lat_slice, lon_slice)
+
+            dataset = deps.dataset(dataset_id)
+
+            sliced = dataset.cf.sel(latitude=lat_slice)
+
+            return sliced
+
+        region_deps = Dependencies(
+            dataset_ids=deps.dataset_ids,
+            dataset=get_region_dataset,
+            cache=deps.cache,
+            plugins=deps.plugins,
+            plugin_manager=deps.plugin_manager,
+        )
+
+        all_plugins = list(deps.plugin_manager().get_plugins())
+        this_plugin = [p for p in all_plugins if p.name == self.name]
+
+        for new_router in deps.plugin_manager().subset_hook_caller(
+            'dataset_router', remove_plugins=this_plugin
+        )(deps=region_deps):
+            router.include_router(new_router, prefix='/{region_id}')
+
+        return router
diff --git a/examples/mean.py b/examples/mean.py
@@ -0,0 +1,46 @@
+from typing import Sequence
+
+from fastapi import APIRouter, Depends, HTTPException
+
+from xpublish import Dependencies, Plugin, hookimpl
+
+
+class MeanPlugin(Plugin):
+    """Provides a plugin that adds a dataset router for computing the mean of variables in a dataset.
+
+    The `MeanPlugin` class defines the following:
+    - `name`: The name of the plugin, set to 'mean'.
+    - `dataset_router_prefix`: The prefix for the dataset router, set to an empty string.
+    - `dataset_router_tags`: The tags for the dataset router, set to ['mean'].
+
+    The `dataset_router` method creates an APIRouter with the defined prefix and tags, and adds a GET endpoint for computing the mean of a variable in the dataset. If the variable is not found in the dataset, an HTTPException is raised with a 404 status code.
+    """
+
+    name: str = 'mean'
+
+    dataset_router_prefix: str = ''
+    dataset_router_tags: Sequence[str] = ['mean']
+
+    @hookimpl
+    def dataset_router(self, deps: Dependencies):
+        """Provides a route to retrieve the mean value of a variable in a dataset.
+
+        Args:
+            deps (Dependencies): The dependencies for plugin routers
+        """
+        router = APIRouter(prefix=self.dataset_router_prefix, tags=list(self.dataset_router_tags))
+
+        @router.get('/{var_name}/mean')
+        def get_mean(var_name: str, dataset=Depends(deps.dataset)):
+            if var_name not in dataset.variables:
+                raise HTTPException(
+                    status_code=404,
+                    detail=f"Variable '{var_name}' not found in dataset",
+                )
+
+            mean = dataset[var_name].mean()
+            if mean.isnull():
+                return 'NaN'
+            return float(mean)
+
+        return router