diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 35d4a02..4d2037e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,6 +9,20 @@ repos: - id: check-yaml - id: double-quote-string-fixer + - repo: https://github.com/executablebooks/mdformat + rev: 0.7.16 + hooks: + - id: mdformat + additional_dependencies: + - mdformat-myst + + - repo: https://github.com/adamchainz/blacken-docs + rev: "1.13.0" + hooks: + - id: blacken-docs + additional_dependencies: + - black==23.1.0 + - repo: https://github.com/psf/black rev: 23.3.0 hooks: @@ -33,3 +47,5 @@ repos: hooks: - id: prettier language_version: system + exclude_types: + - markdown # managed by mdformat diff --git a/README.rst b/README.rst index 42e3013..d66b8f6 100644 --- a/README.rst +++ b/README.rst @@ -37,10 +37,10 @@ Here is an example of directly accessing the data from within Python: from fsspec.implementations.http import HTTPFileSystem fs = HTTPFileSystem() - http_map = fs.get_mapper('http://0.0.0.0:9000') + http_map = fs.get_mapper("http://0.0.0.0:9000") # open as a zarr group - zg = zarr.open_consolidated(http_map, mode='r') + zg = zarr.open_consolidated(http_map, mode="r") # or open as another Xarray Dataset ds = xr.open_zarr(http_map, consolidated=True) diff --git a/docs/requirements.txt b/docs/requirements.txt index 79a8a4e..9cc8242 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,3 +3,5 @@ sphinx-autosummary-accessors pydata-sphinx-theme sphinx-autodoc-typehints autodoc_pydantic +myst-nb +sphinx-design diff --git a/docs/source/api.rst b/docs/source/api.md similarity index 57% rename from docs/source/api.rst rename to docs/source/api.md index cbb6170..39bc904 100644 --- a/docs/source/api.rst +++ b/docs/source/api.md @@ -1,17 +1,17 @@ +```{eval-rst} .. currentmodule:: xpublish +``` -############# -API reference -############# +# API reference -Top-level Rest class -==================== +## Top-level Rest class -The :class:`~xpublish.Rest` class can be used for publishing a -:class:`xarray.Dataset` object or a collection of Dataset objects. +The {class}`~xpublish.Rest` class can be used for publishing a +{class}`xarray.Dataset` object or a collection of Dataset objects. The main interfaces to Xpublish that many users may use. +```{eval-rst} .. autosummary:: :toctree: generated/ @@ -22,11 +22,13 @@ The main interfaces to Xpublish that many users may use. Rest.serve Rest.register_plugin Rest.dependencies +``` There are also a handful of methods that are more likely to be used when subclassing `xpublish.Rest` to modify functionality, or are used by plugin dependencies. +```{eval-rst} .. autosummary:: :toctree: generated/ @@ -37,78 +39,92 @@ by plugin dependencies. Rest.init_cache_kwargs Rest.init_app_kwargs Rest.plugin_routers +``` -There is also a specialized version of :class:`xpublish.Rest` for use +There is also a specialized version of {class}`xpublish.Rest` for use when only a single dataset is being served, instead of a collection of datasets. +```{eval-rst} .. autosummary:: :toctree: generated/ SingleDatasetRest SingleDatasetRest.setup_datasets +``` -For serving a single dataset the :class:`~xpublish.SingleDatasetRest` is used instead. +For serving a single dataset the {class}`~xpublish.SingleDatasetRest` is used instead. +```{eval-rst} .. autosummary:: :toctree: generated/ SingleDatasetRest +``` -Dataset.rest (xarray accessor) -============================== +## Dataset.rest (xarray accessor) -This accessor extends :py:class:`xarray.Dataset` with the same interface than -:class:`~xpublish.SingleDatasetRest`. It is a convenient method for publishing one single +This accessor extends {py:class}`xarray.Dataset` with the same interface than +{class}`~xpublish.SingleDatasetRest`. It is a convenient method for publishing one single dataset. Proper use of this accessor should be like: -.. code-block:: python - - >>> import xarray as xr # first import xarray - >>> import xpublish # import xpublish (the dataset 'rest' accessor is registered) - >>> ds = xr.Dataset() # create or load an xarray Dataset - >>> ds.rest(...) # call the 'rest' accessor on the dataset - >>> ds.rest. # access to the methods and properties listed below +``` +>>> import xarray as xr # first import xarray +>>> import xpublish # import xpublish (the dataset 'rest' accessor is registered) +>>> ds = xr.Dataset() # create or load an xarray Dataset +>>> ds.rest(...) # call the 'rest' accessor on the dataset +>>> ds.rest. # access to the methods and properties listed below +``` +```{eval-rst} .. currentmodule:: xarray +``` **Calling the accessor** +```{eval-rst} .. autosummary:: :toctree: generated/ :template: autosummary/accessor_callable.rst Dataset.rest +``` **Properties** +```{eval-rst} .. autosummary:: :toctree: generated/ :template: autosummary/accessor_attribute.rst Dataset.rest.app Dataset.rest.cache +``` **Methods** +```{eval-rst} .. autosummary:: :toctree: generated/ :template: autosummary/accessor_method.rst Dataset.rest.serve +``` -FastAPI dependencies -==================== +## FastAPI dependencies -The functions below are defined in module ``xpublish.dependencies`` and can -be used as `FastAPI dependencies `_ +The functions below are defined in module `xpublish.dependencies` and can +be used as [FastAPI dependencies](https://fastapi.tiangolo.com/tutorial/dependencies) when creating custom API endpoints directly. -When creating routers with plugins, instead use ``xpublish.Dependency`` that will be -passed in to the ``Plugin.app_router`` or ``Plugin.dataset_router`` method. +When creating routers with plugins, instead use `xpublish.Dependency` that will be +passed in to the `Plugin.app_router` or `Plugin.dataset_router` method. +```{eval-rst} .. currentmodule:: xpublish.dependencies +``` +```{eval-rst} .. autosummary:: :toctree: generated/ @@ -119,13 +135,17 @@ passed in to the ``Plugin.app_router`` or ``Plugin.dataset_router`` method. get_zmetadata get_plugins get_plugin_manager +``` -Plugins +## Plugins -Plugins are inherit from the :class:`~xpublish.Plugin` class, and implement various hooks. +Plugins are inherit from the {class}`~xpublish.Plugin` class, and implement various hooks. +```{eval-rst} .. currentmodule:: xpublish +``` +```{eval-rst} .. autosummary:: :toctree: generated/ @@ -137,3 +157,4 @@ Plugins are inherit from the :class:`~xpublish.Plugin` class, and implement vari plugins.manage.find_default_plugins plugins.manage.load_default_plugins plugins.manage.configure_plugins +``` diff --git a/docs/source/conf.py b/docs/source/conf.py index 2448c1a..dda64d5 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -46,11 +46,16 @@ 'sphinxcontrib.autodoc_pydantic', 'sphinx_autosummary_accessors', 'sphinx_autodoc_typehints', + 'sphinx_design', + 'myst_parser', ] +myst_enable_extensions = [] +myst_heading_anchors = 6 + extlinks = { - 'issue': ('https://github.com/xarray-contrib/xpublish/issues/%s', 'GH#'), - 'pr': ('https://github.com/xarray-contrib/xpublish/pull/%s', 'GH#'), + 'issue': ('https://github.com/xpublish-community/xpublish/issues/%s', 'GH#'), + 'pr': ('https://github.com/xpublish-community/xpublish/pull/%s', 'GH#'), } # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates', sphinx_autosummary_accessors.templates_path] @@ -140,7 +145,7 @@ 'icon_links': [ { 'name': 'GitHub', - 'url': 'https://github.com/xarray-contrib/xpublish', # required + 'url': 'https://github.com/xpublish-community/xpublish', # required 'icon': 'fa-brands fa-square-github', 'type': 'fontawesome', } @@ -149,7 +154,7 @@ } html_context = { - 'github_user': 'xarray-contrib', + 'github_user': 'xpublish-community', 'github_repo': 'xpublish', 'github_version': 'main', 'doc_path': 'docs/source/', diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index 3a381c5..8418b75 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -16,7 +16,7 @@ Feature requests and feedback Do you like Xpublish? Share some love on Twitter or in your blog posts! We'd also like to hear about your propositions and suggestions. Feel free to -`submit them as issues `_ and: +`submit them as issues `_ and: * Explain in detail how they should work. * Keep the scope as narrow as possible. This will make it easier to implement. @@ -26,7 +26,7 @@ We'd also like to hear about your propositions and suggestions. Feel free to Report bugs ----------- -Report bugs for Xpublish in the `issue tracker `_. +Report bugs for Xpublish in the `issue tracker `_. If you are reporting a bug, please include: @@ -45,7 +45,7 @@ fix the bug itself. Fix bugs -------- -Look through the `GitHub issues for bugs `_. +Look through the `GitHub issues for bugs `_. Talk to developers to find out how you can fix specific bugs. @@ -81,7 +81,7 @@ Preparing Pull Requests ----------------------- #. Fork the - `xpublish GitHub repository `__. It's + `xpublish GitHub repository `__. It's fine to use ``xpublish`` as your fork repository name because it will live under your user. @@ -134,5 +134,5 @@ Preparing Pull Requests head-fork: YOUR_GITHUB_USERNAME/xpublish compare: your-branch-name - base-fork: xarray-contrib/xpublish + base-fork: xpublish-community/xpublish base: main diff --git a/docs/source/ecosystem/index.md b/docs/source/ecosystem/index.md new file mode 100644 index 0000000..14d22ac --- /dev/null +++ b/docs/source/ecosystem/index.md @@ -0,0 +1,31 @@ +# Ecosystem + +Xpublish's ecosystem is made up of plugins and servers, and the folks who build and run them. + +## Connect + +We have two main venues for discussing Xpublish and it's ecosystem, Github Discussions and Slack. + +### Github Discussions + +For longer form discussions, we can be found in [Github Discussions](https://github.com/xpublish-community/community/discussions?discussions_q=). + +### Slack + +Xpublish has a channel (`#xpublish`) on [ESIP](https://www.esipfed.org/)'s (Earth Science Information Partners) Slack. (Insert Rich's justification here about ESIP being the biggest unbrella that he can find) [Join here](https://join.slack.com/t/esip-all/shared_invite/zt-1omjufm9z-iH8Gf7gmmsm2SiS5Xh6BlQ) + +## Server distributions + +- [XREDS](https://github.com/asascience-open/xreds) from RPS +- [Xpublish Host](https://github.com/axiom-data-science/xpublish-host) from Axiom Data Science + +## Plugins + +- [OGC EDR](https://github.com/xpublish-community/xpublish-edr/) +- [OpenDAP](https://github.com/xpublish-community/xpublish-opendap/) +- [WMS](https://github.com/xpublish-community/xpublish-wms) +- [Intake](https://github.com/axiom-data-science/xpublish-intake) + +## Experiments + +We have a separate Github organization ([xpublish-experiments](https://github.com/xpublish-experiments)) for things that you've tried and want to share with the community, but don't necessarily want to maintain. diff --git a/docs/source/getting-started/index.md b/docs/source/getting-started/index.md new file mode 100644 index 0000000..eb452dc --- /dev/null +++ b/docs/source/getting-started/index.md @@ -0,0 +1,13 @@ +# Getting started + +The getting started guide aims to get you using Xpublish productively as quickly as possible. +It is designed as an entry point for new users, and it provided an introduction to Xpublish’s main concepts. + +```{toctree} +--- +hidden: +--- +why-xpublish +installation +tutorial/index +``` diff --git a/docs/source/installation.rst b/docs/source/getting-started/installation.rst similarity index 100% rename from docs/source/installation.rst rename to docs/source/getting-started/installation.rst diff --git a/docs/source/getting-started/tutorial/dataset-provider-plugin.md b/docs/source/getting-started/tutorial/dataset-provider-plugin.md new file mode 100644 index 0000000..203663e --- /dev/null +++ b/docs/source/getting-started/tutorial/dataset-provider-plugin.md @@ -0,0 +1,14 @@ +# Building a dataset provider plugin + +So far, we've been eagerly loading datasets for Xpublish to serve, but this tends not to scale well between memory needs and slow startup. Xpublish plugins can also be __Dataset Providers__ and handle loading of datasets on request. + +This also allows organizations to quickly be able to adapt Xpublish to work in their own environment, rather than needing Xpublish to explicitly support it. + +```{literalinclude} dataset-provider-plugin.py +``` + +With this plugin, Xpublish can serve the same datasets as we explictly defined and loaded in [serving multiple datasets](./serving-multiple-datasets.md), as well as any others supported by [`xr.tutorial`](https://github.com/pydata/xarray/blob/main/xarray/tutorial.py) + +```{note} +For more details on building dataset provider plugins, please see the [plugin user guide](../../user-guide/plugins.md#dataset-provider-plugins) +``` diff --git a/docs/source/getting-started/tutorial/dataset-provider-plugin.py b/docs/source/getting-started/tutorial/dataset-provider-plugin.py new file mode 100644 index 0000000..1b3f9d0 --- /dev/null +++ b/docs/source/getting-started/tutorial/dataset-provider-plugin.py @@ -0,0 +1,24 @@ +import xarray as xr +from requests import HTTPError + +from xpublish import Plugin, Rest, hookimpl + + +class TutorialDataset(Plugin): + name = 'xarray-tutorial-dataset' + + @hookimpl + def get_datasets(self): + return list(xr.tutorial.file_formats) + + @hookimpl + def get_dataset(self, dataset_id: str): + try: + return xr.tutorial.open_dataset(dataset_id) + except HTTPError: + return None + + +rest = Rest({}) +rest.register_plugin(TutorialDataset()) +rest.serve() diff --git a/docs/source/getting-started/tutorial/dataset-router-plugin.md b/docs/source/getting-started/tutorial/dataset-router-plugin.md new file mode 100644 index 0000000..50de6df --- /dev/null +++ b/docs/source/getting-started/tutorial/dataset-router-plugin.md @@ -0,0 +1,90 @@ +# Creating a dataset router plugin + +Starting with the [dataset router we built](./dataset-router.md), we can transform it into a plugin. + +Xpublish supports several different types of plugins, so we will build a [dataset router plugin](../../user-guide/plugins.md#dataset-provider-plugins). + +```{literalinclude} dataset-router.py +--- +lines: 12-23 +caption: Existing router +--- +``` + +```{literalinclude} dataset-router-plugin.py +``` + +When a plugin is defined it tends to be a bit longer than the router as defined directly, as some of those elements are what provides users the ability to configure the plugin. +Other parts are necessary for Xpublish to be able to find an appropriately load the plugin. + +## Building blocks of a plugin + +### Subclassing + +A plugin starts by inheriting from the {py:class}`xpublish.plugins.hooks.Plugin` (exposed as `xpublish.Plugin`), and defining a name that the system should know it by. +`xpublish.Plugin` itself is a subclass of [`pydantic.BaseModel`](https://docs.pydantic.dev/latest/usage/models/) which allows quick configuration. + +```{literalinclude} dataset-router-plugin.py +--- +lines: 4-11 +emphasize-lines: 4-5 +--- +``` + +### Configurable attributes + +Next the attributes are defined that a user or admin may wish to override. + +```{literalinclude} dataset-router-plugin.py +--- +lines: 7-14 +emphasize-lines: 4-5 +--- +``` + +### Extension hooks + +Then the plugin needs to tell Xpublish what it can do. + +It does it with the `@hookimpl` decorator and specifically named methods, +in this case `dataset_router`. + +These methods can take a set of arguments that Xpublish has defined (further explored in the [plugin user guide](../../user-guide/plugins.md) and [API docs](../../api.md)). + +```{literalinclude} dataset-router-plugin.py +--- +lines: 7-18 +emphasize-lines: 7-8 +--- +``` + +### Building the router + +The router can now be transformed. +Most of it stays the same, though dependencies now instead use the ones passed to the method, and the router should be initialized with the prefix and tags. + +```{literalinclude} dataset-router-plugin.py +--- +lines: 7-27 +emphasize-lines: 9-19, 21 +--- +``` + +Additionally the router needs to be returned from the method, so that Xpublish can access it. + +### Registering the plugin + +While the [entry points system](../../user-guide/plugins.md#entry-points) can be used for sharing plugins with others, for plugins that aren't going to be distributed, they can be registered directly. + +```{literalinclude} dataset-router-plugin.py +--- +lines: 30-37 +emphasize-lines: 7 +--- +``` + +Now the same routes are available on your server, and it's possible to share your plugin with other Xpublish users. + +```{note} +For more details see the [plugin user guide](../../user-guide/plugins.md#dataset-router-plugins) +``` diff --git a/docs/source/getting-started/tutorial/dataset-router-plugin.py b/docs/source/getting-started/tutorial/dataset-router-plugin.py new file mode 100644 index 0000000..557f90a --- /dev/null +++ b/docs/source/getting-started/tutorial/dataset-router-plugin.py @@ -0,0 +1,37 @@ +import xarray as xr +from fastapi import APIRouter, Depends, HTTPException + +from xpublish import Dependencies, Plugin, SingleDatasetRest, hookimpl + + +class MeanPlugin(Plugin): + name = 'mean' + + dataset_router_prefix = '' + dataset_router_tags = ['mean'] + + @hookimpl + def dataset_router(self, deps: Dependencies): + router = APIRouter(prefix=self.dataset_router_prefix, tags=list(self.dataset_router_tags)) + + @router.get('/{var_name}/mean') + def get_mean(var_name: str, dataset=Depends(deps.dataset)): + if var_name not in dataset.variables: + raise HTTPException( + status_code=404, + detail=f"Variable '{var_name}' not found in dataset", + ) + + return float(dataset[var_name].mean()) + + return router + + +ds = xr.tutorial.open_dataset( + 'air_temperature', + chunks=dict(lat=5, lon=5), +) + +rest = SingleDatasetRest(ds) +rest.register_plugin(MeanPlugin()) +rest.serve() diff --git a/docs/source/getting-started/tutorial/dataset-router.md b/docs/source/getting-started/tutorial/dataset-router.md new file mode 100644 index 0000000..c2849e8 --- /dev/null +++ b/docs/source/getting-started/tutorial/dataset-router.md @@ -0,0 +1,69 @@ +# Extending Xpublish with a dataset router + +It is also possible to create custom API routes and serve them via Xpublish. In +the example below, we create a minimal application to get the mean value of a +given variable in the published dataset: + +```{literalinclude} dataset-router.py +``` + +Taking the dataset loaded above in this tutorial, this application should behave +like this: + +- `/air/mean` returns a floating number +- `/not_a_variable/mean` returns a 404 HTTP error + +## Building blocks of new routes + +Adding a new route for a dataset starts with creating a [FastAPI `APIRouter`](https://fastapi.tiangolo.com/tutorial/bigger-applications/#apirouter), which we have done here with `myrouter = APIRouter()`. + +```{literalinclude} dataset-router.py +--- +lines: 6-15 +emphasize-lines: 6 +--- +``` + +Next we define our route using a decorator for the type of request, in this case `@myrouter.get()`. +Within the decorator we specify the path we want the route to respond to. +If we want it to [respond to parameters](https://fastapi.tiangolo.com/tutorial/path-params/) in the path, we can enclose those with curly brackets and they will be passed to our route function. +Here we have specified that we want a path parameter of `var_name` to be passed to the function, and the requests should respond to `{var_name}/mean`. + +```{literalinclude} dataset-router.py +--- +lines: 11-19 +emphasize-lines: 4 +--- +``` + +Following the decorator, we have our function to respond to the route. +It takes in the path parameters, and some other arguments. + +```{literalinclude} dataset-router.py +--- +lines: 14-22 +emphasize-lines: 2-6 +--- +``` + +The {func}`~xpublish.dependencies.get_dataset` function in the example above is +a [FastAPI dependency](https://fastapi.tiangolo.com/tutorial/dependencies/) that is used to access the dataset object being served by +the application, either from inside a FastAPI path operation decorated function +or from another FastAPI dependency. Note that `get_dataset` can only be used +as a function argument (FastAPI has other ways to reuse a dependency, but those +are not supported in this case). + +Xpublish also provides a {func}`~xpublish.dependencies.get_cache` dependency +function to get/put any useful key-value pair from/into the cache that is +created along with a running instance of the application. + +To use our route, we then need to tell Xpublish about it, by passing it into `ds.rest`. + +```{literalinclude} dataset-router.py +--- +lines: 14-24 +emphasize-lines: 9,11 +--- +``` + +Finally we can serve our new route along with the other routes that Xpublish understands. diff --git a/docs/source/getting-started/tutorial/dataset-router.py b/docs/source/getting-started/tutorial/dataset-router.py new file mode 100644 index 0000000..4da0153 --- /dev/null +++ b/docs/source/getting-started/tutorial/dataset-router.py @@ -0,0 +1,24 @@ +import xarray as xr +from fastapi import APIRouter, Depends, HTTPException + +from xpublish.dependencies import get_dataset + +ds = xr.tutorial.open_dataset( + 'air_temperature', + chunks=dict(lat=5, lon=5), +) + +myrouter = APIRouter() + + +@myrouter.get('/{var_name}/mean') +def get_mean(var_name: str, dataset: xr.Dataset = Depends(get_dataset)): + if var_name not in dataset.variables: + raise HTTPException(status_code=404, detail=f"Variable '{var_name}' not found in dataset") + + return float(dataset[var_name].mean()) + + +rest = ds.rest(routers=[myrouter]) + +rest.serve() diff --git a/docs/source/getting-started/tutorial/index.md b/docs/source/getting-started/tutorial/index.md new file mode 100644 index 0000000..d1cfef6 --- /dev/null +++ b/docs/source/getting-started/tutorial/index.md @@ -0,0 +1,17 @@ +# Tutorial + +The Xpublish tutorial is designed for an experienced Xarray user (but not a server administrator) to be able to start at the beginning and build layers of understanding of how Xpublish and it's ecosystem work together. + +If you are interested in developing plugins or administering servers, you may want to jump ahead to [using plugins](./using-plugins.md) or look to [deployment](../../user-guide/deployment/index.md) in the user guide.. + +```{toctree} +--- +hidden: +--- +introduction +dataset-router +serving-multiple-datasets +using-plugins +dataset-router-plugin +dataset-provider-plugin +``` diff --git a/docs/source/getting-started/tutorial/introduction.md b/docs/source/getting-started/tutorial/introduction.md new file mode 100644 index 0000000..aad513d --- /dev/null +++ b/docs/source/getting-started/tutorial/introduction.md @@ -0,0 +1,119 @@ +# Introduction + +If you've read through [Why Xpublish](../why-xpublish.md) you'll know that Xpublish is a foundational building block for data servers. The real trick behind Xpublish is that it builds upon the Xarray datasets that the Python data community is used too. + +To introduce new users to Xpublish, quickly serve a single dataset, and to allow for quick development, Xpublish includes an [Xarray accessor](https://docs.xarray.dev/en/stable/internals/extending-xarray.html). + +## Server-Side + +To begin, import Xpublish and open an Xarray {class}`~xarray.Dataset`: + +```python +import xarray as xr +import xpublish + +ds = xr.tutorial.open_dataset( + "air_temperature", + chunks=dict(lat=5, lon=5), +) +``` + +To publish the dataset, use the +{class}`~xpublish.SingleDatasetRest` class: + +```python +rest = xpublish.SingleDatasetRest(ds) +``` + +Alternatively, you might want to use the {attr}`xarray.Dataset.rest` accessor +for more convenience: + +```python +ds.rest +``` + +Optional customization of the underlying [FastAPI application](https://fastapi.tiangolo.com) or the server-side [cache](https://github.com/dask/cachey) is possible, e.g., + +```python +ds.rest( + app_kws=dict( + title="My Dataset", + description="Dataset Description", + openapi_url="/dataset.JSON", + ), + cache_kws=dict(available_bytes=1e9), +) +``` + +Serving the dataset then requires calling the +{meth}`~xpublish.Rest.serve` method on the {class}`~xpublish.Rest` instance or +the {attr}`xarray.Dataset.rest` accessor: + +```python +rest.serve() + +# or + +ds.rest.serve() +``` + +{meth}`~xpublish.Rest.serve` passes any keyword arguments on to +{func}`uvicorn.run` (see [Uvicorn docs]). + +### Default API routes + +By default, the FastAPI application created with Xpublish provides the following +endpoints to get some information about the published dataset: + +- `/`: returns xarray's HTML repr. +- `/keys`: returns a list of variable keys, i.e., those returned by {attr}`xarray.Dataset.variables`. +- `/info`: returns a JSON dictionary summary of a Dataset variables and attributes, similar to {meth}`xarray.Dataset.info`. +- `/dict`: returns a JSON dictionary of the full dataset. +- `/versions`: returns JSON dictionary of the versions of Python, Xarray and related libraries on the server side, similar to {func}`xarray.show_versions`. + +The application also provides data access through a [Zarr] compatible API with the +following endpoints: + +- `/zarr/.zmetadata`: returns a JSON dictionary representing the consolidated Zarr metadata. +- `/zarr/{var}/{key}`: returns a single chunk of an array. + +### API Docs + +Thanks to FastAPI and [Swagger UI], automatically generated +interactive documentation is available at the `/docs` URL. + +This path can be overridden by setting the `docs_url` key in the `app_kws` +dictionary argument when initializing the rest accessor. + +## Client-Side + +By default, datasets served by Xpublish can be opened by any Zarr client +that implements an HTTPStore. In Python, this can be done with `fsspec`: + +```python +import zarr +from fsspec.implementations.http import HTTPFileSystem + +fs = HTTPFileSystem() + +# The URL 'http://0.0.0.0:9000/zarr/' here serves one dataset +http_map = fs.get_mapper("http://0.0.0.0:9000/zarr/") + +# open as a zarr group +zg = zarr.open_consolidated(http_map, mode="r") + +# or open as another Xarray Dataset +ds = xr.open_zarr(http_map, consolidated=True) +``` + +Xpublish's endpoints can also be queried programmatically. For example: + +```python +import requests + +response = requests.get("http://0.0.0.0:9000/info").json() +``` + +[swagger ui]: https://github.com/swagger-api/swagger-ui +[uvicorn docs]: https://www.uvicorn.org/deployment/#running-programmatically +[zarr]: https://zarr.readthedocs.io/en/stable/ diff --git a/docs/source/getting-started/tutorial/serving-multiple-datasets.md b/docs/source/getting-started/tutorial/serving-multiple-datasets.md new file mode 100644 index 0000000..717f3d3 --- /dev/null +++ b/docs/source/getting-started/tutorial/serving-multiple-datasets.md @@ -0,0 +1,40 @@ +# Serving multiple datasets + +Xpublish also lets you serve multiple datasets via one FastAPI application. You +provide a mapping (dictionary) when creating a +{class}`~xpublish.Rest` instance, e.g., + +```python +ds = xr.tutorial.open_dataset("air_temperature") +ds2 = xr.tutorial.open_dataset("rasm") + +rest_collection = xpublish.Rest({"air_temperature": ds, "rasm": ds2}) + +rest_collection.serve() +``` + +When multiple datasets are given, all dataset-specific API endpoint URLs have +the `/datasets/{dataset_id}` prefix. For example: + +- `/datasets/rasm/info` returns information about the `rasm` dataset +- `/datasets/invalid_dataset_id/info` returns a 404 HTTP error + +The application also has one more API endpoint: + +- `/datasets`: returns the list of the ids (keys) of all published datasets + +Note that custom routes work for multiple datasets as well as for a single +dataset. No code change is required. Taking the example previously, + +```python +rest_collection = xpublish.Rest( + {"air_temperature": ds, "rasm": ds2}, routers=[myrouter] +) + +rest_collection.serve() +``` + +The following URLs should return expected results: + +- `/datasets/air_temperature/air/mean` +- `/datasets/rasm/Tair/mean` diff --git a/docs/source/getting-started/tutorial/using-plugins.md b/docs/source/getting-started/tutorial/using-plugins.md new file mode 100644 index 0000000..0b5ace3 --- /dev/null +++ b/docs/source/getting-started/tutorial/using-plugins.md @@ -0,0 +1,29 @@ +# Using Plugins + +Much of the power of Xpublish comes from it's [ecosystem](../../ecosystem/index) of plugins, which can quickly extend Xpublish with new capabilities. + +```{note} +For more details of the plugin system see the [plugin user guide](../../user-guide/plugins.md) +``` + +## What types of plugins are there? + +Xpublish supports a few different categories of plugins, namely: + +- Dataset routers +- Dataset providers +- App routers + +Other types of plugins are possible as plugins can implement new ways to extend Xpublish that other plugins can build upon. + +## How do I setup plugins? + +Most Xpublish plugins use a the [Python entry points system](../../user-guide/plugins.md#entry-points) and have reasonable defaults set, which allows them to register themselves with Xpublish, and start responding to requests as soon as they are installed. + +That makes new Xpublish functionality a `pip` or `conda install` away! + +```{warning} +For the server admins that just started worrying about new functionality being injected into their servers, the entire plugin loading process can be explicitly managed. + +See the [plugin user guide](../../user-guide/plugins.md#entry-points) or [deployment guide](../../user-guide/deployment/index.md) for more details. +``` diff --git a/docs/source/getting-started/why-xpublish.md b/docs/source/getting-started/why-xpublish.md new file mode 100644 index 0000000..ce3ef00 --- /dev/null +++ b/docs/source/getting-started/why-xpublish.md @@ -0,0 +1,35 @@ +# Why Xpublish + +Xarray provides an intuitive API on top of a foundational data model, labeled arrays and datasets. +This API and data model has formed the basis for a large and growing ecosystem of tools. + +Xpublish stands on the shoulders of Xarray and the greater PyData ecosystem enabling both new and old users, interactions, and clients. +Xpublish does this by using Xarray datasets as the core data interchange format within the server, and surrounding that with an ecosystem of plugins. + +```{warning} Hold on to your hats, we're about to say Xpublish a lot +

via Gfycat

+``` + +## An extendable core + +`xpublish` (the library) on it's own is designed to be relatively small and lightweight. It mainly defines plugin extension points, based around the internal exchange of Xarray datasets. It also defines a standard way to configure plugins, and how to load them. + +It additionally provides an Xarray dataset accessor that allows for quickly serving a dataset, and a nice introduction path for creating new dataset based routers. + +## A collection of plugins + +Xpublish really starts coming into it's own with the plugin ecosystem. + +Because `xpublish` the library has a relatively small API surface, but depends on familiar Xarray datasets, it becomes much easier to quickly develop large classes of plugins. Additionally by keeping most of the internet and storage facing elements of Xpublish out of the `xpublish` library, plugins can develop independently and at their own rate. + +## An ecosystem of servers + +Eventually, for many users they may never know that they are using Xpublish. Instead it will be the foundational building block of a family of data servers. Different communities may have different desires out of their servers and thus combine Xpublish with different sets of plugins. + +A 'neurological community Xpublish server' may look very different from the needs of a 'meteorological community Xpublish server' but they may include some of the same plugins. Each community may distribute their servers in different ways with different ways of configuring them. + +An additional power of Xpublish is for the server admins. When a community decides on a specific 'Xpublish server distribution', but the distributions opinions of how data should be stored don't match the environment, the admin can add or replace the distributions data provider plugins to adapt to their own infrastructure. + +```{admonition} For more background +Check out [Alex's manifesto](https://github.com/xpublish-community/xpublish/discussions/139) that kicked off this phase of development. +``` diff --git a/docs/source/index.md b/docs/source/index.md new file mode 100644 index 0000000..2c7432d --- /dev/null +++ b/docs/source/index.md @@ -0,0 +1,123 @@ +# Xpublish + +**Useful links:** [Installation](getting-started/installation) | [Source Repository](https://github.com/xpublish-community/xpublish/) | [Issue Tracker](https://github.com/xpublish-community/xpublish/issues) | [Q&A Support](https://github.com/xpublish-community/xpublish/discussions/categories/q-a?discussions_q=category%3AQ%26A+) | [Slack Channel](./ecosystem/index.md#slack) + +## Xpublish is + +````{grid} 3 + +```{grid-item-card} A quick way to serve a single Xarray dataset over HTTP using FastAPI +:link: getting-started/tutorial/introduction + +Get started with `ds.rest.serve()` to explore serving data with Xpublish +``` + +```{grid-item-card} An extendable core of a dataset server +:link: getting-started/why-xpublish + +By building a server based around Xarray datasets, we can build on top of the rapid progress of Xarray and the greater PyData ecosystem. +``` + +```{grid-item-card} A community and ecosystem of plugins, servers, and their builders and users +:link: ecosystem/index + +Explore the [Xpublish ecosystem](./ecosystem/index.md). +``` + +```` + +## I want to + +- [Quickly serve a single dataset for my own use](getting-started/tutorial/introduction) +- Serve a collection of datasets with pre-configured server +- [Build plugins to serve datasets in new ways](getting-started/tutorial/dataset-router-plugin) +- [Connect to a new source of datasets](getting-started/tutorial/dataset-provider-plugin) +- [Discuss Xpublish with others](ecosystem/index.md#connect) + +````{grid} 1 1 2 2 +--- +gutter: 2 +--- +```{grid-item-card} Getting started +:link: getting-started/index +:link-type: doc + +New to _Xpublish_? Check out the getting started guides. They contain an introduction +to _Xpublish's_ main concepts. +``` + +```{grid-item-card} User guide +:link: user-guide/index +:link-type: doc + +The user guide contains in-depth information on the key concepts of Xpublish +with useful background information and explanation. +``` + +```{grid-item-card} API Reference +:link: api +:link-type: doc + +The reference guide contains a detailed description of the Xpublish API/ +The reference describes how the methods work and which parameters can be used. +It assumes that you have an an understanding of the key concepts. +``` + +```{grid-item-card} Developer guide +:link: contributing +:link-type: doc + +Saw a typo in the documentation? Want to improve existing functionalities? +The contributing guidelines will guide you through the process of improving Xpublish. +``` + +```` + +### Xpublish lets you easily publish Xarray Datasets via a REST API. + +_You can run a short example application in a live session here:_ [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/xpublish-community/xpublish/master) + +On the server-side, one or more datasets can be published using the +{class}`xpublish.Rest` class or the {attr}`xarray.Dataset.rest` accessor, e.g., + +```python +ds.rest.serve(host="0.0.0.0", port=9000) +``` + +Those datasets can be accessed from various kinds of client applications, e.g., +from within Python using Zarr and fsspec. + +```python +import xarray as xr +import zarr +from fsspec.implementations.http import HTTPFileSystem + +fs = HTTPFileSystem() +http_map = fs.get_mapper("http://0.0.0.0:9000/zarr/") + +# open as a zarr group +zg = zarr.open_consolidated(http_map, mode="r") + +# or open as another Xarray Dataset +ds = xr.open_zarr(http_map, consolidated=True) +``` + +Or to explore other access methods, open [http://0.0.0.0:9000/docs](http://0.0.0.0:9000/docs) in a browser. + +```{toctree} +--- +caption: Documentation Contents +hidden: true +maxdepth: 2 +--- +getting-started/index +user-guide/index +api +ecosystem/index +Contributing +``` + +## Feedback + +If you encounter any errors or problems with **Xpublish**, please open an issue +on [GitHub](http://github.com/xpublish-community/xpublish), or ask questions in [Github Discussions](https://github.com/xpublish-community/xpublish/discussions/categories/q-a?discussions_q=category%3AQ%26A+) or on our [Slack Channel](./ecosystem/index.md#slack). diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index 2675af7..0000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,79 +0,0 @@ -======== -Xpublish -======== - -**Xpublish lets you easily publish Xarray Datasets via a REST API.** - -*You can run a short example application in a live session here:* |Binder| - -.. |Binder| image:: https://mybinder.org/badge_logo.svg - :target: https://mybinder.org/v2/gh/xarray-contrib/xpublish/master - -On the server-side, one or more datasets can be published using the -:class:`xpublish.Rest` class or the :attr:`xarray.Dataset.rest` accessor, e.g., - -.. code-block:: python - - ds.rest.serve(host="0.0.0.0", port=9000) - -Those datasets can be accessed from various kinds of client applications, e.g., -from within Python using Zarr and fsspec. - -.. code-block:: python - - import xarray as xr - import zarr - from fsspec.implementations.http import HTTPFileSystem - - fs = HTTPFileSystem() - http_map = fs.get_mapper('http://0.0.0.0:9000') - - # open as a zarr group - zg = zarr.open_consolidated(http_map, mode='r') - - # or open as another Xarray Dataset - ds = xr.open_zarr(http_map, consolidated=True) - -Why? -~~~~ - -Xpublish lets you serve, share and publish Xarray Datasets via a web -application. - -The data and/or metadata in the Xarray Datasets can be exposed in various forms -through pluggable REST API endpoints. Efficient, on-demand delivery of large -datasets may be enabled with Dask on the server-side. - -We are exploring applications of Xpublish that include: - -* publish on-demand or derived data products -* turning xarray objects into streaming services (e.g. OPeNDAP) - -How? -~~~~ - -Under the hood, Xpublish is using a web app (FastAPI and Uvicorn) that is -exposing a REST-like API with builtin and/or user-defined endpoints. - -For example, Xpublish provides by default a minimal Zarr compatible REST-like -API with the following endpoints: - -* ``/.zmetadata``: returns Zarr-formatted metadata keys as json strings. -* ``/var/0.0.0``: returns a variable data chunk as a binary string. - -.. toctree:: - :maxdepth: 2 - :caption: Documentation Contents - :hidden: - - installation - tutorial - plugins - api - contributing - -Feedback --------- - -If you encounter any errors or problems with **Xpublish**, please open an issue -on `GitHub `_. diff --git a/docs/source/plugins.rst b/docs/source/plugins.rst deleted file mode 100644 index 1293b76..0000000 --- a/docs/source/plugins.rst +++ /dev/null @@ -1,329 +0,0 @@ -======= -Plugins -======= - -While :py:class:`fastapi.APIRouter` can get you started building new endpoints -for datasets quickly, the real extendability of Xpublish comes from it's plugin system. - -By using a plugin system, Xpublish becomes incredibly adaptable, and hopefully -easier to develop for also. Individual plugins and their functionality can -evolve independently, there are clear boundaries between types of functionality, -which allows easier reasoning about code. - -There are a few main varieties of plugins that Xpublish supports, but those -provide a lot of flexibility, and can enable whole new categories of plugins -and functionality. - -* `Dataset router `_ -* `App router `_ -* `Dataset provider `_ -* `Hook spec `_ - -Plugins work by implementing specific methods to support a variety of usage, -and marking the implementations with a decorator. A plugin can also implement -methods for multiple varieties, which may be useful for things like dynamic -data providers. - -.. warning:: - - Plugins are new to Xpublish, so we're learning how everything works best together. - - If you have any questions, please ask in `Github Discussions - `_ - (and feel free to tag ``@abkfenris`` for help with the plugin system). - -------------- -Functionality -------------- - -Plugins are built as `Pydantic models `_ -and descend from :py:class:`xpublish.plugins.hooks.Plugin`. -This allows there to be a common way of configuring plugins and their functionality. - -.. code-block:: python - :emphasize-lines: 5 - - from xpublish import Plugin - - - class HelloWorldPlugin(Plugin): - name = "hello_world" - - -At the minimum, a plugin needs to specify a ``name`` attribute. - -Marking implementation methods ------------------------------- - -We'll go deeper into the specific methods below, what they have in common is that any -method that a plugin is hoping to expose to the rest of Xpublish needs to be marked -with a ``@hookimpl`` decorator. - -.. code-block:: python - :emphasize-lines: 7 - - from xpublish import Plugin, hookimpl - from fastapi import APIRouter - - class HelloWorldPlugin(Plugin): - name = "hello_world" - - @hookimpl - def app_router(self): - router = APIRouter() - - @router.get("/hello") - def get_hello(): - return "world" - - return router - -For the plugin system, Xpublish is using `pluggy `_. -Pluggy was developed to support `pytest `_, -but it now is used by several other projects including `Tox `_, -`Datasette `_, -and `Conda `_, among others. - -Pluggy implements plugins as a system of hooks, each one is a distinct way for Xpublish -to communicate with plugins. -Each hook has both reference specifications, and plugin provided implementations. - -Most of the specifications are provided by Xpublish and are methods on -:py:class:`xpublish.plugins.hooks.PluginSpec` that are marked with ``@hookspec``. - -Plugins can then re-implement these methods with all or a subset of the arguments, -which are then marked with ``@hookimpl`` -to tell Pluggy to make them accessible to Xpublish (and other plugins). - -.. note:: - - Over time Xpublish will most likely end up expanding the number of arugments passed - into most hook methods. - - Currently we're starting with a minimum set of arguments as we can always expand, - but currently it is much harder to reduce the number of arguments. - - If there is a new argument that you would like your plugin hooks to have, - please raise an `issue `_ - to discuss including it in a future version. - -In the specification, Xpublish defines if it's supposed to get responses from all -implementations (:py:meth:`xpublish.plugins.hooks.PluginSpec.get_dataset_ids`), -or the first non-``None`` response (:py:meth:`xpublish.plugins.hooks.PluginSpec.get_dataset`). - -Pluggy also provides a lot more advanced functionality that we aren't going to go -into at this point, but could allow for creative things like dataset middleware. - - -Loading Local Plugins ---------------------- - -For plugins that you are not distributing, they can either be loaded directly via the -:py:class:`xpublish.Rest` initializer, or they can use -:py:meth:`xpublish.Rest.register_plugin` to load afterwards. - -.. code-block:: python - - from xpublish import Rest - - rest = Rest(datasets, plugins={"hello-world": HelloWorldPlugin()}) - -.. code-block:: python - - from xpublish import Rest - - rest = Rest(datasets) - rest.register_plugin(HelloWorldPlugin()) - -.. caution:: - - When plugins are provided directly to the :py:class:`xpublish.Rest` initializer - as keyword arguments, it prevents Xpublish from automatically loading other plugins - that are installed. - - For more details of the automatic plugin loading system, - see `entry points `_ below. - -Entry Points ------------- - -When you install a plugin library, the library takes advantage of the -`entry point system `_. - -This allows :py:class:`xpublish.Rest` to automatically find and use plugins. -It only does this if plugins **are not** provided as an keyword argument. - -:py:class:`xpublish.Rest` uses :py:func:`plugins.manage.load_default_plugins` to -load plugins from entry points. -It can be used directly and be set to disable specific plugins from being loaded, -or :py:func:`plugins.manage.find_default_plugins` and :py:func:`plugins.manage.configure_plugins`, -can be used to further tweak loading plugins from entrypoints. - -To completely disable loading of plugins from entry points pass an empty dictionary to -``xpublish.Rest(datasets, plugins={})``. - -Example Entry Point -******************* - -Using `xpublish-edr `_ as an example. - -The plugin is named ``CfEdrPlugin`` and is located in ``xpublish_edr/plugin.py``. - -In ``pyproject.toml`` that then is added to the ``[project.entry-points."xpublish.plugin"]`` table. - -.. code-block:: toml - - [project.entry-points."xpublish.plugin"] - cf_edr = "xpublish_edr.plugin:CfEdrPlugin" - -Dependencies ------------- - -To allow plugins to be more adaptable, they should use -:py:meth:`xpublish.Dependencies.dataset` rather than directly -importing :py:func:`xpublish.dependencies.get_dataset`. - -To facilitate this, :py:class:`xpublish.Dependencies` is passed into -router hook methods. - -.. code-block:: python - - from fastapi import APIRouter, Depends - from xpublish import Plugin, Dependencies, hookimpl - - class DatasetAttrs(Plugin): - name = "dataset-attrs" - - @hookimpl - def dataset_router(self, deps: Dependencies): - router = APIRouter() - - @router.get("/attrs") - def get_attrs(ds = Depends(deps.dataset)): - return ds.attrs - - return router - -:py:class:`xpublish.Dependencies` has several other types of dependency functions that -it includes. - ----------------------- -Dataset Router Plugins ----------------------- - -Dataset router plugins are the next step from passing routers into -:py:class:`xpublish.Rest`. - -By implementing :py:meth:`xpublish.plugins.hooks.PluginSpec.dataset_router` -a developer can add new routes that respond below ``/datasets//``. - -Most dataset routers will have a prefix on their paths, and apply tags. -To make this reasonably standard, those should be specified as ``dataset_router_prefix`` -and ``dataset_router_tags`` on the plugin allowing them to be reasonably overridden. - -Adapted from `xpublish/plugins/included/dataset_info.py `_ - -.. code-block:: python - - from fastapi import APIRouter, Depends - from xpublish import Plugin, Dependencies, hookimpl - - class DatasetInfoPlugin(Plugin): - name = "dataset-info" - - dataset_router_prefix = "/info" - dataset_router_tags = ["info"] - - @hookimpl - def dataset_router(self, deps: Dependencies): - router = APIRouter(prefix=self.dataset_router_prefix, tags=self.dataset_router_tags) - - @router.get("/keys") - def list_keys(dataset=Depends(deps.dataset): - return dataset.variables - - return router - -This plugin will respond to ``/datasets//info/keys`` with a list of the keys in the dataset. - - ------------------- -App Router Plugins ------------------- - -App routers allow new top level routes to be provided by implementing -:py:meth:`xpublish.plugins.hooks.PluginSpec.app_router`. - -Similar to dataset routers, these should have a prefix (``app_router_prefix``) and tags (``app_router_tags``) that can be user overridable. - -.. code-block:: python - - from fastapi import APIRouter, Depends - from xpublish import Plugin, Dependencies, hookimpl - - class PluginInfo(Plugin): - name = "plugin_info" - - app_router_prefix = "/info" - app_router_tags = ["info"] - - @hookimpl - def app_router(self, deps: Dependencies): - router = APIRouter(prefix=self.app_router_prefix, tags=self.app_router_tags) - - @router.get("/plugins") - def plugins(plugins: Dict[str, Plugin] = Depends(deps.plugins)): - return {name: type(plugin) for name, plugin in plugins.items} - - return router - -This will return a dictionary of plugin names, and types at `/info/plugins`. - ------------------------- -Dataset Provider Plugins ------------------------- - -While Xpublish can have datasets passed in to :py:class:`xpublish.Rest` on intialization, -plugins can provide datasets (and they actually have priority over those passed in directly). - -In order for a plugin to provide datasets it needs to implemenent -:py:meth:`xpublish.plugins.hooks.PluginSpec.get_datasets` -and :py:meth:`xpublish.plugins.hooks.PluginSpec.get_dataset` methods. - -The first should return a list of all datasets that a plugin knows about. - -The second is provided a ``dataset_id``. -The plugin should return a dataset if it knows about the dataset corresponding to the id, -otherwise it should return None, so that Xpublish knows to continue looking to the next -plugin or the passed in dictionary of datasets. - -A plugin that provides the Xarray tutorial ``air_temperature`` dataset. - -.. code-block:: python - - from xpublish import Plugin, hookimpl - - - class TutorialDataset(Plugin): - name = "xarray-tutorial-dataset" - - @hookimpl - def get_datasets(self): - return ["air"] - - @hookimpl - def get_dataset(self, dataset_id: str): - if dataset_id == "air": - return xr.tutorial.open_dataset("air_temperature") - - return None - - ------------------ -Hook Spec Plugins ------------------ - -Plugins can also provide new hook specifications that other plugins can then implement. -This allows Xpublish to support things that we haven't even thought of yet. - -These return a class of hookspecs from :py:meth:`xpublish.plugins.hooks.PluginSpec.register_hookspec`. diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst deleted file mode 100644 index c273e38..0000000 --- a/docs/source/tutorial.rst +++ /dev/null @@ -1,236 +0,0 @@ -======== -Tutorial -======== - -Server-Side ------------ - -To begin, import Xpublish and open an Xarray :class:`~xarray.Dataset`: - -.. code-block:: python - - import xarray as xr - import xpublish - - ds = xr.tutorial.open_dataset( - "air_temperature", chunks=dict(lat=5, lon=5), - ) - -Publishing the dataset above is straightforward, just use the -:class:`~xpublish.Rest` class: - -.. code-block:: python - - rest = xpublish.Rest(ds) - -Alternatively, you might want to use the :attr:`xarray.Dataset.rest` accessor -for more convenience: - -.. code-block:: python - - ds.rest - -Optional customization of the underlying `FastAPI -application `_ or the server-side `cache -`_ is possible, e.g., - -.. code-block:: python - - ds.rest( - app_kws=dict( - title="My Dataset", - description="Dataset Description", - openapi_url="/dataset.JSON", - ), - cache_kws=dict(available_bytes=1e9) - ) - -Serving the dataset then simply requires calling the -:meth:`~xpublish.Rest.serve` method on the :class:`~xpublish.Rest` instance or -the :attr:`xarray.Dataset.rest` accessor: - -.. code-block:: python - - rest.serve() - - # or - - ds.rest.serve() - -:meth:`~xpublish.Rest.serve` passes any keyword arguments on to -:func:`uvicorn.run` (see `Uvicorn docs`_). - -.. _`Uvicorn docs`: https://www.uvicorn.org/deployment/#running-programmatically - -Default API routes -~~~~~~~~~~~~~~~~~~ - -By default, the FastAPI application created with Xpublish provides the following -endpoints to get some information about the published dataset: - -* ``/``: returns xarray's HTML repr. -* ``/keys``: returns a list of variable keys, i.e., those returned by :attr:`xarray.Dataset.variables`. -* ``/info``: returns a JSON dictionary summary of a Dataset variables and attributes, similar to :meth:`xarray.Dataset.info`. -* ``/dict``: returns a JSON dictionary of the full dataset. -* ``/versions``: returns JSON dictionary of the versions of Python, Xarray and related libraries on the server side, similar to :func:`xarray.show_versions`. - -The application also provides data access through a Zarr_ compatible API with the -following endpoints: - -* ``/.zmetadata``: returns a JSON dictionary representing the consolidated Zarr metadata. -* ``/{var}/{key}``: returns a single chunk of an array. - -.. _Zarr: https://zarr.readthedocs.io/en/stable/ - -Custom API routes -~~~~~~~~~~~~~~~~~ - -With Xpublish you have full control on which and how API endpoints are exposed -by the application. - -In the example below, the default API routes are included with custom tags -and using a path prefix for Zarr-like data access: - -.. code-block:: python - - from xpublish.routers import base_router, zarr_router - - ds.rest( - routers=[ - (base_router, {'tags': 'info'}), - (zarr_router, {'tags': 'zarr', 'prefix': '/zarr'}) - ] - ) - - ds.rest.serve() - -Using those settings, the Zarr-specific API endpoints now have the following -paths: - -* ``/zarr/.zmetadata`` -* ``/zarr/{var}/{key}`` - -It is also possible to create custom API routes and serve them via Xpublish. In -the example below, we create a minimal application to get the mean value of a -given variable in the published dataset: - -.. code-block:: python - - from fastapi import APIRouter, Depends, HTTPException - from xpublish.dependencies import get_dataset - - - myrouter = APIRouter() - - @myrouter.get("/{var_name}/mean") - def get_mean(var_name: str, dataset: xr.Dataset = Depends(get_dataset)): - if var_name not in dataset.variables: - raise HTTPException( - status_code=404, detail=f"Variable '{var_name}' not found in dataset" - ) - - return float(dataset[var_name].mean()) - - ds.rest(routers=[myrouter]) - - ds.rest.serve() - -Taking the dataset loaded above in this tutorial, this application should behave -like this: - -* ``/air/mean`` returns a floating number -* ``/not_a_variable/mean`` returns a 404 HTTP error - -The :func:`~xpublish.dependencies.get_dataset` function in the example above is -a FastAPI dependency that is used to access the dataset object being served by -the application, either from inside a FastAPI path operation decorated function -or from another FastAPI dependency. Note that ``get_dataset`` can only be used -as a function argument (FastAPI has other ways to reuse a dependency, but those -are not supported in this case). - -Xpublish also provides a :func:`~xpublish.dependencies.get_cache` dependency -function to get/put any useful key-value pair from/into the cache that is -created along with a running instance of the application. - -API Docs -~~~~~~~~ - -Thanks to FastAPI and `Swagger UI`_, automatically generated -interactive documentation is available at the ``/docs`` URL. - -This path can be overridden by setting the ``docs_url`` key in the ``app_kws`` -dictionary argument when initializing the rest accessor. - -.. _`Swagger UI`: https://github.com/swagger-api/swagger-ui - -Serving multiple datasets -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Xpublish also lets you serve multiple datasets via one FastAPI application. You -just need to provide a mapping (dictionary) when creating a -:class:`~xpublish.Rest` instance, e.g., - -.. code-block:: python - - ds2 = xr.tutorial.open_dataset('rasm') - - rest_collection = xpublish.Rest({'air_temperature': ds, 'rasm': ds2}) - - rest_collection.serve() - -When multiple datasets are given, all dataset-specific API endpoint URLs have -the ``/datasets/{dataset_id}`` prefix. For example: - -* ``/datasets/rasm/info`` returns information about the ``rasm`` dataset -* ``/datasets/invalid_dataset_id/info`` returns a 404 HTTP error - -The application also has one more API endpoint: - -* ``/datasets``: returns the list of the ids (keys) of all published datasets - -Note that custom routes work for multiple datasets just as well as for a single -dataset. No code change is required. Taking the example above, - -.. code-block:: python - - rest_collection = xpublish.Rest( - {'air_temperature': ds, 'rasm': ds2}, - routers=[myrouter] - ) - - rest_collection.serve() - -The following URLs should return expected results: - -* ``/datasets/air_temperature/air/mean`` -* ``/datasets/rasm/Tair/mean`` - -Client-Side ------------ - -By default, datasets served by Xpublish can be opened by any Zarr client -that implements an HTTPStore. In Python, this can be done with ``fsspec``: - -.. code-block:: python - - import zarr - from fsspec.implementations.http import HTTPFileSystem - - fs = HTTPFileSystem() - - # The URL 'http://0.0.0.0:9000' here serves one dataset - http_map = fs.get_mapper('http://0.0.0.0:9000') - - # open as a zarr group - zg = zarr.open_consolidated(http_map, mode='r') - - # or open as another Xarray Dataset - ds = xr.open_zarr(http_map, consolidated=True) - -Xpublish's endpoints can also be queried programmatically. For example: - -.. code-block:: python - - import requests - - response = requests.get('http://0.0.0.0:9000/info').json() diff --git a/docs/source/user-guide/deployment/index.md b/docs/source/user-guide/deployment/index.md new file mode 100644 index 0000000..48e1e51 --- /dev/null +++ b/docs/source/user-guide/deployment/index.md @@ -0,0 +1,7 @@ +# Deploying Xpublish + +```{warning} +More coming soon. +``` + +For now, take a look at some of the [server distributions](../ecosystem/index.md#server-distributions), or ask in [Github Discussions](https://github.com/xpublish-community/xpublish/discussions/categories/q-a?discussions_q=category%3AQ%26A+) or on our [Slack Channel](./ecosystem/index.md#slack). diff --git a/docs/source/user-guide/index.md b/docs/source/user-guide/index.md new file mode 100644 index 0000000..74a2858 --- /dev/null +++ b/docs/source/user-guide/index.md @@ -0,0 +1,11 @@ +# User Guide + +In this user guide, you will find detailed descriptions and examples that describe many common tasks that you can accomplish with Xpublish. + +```{toctree} +--- +hidden: +--- +plugins +deployment/index +``` diff --git a/docs/source/user-guide/plugins.md b/docs/source/user-guide/plugins.md new file mode 100644 index 0000000..14fde78 --- /dev/null +++ b/docs/source/user-guide/plugins.md @@ -0,0 +1,316 @@ +# Plugins + +While {py:class}`fastapi.APIRouter` can get you started building new endpoints +for datasets quickly, the real extendability of Xpublish comes from it's plugin system. + +By using a plugin system, Xpublish becomes incredibly adaptable, and hopefully +easier to develop for also. Individual plugins and their functionality can +evolve independently, there are clear boundaries between types of functionality, +which allows easier reasoning about code. + +There are a few main varieties of plugins that Xpublish supports, but those +provide a lot of flexibility, and can enable whole new categories of plugins +and functionality. + +- [Dataset router](#dataset-router-plugins) +- [App router](#app-router-plugins) +- [Dataset provider](#dataset-provider-plugins) +- [Hook spec](#hook-spec-plugins) + +Plugins work by implementing specific methods to support a variety of usage, +and marking the implementations with a decorator. A plugin can also implement +methods for multiple varieties, which may be useful for things like dynamic +data providers. + +```{warning} +Plugins are new to Xpublish, so we're learning how everything works best together. + +If you have any questions, please ask in [Github Discussions](https://github.com/xpublish-community/xpublish/discussions) +(and feel free to tag `@abkfenris` for help with the plugin system). +``` + +## Functionality + +Plugins are built as [Pydantic models](https://docs.pydantic.dev/usage/models/) +and descend from {py:class}`xpublish.plugins.hooks.Plugin`. +This allows there to be a common way of configuring plugins and their functionality. + +```{code-block} python +--- +emphasize-lines: 5 +--- +from xpublish import Plugin + + +class HelloWorldPlugin(Plugin): + name = "hello_world" +``` + +At the minimum, a plugin needs to specify a `name` attribute. + +### Marking implementation methods + +We'll go deeper into the specific methods below, what they have in common is that any +method that a plugin is hoping to expose to the rest of Xpublish needs to be marked +with a `@hookimpl` decorator. + +```{code-block} python +--- +emphasize-lines: 7 +--- +from xpublish import Plugin, hookimpl +from fastapi import APIRouter + +class HelloWorldPlugin(Plugin): + name = "hello_world" + + @hookimpl + def app_router(self): + router = APIRouter() + + @router.get("/hello") + def get_hello(): + return "world" + + return router +``` + +For the plugin system, Xpublish is using [pluggy](https://pluggy.readthedocs.io/en/latest/). +Pluggy was developed to support [pytest](https://docs.pytest.org/en/latest/how-to/plugins.html), +but it now is used by several other projects including [Tox](https://tox.wiki/en/latest/plugins.html), +[Datasette](https://docs.datasette.io/en/latest/plugins.html), +and [Conda](https://docs.conda.io/projects/conda/en/latest/dev-guide/plugins/index.html), among others. + +Pluggy implements plugins as a system of hooks, each one is a distinct way for Xpublish +to communicate with plugins. +Each hook has both reference specifications, and plugin provided implementations. + +Most of the specifications are provided by Xpublish and are methods on +{py:class}`xpublish.plugins.hooks.PluginSpec` that are marked with `@hookspec`. + +Plugins can then re-implement these methods with all or a subset of the arguments, +which are then marked with `@hookimpl` +to tell Pluggy to make them accessible to Xpublish (and other plugins). + +```{note} +Over time Xpublish will most likely end up expanding the number of arugments passed +into most hook methods. + +Currently we're starting with a minimum set of arguments as we can always expand, +but currently it is much harder to reduce the number of arguments. + +If there is a new argument that you would like your plugin hooks to have, +please raise an [issue](https://github.com/xpublish-community/xpublish/issues) +to discuss including it in a future version. +``` + +In the specification, Xpublish defines if it's supposed to get responses from all +implementations ({py:meth}`xpublish.plugins.hooks.PluginSpec.get_dataset_ids`), +or the first non-`None` response ({py:meth}`xpublish.plugins.hooks.PluginSpec.get_dataset`). + +Pluggy also provides a lot more advanced functionality that we aren't going to go +into at this point, but could allow for creative things like dataset middleware. + +### Loading Local Plugins + +For plugins that you are not distributing, they can either be loaded directly via the +{py:class}`xpublish.Rest` initializer, or they can use +{py:meth}`xpublish.Rest.register_plugin` to load afterwards. + +```python +from xpublish import Rest + +rest = Rest(datasets, plugins={"hello-world": HelloWorldPlugin()}) +``` + +```python +from xpublish import Rest + +rest = Rest(datasets) +rest.register_plugin(HelloWorldPlugin()) +``` + +```{caution} +When plugins are provided directly to the {py:class}`xpublish.Rest` initializer +as keyword arguments, it prevents Xpublish from automatically loading other plugins +that are installed. + +For more details of the automatic plugin loading system, +see \[entry points\] below. +``` + +### Entry Points + +When you install a plugin library, the library takes advantage of the +[entry point system](https://setuptools.pypa.io/en/latest/userguide/entry_point.html). + +This allows {py:class}`xpublish.Rest` to automatically find and use plugins. +It only does this if plugins **are not** provided as an keyword argument. + +{py:class}`xpublish.Rest` uses {py:func}`plugins.manage.load_default_plugins` to +load plugins from entry points. +It can be used directly and be set to disable specific plugins from being loaded, +or {py:func}`plugins.manage.find_default_plugins` and {py:func}`plugins.manage.configure_plugins`, +can be used to further tweak loading plugins from entrypoints. + +To completely disable loading of plugins from entry points pass an empty dictionary to +`xpublish.Rest(datasets, plugins={})`. + +#### Example Entry Point + +Using [xpublish-edr](https://github.com/gulfofmaine/xpublish-edr/) as an example. + +The plugin is named `CfEdrPlugin` and is located in `xpublish_edr/plugin.py`. + +In `pyproject.toml` that then is added to the `[project.entry-points."xpublish.plugin"]` table. + +```toml +[project.entry-points."xpublish.plugin"] +cf_edr = "xpublish_edr.plugin:CfEdrPlugin" +``` + +### Dependencies + +To allow plugins to be more adaptable, they should use +{py:meth}`xpublish.Dependencies.dataset` rather than directly +importing {py:func}`xpublish.dependencies.get_dataset`. + +To facilitate this, {py:class}`xpublish.Dependencies` is passed into +router hook methods. + +```python +from fastapi import APIRouter, Depends +from xpublish import Plugin, Dependencies, hookimpl + + +class DatasetAttrs(Plugin): + name = "dataset-attrs" + + @hookimpl + def dataset_router(self, deps: Dependencies): + router = APIRouter() + + @router.get("/attrs") + def get_attrs(ds=Depends(deps.dataset)): + return ds.attrs + + return router +``` + +{py:class}`xpublish.Dependencies` has several other types of dependency functions that +it includes. + +## Dataset Router Plugins + +Dataset router plugins are the next step from passing routers into +{py:class}`xpublish.Rest`. + +By implementing {py:meth}`xpublish.plugins.hooks.PluginSpec.dataset_router` +a developer can add new routes that respond below `/datasets//`. + +Most dataset routers will have a prefix on their paths, and apply tags. +To make this reasonably standard, those should be specified as `dataset_router_prefix` +and `dataset_router_tags` on the plugin allowing them to be reasonably overridden. + +Adapted from [xpublish/plugins/included/dataset_info.py](https://github.com/xpublish-community/xpublish/blob/main/xpublish/plugins/included/dataset_info.py) + +```python +from fastapi import APIRouter, Depends +from xpublish import Plugin, Dependencies, hookimpl + + +class DatasetInfoPlugin(Plugin): + name = "dataset-info" + + dataset_router_prefix = "/info" + dataset_router_tags = ["info"] + + @hookimpl + def dataset_router(self, deps: Dependencies): + router = APIRouter( + prefix=self.dataset_router_prefix, tags=self.dataset_router_tags + ) + + @router.get("/keys") + def list_keys(dataset=Depends(deps.dataset)): + return dataset.variables + + return router +``` + +This plugin will respond to `/datasets//info/keys` with a list of the keys in the dataset. + +## App Router Plugins + +App routers allow new top level routes to be provided by implementing +{py:meth}`xpublish.plugins.hooks.PluginSpec.app_router`. + +Similar to dataset routers, these should have a prefix (`app_router_prefix`) and tags (`app_router_tags`) that can be user overridable. + +```python +from fastapi import APIRouter, Depends +from xpublish import Plugin, Dependencies, hookimpl + + +class PluginInfo(Plugin): + name = "plugin_info" + + app_router_prefix = "/info" + app_router_tags = ["info"] + + @hookimpl + def app_router(self, deps: Dependencies): + router = APIRouter(prefix=self.app_router_prefix, tags=self.app_router_tags) + + @router.get("/plugins") + def plugins(plugins: Dict[str, Plugin] = Depends(deps.plugins)): + return {name: type(plugin) for name, plugin in plugins.items} + + return router +``` + +This will return a dictionary of plugin names, and types at `/info/plugins`. + +## Dataset Provider Plugins + +While Xpublish can have datasets passed in to {py:class}`xpublish.Rest` on intialization, +plugins can provide datasets (and they actually have priority over those passed in directly). + +In order for a plugin to provide datasets it needs to implemenent +{py:meth}`xpublish.plugins.hooks.PluginSpec.get_datasets` +and {py:meth}`xpublish.plugins.hooks.PluginSpec.get_dataset` methods. + +The first should return a list of all datasets that a plugin knows about. + +The second is provided a `dataset_id`. +The plugin should return a dataset if it knows about the dataset corresponding to the id, +otherwise it should return None, so that Xpublish knows to continue looking to the next +plugin or the passed in dictionary of datasets. + +A plugin that provides the Xarray tutorial `air_temperature` dataset. + +```python +from xpublish import Plugin, hookimpl + + +class TutorialDataset(Plugin): + name = "xarray-tutorial-dataset" + + @hookimpl + def get_datasets(self): + return ["air"] + + @hookimpl + def get_dataset(self, dataset_id: str): + if dataset_id == "air": + return xr.tutorial.open_dataset("air_temperature") + + return None +``` + +## Hook Spec Plugins + +Plugins can also provide new hook specifications that other plugins can then implement. +This allows Xpublish to support things that we haven't even thought of yet. + +These return a class of hookspecs from {py:meth}`xpublish.plugins.hooks.PluginSpec.register_hookspec`. diff --git a/setup.cfg b/setup.cfg index 3e5fdef..f5cf7d5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,7 +9,7 @@ select = B,C,E,F,W,T4,B9 [isort] known_first_party=xpublish -known_third_party=cachey,dask,fastapi,numcodecs,numpy,pandas,pkg_resources,pluggy,pydantic,pytest,setuptools,sphinx_autosummary_accessors,starlette,uvicorn,xarray,zarr +known_third_party=cachey,dask,fastapi,numcodecs,numpy,pandas,pkg_resources,pluggy,pydantic,pytest,requests,setuptools,sphinx_autosummary_accessors,starlette,uvicorn,xarray,zarr multi_line_output=3 include_trailing_comma=True force_grid_wrap=0 diff --git a/setup.py b/setup.py index 1e9ad58..e5b72ee 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ maintainer='Joe Hamman', maintainer_email='jhamman@ucar.edu', classifiers=CLASSIFIERS, - url='https://github.com/xarray-contrib/xpublish', + url='https://github.com/xpublish-community/xpublish', packages=find_packages(exclude=('tests',)), package_dir={'xpublish': 'xpublish'}, include_package_data=True, diff --git a/tests/test_zarr_compat.py b/tests/test_zarr_compat.py index 6616bdc..8f35e59 100644 --- a/tests/test_zarr_compat.py +++ b/tests/test_zarr_compat.py @@ -159,7 +159,7 @@ def test_roundtrip(start, end, freq, nlats, nlons, var_const, calendar, use_cfti This ends up producing unexpected behavior when calling encode_zarr_varible() on datasets with variables containing datetime like dtypes. -See: https://github.com/xarray-contrib/xpublish/pull/10#discussion_r388028417""" +See: https://github.com/xpublish-community/xpublish/pull/10#discussion_r388028417""" @pytest.mark.parametrize( diff --git a/xpublish/rest.py b/xpublish/rest.py index f9a3eb2..65f8581 100644 --- a/xpublish/rest.py +++ b/xpublish/rest.py @@ -341,20 +341,20 @@ def serve(self, host: str = '0.0.0.0', port: int = 9000, log_level: str = 'debug class SingleDatasetRest(Rest): """Used to publish a single Xarray dataset via a REST API (FastAPI application). - Use xpublish.Rest to publish multiple datasets. + Use :class:`xpublish.Rest` to publish multiple datasets. Parameters: ----------- - dataset : :class:`xarray.Dataset` + dataset : A single :class:`xarray.Dataset` object to be served. """ def __init__( self, dataset: xr.Dataset, - routers=None, - cache_kws=None, - app_kws=None, + routers: Optional[APIRouter] = None, + cache_kws: Optional[Dict] = None, + app_kws: Optional[Dict] = None, plugins: Optional[Dict[str, Plugin]] = None, ): self._dataset = dataset