From 0bc65491a6b9058ddd8c464055969e196b2c0570 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 09:16:02 -0800 Subject: [PATCH 01/13] add binder env --- .binder/environment.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .binder/environment.yml diff --git a/.binder/environment.yml b/.binder/environment.yml new file mode 100644 index 0000000..dd6ce38 --- /dev/null +++ b/.binder/environment.yml @@ -0,0 +1,18 @@ +name: xpublish +channels: + - conda-forge +dependencies: + - python=3 + - xarray + - netcdf4 + - zarr + - numcodecs + - fastapi + - uvicorn + - fsspec + - dask + - distributed + - dask-labextension + - toolz + - bokeh + - pip From 5ad2969b2427053447e9b2401fed5aa3585aecb8 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 09:39:55 -0800 Subject: [PATCH 02/13] start/postBuild --- .binder/environment.yml | 1 + .binder/postBuild | 5 +++ .binder/start | 5 +++ test.py => .binder/test.py | 7 ++++ publish.ipynb | 72 -------------------------------------- 5 files changed, 18 insertions(+), 72 deletions(-) create mode 100644 .binder/postBuild create mode 100644 .binder/start rename test.py => .binder/test.py (62%) delete mode 100644 publish.ipynb diff --git a/.binder/environment.yml b/.binder/environment.yml index dd6ce38..30f0202 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -15,4 +15,5 @@ dependencies: - dask-labextension - toolz - bokeh + - ipytree - pip diff --git a/.binder/postBuild b/.binder/postBuild new file mode 100644 index 0000000..a668cac --- /dev/null +++ b/.binder/postBuild @@ -0,0 +1,5 @@ +set -euo pipefail + +# labextensions +jupyter labextension install --clean dask-labextension \ + @jupyter-widgets/jupyterlab-manager \ No newline at end of file diff --git a/.binder/start b/.binder/start new file mode 100644 index 0000000..cfbe4b9 --- /dev/null +++ b/.binder/start @@ -0,0 +1,5 @@ +#!/bin/bash + +python .binder/test.py + +exec "$@" \ No newline at end of file diff --git a/test.py b/.binder/test.py similarity index 62% rename from test.py rename to .binder/test.py index bd4a829..3a75dd4 100644 --- a/test.py +++ b/.binder/test.py @@ -1,8 +1,15 @@ +from dask.distributed import Client + import xarray as xr import xpublish # noqa: F401 if __name__ == "__main__": + + client = Client() + print(client.cluster) + print(client.cluster.dashboard_link) + ds = xr.tutorial.open_dataset("air_temperature", chunks=dict(lat=5, lon=5), decode_cf=False) print(ds) diff --git a/publish.ipynb b/publish.ipynb deleted file mode 100644 index edaf13b..0000000 --- a/publish.ipynb +++ /dev/null @@ -1,72 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import xarray as xr\n", - "\n", - "import xpublish" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ds = xr.tutorial.open_dataset('air_temperature',\n", - " chunks=dict(lat=5, lon=5),\n", - " decode_cf=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ds.rest.serve()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 1f798ee5d78fad4b73e78c2ac5f51037dc2ce554 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 09:46:09 -0800 Subject: [PATCH 03/13] fix postBuild --- .binder/postBuild | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.binder/postBuild b/.binder/postBuild index a668cac..b680321 100644 --- a/.binder/postBuild +++ b/.binder/postBuild @@ -1,5 +1,7 @@ +#!/bin/bash + set -euo pipefail # labextensions jupyter labextension install --clean dask-labextension \ - @jupyter-widgets/jupyterlab-manager \ No newline at end of file + @jupyter-widgets/jupyterlab-manager From 4ceeed42f18f5996c4e5e2224c307999bee39b65 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 09:56:56 -0800 Subject: [PATCH 04/13] more binder --- .binder/environment.yml | 1 + .binder/postBuild | 5 +- .binder/start | 2 +- open_dataset.ipynb | 252 +++++++++++++++++++++++++++++++++++++--- xpublish/rest.py | 40 ++++--- 5 files changed, 265 insertions(+), 35 deletions(-) diff --git a/.binder/environment.yml b/.binder/environment.yml index 30f0202..3b9614a 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -13,6 +13,7 @@ dependencies: - dask - distributed - dask-labextension + - jupyter-server-proxy - toolz - bokeh - ipytree diff --git a/.binder/postBuild b/.binder/postBuild index b680321..e491aac 100644 --- a/.binder/postBuild +++ b/.binder/postBuild @@ -2,6 +2,9 @@ set -euo pipefail +pip install -e . + # labextensions jupyter labextension install --clean dask-labextension \ - @jupyter-widgets/jupyterlab-manager + @jupyter-widgets/jupyterlab-manager \ + ipytree diff --git a/.binder/start b/.binder/start index cfbe4b9..e0fa1ec 100644 --- a/.binder/start +++ b/.binder/start @@ -1,5 +1,5 @@ #!/bin/bash -python .binder/test.py +python .binder/test.py > logfile.txt 2>&1 & exec "$@" \ No newline at end of file diff --git a/open_dataset.ipynb b/open_dataset.ipynb index 840c281..5ef264e 100644 --- a/open_dataset.ipynb +++ b/open_dataset.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +20,69 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from dask.distributed import Client" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jhamman/miniconda3/lib/python3.6/site-packages/distributed/dashboard/core.py:79: UserWarning: \n", + "Port 8787 is already in use. \n", + "Perhaps you already have a cluster running?\n", + "Hosting the diagnostics dashboard on a random port instead.\n", + " warnings.warn(\"\\n\" + msg)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

Client

\n", + "\n", + "
\n", + "

Cluster

\n", + "
    \n", + "
  • Workers: 4
  • \n", + "
  • Cores: 4
  • \n", + "
  • Memory: 34.36 GB
  • \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "client = Client()\n", + "client" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -30,9 +92,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".zmetadata b'{\"zarr_consolidated_format\":1,\"metadata\":{\".zgroup\":{\"zarr_format\":2},\".zattrs\":{\"Conventions\":\"COARDS\",\"title\":\"4x daily NMC reanalysis (1948)\",\"description\":\"Data is from NMC initialized reanalysis\\\\n(4x/day). These are the 0.9950 sigma level values.\",\"platform\":\"Model\",\"references\":\"http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.html\"},\"lat/.zattrs\":{\"standard_name\":\"latitude\",\"long_name\":\"Latitude\",\"units\":\"degrees_north\",\"axis\":\"Y\",\"_ARRAY_DIMENSIONS\":[\"lat\"]},\"lat/.zarray\":{\"compressor\":{\"id\":\"blosc\",\"cname\":\"lz4\",\"clevel\":5,\"shuffle\":1,\"blocksize\":0},\"filters\":null,\"chunks\":[25],\"dtype\":\" 918\u001b[0;31m \u001b[0mmethod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 919\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 920\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/zarr/util.py\u001b[0m in \u001b[0;36m_ipython_display_\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 493\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_ipython_display_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 494\u001b[0;31m \u001b[0mtree\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtree_widget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpand\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpand\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 495\u001b[0m \u001b[0mtree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ipython_display_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 496\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtree\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/zarr/util.py\u001b[0m in \u001b[0;36mtree_widget\u001b[0;34m(group, expand, level)\u001b[0m\n\u001b[1;32m 429\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 430\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtree_widget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpand\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 431\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mipytree\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 432\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 433\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mipytree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'ipytree'" + ] + }, + { + "data": { + "text/plain": [ + "/\n", + " ├── air (2920, 25, 53) int16\n", + " ├── lat (25,) float32\n", + " ├── lon (53,) float32\n", + " └── time (2920,) float32" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "zg.tree()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset>\n",
+       "Dimensions:  (lat: 25, lon: 53, time: 2920)\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
+       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
+       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
+       "Data variables:\n",
+       "    air      (time, lat, lon) float32 dask.array<chunksize=(2920, 5, 5), meta=np.ndarray>\n",
+       "Attributes:\n",
+       "    Conventions:  COARDS\n",
+       "    title:        4x daily NMC reanalysis (1948)\n",
+       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
+       "    platform:     Model\n",
+       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...
" + ], + "text/plain": [ + "\n", + "Dimensions: (lat: 25, lon: 53, time: 2920)\n", + "Coordinates:\n", + " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", + " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", + " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", + "Data variables:\n", + " air (time, lat, lon) float32 dask.array\n", + "Attributes:\n", + " Conventions: COARDS\n", + " title: 4x daily NMC reanalysis (1948)\n", + " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", + " platform: Model\n", + " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly..." + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ds = xr.open_zarr(http_map, consolidated=True, decode_cf=True)\n", "ds" @@ -78,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -90,16 +254,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'long_name': '4xDaily Air temperature at sigma level 995',\n", + " 'units': 'degK',\n", + " 'precision': 2,\n", + " 'GRIB_id': 11,\n", + " 'GRIB_name': 'TMP',\n", + " 'var_desc': 'Air temperature',\n", + " 'dataset': 'NMC Reanalysis',\n", + " 'level_desc': 'Surface',\n", + " 'statistic': 'Individual Obs',\n", + " 'parent_stat': 'Other',\n", + " 'actual_range': array([185.16, 322.1 ], dtype=float32)}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ds_tutorial.air.attrs" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -110,9 +295,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(0, 0, 0)\n", + "(slice(0, 4, None), 0, 0)\n", + "(slice(0, 4, None), slice(0, 4, None), 0)\n", + "(slice(0, 4, None), slice(0, 4, None), slice(0, 4, None))\n", + "(slice(None, -4, None), slice(0, 4, None), slice(0, 4, None))\n", + "(slice(None, None, None), slice(0, 4, None), slice(0, 4, None))\n", + "(slice(None, None, None), slice(None, None, None), slice(0, 4, None))\n", + "(slice(None, None, None), slice(None, None, None), slice(None, None, None))\n" + ] + } + ], "source": [ "# test a bunch of indexing patterns\n", "for index in [(0, 0, 0),\n", @@ -130,7 +330,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -139,6 +339,20 @@ "assert_identical(ds, ds_tutorial)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -163,7 +377,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.8" } }, "nbformat": 4, diff --git a/xpublish/rest.py b/xpublish/rest.py index d570305..297bac6 100644 --- a/xpublish/rest.py +++ b/xpublish/rest.py @@ -90,6 +90,22 @@ def zmetadata_json(self): return zjson + async def get_key(self, var, chunk): + logger.debug("var is %s", var) + logger.debug("chunk is %s", chunk) + + da = self._variables[var].data + arr_meta = self.zmetadata["metadata"][f"{var}/{array_meta_key}"] + + data_chunk = get_data_chunk(da, chunk, out_shape=arr_meta["chunks"]) + + echunk = _encode_chunk( + data_chunk.tobytes(), + filters=arr_meta["filters"], + compressor=arr_meta["compressor"], + ) + return Response(echunk, media_type="application/octet-stream") + @property def app(self): """ FastAPI app """ @@ -124,22 +140,18 @@ def to_dict(data: bool = False): return self._obj.to_dict(data=data) @self._app.get("/{var}/{chunk}") - def get_key(var, chunk): - logger.debug("var is %s", var) - logger.debug("chunk is %s", chunk) + async def get_key(var, chunk): + result = await self.get_key(var, chunk) + return result - da = self._variables[var].data - arr_meta = self.zmetadata["metadata"][f"{var}/{array_meta_key}"] - - data_chunk = get_data_chunk(da, chunk, out_shape=arr_meta["chunks"]) - - echunk = _encode_chunk( - data_chunk.tobytes(), - filters=arr_meta["filters"], - compressor=arr_meta["compressor"], - ) - return Response(echunk, media_type="application/octet-stream") + @self._app.get("/versions") + def versions(): + import io + with io.StringIO() as f: + xr.show_versions(f) + versions = f.getvalue() + return versions return self._app def serve(self, host="0.0.0.0", port=9000, log_level="debug", **kwargs): From 3fa168e2ba97be6f7f7ef98496d6e7e77a91716f Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 10:59:01 -0800 Subject: [PATCH 05/13] dask config for binder --- .binder/dask_config.yaml | 25 +++++++++++++++++++++++++ .binder/postBuild | 6 ++++++ .binder/test.py | 2 +- 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 .binder/dask_config.yaml diff --git a/.binder/dask_config.yaml b/.binder/dask_config.yaml new file mode 100644 index 0000000..68dffc7 --- /dev/null +++ b/.binder/dask_config.yaml @@ -0,0 +1,25 @@ +distributed: + version: 2 + + dashboard: + link: /user/{JUPYTERHUB_USER}/proxy/{port}/status + + scheduler: + idle-timeout: 3600s + + admin: + tick: + limit: 5s + +logging: + distributed: warning + bokeh: critical + tornado: critical + tornado.application: error + +labextension: + factory: + module: distributed + class: LocalCluster + args: [] + kwargs: {} diff --git a/.binder/postBuild b/.binder/postBuild index e491aac..636a228 100644 --- a/.binder/postBuild +++ b/.binder/postBuild @@ -8,3 +8,9 @@ pip install -e . jupyter labextension install --clean dask-labextension \ @jupyter-widgets/jupyterlab-manager \ ipytree + +# dask config +# ${KERNEL_PYTHON_PREFIX} is set by repo2docker to sys.prefix +# of the python that the kernel is run in. +mkdir -p ${KERNEL_PYTHON_PREFIX}/etc/dask +cp binder/dask_config.yaml ${KERNEL_PYTHON_PREFIX}/etc/dask/dask.yaml diff --git a/.binder/test.py b/.binder/test.py index 3a75dd4..ad633ce 100644 --- a/.binder/test.py +++ b/.binder/test.py @@ -6,7 +6,7 @@ if __name__ == "__main__": - client = Client() + client = Client(n_workers=4) print(client.cluster) print(client.cluster.dashboard_link) From dfb7f2ef92e8fe201e9ff67681339e59432aaf77 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 11:09:37 -0800 Subject: [PATCH 06/13] update requirements --- .binder/postBuild | 2 +- .binder/test.py | 2 +- dev-requirements.txt | 4 +++- requirements.txt | 8 ++++---- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.binder/postBuild b/.binder/postBuild index 636a228..e39dadb 100644 --- a/.binder/postBuild +++ b/.binder/postBuild @@ -13,4 +13,4 @@ jupyter labextension install --clean dask-labextension \ # ${KERNEL_PYTHON_PREFIX} is set by repo2docker to sys.prefix # of the python that the kernel is run in. mkdir -p ${KERNEL_PYTHON_PREFIX}/etc/dask -cp binder/dask_config.yaml ${KERNEL_PYTHON_PREFIX}/etc/dask/dask.yaml +cp .binder/dask_config.yaml ${KERNEL_PYTHON_PREFIX}/etc/dask/dask.yaml diff --git a/.binder/test.py b/.binder/test.py index ad633ce..3651793 100644 --- a/.binder/test.py +++ b/.binder/test.py @@ -6,7 +6,7 @@ if __name__ == "__main__": - client = Client(n_workers=4) + client = Client(n_workers=4) print(client.cluster) print(client.cluster.dashboard_link) diff --git a/dev-requirements.txt b/dev-requirements.txt index 04baf5a..199a069 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,7 @@ +fsspec +netcdf4 pytest pytest-sugar pytest-cov -netcdf4 +requests -r requirements.txt diff --git a/requirements.txt b/requirements.txt index b9ec47b..ce81e2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ dask fastapi -fsspec numcodecs -requests +numpy>=1.17 +starlette +toolz uvicorn -xarray +xarray>=0.15 zarr -toolz From 79ba6ab8f5af3f1b02fadcb2e4d76ef74c443a60 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 11:40:04 -0800 Subject: [PATCH 07/13] update notebook --- .binder/test.py | 2 +- examples/open_dataset.ipynb | 225 +++++++++++++++++++++ open_dataset.ipynb | 385 ------------------------------------ 3 files changed, 226 insertions(+), 386 deletions(-) create mode 100644 examples/open_dataset.ipynb delete mode 100644 open_dataset.ipynb diff --git a/.binder/test.py b/.binder/test.py index 3651793..d325677 100644 --- a/.binder/test.py +++ b/.binder/test.py @@ -6,7 +6,7 @@ if __name__ == "__main__": - client = Client(n_workers=4) + client = Client(n_workers=4, dashboard_address=8787) print(client.cluster) print(client.cluster.dashboard_link) diff --git a/examples/open_dataset.ipynb b/examples/open_dataset.ipynb new file mode 100644 index 0000000..6754ece --- /dev/null +++ b/examples/open_dataset.ipynb @@ -0,0 +1,225 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "import xpublish\n", + "import zarr\n", + "\n", + "import numpy as np\n", + "from json import loads\n", + "\n", + "from fsspec.implementations.http import HTTPFileSystem\n", + "\n", + "from dask.distributed import Client\n", + "from xarray.testing import assert_identical, assert_equal, assert_chunks_equal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's check to make sure our server started alright\n", + "!head logfile.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Start a dask cluster for use on the client side\n", + "client = Client(n_workers=4, dashboard_address=43757)\n", + "client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can now open three more browser tabs/windows:\n", + "\n", + "_Note that you will have to modify the url prefix slightly, to do this, just copy the first part of your browser's URL_\n", + "\n", + "1. Xpublish Web App: e.g. https://hub.gke.mybinder.org/user/jhamman-xpublish-gbbqbxfi/proxy/9000\n", + "2. Xpublish's Dask Cluster Dashboard: e.g. https://hub.gke.mybinder.org/user/jhamman-xpublish-gbbqbxfi/proxy/8787/status\n", + "3. This Notebook's Dask Cluster Dashboard: e.g. https://hub.gke.mybinder.org/user/jhamman-xpublish-gbbqbxfi/proxy/43757/status\n", + "\n", + "_Also note that this port numbers may change. The server side ports are available in `logfile.txt` (see above) and the client-side port is in the cell above._" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We can access our API using fsspec's HTTPFileSystem\n", + "fs = HTTPFileSystem()\n", + "\n", + "# The http mapper gives us a dict-like interface to the API\n", + "http_map = fs.get_mapper('http://0.0.0.0:9000')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We can access API enpoints by key now...\n", + "for key in ['.zmetadata', 'keys']:\n", + " print(key, http_map[key], '\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The .zmetadata key returns the json dictionary of consolidated zarr metadata\n", + "# We can load/decode that and access one array's attributes\n", + "d = loads(http_map['.zmetadata'])\n", + "d['metadata']['air/.zattrs']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We can pass that mapper object directly to zarr's open_consolidated function\n", + "# This returns a zarr groups\n", + "zg = zarr.open_consolidated(http_map, mode='r')\n", + "zg.tree()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# And we can do the same with xarray's open_zarr function\n", + "ds = xr.open_zarr(http_map, consolidated=True)\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The rest of this notebook applies some simple tests to show that the served dataset is indentical to the\n", + "# \"air_temperature\" dataset in xarray's tutorial dataset.\n", + "ds_tutorial = xr.tutorial.open_dataset(\n", + " 'air_temperature',\n", + " chunks=dict(lat=5, lon=5),\n", + " decode_cf=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds_tutorial.air.attrs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def test(actual, expected, index):\n", + " '''a simple equality test with index as a parameter'''\n", + " assert np.array_equal(actual[index].values, expected[index].values)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test a bunch of indexing patterns\n", + "for index in [(0, 0, 0),\n", + " (slice(0, 4), 0, 0),\n", + " (slice(0, 4), slice(0, 4), 0),\n", + " (slice(0, 4), slice(0, 4), slice(0, 4)),\n", + " (slice(-4), slice(0, 4), slice(0, 4)),\n", + " (slice(None), slice(0, 4), slice(0, 4)),\n", + " (slice(None), slice(None), slice(0, 4)),\n", + " (slice(None), slice(None), slice(None)),\n", + " ]:\n", + " print(index)\n", + " test(ds_tutorial['air'], ds['air'], index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert_equal(ds, ds_tutorial)\n", + "assert_chunks_equal(ds, ds_tutorial)\n", + "assert_identical(ds, ds_tutorial)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/open_dataset.ipynb b/open_dataset.ipynb deleted file mode 100644 index 5ef264e..0000000 --- a/open_dataset.ipynb +++ /dev/null @@ -1,385 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import xarray as xr\n", - "import xpublish\n", - "import zarr\n", - "\n", - "import numpy as np\n", - "from json import loads\n", - "\n", - "from fsspec.implementations.http import HTTPFileSystem\n", - "\n", - "from xarray.testing import assert_identical, assert_equal, assert_chunks_equal" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from dask.distributed import Client" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/jhamman/miniconda3/lib/python3.6/site-packages/distributed/dashboard/core.py:79: UserWarning: \n", - "Port 8787 is already in use. \n", - "Perhaps you already have a cluster running?\n", - "Hosting the diagnostics dashboard on a random port instead.\n", - " warnings.warn(\"\\n\" + msg)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "

Client

\n", - "\n", - "
\n", - "

Cluster

\n", - "
    \n", - "
  • Workers: 4
  • \n", - "
  • Cores: 4
  • \n", - "
  • Memory: 34.36 GB
  • \n", - "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client = Client()\n", - "client" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "fs = HTTPFileSystem()\n", - "http_map = fs.get_mapper('http://0.0.0.0:9000')" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ".zmetadata b'{\"zarr_consolidated_format\":1,\"metadata\":{\".zgroup\":{\"zarr_format\":2},\".zattrs\":{\"Conventions\":\"COARDS\",\"title\":\"4x daily NMC reanalysis (1948)\",\"description\":\"Data is from NMC initialized reanalysis\\\\n(4x/day). These are the 0.9950 sigma level values.\",\"platform\":\"Model\",\"references\":\"http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.html\"},\"lat/.zattrs\":{\"standard_name\":\"latitude\",\"long_name\":\"Latitude\",\"units\":\"degrees_north\",\"axis\":\"Y\",\"_ARRAY_DIMENSIONS\":[\"lat\"]},\"lat/.zarray\":{\"compressor\":{\"id\":\"blosc\",\"cname\":\"lz4\",\"clevel\":5,\"shuffle\":1,\"blocksize\":0},\"filters\":null,\"chunks\":[25],\"dtype\":\" 918\u001b[0;31m \u001b[0mmethod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 919\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 920\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/zarr/util.py\u001b[0m in \u001b[0;36m_ipython_display_\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 493\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_ipython_display_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 494\u001b[0;31m \u001b[0mtree\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtree_widget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpand\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexpand\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 495\u001b[0m \u001b[0mtree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ipython_display_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 496\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtree\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/zarr/util.py\u001b[0m in \u001b[0;36mtree_widget\u001b[0;34m(group, expand, level)\u001b[0m\n\u001b[1;32m 429\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 430\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtree_widget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpand\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 431\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mipytree\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 432\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 433\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mipytree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTree\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'ipytree'" - ] - }, - { - "data": { - "text/plain": [ - "/\n", - " ├── air (2920, 25, 53) int16\n", - " ├── lat (25,) float32\n", - " ├── lon (53,) float32\n", - " └── time (2920,) float32" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "zg.tree()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
<xarray.Dataset>\n",
-       "Dimensions:  (lat: 25, lon: 53, time: 2920)\n",
-       "Coordinates:\n",
-       "  * lat      (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n",
-       "  * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n",
-       "  * time     (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n",
-       "Data variables:\n",
-       "    air      (time, lat, lon) float32 dask.array<chunksize=(2920, 5, 5), meta=np.ndarray>\n",
-       "Attributes:\n",
-       "    Conventions:  COARDS\n",
-       "    title:        4x daily NMC reanalysis (1948)\n",
-       "    description:  Data is from NMC initialized reanalysis\\n(4x/day).  These a...\n",
-       "    platform:     Model\n",
-       "    references:   http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly...
" - ], - "text/plain": [ - "\n", - "Dimensions: (lat: 25, lon: 53, time: 2920)\n", - "Coordinates:\n", - " * lat (lat) float32 75.0 72.5 70.0 67.5 65.0 ... 25.0 22.5 20.0 17.5 15.0\n", - " * lon (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0\n", - " * time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00\n", - "Data variables:\n", - " air (time, lat, lon) float32 dask.array\n", - "Attributes:\n", - " Conventions: COARDS\n", - " title: 4x daily NMC reanalysis (1948)\n", - " description: Data is from NMC initialized reanalysis\\n(4x/day). These a...\n", - " platform: Model\n", - " references: http://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanaly..." - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = xr.open_zarr(http_map, consolidated=True, decode_cf=True)\n", - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "ds_tutorial = xr.tutorial.open_dataset(\n", - " 'air_temperature',\n", - " chunks=dict(lat=5, lon=5),\n", - " decode_cf=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'long_name': '4xDaily Air temperature at sigma level 995',\n", - " 'units': 'degK',\n", - " 'precision': 2,\n", - " 'GRIB_id': 11,\n", - " 'GRIB_name': 'TMP',\n", - " 'var_desc': 'Air temperature',\n", - " 'dataset': 'NMC Reanalysis',\n", - " 'level_desc': 'Surface',\n", - " 'statistic': 'Individual Obs',\n", - " 'parent_stat': 'Other',\n", - " 'actual_range': array([185.16, 322.1 ], dtype=float32)}" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_tutorial.air.attrs" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "def test(actual, expected, index):\n", - " '''a simple equality test with index as a parameter'''\n", - " assert np.array_equal(actual[index].values, expected[index].values)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(0, 0, 0)\n", - "(slice(0, 4, None), 0, 0)\n", - "(slice(0, 4, None), slice(0, 4, None), 0)\n", - "(slice(0, 4, None), slice(0, 4, None), slice(0, 4, None))\n", - "(slice(None, -4, None), slice(0, 4, None), slice(0, 4, None))\n", - "(slice(None, None, None), slice(0, 4, None), slice(0, 4, None))\n", - "(slice(None, None, None), slice(None, None, None), slice(0, 4, None))\n", - "(slice(None, None, None), slice(None, None, None), slice(None, None, None))\n" - ] - } - ], - "source": [ - "# test a bunch of indexing patterns\n", - "for index in [(0, 0, 0),\n", - " (slice(0, 4), 0, 0),\n", - " (slice(0, 4), slice(0, 4), 0),\n", - " (slice(0, 4), slice(0, 4), slice(0, 4)),\n", - " (slice(-4), slice(0, 4), slice(0, 4)),\n", - " (slice(None), slice(0, 4), slice(0, 4)),\n", - " (slice(None), slice(None), slice(0, 4)),\n", - " (slice(None), slice(None), slice(None)),\n", - " ]:\n", - " print(index)\n", - " test(ds_tutorial['air'], ds['air'], index)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "assert_equal(ds, ds_tutorial)\n", - "assert_chunks_equal(ds, ds_tutorial)\n", - "assert_identical(ds, ds_tutorial)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 0df8edf221fc80e81dd15a8352dac64b407e4b0a Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 11:43:53 -0800 Subject: [PATCH 08/13] black --- xpublish/rest.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xpublish/rest.py b/xpublish/rest.py index 297bac6..4d5fb56 100644 --- a/xpublish/rest.py +++ b/xpublish/rest.py @@ -100,9 +100,7 @@ async def get_key(self, var, chunk): data_chunk = get_data_chunk(da, chunk, out_shape=arr_meta["chunks"]) echunk = _encode_chunk( - data_chunk.tobytes(), - filters=arr_meta["filters"], - compressor=arr_meta["compressor"], + data_chunk.tobytes(), filters=arr_meta["filters"], compressor=arr_meta["compressor"], ) return Response(echunk, media_type="application/octet-stream") @@ -152,6 +150,7 @@ def versions(): xr.show_versions(f) versions = f.getvalue() return versions + return self._app def serve(self, host="0.0.0.0", port=9000, log_level="debug", **kwargs): From 7f6bc3a0c4de8aa177dc9bd07febff0ddf0ffa03 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 11:49:03 -0800 Subject: [PATCH 09/13] isort --- .binder/test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.binder/test.py b/.binder/test.py index d325677..0a9e1f2 100644 --- a/.binder/test.py +++ b/.binder/test.py @@ -1,6 +1,5 @@ -from dask.distributed import Client - import xarray as xr +from dask.distributed import Client import xpublish # noqa: F401 From 34d26cc3e1752cab5b03d53ff1aef2394c78d126 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 11:55:28 -0800 Subject: [PATCH 10/13] Trigger notification From 8634f3f5a46fd88173dcc9884efa352729ab57f8 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 12:03:10 -0800 Subject: [PATCH 11/13] cleanup notebook --- examples/open_dataset.ipynb | 78 ++++++++++++++----------------------- 1 file changed, 29 insertions(+), 49 deletions(-) diff --git a/examples/open_dataset.ipynb b/examples/open_dataset.ipynb index 6754ece..c0218d7 100644 --- a/examples/open_dataset.ipynb +++ b/examples/open_dataset.ipynb @@ -6,17 +6,16 @@ "metadata": {}, "outputs": [], "source": [ - "import xarray as xr\n", - "import xpublish\n", - "import zarr\n", - "\n", - "import numpy as np\n", "from json import loads\n", "\n", + "import numpy as np\n", + "import xarray as xr\n", + "import zarr\n", + "from dask.distributed import Client\n", "from fsspec.implementations.http import HTTPFileSystem\n", + "from xarray.testing import assert_chunks_equal, assert_equal, assert_identical\n", "\n", - "from dask.distributed import Client\n", - "from xarray.testing import assert_identical, assert_equal, assert_chunks_equal" + "import xpublish" ] }, { @@ -53,7 +52,9 @@ "3. This Notebook's Dask Cluster Dashboard: e.g. https://hub.gke.mybinder.org/user/jhamman-xpublish-gbbqbxfi/proxy/43757/status\n", "\n", "_Also note that this port numbers may change. The server side ports are available in `logfile.txt` (see above) and the client-side port is in the cell above._" - ] + ], + "execution_count": null, + "outputs": [] }, { "cell_type": "code", @@ -65,7 +66,7 @@ "fs = HTTPFileSystem()\n", "\n", "# The http mapper gives us a dict-like interface to the API\n", - "http_map = fs.get_mapper('http://0.0.0.0:9000')" + "http_map = fs.get_mapper(\"http://0.0.0.0:9000\")" ] }, { @@ -75,8 +76,8 @@ "outputs": [], "source": [ "# We can access API enpoints by key now...\n", - "for key in ['.zmetadata', 'keys']:\n", - " print(key, http_map[key], '\\n')" + "for key in [\".zmetadata\", \"keys\"]:\n", + " print(key, http_map[key], \"\\n\")" ] }, { @@ -87,8 +88,8 @@ "source": [ "# The .zmetadata key returns the json dictionary of consolidated zarr metadata\n", "# We can load/decode that and access one array's attributes\n", - "d = loads(http_map['.zmetadata'])\n", - "d['metadata']['air/.zattrs']" + "d = loads(http_map[\".zmetadata\"])\n", + "d[\"metadata\"][\"air/.zattrs\"]" ] }, { @@ -99,7 +100,7 @@ "source": [ "# We can pass that mapper object directly to zarr's open_consolidated function\n", "# This returns a zarr groups\n", - "zg = zarr.open_consolidated(http_map, mode='r')\n", + "zg = zarr.open_consolidated(http_map, mode=\"r\")\n", "zg.tree()" ] }, @@ -123,9 +124,8 @@ "# The rest of this notebook applies some simple tests to show that the served dataset is indentical to the\n", "# \"air_temperature\" dataset in xarray's tutorial dataset.\n", "ds_tutorial = xr.tutorial.open_dataset(\n", - " 'air_temperature',\n", - " chunks=dict(lat=5, lon=5),\n", - " decode_cf=True)" + " \"air_temperature\", chunks=dict(lat=5, lon=5), decode_cf=True\n", + ")" ] }, { @@ -144,7 +144,7 @@ "outputs": [], "source": [ "def test(actual, expected, index):\n", - " '''a simple equality test with index as a parameter'''\n", + " \"\"\"a simple equality test with index as a parameter\"\"\"\n", " assert np.array_equal(actual[index].values, expected[index].values)" ] }, @@ -155,17 +155,18 @@ "outputs": [], "source": [ "# test a bunch of indexing patterns\n", - "for index in [(0, 0, 0),\n", - " (slice(0, 4), 0, 0),\n", - " (slice(0, 4), slice(0, 4), 0),\n", - " (slice(0, 4), slice(0, 4), slice(0, 4)),\n", - " (slice(-4), slice(0, 4), slice(0, 4)),\n", - " (slice(None), slice(0, 4), slice(0, 4)),\n", - " (slice(None), slice(None), slice(0, 4)),\n", - " (slice(None), slice(None), slice(None)),\n", - " ]:\n", + "for index in [\n", + " (0, 0, 0),\n", + " (slice(0, 4), 0, 0),\n", + " (slice(0, 4), slice(0, 4), 0),\n", + " (slice(0, 4), slice(0, 4), slice(0, 4)),\n", + " (slice(-4), slice(0, 4), slice(0, 4)),\n", + " (slice(None), slice(0, 4), slice(0, 4)),\n", + " (slice(None), slice(None), slice(0, 4)),\n", + " (slice(None), slice(None), slice(None)),\n", + "]:\n", " print(index)\n", - " test(ds_tutorial['air'], ds['air'], index)" + " test(ds_tutorial[\"air\"], ds[\"air\"], index)" ] }, { @@ -178,27 +179,6 @@ "assert_chunks_equal(ds, ds_tutorial)\n", "assert_identical(ds, ds_tutorial)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 5fd01da7a481a19373a5af6e1f9bb775d8ccbbf5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 16:13:32 -0800 Subject: [PATCH 12/13] fix install and isort --- requirements.txt | 1 - setup.cfg | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ce81e2f..048f634 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,6 @@ dask fastapi numcodecs numpy>=1.17 -starlette toolz uvicorn xarray>=0.15 diff --git a/setup.cfg b/setup.cfg index 589d3e8..0b88016 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,6 +16,7 @@ line_length=100 skip= docs/source/conf.py setup.py + .binder/test.py [tool:pytest] log_cli = True From a86ecdabdc6ebcbd42a5ec3417485bc592c01707 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 24 Feb 2020 16:30:31 -0800 Subject: [PATCH 13/13] update docs --- README.md | 2 +- docs/source/index.rst | 7 ++++++- docs/source/tutorial.rst | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b7720f5..946650b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![GitHub Workflow Status](https://img.shields.io/github/workflow/status/jhamman/xpublish/CI?logo=github)](https://github.com/jhamman/xpublish/actions?query=workflow%3ACI) [![Documentation Status](https://readthedocs.org/projects/xpublish/badge/?version=latest)](https://xpublish.readthedocs.io/en/latest/?badge=latest) - +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/jhamman/xpublish/master) # xpublish diff --git a/docs/source/index.rst b/docs/source/index.rst index 2645c22..9eeb8f1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -2,7 +2,12 @@ xpublish ======== -Xpublish lets you publish Xarray datasets via a Zarr-compatible REST API. +**Xpublish lets you publish Xarray datasets via a Zarr-compatible REST API.** + +*You can run a short example application in a live session here:* |Binder| + +.. |Binder| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/jhamman/xpublish/master On the server-side, datasets are published using a simple Xarray accessor: diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 68a4b72..d7cf257 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -37,6 +37,7 @@ REST API * ``/keys``: returns a list of variable keys, equivalent to ``list(ds.variables)``. * ``/info``: returns a concise summary of a Dataset variables and attributes, equivalent to ``ds.info()``. * ``/dict``: returns a json dictionary of the full dataset. Accpets the ``?data={value}`` parameter to specify if the return dictionary should include the data in addition to the dataset schema. +* ``/versions``: returns a plain text summary of the versions of xarray and related libraries on the server side, equivalent to ``xr.show_versions()``. Zarr API ~~~~~~~~