diff --git a/doc/io.rst b/doc/io.rst index 151f5eb740f..0dc5181f9b8 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -81,6 +81,16 @@ require external libraries and dicts can easily be pickled, or converted to json, or geojson. All the values are converted to lists, so dicts might be quite large. +To export just the dataset schema, without the data itself, use the +``data=False`` option: + +.. ipython:: python + + ds.to_dict(data=False) + +This can be useful for generating indices of dataset contents to expose to +search indices or other automated data discovery tools. + .. _io.netcdf: netCDF @@ -665,7 +675,7 @@ To read a consolidated store, pass the ``consolidated=True`` option to :py:func:`~xarray.open_zarr`:: ds = xr.open_zarr('foo.zarr', consolidated=True) - + Xarray can't perform consolidation on pre-existing zarr datasets. This should be done directly from zarr, as described in the `zarr docs `_. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4a351716fab..c408306ffdb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,8 @@ Breaking changes Enhancements ~~~~~~~~~~~~ +- Add ``data=False`` option to ``to_dict()`` methods. (:issue:`2656`) + By `Ryan Abernathey `_ - :py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen` are newly added. See :ref:`comput.coarsen` for details. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f27958b1c77..aa6c35394fb 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1760,7 +1760,7 @@ def to_netcdf(self, *args, **kwargs): return dataset.to_netcdf(*args, **kwargs) - def to_dict(self): + def to_dict(self, data=True): """ Convert this xarray.DataArray into a dictionary following xarray naming conventions. @@ -1769,22 +1769,20 @@ def to_dict(self): Useful for coverting to json. To avoid datetime incompatibility use decode_times=False kwarg in xarrray.open_dataset. + Parameters + ---------- + data : bool, optional + Whether to include the actual data in the dictionary. When set to + False, returns just the schema. + See also -------- DataArray.from_dict """ - d = {'coords': {}, 'attrs': decode_numpy_dict_values(self.attrs), - 'dims': self.dims} - + d = self.variable.to_dict(data=data) + d.update({'coords': {}, 'name': self.name}) for k in self.coords: - data = ensure_us_time_resolution(self[k].values).tolist() - d['coords'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) - - d.update({'data': ensure_us_time_resolution(self.values).tolist(), - 'name': self.name}) + d['coords'][k] = self.coords[k].variable.to_dict(data=data) return d @classmethod diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2caf45ce954..3f1c038fc11 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3221,7 +3221,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): return df - def to_dict(self): + def to_dict(self, data=True): """ Convert this dataset to a dictionary following xarray naming conventions. @@ -3230,25 +3230,22 @@ def to_dict(self): Useful for coverting to json. To avoid datetime incompatibility use decode_times=False kwarg in xarrray.open_dataset. + Parameters + ---------- + data : bool, optional + Whether to include the actual data in the dictionary. When set to + False, returns just the schema. + See also -------- Dataset.from_dict """ d = {'coords': {}, 'attrs': decode_numpy_dict_values(self.attrs), 'dims': dict(self.dims), 'data_vars': {}} - for k in self.coords: - data = ensure_us_time_resolution(self[k].values).tolist() - d['coords'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) + d['coords'].update({k: self[k].variable.to_dict(data=data)}) for k in self.data_vars: - data = ensure_us_time_resolution(self[k].values).tolist() - d['data_vars'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) + d['data_vars'].update({k: self[k].variable.to_dict(data=data)}) return d @classmethod diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 8bd7225efc3..a71b148baf3 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -19,7 +19,8 @@ from .options import _get_keep_attrs from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, zip) -from .utils import OrderedSet, either_dict_or_kwargs +from .utils import (OrderedSet, either_dict_or_kwargs, + decode_numpy_dict_values, ensure_us_time_resolution) try: import dask.array as da @@ -410,6 +411,16 @@ def to_index(self): """Convert this variable to a pandas.Index""" return self.to_index_variable().to_index() + def to_dict(self, data=True): + """Dictionary representation of variable.""" + item = {'dims': self.dims, + 'attrs': decode_numpy_dict_values(self.attrs)} + if data: + item['data'] = ensure_us_time_resolution(self.values).tolist() + else: + item.update({'dtype': str(self.dtype), 'shape': self.shape}) + return item + @property def dims(self): """Tuple of dimension names with which this variable is associated. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index aa02e802fc5..8995fca2f95 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2909,6 +2909,15 @@ def test_to_and_from_dict(self): ValueError, "cannot convert dict without the key 'data'"): DataArray.from_dict(d) + # check the data=False option + expected_no_data = expected.copy() + del expected_no_data['data'] + del expected_no_data['coords']['x']['data'] + expected_no_data['coords']['x'].update({'dtype': '