diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 00000000000..5a8f6596d69 --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,58 @@ +# Configuration for probot-stale - https://github.com/probot/stale + +# Number of days of inactivity before an Issue or Pull Request becomes stale +daysUntilStale: 700 # start with a large number and reduce shortly + +# Number of days of inactivity before an Issue or Pull Request with the stale label is closed. +# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. +daysUntilClose: 30 + +# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable +exemptLabels: + - pinned + - security + - "[Status] Maybe Later" + +# Set to true to ignore issues in a project (defaults to false) +exemptProjects: false + +# Set to true to ignore issues in a milestone (defaults to false) +exemptMilestones: false + +# Set to true to ignore issues with an assignee (defaults to false) +exemptAssignees: true + +# Label to use when marking as stale +staleLabel: stale + +# Comment to post when marking as stale. Set to `false` to disable +markComment: | + In order to maintain a list of currently relevant issues, we mark issues as stale after a period of inactivity + If this issue remains relevant, please comment here; otherwise it will be marked as closed automatically + +# Comment to post when removing the stale label. +# unmarkComment: > +# Your comment here. + +# Comment to post when closing a stale Issue or Pull Request. +# closeComment: > +# Your comment here. + +# Limit the number of actions per hour, from 1-30. Default is 30 +limitPerRun: 1 # start with a small number + + +# Limit to only `issues` or `pulls` +# only: issues + +# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': +# pulls: +# daysUntilStale: 30 +# markComment: > +# This pull request has been automatically marked as stale because it has not had +# recent activity. It will be closed if no further activity occurs. Thank you +# for your contributions. + +# issues: +# exemptLabels: +# - confirmed \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 8e1866de8d4..a21d4d94413 100644 --- a/.travis.yml +++ b/.travis.yml @@ -60,7 +60,7 @@ script: - python --version - python -OO -c "import xarray" - if [[ "$CONDA_ENV" == "docs" ]]; then - conda install -c conda-forge sphinx sphinx_rtd_theme sphinx-gallery numpydoc; + conda install -c conda-forge --override-channels sphinx sphinx_rtd_theme sphinx-gallery numpydoc "gdal>2.2.4"; sphinx-build -n -j auto -b html -d _build/doctrees doc _build/html; elif [[ "$CONDA_ENV" == "lint" ]]; then pycodestyle xarray ; diff --git a/README.rst b/README.rst index a4c8f6d200b..f69f7d95c31 100644 --- a/README.rst +++ b/README.rst @@ -9,49 +9,47 @@ xarray: N-D labeled arrays and datasets :target: https://coveralls.io/r/pydata/xarray .. image:: https://readthedocs.org/projects/xray/badge/?version=latest :target: http://xarray.pydata.org/ -.. image:: https://img.shields.io/pypi/v/xarray.svg - :target: https://pypi.python.org/pypi/xarray/ -.. image:: https://zenodo.org/badge/13221727.svg - :target: https://zenodo.org/badge/latestdoi/13221727 .. image:: http://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat :target: http://pandas.pydata.org/speed/xarray/ -.. image:: https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A - :target: http://numfocus.org +.. image:: https://img.shields.io/pypi/v/xarray.svg + :target: https://pypi.python.org/pypi/xarray/ **xarray** (formerly **xray**) is an open source project and Python package that makes working with labelled multi-dimensional arrays simple, efficient, and fun! -Multi-dimensional (a.k.a. N-dimensional, ND) arrays (sometimes called -"tensors") are an essential part of computational science. -They are encountered in a wide range of fields, including physics, astronomy, -geoscience, bioinformatics, engineering, finance, and deep learning. -In Python, NumPy_ provides the fundamental data structure and API for -working with raw ND arrays. -However, real-world datasets are usually more than just raw numbers; -they have labels which encode information about how the array values map -to locations in space, time, etc. +Xarray introduces labels in the form of dimensions, coordinates and +attributes on top of raw NumPy_-like arrays, which allows for a more +intuitive, more concise, and less error-prone developer experience. +The package includes a large and growing library of domain-agnostic functions +for advanced analytics and visualization with these data structures. -By introducing *dimensions*, *coordinates*, and *attributes* on top of raw -NumPy-like arrays, xarray is able to understand these labels and use them to -provide a more intuitive, more concise, and less error-prone experience. -Xarray also provides a large and growing library of functions for advanced -analytics and visualization with these data structures. Xarray was inspired by and borrows heavily from pandas_, the popular data analysis package focused on labelled tabular data. -Xarray can read and write data from most common labeled ND-array storage -formats and is particularly tailored to working with netCDF_ files, which were -the source of xarray's data model. +It is particularly tailored to working with netCDF_ files, which were the +source of xarray's data model, and integrates tightly with dask_ for parallel +computing. -.. _NumPy: http://www.numpy.org/ +.. _NumPy: http://www.numpy.org .. _pandas: http://pandas.pydata.org +.. _dask: http://dask.org .. _netCDF: http://www.unidata.ucar.edu/software/netcdf Why xarray? ----------- -Adding dimensions names and coordinate indexes to numpy's ndarray_ makes many -powerful array operations possible: +Multi-dimensional (a.k.a. N-dimensional, ND) arrays (sometimes called +"tensors") are an essential part of computational science. +They are encountered in a wide range of fields, including physics, astronomy, +geoscience, bioinformatics, engineering, finance, and deep learning. +In Python, NumPy_ provides the fundamental data structure and API for +working with raw ND arrays. +However, real-world datasets are usually more than just raw numbers; +they have labels which encode information about how the array values map +to locations in space, time, etc. + +Xarray doesn't just keep track of labels on arrays -- it uses them to provide a +powerful and concise interface. For example: - Apply operations over dimensions by name: ``x.sum('time')``. - Select values by label instead of integer location: @@ -65,42 +63,10 @@ powerful array operations possible: - Keep track of arbitrary metadata in the form of a Python dictionary: ``x.attrs``. -pandas_ provides many of these features, but it does not make use of dimension -names, and its core data structures are fixed dimensional arrays. - -Why isn't pandas enough? ------------------------- - -pandas_ excels at working with tabular data. That suffices for many statistical -analyses, but physical scientists rely on N-dimensional arrays -- which is -where xarray comes in. - -xarray aims to provide a data analysis toolkit as powerful as pandas_ but -designed for working with homogeneous N-dimensional arrays -instead of tabular data. When possible, we copy the pandas API and rely on -pandas's highly optimized internals (in particular, for fast indexing). - -Why netCDF? ------------ - -Because xarray implements the same data model as the netCDF_ file format, -xarray datasets have a natural and portable serialization format. But it is also -easy to robustly convert an xarray ``DataArray`` to and from a numpy ``ndarray`` -or a pandas ``DataFrame`` or ``Series``, providing compatibility with the full -`PyData ecosystem `__. - -Our target audience is anyone who needs N-dimensional labeled arrays, but we -are particularly focused on the data analysis needs of physical scientists -- -especially geoscientists who already know and love netCDF_. - -.. _ndarray: http://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html -.. _pandas: http://pandas.pydata.org -.. _netCDF: http://www.unidata.ucar.edu/software/netcdf - Documentation ------------- -The official documentation is hosted on ReadTheDocs at http://xarray.pydata.org/ +Learn more about xarray in its official documentation at http://xarray.pydata.org/ Contributing ------------ @@ -148,7 +114,7 @@ __ http://climate.com/ License ------- -Copyright 2014-2018, xarray Developers +Copyright 2014-2019, xarray Developers Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index e3933b400e6..11a779ae376 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -40,7 +40,7 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. - "pythons": ["2.7", "3.6"], + "pythons": ["3.6"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index 311e4a275a8..0ed6dd78c3a 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -20,14 +20,14 @@ dependencies: - scipy - seaborn - toolz - - rasterio + # - rasterio # xref #2683 - bottleneck - zarr - pseudonetcdf>=3.0.1 - eccodes - cdms2 - - pynio - - iris>=1.10 + # - pynio # xref #2683 + # - iris>=1.10 # xref #2683 - pydap - lxml - pip: diff --git a/doc/faq.rst b/doc/faq.rst index 44bc021024b..465a5a6d250 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -18,8 +18,9 @@ pandas is a fantastic library for analysis of low-dimensional labelled data - if it can be sensibly described as "rows and columns", pandas is probably the right choice. However, sometimes we want to use higher dimensional arrays (`ndim > 2`), or arrays for which the order of dimensions (e.g., columns vs -rows) shouldn't really matter. For example, climate and weather data is often -natively expressed in 4 or more dimensions: time, x, y and z. +rows) shouldn't really matter. For example, the images of a movie can be +natively represented as an array with four dimensions: time, row, column and +color. Pandas has historically supported N-dimensional panels, but deprecated them in version 0.20 in favor of Xarray data structures. There are now built-in methods @@ -39,9 +40,8 @@ if you were using Panels: xarray ``Dataset``. You can :ref:`read about switching from Panels to Xarray here `. -Pandas gets a lot of things right, but scientific users need fully multi- -dimensional data structures. - +Pandas gets a lot of things right, but many science, engineering and complex +analytics use cases need fully multi-dimensional data structures. How do xarray data structures differ from those found in pandas? ---------------------------------------------------------------- @@ -65,7 +65,9 @@ multi-dimensional data-structures. That said, you should only bother with xarray if some aspect of data is fundamentally multi-dimensional. If your data is unstructured or -one-dimensional, stick with pandas. +one-dimensional, pandas is usually the right choice: it has better performance +for common operations such as ``groupby`` and you'll find far more usage +examples online. Why don't aggregations return Python scalars? diff --git a/doc/index.rst b/doc/index.rst index fe6d2874953..dbe911011cd 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -5,29 +5,21 @@ xarray: N-D labeled arrays and datasets in Python that makes working with labelled multi-dimensional arrays simple, efficient, and fun! -Multi-dimensional (a.k.a. N-dimensional, ND) arrays (sometimes called -"tensors") are an essential part of computational science. -They are encountered in a wide range of fields, including physics, astronomy, -geoscience, bioinformatics, engineering, finance, and deep learning. -In Python, NumPy_ provides the fundamental data structure and API for -working with raw ND arrays. -However, real-world datasets are usually more than just raw numbers; -they have labels which encode information about how the array values map -to locations in space, time, etc. - -By introducing *dimensions*, *coordinates*, and *attributes* on top of raw -NumPy-like arrays, xarray is able to understand these labels and use them to -provide a more intuitive, more concise, and less error-prone experience. -Xarray also provides a large and growing library of functions for advanced -analytics and visualization with these data structures. +Xarray introduces labels in the form of dimensions, coordinates and +attributes on top of raw NumPy_-like arrays, which allows for a more +intuitive, more concise, and less error-prone developer experience. +The package includes a large and growing library of domain-agnostic functions +for advanced analytics and visualization with these data structures. + Xarray was inspired by and borrows heavily from pandas_, the popular data analysis package focused on labelled tabular data. -Xarray can read and write data from most common labeled ND-array storage -formats and is particularly tailored to working with netCDF_ files, which were -the source of xarray's data model. +It is particularly tailored to working with netCDF_ files, which were the +source of xarray's data model, and integrates tightly with dask_ for parallel +computing. -.. _NumPy: http://www.numpy.org/ +.. _NumPy: http://www.numpy.org .. _pandas: http://pandas.pydata.org +.. _dask: http://dask.org .. _netCDF: http://www.unidata.ucar.edu/software/netcdf Documentation diff --git a/doc/indexing.rst b/doc/indexing.rst index 3878d983cf6..77ec7428991 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -371,7 +371,7 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: ind = xr.DataArray([['a', 'b'], ['b', 'a']], dims=['a', 'b']) da.loc[:, ind] # same as da.sel(y=ind) -These methods may and also be applied to ``Dataset`` objects +These methods may also be applied to ``Dataset`` objects .. ipython:: python diff --git a/doc/io.rst b/doc/io.rst index 151f5eb740f..0dc5181f9b8 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -81,6 +81,16 @@ require external libraries and dicts can easily be pickled, or converted to json, or geojson. All the values are converted to lists, so dicts might be quite large. +To export just the dataset schema, without the data itself, use the +``data=False`` option: + +.. ipython:: python + + ds.to_dict(data=False) + +This can be useful for generating indices of dataset contents to expose to +search indices or other automated data discovery tools. + .. _io.netcdf: netCDF @@ -665,7 +675,7 @@ To read a consolidated store, pass the ``consolidated=True`` option to :py:func:`~xarray.open_zarr`:: ds = xr.open_zarr('foo.zarr', consolidated=True) - + Xarray can't perform consolidation on pre-existing zarr datasets. This should be done directly from zarr, as described in the `zarr docs `_. diff --git a/doc/related-projects.rst b/doc/related-projects.rst index cf89c715bc7..c89e324ff7c 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -3,7 +3,7 @@ Xarray related projects ----------------------- -Here below is a list of several existing libraries that build +Here below is a list of existing open source projects that build functionality upon xarray. See also section :ref:`internals` for more details on how to build xarray extensions. @@ -39,11 +39,16 @@ Geosciences Machine Learning ~~~~~~~~~~~~~~~~ -- `cesium `_: machine learning for time series analysis +- `ArviZ `_: Exploratory analysis of Bayesian models, built on top of xarray. - `Elm `_: Parallel machine learning on xarray data structures - `sklearn-xarray (1) `_: Combines scikit-learn and xarray (1). - `sklearn-xarray (2) `_: Combines scikit-learn and xarray (2). +Other domains +~~~~~~~~~~~~~ +- `ptsa `_: EEG Time Series Analysis +- `pycalphad `_: Computational Thermodynamics in Python + Extend xarray capabilities ~~~~~~~~~~~~~~~~~~~~~~~~~~ - `Collocate `_: Collocate xarray trajectories in arbitrary physical dimensions @@ -61,9 +66,10 @@ Visualization - `hvplot `_ : A high-level plotting API for the PyData ecosystem built on HoloViews. - `psyplot `_: Interactive data visualization with python. -Other -~~~~~ -- `ptsa `_: EEG Time Series Analysis -- `pycalphad `_: Computational Thermodynamics in Python +Non-Python projects +~~~~~~~~~~~~~~~~~~~ +- `xframe `_: C++ data structures inspired by xarray. +- `AxisArrays `_ and + `NamedArrays `_: similar data structures for Julia. More projects can be found at the `"xarray" Github topic `_. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cb6344cceab..97e475bc368 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,8 @@ Breaking changes Enhancements ~~~~~~~~~~~~ +- Add ``data=False`` option to ``to_dict()`` methods. (:issue:`2656`) + By `Ryan Abernathey `_ - :py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen` are newly added. See :ref:`comput.coarsen` for details. @@ -36,6 +38,11 @@ Enhancements - Upsampling an array via interpolation with resample is now dask-compatible, as long as the array is not chunked along the resampling dimension. By `Spencer Clark `_. +- :py:func:`xarray.testing.assert_equal` and + :py:func:`xarray.testing.assert_identical` now provide a more detailed + report showing what exactly differs between the two objects (dimensions / + coordinates / variables / attributes) (:issue:`1507`). + By `Benoit Bovy `_. Bug fixes ~~~~~~~~~ diff --git a/doc/why-xarray.rst b/doc/why-xarray.rst index e9f30fe25be..d0a6c591b29 100644 --- a/doc/why-xarray.rst +++ b/doc/why-xarray.rst @@ -1,11 +1,21 @@ Overview: Why xarray? ===================== -Features --------- - -Adding dimensions names and coordinate indexes to numpy's ndarray_ makes many -powerful array operations possible: +What labels enable +------------------ + +Multi-dimensional (a.k.a. N-dimensional, ND) arrays (sometimes called +"tensors") are an essential part of computational science. +They are encountered in a wide range of fields, including physics, astronomy, +geoscience, bioinformatics, engineering, finance, and deep learning. +In Python, NumPy_ provides the fundamental data structure and API for +working with raw ND arrays. +However, real-world datasets are usually more than just raw numbers; +they have labels which encode information about how the array values map +to locations in space, time, etc. + +Xarray doesn't just keep track of labels on arrays -- it uses them to provide a +powerful and concise interface. For example: - Apply operations over dimensions by name: ``x.sum('time')``. - Select values by label instead of integer location: @@ -19,9 +29,6 @@ powerful array operations possible: - Keep track of arbitrary metadata in the form of a Python dictionary: ``x.attrs``. -pandas_ provides many of these features, but it does not make use of dimension -names, and its core data structures are fixed dimensional arrays. - The N-dimensional nature of xarray's data structures makes it suitable for dealing with multi-dimensional scientific data, and its use of dimension names instead of axis labels (``dim='time'`` instead of ``axis=0``) makes such @@ -29,10 +36,15 @@ arrays much more manageable than the raw numpy ndarray: with xarray, you don't need to keep track of the order of arrays dimensions or insert dummy dimensions (e.g., ``np.newaxis``) to align arrays. +The immediate payoff of using xarray is that you'll write less code. The +long-term payoff is that you'll understand what you were thinking when you come +back to look at it weeks or months later. + Core data structures -------------------- -xarray has two core data structures. Both are fundamentally N-dimensional: +xarray has two core data structures, which build upon and extend the core +strengths of NumPy_ and pandas_. Both are fundamentally N-dimensional: - :py:class:`~xarray.DataArray` is our implementation of a labeled, N-dimensional array. It is an N-D generalization of a :py:class:`pandas.Series`. The name @@ -43,8 +55,6 @@ xarray has two core data structures. Both are fundamentally N-dimensional: shared dimensions, and serves a similar purpose in xarray to the :py:class:`pandas.DataFrame`. -.. _datarray: https://github.com/fperez/datarray - The value of attaching labels to numpy's :py:class:`numpy.ndarray` may be fairly obvious, but the dataset may need more motivation. @@ -69,23 +79,33 @@ metadata once, not every time you save a file. Goals and aspirations --------------------- -pandas_ excels at working with tabular data. That suffices for many statistical -analyses, but physical scientists rely on N-dimensional arrays -- which is -where xarray comes in. +Xarray contributes domain-agnostic data-structures and tools for labeled +multi-dimensional arrays to Python's SciPy_ ecosystem for numerical computing. +In particular, xarray builds upon and integrates with NumPy_ and pandas_: + +- Our user-facing interfaces aim to be more explicit verisons of those found in + NumPy/pandas. +- Compatibility with the broader ecosystem is a major goal: it should be easy + to get your data in and out. +- We try to keep a tight focus on functionality and interfaces related to + labeled data, and leverage other Python libraries for everything else, e.g., + NumPy/pandas for fast arrays/indexing (xarray itself contains no compiled + code), Dask_ for parallel computing, matplotlib_ for plotting, etc. + +Xarray is a collaborative and community driven project, run entirely on +volunteer effort (see :ref:`contributing`). +Our target audience is anyone who needs N-dimensional labeled arrays in Python. +Originally, development was driven by the data analysis needs of physical +scientists (especially geoscientists who already know and love +netCDF_), but it has become a much more broadly useful tool, and is still +under active development. +See our technical :ref:`roadmap` for more details, and feel free to reach out +with questions about whether xarray is the right tool for your needs. -xarray aims to provide a data analysis toolkit as powerful as pandas_ but -designed for working with homogeneous N-dimensional arrays -instead of tabular data. When possible, we copy the pandas API and rely on -pandas's highly optimized internals (in particular, for fast indexing). - -Importantly, xarray has robust support for converting its objects to and -from a numpy ``ndarray`` or a pandas ``DataFrame`` or ``Series``, providing -compatibility with the full `PyData ecosystem `__. - -Our target audience is anyone who needs N-dimensional labeled arrays, but we -are particularly focused on the data analysis needs of physical scientists -- -especially geoscientists who already know and love netCDF_. - -.. _ndarray: http://docs.scipy.org/doc/numpy/reference/arrays.ndarray.html +.. _datarray: https://github.com/fperez/datarray +.. _Dask: http://dask.org +.. _matplotlib: http://matplotlib.org .. _netCDF: http://www.unidata.ucar.edu/software/netcdf +.. _NumPy: http://www.numpy.org .. _pandas: http://pandas.pydata.org +.. _SciPy: http://www.scipy.org diff --git a/setup.py b/setup.py index 8c0c98ab33d..ff667d7a113 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,25 @@ that makes working with labelled multi-dimensional arrays simple, efficient, and fun! +Xarray introduces labels in the form of dimensions, coordinates and +attributes on top of raw NumPy_-like arrays, which allows for a more +intuitive, more concise, and less error-prone developer experience. +The package includes a large and growing library of domain-agnostic functions +for advanced analytics and visualization with these data structures. + +Xarray was inspired by and borrows heavily from pandas_, the popular data +analysis package focused on labelled tabular data. +It is particularly tailored to working with netCDF_ files, which were the +source of xarray's data model, and integrates tightly with dask_ for parallel +computing. + +.. _NumPy: http://www.numpy.org/ +.. _pandas: http://pandas.pydata.org +.. _netCDF: http://www.unidata.ucar.edu/software/netcdf + +Why xarray? +----------- + Multi-dimensional (a.k.a. N-dimensional, ND) arrays (sometimes called "tensors") are an essential part of computational science. They are encountered in a wide range of fields, including physics, astronomy, @@ -48,25 +67,25 @@ they have labels which encode information about how the array values map to locations in space, time, etc. -By introducing *dimensions*, *coordinates*, and *attributes* on top of raw -NumPy-like arrays, xarray is able to understand these labels and use them to -provide a more intuitive, more concise, and less error-prone experience. -Xarray also provides a large and growing library of functions for advanced -analytics and visualization with these data structures. -Xarray was inspired by and borrows heavily from pandas_, the popular data -analysis package focused on labelled tabular data. -Xarray can read and write data from most common labeled ND-array storage -formats and is particularly tailored to working with netCDF_ files, which were -the source of xarray's data model. +Xarray doesn't just keep track of labels on arrays -- it uses them to provide a +powerful and concise interface. For example: -.. _NumPy: http://www.numpy.org/ -.. _pandas: http://pandas.pydata.org -.. _netCDF: http://www.unidata.ucar.edu/software/netcdf +- Apply operations over dimensions by name: ``x.sum('time')``. +- Select values by label instead of integer location: + ``x.loc['2014-01-01']`` or ``x.sel(time='2014-01-01')``. +- Mathematical operations (e.g., ``x - y``) vectorize across multiple + dimensions (array broadcasting) based on dimension names, not shape. +- Flexible split-apply-combine operations with groupby: + ``x.groupby('time.dayofyear').mean()``. +- Database like alignment based on coordinate labels that smoothly + handles missing values: ``x, y = xr.align(x, y, join='outer')``. +- Keep track of arbitrary metadata in the form of a Python dictionary: + ``x.attrs``. -Important links ---------------- +Learn more +---------- -- HTML documentation: http://xarray.pydata.org +- Documentation: http://xarray.pydata.org - Issue tracker: http://github.com/pydata/xarray/issues - Source code: http://github.com/pydata/xarray - SciPy2015 talk: https://www.youtube.com/watch?v=X0pAhJgySxk diff --git a/xarray/core/combine.py b/xarray/core/combine.py index e552d8d900c..0327a65ab1b 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -493,16 +493,21 @@ def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars, return new_combined_ids +def vars_as_keys(ds): + return tuple(sorted(ds)) + + def _auto_combine_1d(datasets, concat_dim=_CONCAT_DIM_DEFAULT, compat='no_conflicts', data_vars='all', coords='different'): # This is just the old auto_combine function (which only worked along 1D) if concat_dim is not None: dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim - grouped = itertools.groupby(datasets, key=lambda ds: tuple(sorted(ds))) + sorted_datasets = sorted(datasets, key=vars_as_keys) + grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) concatenated = [_auto_concat(list(ds_group), dim=dim, data_vars=data_vars, coords=coords) - for id, ds_group in grouped] + for id, ds_group in grouped_by_vars] else: concatenated = datasets merged = merge(concatenated, compat=compat) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f27958b1c77..aa6c35394fb 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1760,7 +1760,7 @@ def to_netcdf(self, *args, **kwargs): return dataset.to_netcdf(*args, **kwargs) - def to_dict(self): + def to_dict(self, data=True): """ Convert this xarray.DataArray into a dictionary following xarray naming conventions. @@ -1769,22 +1769,20 @@ def to_dict(self): Useful for coverting to json. To avoid datetime incompatibility use decode_times=False kwarg in xarrray.open_dataset. + Parameters + ---------- + data : bool, optional + Whether to include the actual data in the dictionary. When set to + False, returns just the schema. + See also -------- DataArray.from_dict """ - d = {'coords': {}, 'attrs': decode_numpy_dict_values(self.attrs), - 'dims': self.dims} - + d = self.variable.to_dict(data=data) + d.update({'coords': {}, 'name': self.name}) for k in self.coords: - data = ensure_us_time_resolution(self[k].values).tolist() - d['coords'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) - - d.update({'data': ensure_us_time_resolution(self.values).tolist(), - 'name': self.name}) + d['coords'][k] = self.coords[k].variable.to_dict(data=data) return d @classmethod diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ef73f3cef12..d748ac67ede 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3222,7 +3222,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): return df - def to_dict(self): + def to_dict(self, data=True): """ Convert this dataset to a dictionary following xarray naming conventions. @@ -3231,25 +3231,22 @@ def to_dict(self): Useful for coverting to json. To avoid datetime incompatibility use decode_times=False kwarg in xarrray.open_dataset. + Parameters + ---------- + data : bool, optional + Whether to include the actual data in the dictionary. When set to + False, returns just the schema. + See also -------- Dataset.from_dict """ d = {'coords': {}, 'attrs': decode_numpy_dict_values(self.attrs), 'dims': dict(self.dims), 'data_vars': {}} - for k in self.coords: - data = ensure_us_time_resolution(self[k].values).tolist() - d['coords'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) + d['coords'].update({k: self[k].variable.to_dict(data=data)}) for k in self.data_vars: - data = ensure_us_time_resolution(self[k].values).tolist() - d['data_vars'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) + d['data_vars'].update({k: self[k].variable.to_dict(data=data)}) return d @classmethod diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 5dd3cf06025..50fa64c9987 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -13,6 +13,7 @@ import numpy as np import pandas as pd +from .duck_array_ops import array_equiv from .options import OPTIONS from .pycompat import ( PY2, bytes_type, dask_array_type, unicode_type, zip_longest) @@ -411,6 +412,15 @@ def short_dask_repr(array, show_dtype=True): return 'dask.array' % (array.shape, chunksize) +def short_data_repr(array): + if isinstance(getattr(array, 'variable', array)._data, dask_array_type): + return short_dask_repr(array) + elif array._in_memory or array.size < 1e5: + return short_array_repr(array.values) + else: + return u'[%s values with dtype=%s]' % (array.size, array.dtype) + + def array_repr(arr): # used for DataArray, Variable and IndexVariable if hasattr(arr, 'name') and arr.name is not None: @@ -421,12 +431,7 @@ def array_repr(arr): summary = [u'' % (type(arr).__name__, name_str, dim_summary(arr))] - if isinstance(getattr(arr, 'variable', arr)._data, dask_array_type): - summary.append(short_dask_repr(arr)) - elif arr._in_memory or arr.size < 1e5: - summary.append(short_array_repr(arr.values)) - else: - summary.append(u'[%s values with dtype=%s]' % (arr.size, arr.dtype)) + summary.append(short_data_repr(arr)) if hasattr(arr, 'coords'): if arr.coords: @@ -463,3 +468,132 @@ def dataset_repr(ds): summary.append(attrs_repr(ds.attrs)) return u'\n'.join(summary) + + +def diff_dim_summary(a, b): + if a.dims != b.dims: + return "Differing dimensions:\n ({}) != ({})".format( + dim_summary(a), dim_summary(b)) + else: + return "" + + +def _diff_mapping_repr(a_mapping, b_mapping, compat, + title, summarizer, col_width=None): + + def extra_items_repr(extra_keys, mapping, ab_side): + extra_repr = [summarizer(k, mapping[k], col_width) for k in extra_keys] + if extra_repr: + header = "{} only on the {} object:".format(title, ab_side) + return [header] + extra_repr + else: + return [] + + a_keys = set(a_mapping) + b_keys = set(b_mapping) + + summary = [] + + diff_items = [] + + for k in a_keys & b_keys: + try: + # compare xarray variable + compatible = getattr(a_mapping[k], compat)(b_mapping[k]) + is_variable = True + except AttributeError: + # compare attribute value + compatible = a_mapping[k] == b_mapping[k] + is_variable = False + + if not compatible: + temp = [summarizer(k, vars[k], col_width) + for vars in (a_mapping, b_mapping)] + + if compat == 'identical' and is_variable: + attrs_summary = [] + + for m in (a_mapping, b_mapping): + attr_s = "\n".join([summarize_attr(ak, av) + for ak, av in m[k].attrs.items()]) + attrs_summary.append(attr_s) + + temp = ["\n".join([var_s, attr_s]) if attr_s else var_s + for var_s, attr_s in zip(temp, attrs_summary)] + + diff_items += [ab_side + s[1:] + for ab_side, s in zip(('L', 'R'), temp)] + + if diff_items: + summary += ["Differing {}:".format(title.lower())] + diff_items + + summary += extra_items_repr(a_keys - b_keys, a_mapping, "left") + summary += extra_items_repr(b_keys - a_keys, b_mapping, "right") + + return "\n".join(summary) + + +diff_coords_repr = functools.partial(_diff_mapping_repr, + title="Coordinates", + summarizer=summarize_coord) + + +diff_data_vars_repr = functools.partial(_diff_mapping_repr, + title="Data variables", + summarizer=summarize_datavar) + + +diff_attrs_repr = functools.partial(_diff_mapping_repr, + title="Attributes", + summarizer=summarize_attr) + + +def _compat_to_str(compat): + if compat == "equals": + return "equal" + else: + return compat + + +def diff_array_repr(a, b, compat): + # used for DataArray, Variable and IndexVariable + summary = ["Left and right {} objects are not {}" + .format(type(a).__name__, _compat_to_str(compat))] + + summary.append(diff_dim_summary(a, b)) + + if not array_equiv(a.data, b.data): + temp = [wrap_indent(short_array_repr(obj), start=' ') + for obj in (a, b)] + diff_data_repr = [ab_side + "\n" + ab_data_repr + for ab_side, ab_data_repr in zip(('L', 'R'), temp)] + summary += ["Differing values:"] + diff_data_repr + + if hasattr(a, 'coords'): + col_width = _calculate_col_width(set(a.coords) | set(b.coords)) + summary.append(diff_coords_repr(a.coords, b.coords, compat, + col_width=col_width)) + + if compat == 'identical': + summary.append(diff_attrs_repr(a.attrs, b.attrs, compat)) + + return "\n".join(summary) + + +def diff_dataset_repr(a, b, compat): + summary = ["Left and right {} objects are not {}" + .format(type(a).__name__, _compat_to_str(compat))] + + col_width = _calculate_col_width( + set(_get_col_items(a.variables) + _get_col_items(b.variables))) + + summary.append(diff_dim_summary(a, b)) + summary.append(diff_coords_repr(a.coords, b.coords, compat, + col_width=col_width)) + summary.append(diff_data_vars_repr(a.data_vars, b.data_vars, compat, + col_width=col_width)) + + if compat == 'identical': + summary.append(diff_attrs_repr(a.attrs, b.attrs, compat)) + + return "\n".join(summary) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 8bd7225efc3..a71b148baf3 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -19,7 +19,8 @@ from .options import _get_keep_attrs from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, zip) -from .utils import OrderedSet, either_dict_or_kwargs +from .utils import (OrderedSet, either_dict_or_kwargs, + decode_numpy_dict_values, ensure_us_time_resolution) try: import dask.array as da @@ -410,6 +411,16 @@ def to_index(self): """Convert this variable to a pandas.Index""" return self.to_index_variable().to_index() + def to_dict(self, data=True): + """Dictionary representation of variable.""" + item = {'dims': self.dims, + 'attrs': decode_numpy_dict_values(self.attrs)} + if data: + item['data'] = ensure_us_time_resolution(self.values).tolist() + else: + item.update({'dtype': str(self.dtype), 'shape': self.shape}) + return item + @property def dims(self): """Tuple of dimension names with which this variable is associated. diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index fb67a1f9a33..2f5be1b33a8 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -7,10 +7,9 @@ import numpy as np from ..core.formatting import format_item -from ..core.pycompat import getargspec from .utils import ( - _determine_cmap_params, _infer_xy_labels, import_matplotlib_pyplot, - label_from_attrs) + _infer_line_data, _infer_xy_labels, _process_cmap_cbar_kwargs, + import_matplotlib_pyplot, label_from_attrs) # Overrides axes.labelsize, xtick.major.size, ytick.major.size # from mpl.rcParams @@ -199,38 +198,6 @@ def _left_axes(self): def _bottom_axes(self): return self.axes[-1, :] - def _process_cmap(self, func, kwargs, data): - cmapkw = kwargs.get('cmap') - colorskw = kwargs.get('colors') - cbar_kwargs = kwargs.pop('cbar_kwargs', {}) - cbar_kwargs = {} if cbar_kwargs is None else dict(cbar_kwargs) - - if kwargs.get('cbar_ax', None) is not None: - raise ValueError('cbar_ax not supported by FacetGrid.') - - # colors is mutually exclusive with cmap - if cmapkw and colorskw: - raise ValueError("Can't specify both cmap and colors.") - - # These should be consistent with xarray.plot._plot2d - cmap_kwargs = {'plot_data': data.values, - # MPL default - 'levels': 7 if 'contour' in func.__name__ else None, - 'filled': func.__name__ != 'contour', - } - - cmap_args = getargspec(_determine_cmap_params).args - cmap_kwargs.update((a, kwargs[a]) for a in cmap_args if a in kwargs) - - cmap_params = _determine_cmap_params(**cmap_kwargs) - - if colorskw is not None: - cmap_params['cmap'] = None - - self._cmap_extend = cmap_params.get('extend') - - return cmap_params, cbar_kwargs - def map_dataarray(self, func, x, y, **kwargs): """ Apply a plotting function to a 2d facet's subset of the data. @@ -253,7 +220,13 @@ def map_dataarray(self, func, x, y, **kwargs): """ - cmap_params, cbar_kwargs = self._process_cmap(func, kwargs, self.data) + if kwargs.get('cbar_ax', None) is not None: + raise ValueError('cbar_ax not supported by FacetGrid.') + + cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( + func, kwargs, self.data.values) + + self._cmap_extend = cmap_params.get('extend') # Order is important func_kwargs = kwargs.copy() @@ -269,7 +242,7 @@ def map_dataarray(self, func, x, y, **kwargs): # None is the sentinel value if d is not None: subset = self.data.loc[d] - mappable = func(subset, x, y, ax=ax, **func_kwargs) + mappable = func(subset, x=x, y=y, ax=ax, **func_kwargs) self._mappables.append(mappable) self._finalize_grid(x, y) @@ -279,37 +252,23 @@ def map_dataarray(self, func, x, y, **kwargs): return self - def map_dataarray_line(self, x=None, y=None, hue=None, **kwargs): - """ - Apply a line plot to a 2d facet subset of the data. - - Parameters - ---------- - x, y, hue: string - dimension names for the axes and hues of each facet - - Returns - ------- - self : FacetGrid object - - """ - from .plot import _infer_line_data, line + def map_dataarray_line(self, func, x, y, **kwargs): add_legend = kwargs.pop('add_legend', True) kwargs['add_legend'] = False + func_kwargs = kwargs.copy() + func_kwargs['_labels'] = False for d, ax in zip(self.name_dicts.flat, self.axes.flat): # None is the sentinel value if d is not None: subset = self.data.loc[d] - mappable = line(subset, x=x, y=y, hue=hue, - ax=ax, _labels=False, - **kwargs) + mappable = func(subset, x=x, y=y, ax=ax, **func_kwargs) self._mappables.append(mappable) _, _, hueplt, xlabel, ylabel, huelabel = _infer_line_data( darray=self.data.loc[self.name_dicts.flat[0]], - x=x, y=y, hue=hue) + x=x, y=y, hue=func_kwargs['hue']) self._hue_var = hueplt self._hue_label = huelabel @@ -571,9 +530,9 @@ def map(self, func, *args, **kwargs): return self -def _easy_facetgrid(data, plotfunc, x=None, y=None, kind=None, row=None, col=None, - col_wrap=None, sharex=True, sharey=True, aspect=None, - size=None, subplot_kws=None, **kwargs): +def _easy_facetgrid(data, plotfunc, kind, x=None, y=None, row=None, + col=None, col_wrap=None, sharex=True, sharey=True, + aspect=None, size=None, subplot_kws=None, **kwargs): """ Convenience method to call xarray.plot.FacetGrid from 2d plotting methods @@ -594,9 +553,8 @@ def _easy_facetgrid(data, plotfunc, x=None, y=None, kind=None, row=None, col=Non sharex=sharex, sharey=sharey, figsize=figsize, aspect=aspect, size=size, subplot_kws=subplot_kws) + if kind == 'line': + return g.map_dataarray_line(plotfunc, x, y, **kwargs) + if kind == 'dataarray': return g.map_dataarray(plotfunc, x, y, **kwargs) - elif kind == 'array line': - return g.map_dataarray_line(hue=kwargs.pop('hue'), **kwargs) - elif kind == 'dataset': - return g.map_dataset(plotfunc, x, y, **kwargs) diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index ee509c3d54a..4bedf206923 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -9,7 +9,6 @@ from __future__ import absolute_import, division, print_function import functools -import warnings import numpy as np import pandas as pd @@ -17,14 +16,13 @@ from xarray.core.common import contains_cftime_datetimes from xarray.core.pycompat import basestring -from .facetgrid import FacetGrid, _easy_facetgrid +from .facetgrid import _easy_facetgrid from .utils import ( - _add_colorbar, _determine_cmap_params, - _ensure_plottable, _infer_interval_breaks, + _add_colorbar, _ensure_plottable, _infer_interval_breaks, _infer_line_data, _infer_xy_labels, _interval_to_double_bound_points, - _interval_to_mid_points, _is_monotonic, _rescale_imshow_rgb, - _resolve_intervals_2dplot, _update_axes, _valid_other_type, - get_axis, import_matplotlib_pyplot, label_from_attrs) + _interval_to_mid_points, _process_cmap_cbar_kwargs, _rescale_imshow_rgb, + _resolve_intervals_2dplot, _update_axes, _valid_other_type, get_axis, + import_matplotlib_pyplot, label_from_attrs) def plot(darray, row=None, col=None, col_wrap=None, ax=None, hue=None, @@ -112,79 +110,6 @@ def plot(darray, row=None, col=None, col_wrap=None, ax=None, hue=None, return plotfunc(darray, **kwargs) -def _infer_line_data(darray, x, y, hue): - error_msg = ('must be either None or one of ({0:s})' - .format(', '.join([repr(dd) for dd in darray.dims]))) - ndims = len(darray.dims) - - if x is not None and x not in darray.dims and x not in darray.coords: - raise ValueError('x ' + error_msg) - - if y is not None and y not in darray.dims and y not in darray.coords: - raise ValueError('y ' + error_msg) - - if x is not None and y is not None: - raise ValueError('You cannot specify both x and y kwargs' - 'for line plots.') - - if ndims == 1: - dim, = darray.dims # get the only dimension name - huename = None - hueplt = None - hue_label = '' - - if (x is None and y is None) or x == dim: - xplt = darray[dim] - yplt = darray - - else: - yplt = darray[dim] - xplt = darray - - else: - if x is None and y is None and hue is None: - raise ValueError('For 2D inputs, please' - 'specify either hue, x or y.') - - if y is None: - xname, huename = _infer_xy_labels(darray=darray, x=x, y=hue) - xplt = darray[xname] - if xplt.ndim > 1: - if huename in darray.dims: - otherindex = 1 if darray.dims.index(huename) == 0 else 0 - otherdim = darray.dims[otherindex] - yplt = darray.transpose(otherdim, huename) - xplt = xplt.transpose(otherdim, huename) - else: - raise ValueError('For 2D inputs, hue must be a dimension' - + ' i.e. one of ' + repr(darray.dims)) - - else: - yplt = darray.transpose(xname, huename) - - else: - yname, huename = _infer_xy_labels(darray=darray, x=y, y=hue) - yplt = darray[yname] - if yplt.ndim > 1: - if huename in darray.dims: - otherindex = 1 if darray.dims.index(huename) == 0 else 0 - xplt = darray.transpose(otherdim, huename) - else: - raise ValueError('For 2D inputs, hue must be a dimension' - + ' i.e. one of ' + repr(darray.dims)) - - else: - xplt = darray.transpose(yname, huename) - - hue_label = label_from_attrs(darray[huename]) - hueplt = darray[huename] - - xlabel = label_from_attrs(xplt) - ylabel = label_from_attrs(yplt) - - return xplt, yplt, hueplt, xlabel, ylabel, hue_label - - # This function signature should not change so that it can use # matplotlib format strings def line(darray, *args, **kwargs): @@ -241,9 +166,7 @@ def line(darray, *args, **kwargs): allargs = locals().copy() allargs.update(allargs.pop('kwargs')) allargs.pop('darray') - allargs['data'] = darray - allargs['plotfunc'] = line - return _easy_facetgrid(kind='array line', **allargs) + return _easy_facetgrid(darray, line, kind='line', **allargs) ndims = len(darray.dims) if ndims > 2: @@ -592,40 +515,23 @@ def newplotfunc(darray, x=None, y=None, figsize=None, size=None, allargs = locals().copy() allargs.pop('imshow_rgb') allargs.update(allargs.pop('kwargs')) - + allargs.pop('darray') # Need the decorated plotting function allargs['plotfunc'] = globals()[plotfunc.__name__] - allargs['data'] = darray - del allargs['darray'] - - return _easy_facetgrid(kind='dataarray', **allargs) + return _easy_facetgrid(darray, kind='dataarray', **allargs) plt = import_matplotlib_pyplot() - # colors is mutually exclusive with cmap - if cmap and colors: - raise ValueError("Can't specify both cmap and colors.") - # colors is only valid when levels is supplied or the plot is of type - # contour or contourf - if colors and (('contour' not in plotfunc.__name__) and (not levels)): - raise ValueError("Can only specify colors with contour or levels") - # we should not be getting a list of colors in cmap anymore - # is there a better way to do this test? - if isinstance(cmap, (list, tuple)): - warnings.warn("Specifying a list of colors in cmap is deprecated. " - "Use colors keyword instead.", - DeprecationWarning, stacklevel=3) - rgb = kwargs.pop('rgb', None) - xlab, ylab = _infer_xy_labels( - darray=darray, x=x, y=y, imshow=imshow_rgb, rgb=rgb) - if rgb is not None and plotfunc.__name__ != 'imshow': raise ValueError('The "rgb" keyword is only valid for imshow()') elif rgb is not None and not imshow_rgb: raise ValueError('The "rgb" keyword is only valid for imshow()' 'with a three-dimensional array (per facet)') + xlab, ylab = _infer_xy_labels( + darray=darray, x=x, y=y, imshow=imshow_rgb, rgb=rgb) + # better to pass the ndarrays directly to plotting functions xval = darray[xlab].values yval = darray[ylab].values @@ -659,22 +565,8 @@ def newplotfunc(darray, x=None, y=None, figsize=None, size=None, _ensure_plottable(xplt, yplt) - if 'contour' in plotfunc.__name__ and levels is None: - levels = 7 # this is the matplotlib default - - cmap_kwargs = {'plot_data': zval.data, - 'vmin': vmin, - 'vmax': vmax, - 'cmap': colors if colors else cmap, - 'center': center, - 'robust': robust, - 'extend': extend, - 'levels': levels, - 'filled': plotfunc.__name__ != 'contour', - 'norm': norm, - } - - cmap_params = _determine_cmap_params(**cmap_kwargs) + cmap_params, cbar_kwargs = _process_cmap_cbar_kwargs( + plotfunc, locals(), zval.data) if 'contour' in plotfunc.__name__: # extend is a keyword argument only for contour and contourf, but @@ -710,13 +602,11 @@ def newplotfunc(darray, x=None, y=None, figsize=None, size=None, ax.set_title(darray._title_for_slice()) if add_colorbar: - cbar_kwargs = {} if cbar_kwargs is None else dict(cbar_kwargs) if add_labels and 'label' not in cbar_kwargs: cbar_kwargs['label'] = label_from_attrs(darray) cbar = _add_colorbar(primitive, ax, cbar_ax, cbar_kwargs, cmap_params) - - elif cbar_ax is not None or cbar_kwargs is not None: + elif (cbar_ax is not None or cbar_kwargs): # inform the user about keywords which aren't used raise ValueError("cbar_ax and cbar_kwargs can't be used with " "add_colorbar=False.") diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 31fdda34e4f..e0266adf2f3 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -3,14 +3,13 @@ import itertools import textwrap import warnings - from datetime import datetime import numpy as np import pandas as pd from ..core.options import OPTIONS -from ..core.pycompat import basestring +from ..core.pycompat import basestring, getargspec from ..core.utils import is_scalar ROBUST_PERCENTILE = 2.0 @@ -349,6 +348,79 @@ def _infer_xy_labels(darray, x, y, imshow=False, rgb=None): return x, y +def _infer_line_data(darray, x, y, hue): + error_msg = ('must be either None or one of ({0:s})' + .format(', '.join([repr(dd) for dd in darray.dims]))) + ndims = len(darray.dims) + + if x is not None and x not in darray.dims and x not in darray.coords: + raise ValueError('x ' + error_msg) + + if y is not None and y not in darray.dims and y not in darray.coords: + raise ValueError('y ' + error_msg) + + if x is not None and y is not None: + raise ValueError('You cannot specify both x and y kwargs' + 'for line plots.') + + if ndims == 1: + dim, = darray.dims # get the only dimension name + huename = None + hueplt = None + huelabel = '' + + if (x is None and y is None) or x == dim: + xplt = darray[dim] + yplt = darray + + else: + yplt = darray[dim] + xplt = darray + + else: + if x is None and y is None and hue is None: + raise ValueError('For 2D inputs, please' + 'specify either hue, x or y.') + + if y is None: + xname, huename = _infer_xy_labels(darray=darray, x=x, y=hue) + xplt = darray[xname] + if xplt.ndim > 1: + if huename in darray.dims: + otherindex = 1 if darray.dims.index(huename) == 0 else 0 + otherdim = darray.dims[otherindex] + yplt = darray.transpose(otherdim, huename) + xplt = xplt.transpose(otherdim, huename) + else: + raise ValueError('For 2D inputs, hue must be a dimension' + + ' i.e. one of ' + repr(darray.dims)) + + else: + yplt = darray.transpose(xname, huename) + + else: + yname, huename = _infer_xy_labels(darray=darray, x=y, y=hue) + yplt = darray[yname] + if yplt.ndim > 1: + if huename in darray.dims: + otherindex = 1 if darray.dims.index(huename) == 0 else 0 + xplt = darray.transpose(otherdim, huename) + else: + raise ValueError('For 2D inputs, hue must be a dimension' + + ' i.e. one of ' + repr(darray.dims)) + + else: + xplt = darray.transpose(yname, huename) + + huelabel = label_from_attrs(darray[huename]) + hueplt = darray[huename] + + xlabel = label_from_attrs(xplt) + ylabel = label_from_attrs(yplt) + + return xplt, yplt, hueplt, xlabel, ylabel, huelabel + + def get_axis(figsize, size, aspect, ax): import matplotlib as mpl import matplotlib.pyplot as plt @@ -633,3 +705,58 @@ def _infer_interval_breaks(coord, axis=0, check_monotonic=False): trim_last = tuple(slice(None, -1) if n == axis else slice(None) for n in range(coord.ndim)) return np.concatenate([first, coord[trim_last] + deltas, last], axis=axis) + + +def _process_cmap_cbar_kwargs(func, kwargs, data): + """ + Parameters + ========== + func : plotting function + kwargs : dict, + Dictionary with arguments that need to be parsed + data : ndarray, + Data values + + Returns + ======= + cmap_params + + cbar_kwargs + """ + + cmap = kwargs.pop('cmap', None) + colors = kwargs.pop('colors', None) + + cbar_kwargs = kwargs.pop('cbar_kwargs', {}) + cbar_kwargs = {} if cbar_kwargs is None else dict(cbar_kwargs) + + levels = kwargs.pop('levels', None) + if 'contour' in func.__name__ and levels is None: + levels = 7 # this is the matplotlib default + + # colors is mutually exclusive with cmap + if cmap and colors: + raise ValueError("Can't specify both cmap and colors.") + + # colors is only valid when levels is supplied or the plot is of type + # contour or contourf + if colors and (('contour' not in func.__name__) and (not levels)): + raise ValueError("Can only specify colors with contour or levels") + + # we should not be getting a list of colors in cmap anymore + # is there a better way to do this test? + if isinstance(cmap, (list, tuple)): + warnings.warn("Specifying a list of colors in cmap is deprecated. " + "Use colors keyword instead.", + DeprecationWarning, stacklevel=3) + + cmap_kwargs = {'plot_data': data, + 'levels': levels, + 'cmap': colors if colors else cmap, + 'filled': func.__name__ != 'contour'} + + cmap_args = getargspec(_determine_cmap_params).args + cmap_kwargs.update((a, kwargs[a]) for a in cmap_args if a in kwargs) + cmap_params = _determine_cmap_params(**cmap_kwargs) + + return cmap_params, cbar_kwargs diff --git a/xarray/testing.py b/xarray/testing.py index c2bb5044ef4..418f1a08668 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -4,6 +4,7 @@ import numpy as np from xarray.core import duck_array_ops +from xarray.core import formatting def _decode_string_data(data): @@ -49,8 +50,10 @@ def assert_equal(a, b): import xarray as xr __tracebackhide__ = True # noqa: F841 assert type(a) == type(b) # noqa - if isinstance(a, (xr.Variable, xr.DataArray, xr.Dataset)): - assert a.equals(b), '{}\n{}'.format(a, b) + if isinstance(a, (xr.Variable, xr.DataArray)): + assert a.equals(b), formatting.diff_array_repr(a, b, 'equals') + elif isinstance(a, xr.Dataset): + assert a.equals(b), formatting.diff_dataset_repr(a, b, 'equals') else: raise TypeError('{} not supported by assertion comparison' .format(type(a))) @@ -76,11 +79,13 @@ def assert_identical(a, b): import xarray as xr __tracebackhide__ = True # noqa: F841 assert type(a) == type(b) # noqa - if isinstance(a, xr.DataArray): + if isinstance(a, xr.Variable): + assert a.identical(b), formatting.diff_array_repr(a, b, 'identical') + elif isinstance(a, xr.DataArray): assert a.name == b.name - assert_identical(a._to_temp_dataset(), b._to_temp_dataset()) + assert a.identical(b), formatting.diff_array_repr(a, b, 'identical') elif isinstance(a, (xr.Dataset, xr.Variable)): - assert a.identical(b), '{}\n{}'.format(a, b) + assert a.identical(b), formatting.diff_dataset_repr(a, b, 'identical') else: raise TypeError('{} not supported by assertion comparison' .format(type(a))) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 3c4fc67c5eb..3fe014bdaba 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -799,6 +799,7 @@ def test_to_datetimeindex_feb_29(calendar): @pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.xfail(reason='https://github.com/pandas-dev/pandas/issues/24263') def test_multiindex(): index = xr.cftime_range('2001-01-01', periods=100, calendar='360_day') mindex = pd.MultiIndex.from_arrays([index]) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 9ea38e7d5f2..e978350d322 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -650,7 +650,7 @@ def test_merge_one_dim_concat_another(self): expected = Dataset({'foo': ('x', [0, 1, 2, 3]), 'bar': ('x', [10, 20, 30, 40])}) - actual = auto_combine(objs, concat_dim=['x', None]) + actual = auto_combine(objs, concat_dim=['x', None], compat='equals') assert_identical(expected, actual) actual = auto_combine(objs) @@ -661,7 +661,19 @@ def test_merge_one_dim_concat_another(self): Dataset({'foo': ('x', [2, 3])})], [Dataset({'bar': ('x', [10, 20])}), Dataset({'bar': ('x', [30, 40])})]] - actual = auto_combine(objs, concat_dim=[None, 'x']) + actual = auto_combine(objs, concat_dim=[None, 'x'], compat='equals') + assert_identical(expected, actual) + + def test_internal_ordering(self): + # This gives a MergeError if _auto_combine_1d is not sorting by + # data_vars correctly, see GH #2662 + objs = [Dataset({'foo': ('x', [0, 1])}), + Dataset({'bar': ('x', [10, 20])}), + Dataset({'foo': ('x', [2, 3])}), + Dataset({'bar': ('x', [30, 40])})] + actual = auto_combine(objs, concat_dim='x', compat='equals') + expected = Dataset({'foo': ('x', [0, 1, 2, 3]), + 'bar': ('x', [10, 20, 30, 40])}) assert_identical(expected, actual) def test_combine_concat_over_redundant_nesting(self): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index aa02e802fc5..8995fca2f95 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2909,6 +2909,15 @@ def test_to_and_from_dict(self): ValueError, "cannot convert dict without the key 'data'"): DataArray.from_dict(d) + # check the data=False option + expected_no_data = expected.copy() + del expected_no_data['data'] + del expected_no_data['coords']['x']['data'] + expected_no_data['coords']['x'].update({'dtype': '