Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attr tracking #142

Merged
merged 9 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 169 additions & 11 deletions src/gval/accessors/gval_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from gval.comparison.tabulation import _crosstab_Datasets, _crosstab_DataArrays
from gval.comparison.compute_categorical_metrics import _compute_categorical_metrics
from gval.comparison.compute_continuous_metrics import _compute_continuous_metrics
from gval.utils.schemas import Crosstab_df, Metrics_df
from gval.attributes.attributes import _attribute_tracking_xarray
from gval.utils.schemas import Crosstab_df, Metrics_df, AttributeTrackingDf
from gval.utils.visualize import _map_plot
from gval.comparison.pairing_functions import difference

Expand Down Expand Up @@ -57,6 +58,41 @@ def check_same_type(self, benchmark_map: Union[xr.Dataset, xr.DataArray]):
if not isinstance(benchmark_map, self.data_type):
raise TypeError(f"Benchmark Map needs to be data type of {self.data_type}")

def __handle_attribute_tracking(
self,
candidate_map: Union[xr.Dataset, xr.DataArray],
benchmark_map: Union[xr.Dataset, xr.DataArray],
agreement_map: Optional[Union[xr.Dataset, xr.DataArray]] = None,
attribute_tracking_kwargs: Optional[Dict] = None,
): # pragma: no cover
"""
Handles attribute tracking for categorical and continuous comparison
"""

# use user passed attribute_tracking_kwargs to pass arguments to attribute_tracking_xarray()
if attribute_tracking_kwargs is not None:
if "benchmark_map" in attribute_tracking_kwargs:
del attribute_tracking_kwargs["benchmark_map"]

if "agreement_map" in attribute_tracking_kwargs:
if attribute_tracking_kwargs["agreement_map"] is None:
agreement_map = None
else:
del attribute_tracking_kwargs["agreement_map"]

results = candidate_map.gval.attribute_tracking_xarray(
benchmark_map=benchmark_map,
agreement_map=agreement_map,
**attribute_tracking_kwargs,
)

else:
results = candidate_map.gval.attribute_tracking_xarray(
benchmark_map=benchmark_map, agreement_map=agreement_map
)

return results

def categorical_compare(
self,
benchmark_map: Union[gpd.GeoDataFrame, xr.Dataset, xr.DataArray],
Expand All @@ -77,8 +113,22 @@ def categorical_compare(
average: str = "micro",
weights: Optional[Iterable[Number]] = None,
rasterize_attributes: Optional[list] = None,
attribute_tracking: bool = False,
attribute_tracking_kwargs: Optional[Dict] = None,
) -> Tuple[
Union[xr.Dataset, xr.DataArray], DataFrame[Crosstab_df], DataFrame[Metrics_df]
Union[
Tuple[
Union[xr.Dataset, xr.DataArray],
DataFrame[Crosstab_df],
DataFrame[Metrics_df],
],
Tuple[
Union[xr.Dataset, xr.DataArray],
DataFrame[Crosstab_df],
DataFrame[Metrics_df],
DataFrame[AttributeTrackingDf],
],
]
]:
"""
Computes comparison between two categorical value xarray's.
Expand Down Expand Up @@ -138,11 +188,18 @@ def categorical_compare(
rasterize_attributes: Optional[list], default = None
Numerical attributes of a Benchmark Map GeoDataFrame to rasterize. Only applicable if benchmark map is a vector file.
This cannot be none if the benchmark map is a vector file.
attribute_tracking: bool, default = False
Whether to return a dataframe with the attributes of the candidate and benchmark maps.
attribute_tracking_kwargs: Optional[Dict], default = None
Keyword arguments to pass to `gval.attribute_tracking()`. This is only used if `attribute_tracking` is True. By default, agreement maps are used for attribute tracking but this can be set to None within this argument to override. See `gval.attribute_tracking` for more information.

Returns
-------
Union[xr.Dataset, xr.DataArray], DataFrame[Crosstab_df], DataFrame[Metrics_df]
Tuple with agreement map, cross-tabulation table, and metric table
Union[
fernando-aristizabal marked this conversation as resolved.
Show resolved Hide resolved
Tuple[Union[xr.Dataset, xr.DataArray], DataFrame[Crosstab_df], DataFrame[Metrics_df]],
Tuple[Union[xr.Dataset, xr.DataArray], DataFrame[Crosstab_df], DataFrame[Metrics_df], DataFrame[AttributeTrackingDf]]
]
Tuple with agreement map, cross-tabulation table, and metric table. Possibly attribute tracking table as well.
"""

# using homogenize accessor to avoid code reuse
Expand All @@ -168,9 +225,6 @@ def categorical_compare(
comparison_function=comparison_function,
)

# clear memory
del candidate, benchmark

metrics_df = _compute_categorical_metrics(
crosstab_df=crosstab_df,
metrics=metrics,
Expand All @@ -180,6 +234,21 @@ def categorical_compare(
weights=weights,
)

if attribute_tracking:
results = self.__handle_attribute_tracking(
candidate_map=candidate,
benchmark_map=benchmark,
agreement_map=agreement_map,
attribute_tracking_kwargs=attribute_tracking_kwargs,
)

if len(results) == 2:
attributes_df, agreement_map = results
else:
attributes_df = results

return agreement_map, crosstab_df, metrics_df, attributes_df

return agreement_map, crosstab_df, metrics_df

def continuous_compare(
Expand All @@ -191,8 +260,17 @@ def continuous_compare(
nodata: Optional[Number] = None,
encode_nodata: Optional[bool] = False,
rasterize_attributes: Optional[list] = None,
attribute_tracking: bool = False,
attribute_tracking_kwargs: Optional[Dict] = None,
) -> Tuple[
Union[xr.Dataset, xr.DataArray], DataFrame[Crosstab_df], DataFrame[Metrics_df]
Union[
Tuple[Union[xr.Dataset, xr.DataArray], DataFrame[Metrics_df]],
Tuple[
Union[xr.Dataset, xr.DataArray],
DataFrame[Metrics_df],
DataFrame[AttributeTrackingDf],
],
]
]:
"""
Computes comparison between two continuous value xarray's.
Expand Down Expand Up @@ -220,11 +298,18 @@ def continuous_compare(
Encoded no data value to write to agreement map output. A nodata argument must be passed. This will use `rxr.rio.write_nodata(nodata, encode=encode_nodata)`.
rasterize_attributes: Optional[list], default = None
Numerical attributes of a GeoDataFrame to rasterize.
attribute_tracking: bool, default = False
Whether to return a dataframe with the attributes of the candidate and benchmark maps.
attribute_tracking_kwargs: Optional[Dict], default = None
Keyword arguments to pass to `gval.attribute_tracking()`. This is only used if `attribute_tracking` is True. By default, agreement maps are used for attribute tracking but this can be set to None within this argument to override. See `gval.attribute_tracking` for more information.

Returns
-------
Union[xr.Dataset, xr.DataArray], DataFrame[Metrics_df]
Tuple with agreement map and metric table.
Union[
fernando-aristizabal marked this conversation as resolved.
Show resolved Hide resolved
Tuple[Union[xr.Dataset, xr.DataArray], DataFrame[Metrics_df]],
Tuple[Union[xr.Dataset, xr.DataArray], DataFrame[Metrics_df], DataFrame[AttributeTrackingDf]]
]
Tuple with agreement map and metric table, possibly attribute tracking table as well.
"""

# using homogenize accessor to avoid code reuse
Expand All @@ -246,7 +331,20 @@ def continuous_compare(
metrics=metrics,
)

del candidate, benchmark
if attribute_tracking:
results = self.__handle_attribute_tracking(
candidate_map=candidate,
benchmark_map=benchmark,
agreement_map=agreement_map,
attribute_tracking_kwargs=attribute_tracking_kwargs,
)

if len(results) == 2:
attributes_df, agreement_map = results
else:
attributes_df = results

return agreement_map, metrics_df, attributes_df

return agreement_map, metrics_df

Expand Down Expand Up @@ -423,6 +521,66 @@ def compute_crosstab(
comparison_function,
)

def attribute_tracking(
self,
benchmark_map: Union[xr.DataArray, xr.Dataset],
agreement_map: Optional[Union[xr.DataArray, xr.Dataset]] = None,
candidate_suffix: Optional[str] = "_candidate",
benchmark_suffix: Optional[str] = "_benchmark",
candidate_include: Optional[Iterable[str]] = None,
candidate_exclude: Optional[Iterable[str]] = None,
benchmark_include: Optional[Iterable[str]] = None,
benchmark_exclude: Optional[Iterable[str]] = None,
) -> Union[
DataFrame[AttributeTrackingDf],
Tuple[DataFrame[AttributeTrackingDf], Union[xr.DataArray, xr.Dataset]],
]:
"""
Concatenate xarray attributes into a single pandas dataframe.

Parameters
----------
candidate_map : Union[xr.DataArray, xr.Dataset]
Self. Candidate map xarray object.
benchmark_map : Union[xr.DataArray, xr.Dataset]
Benchmark map xarray object.
candidate_suffix : Optional[str], default = '_candidate'
Suffix to append to candidate map xarray attributes, by default '_candidate'.
benchmark_suffix : Optional[str], default = '_benchmark'
Suffix to append to benchmark map xarray attributes, by default '_benchmark'.
candidate_include : Optional[Iterable[str]], default = None
List of attributes to include from candidate map. candidate_include and candidate_exclude are mutually exclusive arguments.
candidate_exclude : Optional[Iterable[str]], default = None
List of attributes to exclude from candidate map. candidate_include and candidate_exclude are mutually exclusive arguments.
benchmark_include : Optional[Iterable[str]], default = None
List of attributes to include from benchmark map. benchmark_include and benchmark_exclude are mutually exclusive arguments.
benchmark_exclude : Optional[Iterable[str]], default = None
List of attributes to exclude from benchmark map. benchmark_include and benchmark_exclude are mutually exclusive arguments.

Raises
------
ValueError
If candidate_include and candidate_exclude are both not None.
ValueError
If benchmark_include and benchmark_exclude are both not None.

Returns
-------
Union[DataFrame[AttributeTrackingDf], Tuple[DataFrame[AttributeTrackingDf], Union[xr.DataArray, xr.Dataset]]]
Pandas dataframe with concatenated attributes from candidate and benchmark maps. If agreement_map is not None, returns a tuple with the dataframe and the agreement map.
"""
return _attribute_tracking_xarray(
candidate_map=self._obj,
benchmark_map=benchmark_map,
agreement_map=agreement_map,
candidate_suffix=candidate_suffix,
benchmark_suffix=benchmark_suffix,
candidate_include=candidate_include,
candidate_exclude=candidate_exclude,
benchmark_include=benchmark_include,
benchmark_exclude=benchmark_exclude,
)

def cat_plot(
self,
title: str = "Categorical Map",
Expand Down
131 changes: 131 additions & 0 deletions src/gval/attributes/attributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"""
Carries over attributes from xarray's DataArray and Dataset classes.

TODO:
- [ ] implement tests with xr.testing.assert_identical
- [ ] make additional test cases???
- [ ] add to accessor methods
- [ ] documentation
"""

__author__ = "Fernando Aristizabal"

from typing import Optional, Iterable, Union, Tuple

import pandas as pd
import xarray as xr
from pandera.typing import DataFrame

from gval.utils.schemas import AttributeTrackingDf


def _attribute_tracking_xarray(
candidate_map: Union[xr.DataArray, xr.Dataset],
benchmark_map: Union[xr.DataArray, xr.Dataset],
agreement_map: Optional[Union[xr.DataArray, xr.Dataset]] = None,
candidate_suffix: Optional[str] = "_candidate",
benchmark_suffix: Optional[str] = "_benchmark",
candidate_include: Optional[Iterable[str]] = None,
candidate_exclude: Optional[Iterable[str]] = None,
benchmark_include: Optional[Iterable[str]] = None,
benchmark_exclude: Optional[Iterable[str]] = None,
) -> Union[
DataFrame[AttributeTrackingDf],
Tuple[DataFrame[AttributeTrackingDf], Union[xr.DataArray, xr.Dataset]],
]:
"""
Concatenate xarray attributes into a single pandas dataframe.

Parameters
----------
candidate_map : Union[xr.DataArray, xr.Dataset]
Candidate map xarray object.
benchmark_map : Union[xr.DataArray, xr.Dataset]
Benchmark map xarray object.
candidate_suffix : Optional[str], default = '_candidate'
Suffix to append to candidate map xarray attributes, by default '_candidate'.
benchmark_suffix : Optional[str], default = '_benchmark'
Suffix to append to benchmark map xarray attributes, by default '_benchmark'.
candidate_include : Optional[Iterable[str]], default = None
List of attributes to include from candidate map. candidate_include and candidate_exclude are mutually exclusive arguments.
candidate_exclude : Optional[Iterable[str]], default = None
List of attributes to exclude from candidate map. candidate_include and candidate_exclude are mutually exclusive arguments.
benchmark_include : Optional[Iterable[str]], default = None
List of attributes to include from benchmark map. benchmark_include and benchmark_exclude are mutually exclusive arguments.
benchmark_exclude : Optional[Iterable[str]], default = None
List of attributes to exclude from benchmark map. benchmark_include and benchmark_exclude are mutually exclusive arguments.

Raises
------
ValueError
If candidate_include and candidate_exclude are both not None.
ValueError
If benchmark_include and benchmark_exclude are both not None.

Returns
-------
Union[DataFrame[AttributeTrackingDf], Tuple[DataFrame[AttributeTrackingDf], Union[xr.DataArray, xr.Dataset]]]
Pandas dataframe with concatenated attributes from candidate and benchmark maps. If agreement_map is not None, returns a tuple with the dataframe and the agreement map.
"""

candidate_attrs = candidate_map.attrs
benchmark_attrs = benchmark_map.attrs

# remove default exclude from both includes
if (candidate_include is not None) and (candidate_exclude is not None):
raise ValueError(
"candidate_include and candidate_exclude are mutually exclusive"
)

if (benchmark_include is not None) and (benchmark_exclude is not None):
raise ValueError(
"benchmark_include and benchmark_exclude are mutually exclusive"
)

# candidate include and exclude
if candidate_include is not None:
candidate_attrs = {
k: v for k, v in candidate_attrs.items() if k in candidate_include
}
elif candidate_exclude is not None:
candidate_attrs = {
k: v for k, v in candidate_attrs.items() if k not in candidate_exclude
}

# benchmark include and exclude
if benchmark_include is not None:
benchmark_attrs = {
k: v for k, v in benchmark_attrs.items() if k in benchmark_include
}
elif benchmark_exclude is not None:
benchmark_attrs = {
k: v for k, v in benchmark_attrs.items() if k not in benchmark_exclude
}

# Convert xarray attributes to pandas dataframes
candidate_df = pd.DataFrame([candidate_attrs], index=[0])
benchmark_df = pd.DataFrame([benchmark_attrs], index=[0])

# Append a suffix to each dataframe's column names to denote its origin
candidate_df.columns = [f"{col}{candidate_suffix}" for col in candidate_df.columns]
benchmark_df.columns = [f"{col}{benchmark_suffix}" for col in benchmark_df.columns]

# Concatenate the dataframes together
combined_df = pd.concat([candidate_df, benchmark_df], axis=1)

# validate schema
AttributeTrackingDf.validate_column_suffixes(
combined_df, candidate_suffix, benchmark_suffix
)

if agreement_map is None:
return combined_df

updated_candidate_attrs = candidate_df.to_dict(orient="records")[0]
updated_benchmark_attrs = benchmark_df.to_dict(orient="records")[0]

agreement_map = agreement_map.assign_attrs(
{**updated_candidate_attrs, **updated_benchmark_attrs}
)

return combined_df, agreement_map
Loading