diff --git a/CONTRIBUTING.MD b/CONTRIBUTING.MD index df1fad16..5202a8cc 100644 --- a/CONTRIBUTING.MD +++ b/CONTRIBUTING.MD @@ -88,12 +88,7 @@ that will lint the code. Make sure all of these checks pass, if not make changes ## Versioning -The repository will adhere to the following set of general guidelines to update versions. -Given the arbitrary version 1.2.3: - -- x.x.3 Refers to accumulation of updates that do not introduce significantly new functionality or change core dependencies. -- x.2.x This accumulation of updates introduces significantly new functionality and possibly the need to deprecate previous functionality and changing core dependencies. -- 1.x.x Such a change introduces not only a significant accumulation of updates but changes to core functions necessitating major updates to documentation and core dependencies. +The repository will adhere to the [Semantic Versioning 2.0.0.](https://semver.org/) ## Docker Use diff --git a/docs/sphinx/SPHINX_CONTRIBUTING.MD b/docs/sphinx/SPHINX_CONTRIBUTING.MD index df1fad16..5202a8cc 100644 --- a/docs/sphinx/SPHINX_CONTRIBUTING.MD +++ b/docs/sphinx/SPHINX_CONTRIBUTING.MD @@ -88,12 +88,7 @@ that will lint the code. Make sure all of these checks pass, if not make changes ## Versioning -The repository will adhere to the following set of general guidelines to update versions. -Given the arbitrary version 1.2.3: - -- x.x.3 Refers to accumulation of updates that do not introduce significantly new functionality or change core dependencies. -- x.2.x This accumulation of updates introduces significantly new functionality and possibly the need to deprecate previous functionality and changing core dependencies. -- 1.x.x Such a change introduces not only a significant accumulation of updates but changes to core functions necessitating major updates to documentation and core dependencies. +The repository will adhere to the [Semantic Versioning 2.0.0.](https://semver.org/) ## Docker Use diff --git a/docs/sphinx/SphinxTutorial.ipynb b/docs/sphinx/SphinxTutorial.ipynb index 65c465fc..8cb24a51 100644 --- a/docs/sphinx/SphinxTutorial.ipynb +++ b/docs/sphinx/SphinxTutorial.ipynb @@ -103,21 +103,45 @@ "id": "c24dfc06", "metadata": {}, "source": [ - "The agreement map compares the encodings of the benchmark map and candidate map using a \"comparison function\" to then output unique encodings. In this particular case the \"Szudzik\" comparison function was used by default since no argument was passed in for the `comparison_function` argument. The Szudzik function is defined below:" + "The agreement map compares the encodings of the benchmark map and candidate map using a \"comparison function\" to then output unique encodings. In this particular case the \"Szudzik\" comparison function was used by default since no argument was passed in for the `comparison_function` argument. First, a negative value transformation (nvt) is used to support negative numbers encodings:" ] }, { "cell_type": "markdown", - "id": "27c2d169", + "id": "6b2dec44", "metadata": {}, "source": [ "$$\n", "c = \\text{candidate value} \\\\\n", "b = \\text{benchmark value} \\\\\n", - "f(x)= \n", + "nvt(x)= \n", "\\begin{cases}\n", - " c^{2} + c + b,& \\text{if } c\\geq b\\\\\n", - " b^{2} + c, & \\text{otherwise}\n", + " 2 * x,& \\text{if } x \\geq 0\\\\\n", + " -2 * x -1, & \\text{otherwise}\n", + "\\end{cases} \\\\\n", + "ct = nvt(c) \\\\\n", + "bt = nvt(b) \\\\\n", + "$$" + ] + }, + { + "cell_type": "markdown", + "id": "5ba5f9b0", + "metadata": {}, + "source": [ + "Then the szudzik function is applied to the transformed values:" + ] + }, + { + "cell_type": "markdown", + "id": "94e6bfbd", + "metadata": {}, + "source": [ + "$$\n", + "szudzik(ct, bt)= \n", + "\\begin{cases}\n", + " ct^{2} + ct + bt,& \\text{if } ct\\geq bt\\\\\n", + " bt^{2} + ct, & \\text{otherwise}\n", "\\end{cases}\n", "$$" ] diff --git a/docs/sphinx/utils.rst b/docs/sphinx/utils.rst index 6284a21e..06d76573 100644 --- a/docs/sphinx/utils.rst +++ b/docs/sphinx/utils.rst @@ -10,4 +10,3 @@ Utilities loading_datasets exceptions schemas - visualize diff --git a/docs/sphinx/visualize.rst b/docs/sphinx/visualize.rst deleted file mode 100644 index c9fbaeaa..00000000 --- a/docs/sphinx/visualize.rst +++ /dev/null @@ -1,7 +0,0 @@ -Visualize -######### - -:doc:`Return to Homepage <../index>` - -.. automodule:: gval.utils.visualize - :members: diff --git a/notebooks/Tutorial.ipynb b/notebooks/Tutorial.ipynb index 65c465fc..8cb24a51 100644 --- a/notebooks/Tutorial.ipynb +++ b/notebooks/Tutorial.ipynb @@ -103,21 +103,45 @@ "id": "c24dfc06", "metadata": {}, "source": [ - "The agreement map compares the encodings of the benchmark map and candidate map using a \"comparison function\" to then output unique encodings. In this particular case the \"Szudzik\" comparison function was used by default since no argument was passed in for the `comparison_function` argument. The Szudzik function is defined below:" + "The agreement map compares the encodings of the benchmark map and candidate map using a \"comparison function\" to then output unique encodings. In this particular case the \"Szudzik\" comparison function was used by default since no argument was passed in for the `comparison_function` argument. First, a negative value transformation (nvt) is used to support negative numbers encodings:" ] }, { "cell_type": "markdown", - "id": "27c2d169", + "id": "6b2dec44", "metadata": {}, "source": [ "$$\n", "c = \\text{candidate value} \\\\\n", "b = \\text{benchmark value} \\\\\n", - "f(x)= \n", + "nvt(x)= \n", "\\begin{cases}\n", - " c^{2} + c + b,& \\text{if } c\\geq b\\\\\n", - " b^{2} + c, & \\text{otherwise}\n", + " 2 * x,& \\text{if } x \\geq 0\\\\\n", + " -2 * x -1, & \\text{otherwise}\n", + "\\end{cases} \\\\\n", + "ct = nvt(c) \\\\\n", + "bt = nvt(b) \\\\\n", + "$$" + ] + }, + { + "cell_type": "markdown", + "id": "5ba5f9b0", + "metadata": {}, + "source": [ + "Then the szudzik function is applied to the transformed values:" + ] + }, + { + "cell_type": "markdown", + "id": "94e6bfbd", + "metadata": {}, + "source": [ + "$$\n", + "szudzik(ct, bt)= \n", + "\\begin{cases}\n", + " ct^{2} + ct + bt,& \\text{if } ct\\geq bt\\\\\n", + " bt^{2} + ct, & \\text{otherwise}\n", "\\end{cases}\n", "$$" ] diff --git a/src/gval/statistics/categorical_stat_funcs.py b/src/gval/statistics/categorical_stat_funcs.py index 526c393d..639db4e9 100644 --- a/src/gval/statistics/categorical_stat_funcs.py +++ b/src/gval/statistics/categorical_stat_funcs.py @@ -434,3 +434,61 @@ def fowlkes_mallows_index(tp: Number, fp: Number, fn: Number) -> float: .. [1] [Fowlkes-Mallows Index](https://en.wikipedia.org/wiki/Fowlkes%E2%80%93Mallows_index) """ return math.sqrt((tp / (tp + fp)) * (tp / (tp + fn))) + + +def equitable_threat_score(tp: Number, tn: Number, fp: Number, fn: Number) -> float: + """ + Computes Equitable Threat Score (Gilbert Score) + + Parameters + ---------- + tp: Number + Count reflecting true positive + tn: Number + Count reflecting true negative + fp: Number + Count reflecting false positive + fn: Number + Count reflecting false negative + + Returns + ------- + float + Equitable threat score from -1/3 to 1 + + References + ---------- + .. [1] [Equitable Threat Score](https://resources.eumetrain.org/data/4/451/english/msg/ver_categ_forec/uos2/uos2_ko4.htm) + + """ + total_population = tp + tn + fp + fn + a_ref = ((tp + fp) * (tp + fn)) / total_population + return (tp - a_ref) / (tp - a_ref + fp + fn) + + +def balanced_accuracy(tp: Number, tn: Number, fp: Number, fn: Number) -> float: + """ + Computes Balanced Accuracy + + Parameters + ---------- + tp: Number + Count reflecting true positive + tn: Number + Count reflecting true negative + fp: Number + Count reflecting false positive + fn: Number + Count reflecting false negative + + Returns + ------- + float + Balanced Accuracy from 0 to 1 + + References + ---------- + .. [1] [Balanced Accuracy](https://neptune.ai/blog/balanced-accuracy#Balanced%20Accuracy) + """ + + return ((tp / (tp + fn)) + (tn / (tn + fp))) / 2 diff --git a/tests/cases_compute_categorical_metrics.py b/tests/cases_compute_categorical_metrics.py index a0bb6709..2ddfb618 100644 --- a/tests/cases_compute_categorical_metrics.py +++ b/tests/cases_compute_categorical_metrics.py @@ -20,7 +20,9 @@ "tn": {0: 4845.0}, "tp": {0: 22248.0}, "accuracy": {0: 0.671433173899036}, + "balanced_accuracy": {0: 0.6008362234427282}, "critical_success_index": {0: 0.6265983214104658}, + "equitable_threat_score": {0: 0.10732415444447231}, "f_score": {0: 0.7704401426741005}, "false_discovery_rate": {0: 0.21081196126423327}, "false_negative_rate": {0: 0.24743767547271928}, @@ -46,10 +48,12 @@ "tn": {0: 9489.0, 1: 5200.0}, "tp": {0: 2470.0, 1: 4845.0}, "accuracy": {0: 0.646397492027458, 1: 0.6199086645272772}, + "balanced_accuracy": {0: 0.6638514325121567, 1: 0.6228475926801269}, "critical_success_index": { 0: 0.2740790057700843, 1: 0.44029443838604143, }, + "equitable_threat_score": {0: 0.12607286705706663, 1: 0.1387798441467566}, "f_score": {0: 0.43023863438425364, 1: 0.6113950406965739}, "false_discovery_rate": {0: 0.6878948698508971, 1: 0.33766233766233766}, "false_negative_rate": {0: 0.3077354260089686, 1: 0.43227091633466136}, diff --git a/tests/cases_statistics.py b/tests/cases_statistics.py index 9972aa50..3413377a 100644 --- a/tests/cases_statistics.py +++ b/tests/cases_statistics.py @@ -25,7 +25,9 @@ expected_results = [ [ 0.8823529411764706, + 0.8365384615384616, 0.8571428571428571, + 0.5072463768115942, 0.9230769230769231, 0.07692307692307693, 0.07692307692307693,