Merge pull request #19 from martinvonk/dev

Update main to v2.2.3
martinvonk · Jan 16, 2023 · f3a2a17 · f3a2a17
2 parents 3fbc927 + a468f1c
commit f3a2a17
Show file tree

Hide file tree

Showing 13 changed files with 530 additions and 143 deletions.
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -13,9 +13,9 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: '3.x'
     - name: Install dependencies
@@ -28,4 +28,4 @@ jobs:
         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
       run: |
         python3 -m build
-        python3 -m twine upload --repository pypi dist/*
+        python3 -m twine upload --repository pypi dist/*
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,4 @@
 
 /src/spei.egg-info
 /dist
-/tests
+/references
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@ SPEI is a simple Python package to calculate drought indices for time series suc
 
 This package uses the popular Python packages such as Pandas and Scipy to make it easy and versitile for the user to calculate the drought indices. Pandas Series are great for dealing with time series; providing interpolation, rolling average and other manipulation options. SciPy gives the option to use all [distributions](https://docs.scipy.org/doc/scipy/reference/stats.html) available in the library to fit the data.
 
-For the calculation of potential evaporation, ta a look at [pyet](https://github.com/phydrus/pyet). This is another great package that uses pandas Series to calculate different kinds of potential evaporation time series.
+For the calculation of potential evaporation, take a look at [pyet](https://github.com/phydrus/pyet). This is another great package that uses pandas Series to calculate different kinds of potential evaporation time series.
 
 Please feel free to contribute or ask questions!
 

diff --git a/examples/example01_indices.ipynb b/examples/example01_indices.ipynb
diff --git a/examples/example02_distributions.ipynb b/examples/example02_distributions.ipynb
diff --git a/examples/example03_drought_NL.ipynb b/examples/example03_drought_NL.ipynb
diff --git a/examples/example04_package_comparison.ipynb b/examples/example04_package_comparison.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,13 +1,13 @@
 [project]
 name = "spei"
-version = "0.2.2"
+version = "0.2.3"
 authors = [
   { name="Martin Vonk" },
 ]
 description = "A simple Python package to calculate drought indices for time series such as the SPI, SPEI and SGI."
 readme = "README.md"
 license = { file="LICENSE" }
-requires-python = ">=3.7"
+requires-python = ">=3.8"
 dependencies = [
   "numpy", "scipy", "matplotlib", "pandas"
 ]

diff --git a/src/spei/__init__.py b/src/spei/__init__.py
@@ -1 +1,2 @@
-from . import si, plot, utils
+from . import si, plot, utils
+from .si import spi, spei, sgi, ssfi
diff --git a/src/spei/plot.py b/src/spei/plot.py
@@ -1,15 +1,20 @@
 import matplotlib.pyplot as plt
 
-from typing import Any
 from itertools import cycle
 from calendar import month_name, month_abbr
 from pandas import Series
 from numpy import meshgrid, linspace, array, reshape
 from scipy.stats import gaussian_kde
 from .utils import check_series, dist_test
 
+# Type Hinting
+from typing import List, Optional
+from .typing import Axes, ContinuousDist
 
-def si(si: Series, bound: float = 3.0, figsize: tuple = (8, 4), ax: Any = None) -> Any:
+
+def si(
+    si: Series, bound: float = 3.0, figsize: tuple = (8, 4), ax: Axes = None
+) -> Axes:
     """Plot the standardized index values as a time series.
 
     Parameters
@@ -52,13 +57,13 @@ def si(si: Series, bound: float = 3.0, figsize: tuple = (8, 4), ax: Any = None)
 
 def dist(
     series: Series,
-    dist: Any,
+    dist: ContinuousDist,
     cumulative: bool = False,
     test_dist: bool = True,
-    cmap: str = None,
+    cmap: Optional[str] = None,
     figsize: tuple = (8, 10),
     legend: bool = True,
-) -> Any:
+) -> Axes:
     """Plot the (cumulative) histogram and scipy fitted distribution
     for the time series on a monthly basis.
 
@@ -145,21 +150,21 @@ def dist(
 
 def monthly_density(
     si: Series,
-    years: list[int] = [],
-    months: list[int] = [],
+    years: Optional[List[int]] = None,
+    months: Optional[List[int]] = None,
     cmap: str = "tab20c",
-    ax: Any = None,
-) -> Any:
+    ax: Optional[Axes] = None,
+) -> Axes:
     """Plot the monthly kernel-density estimate for a specific year.
 
     Parameters
     ----------
     si : pandas.Series
         Series of the standardized index
-    year : list
+    year : list, optional
         List of years as int
-    months : list
-        List of months as int
+    months : list, optional
+        List of months as int, by default all months
     cmap : str, optional
         matlotlib colormap, by default 'tab10'
     ax : matplotlib.Axes, optional
@@ -172,6 +177,13 @@ def monthly_density(
     """
     if ax is None:
         _, ax = plt.subplots(figsize=(6, 4))
+
+    if years is None:
+        years = []
+
+    if months is None:
+        months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
+
     cm = plt.get_cmap(cmap, 20)
     colors = reshape(array([cm(x) for x in range(20)], dtype="f,f,f,f"), (5, 4))
     lsts = cycle(["--", "-.", ":"])

diff --git a/src/spei/si.py b/src/spei/si.py
@@ -1,16 +1,39 @@
-from typing import Any
 from pandas import Series
 from numpy import linspace
 from scipy.stats import norm, gamma, fisk, genextreme
 from .utils import check_series
 
+# Type Hinting
+from typing import Optional
+from .typing import ContinuousDist
+
 
 def get_si_ppf(
     series: Series,
-    dist: Any,
+    dist: ContinuousDist,
     sgi: bool = False,
     prob_zero: bool = False,
 ) -> Series:
+    """Internal helper function to calculate drought index
+
+    Parameters
+    ----------
+    series : Series
+        Series with observations
+    dist : ContinuousDist
+        Continuous distribution from the SciPy library
+    sgi : bool, optional
+        Whether to caclulate the standardized groundwater index or not, by
+        default False
+    prob_zero : bool, optional
+        Apply logic to observations of zero and calculate the probability
+        seperately, by default False
+
+    Returns
+    -------
+    Series
+        Series with probability point function ppf
+    """
 
     check_series(series)
 
@@ -24,7 +47,7 @@ def get_si_ppf(
         else:
             if prob_zero:
                 p0 = (data == 0.0).sum() / len(data)
-                pars, loc, scale = dist.fit(data[data != 0.0], scale=data.std())
+                *pars, loc, scale = dist.fit(data[data != 0.0], scale=data.std())
                 cdf_sub = dist.cdf(data, pars, loc=loc, scale=scale)
                 cdf = p0 + (1 - p0) * cdf_sub
                 cdf[data == 0.0] = p0
@@ -61,15 +84,17 @@ def sgi(series: Series) -> Series:
     return get_si_ppf(series, None, sgi=True)
 
 
-def spi(series: Series, dist: Any = None, prob_zero: bool = False) -> Series:
+def spi(
+    series: Series, dist: Optional[ContinuousDist] = None, prob_zero: bool = False
+) -> Series:
     """Method to compute the Standardized Precipitation Index [spi_2002]_.
 
     Parameters
     ----------
     series: pandas.Series
         Pandas time series of the precipitation. Time series index
         should be a pandas DatetimeIndex.
-    dist: scipy.stats._continuous_distns
+    dist: scipy.stats.rv_continuous
         Can be any continuous distribution from the scipy.stats library.
         However, for the SPI generally the Gamma probability density
         function is recommended. Other appropriate choices could be the
@@ -89,21 +114,23 @@ def spi(series: Series, dist: Any = None, prob_zero: bool = False) -> Series:
        22, 1571-1592, 2002.
     """
 
-    if dist == None:
+    if dist is None:
         dist = gamma
 
-    return get_si_ppf(series, dist, prob_zero)
+    return get_si_ppf(series, dist, prob_zero=prob_zero)
+
 
+def spei(series: Series, dist: Optional[ContinuousDist] = None) -> Series:
 
-def spei(series: Series, dist: Any = None) -> Series:
-    """Method to compute the Standardized Precipitation Evaporation Index [spei_2010]_.
+    """Method to compute the Standardized Precipitation Evaporation Index
+    [spei_2010]_.
 
     Parameters
     ----------
     series: pandas.Series
         Pandas time series of the precipitation. Time series index
         should be a pandas DatetimeIndex.
-    dist: scipy.stats._continuous_distns
+    dist: scipy.stats.rv_continuous
         Can be any continuous distribution from the scipy.stats library.
         However, for the SPEI generally the log-logistic (fisk) probability
         density function is recommended. Other appropriate choices could be
@@ -121,21 +148,21 @@ def spei(series: Series, dist: Any = None) -> Series:
        Journal of Climate, 23, 1696-1718, 2010.
     """
 
-    if dist == None:
+    if dist is None:
         dist = fisk  # log-logistic
 
     return get_si_ppf(series, dist)
 
 
-def ssfi(series: Series, dist: Any = None) -> Series:
+def ssfi(series: Series, dist: Optional[ContinuousDist] = None) -> Series:
     """Method to compute the Standardized StreamFlow Index [ssfi_2020]_.
 
     Parameters
     ----------
     series: pandas.Series
         Pandas time series of the precipitation. Time series index
         should be a pandas DatetimeIndex.
-    dist: scipy.stats._continuous_distns
+    dist: scipy.stats.rv_continuous
         Can be any continuous distribution from the scipy.stats library.
         However, for the SSFI generally the gamma probability density
         function is recommended. Other appropriate choices could be the
@@ -153,7 +180,7 @@ def ssfi(series: Series, dist: Any = None) -> Series:
        and nonparametric methods. Water Resources Research, 56, 2020.
     """
 
-    if dist == None:
+    if dist is None:
         dist = genextreme
 
     return get_si_ppf(series, dist)
diff --git a/src/spei/typing.py b/src/spei/typing.py
@@ -0,0 +1,9 @@
+from typing import TypeVar
+
+from scipy.stats._continuous_distns import rv_continuous
+from matplotlib.axes._base import _AxesBase
+from numpy.typing import ArrayLike
+
+ContinuousDist = TypeVar("ContinuousDist", bound=rv_continuous)
+Axes = TypeVar("Axes", bound=_AxesBase)
+ArrayLike = TypeVar("ArrayLike", bound=ArrayLike)
diff --git a/src/spei/utils.py b/src/spei/utils.py
@@ -1,4 +1,3 @@
-from typing import Any
 from pandas import Series, DataFrame
 from scipy.stats import (
     norm,
@@ -12,6 +11,10 @@
     kstest,
 )
 
+# Type Hinting
+from typing import List, Optional, Tuple
+from .typing import ArrayLike, ContinuousDist
+
 
 def check_series(series: Series) -> None:
     """Check if provided time series is of type pandas.Series
@@ -30,13 +33,16 @@ def check_series(series: Series) -> None:
     if not isinstance(series, Series):
         if isinstance(series, DataFrame):
             raise TypeError(
-                f"Please convert pandas.DataFrame to a pandas.Series using .squeeze()"
+                "Please convert pandas.DataFrame to a"
+                "pandas.Series using DataFrame.squeeze()"
             )
         else:
             raise TypeError(f"Please provide a Pandas Series instead of {type(series)}")
 
 
-def dist_test(data: Series, dist: Any, N: int = 100, alpha: float = 0.05) -> Any:
+def dist_test(
+    data: Series, dist: List[ContinuousDist], N: int = 100, alpha: float = 0.05
+) -> Tuple[str, float, bool, ArrayLike]:
     """Fit a distribution and perform the two-sided
     Kolmogorov-Smirnov test for goodness of fit. The
     null hypothesis is that the data and distributions
@@ -47,7 +53,7 @@ def dist_test(data: Series, dist: Any, N: int = 100, alpha: float = 0.05) -> Any
     ----------
     data : array_like
         1-D array of observations of random variables
-    dist: scipy.stats._continuous_distns
+    dist: scipy.stats.rv_continuous
         Can be any continuous distribution from the
         scipy.stats library.
     N : int, optional
@@ -61,7 +67,7 @@ def dist_test(data: Series, dist: Any, N: int = 100, alpha: float = 0.05) -> Any
 
     Returns
     -------
-    string, float, bool, floats
+    string, float, bool, array_like
         distribution name, p-value and fitted parameters
 
     References
@@ -77,7 +83,10 @@ def dist_test(data: Series, dist: Any, N: int = 100, alpha: float = 0.05) -> Any
 
 
 def dists_test(
-    data: Series, distributions: list[Any] = None, N: int = 100, alpha: float = 0.05
+    data: Series,
+    distributions: Optional[List[ContinuousDist]] = None,
+    N: int = 100,
+    alpha: float = 0.05,
 ) -> DataFrame:
     """Fit a list of distribution and perform the
     two-sided Kolmogorov-Smirnov test for goodness
@@ -89,8 +98,9 @@ def dists_test(
     ----------
     data : array_like
         1-D array of observations of random variables
-    distributions : list of scipy.stats._continuous_distns, optional
-        A list of (can be) any continuous distribution from the scipy.stats library, by default None
+    distributions : list of scipy.stats.rv_continuous, optional
+        A list of (can be) any continuous distribution from the scipy.stats
+        library, by default None which makes a custom selection
     N : int, optional
         Sample size, by default 100
     alpha : float, optional
@@ -125,7 +135,7 @@ def dists_test(
         ]
 
     df = DataFrame([dist_test(data, D, N, alpha) for D in distributions])
-    cols = ["Distribution", "KS p-value", f"Reject H0"]
+    cols = ["Distribution", "KS p-value", "Reject H0"]
     cols += [f"Param {i+1}" for i in range(df.columns.stop - len(cols))]
     df.columns = cols
     df = df.set_index(cols[0])