Skip to content

Commit

Permalink
Merge pull request #19 from martinvonk/dev
Browse files Browse the repository at this point in the history
Update main to v2.2.3
  • Loading branch information
martinvonk authored Jan 16, 2023
2 parents 3fbc927 + a468f1c commit f3a2a17
Show file tree
Hide file tree
Showing 13 changed files with 530 additions and 143 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
Expand All @@ -28,4 +28,4 @@ jobs:
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python3 -m build
python3 -m twine upload --repository pypi dist/*
python3 -m twine upload --repository pypi dist/*
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

/src/spei.egg-info
/dist
/tests
/references
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ SPEI is a simple Python package to calculate drought indices for time series suc

This package uses the popular Python packages such as Pandas and Scipy to make it easy and versitile for the user to calculate the drought indices. Pandas Series are great for dealing with time series; providing interpolation, rolling average and other manipulation options. SciPy gives the option to use all [distributions](https://docs.scipy.org/doc/scipy/reference/stats.html) available in the library to fit the data.

For the calculation of potential evaporation, ta a look at [pyet](https://github.com/phydrus/pyet). This is another great package that uses pandas Series to calculate different kinds of potential evaporation time series.
For the calculation of potential evaporation, take a look at [pyet](https://github.com/phydrus/pyet). This is another great package that uses pandas Series to calculate different kinds of potential evaporation time series.

Please feel free to contribute or ask questions!

Expand Down
42 changes: 18 additions & 24 deletions examples/example01_indices.ipynb

Large diffs are not rendered by default.

50 changes: 20 additions & 30 deletions examples/example02_distributions.ipynb

Large diffs are not rendered by default.

84 changes: 38 additions & 46 deletions examples/example03_drought_NL.ipynb

Large diffs are not rendered by default.

352 changes: 352 additions & 0 deletions examples/example04_package_comparison.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
[project]
name = "spei"
version = "0.2.2"
version = "0.2.3"
authors = [
{ name="Martin Vonk" },
]
description = "A simple Python package to calculate drought indices for time series such as the SPI, SPEI and SGI."
readme = "README.md"
license = { file="LICENSE" }
requires-python = ">=3.7"
requires-python = ">=3.8"
dependencies = [
"numpy", "scipy", "matplotlib", "pandas"
]
Expand Down
3 changes: 2 additions & 1 deletion src/spei/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from . import si, plot, utils
from . import si, plot, utils
from .si import spi, spei, sgi, ssfi
36 changes: 24 additions & 12 deletions src/spei/plot.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import matplotlib.pyplot as plt

from typing import Any
from itertools import cycle
from calendar import month_name, month_abbr
from pandas import Series
from numpy import meshgrid, linspace, array, reshape
from scipy.stats import gaussian_kde
from .utils import check_series, dist_test

# Type Hinting
from typing import List, Optional
from .typing import Axes, ContinuousDist

def si(si: Series, bound: float = 3.0, figsize: tuple = (8, 4), ax: Any = None) -> Any:

def si(
si: Series, bound: float = 3.0, figsize: tuple = (8, 4), ax: Axes = None
) -> Axes:
"""Plot the standardized index values as a time series.
Parameters
Expand Down Expand Up @@ -52,13 +57,13 @@ def si(si: Series, bound: float = 3.0, figsize: tuple = (8, 4), ax: Any = None)

def dist(
series: Series,
dist: Any,
dist: ContinuousDist,
cumulative: bool = False,
test_dist: bool = True,
cmap: str = None,
cmap: Optional[str] = None,
figsize: tuple = (8, 10),
legend: bool = True,
) -> Any:
) -> Axes:
"""Plot the (cumulative) histogram and scipy fitted distribution
for the time series on a monthly basis.
Expand Down Expand Up @@ -145,21 +150,21 @@ def dist(

def monthly_density(
si: Series,
years: list[int] = [],
months: list[int] = [],
years: Optional[List[int]] = None,
months: Optional[List[int]] = None,
cmap: str = "tab20c",
ax: Any = None,
) -> Any:
ax: Optional[Axes] = None,
) -> Axes:
"""Plot the monthly kernel-density estimate for a specific year.
Parameters
----------
si : pandas.Series
Series of the standardized index
year : list
year : list, optional
List of years as int
months : list
List of months as int
months : list, optional
List of months as int, by default all months
cmap : str, optional
matlotlib colormap, by default 'tab10'
ax : matplotlib.Axes, optional
Expand All @@ -172,6 +177,13 @@ def monthly_density(
"""
if ax is None:
_, ax = plt.subplots(figsize=(6, 4))

if years is None:
years = []

if months is None:
months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

cm = plt.get_cmap(cmap, 20)
colors = reshape(array([cm(x) for x in range(20)], dtype="f,f,f,f"), (5, 4))
lsts = cycle(["--", "-.", ":"])
Expand Down
55 changes: 41 additions & 14 deletions src/spei/si.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,39 @@
from typing import Any
from pandas import Series
from numpy import linspace
from scipy.stats import norm, gamma, fisk, genextreme
from .utils import check_series

# Type Hinting
from typing import Optional
from .typing import ContinuousDist


def get_si_ppf(
series: Series,
dist: Any,
dist: ContinuousDist,
sgi: bool = False,
prob_zero: bool = False,
) -> Series:
"""Internal helper function to calculate drought index
Parameters
----------
series : Series
Series with observations
dist : ContinuousDist
Continuous distribution from the SciPy library
sgi : bool, optional
Whether to caclulate the standardized groundwater index or not, by
default False
prob_zero : bool, optional
Apply logic to observations of zero and calculate the probability
seperately, by default False
Returns
-------
Series
Series with probability point function ppf
"""

check_series(series)

Expand All @@ -24,7 +47,7 @@ def get_si_ppf(
else:
if prob_zero:
p0 = (data == 0.0).sum() / len(data)
pars, loc, scale = dist.fit(data[data != 0.0], scale=data.std())
*pars, loc, scale = dist.fit(data[data != 0.0], scale=data.std())
cdf_sub = dist.cdf(data, pars, loc=loc, scale=scale)
cdf = p0 + (1 - p0) * cdf_sub
cdf[data == 0.0] = p0
Expand Down Expand Up @@ -61,15 +84,17 @@ def sgi(series: Series) -> Series:
return get_si_ppf(series, None, sgi=True)


def spi(series: Series, dist: Any = None, prob_zero: bool = False) -> Series:
def spi(
series: Series, dist: Optional[ContinuousDist] = None, prob_zero: bool = False
) -> Series:
"""Method to compute the Standardized Precipitation Index [spi_2002]_.
Parameters
----------
series: pandas.Series
Pandas time series of the precipitation. Time series index
should be a pandas DatetimeIndex.
dist: scipy.stats._continuous_distns
dist: scipy.stats.rv_continuous
Can be any continuous distribution from the scipy.stats library.
However, for the SPI generally the Gamma probability density
function is recommended. Other appropriate choices could be the
Expand All @@ -89,21 +114,23 @@ def spi(series: Series, dist: Any = None, prob_zero: bool = False) -> Series:
22, 1571-1592, 2002.
"""

if dist == None:
if dist is None:
dist = gamma

return get_si_ppf(series, dist, prob_zero)
return get_si_ppf(series, dist, prob_zero=prob_zero)


def spei(series: Series, dist: Optional[ContinuousDist] = None) -> Series:

def spei(series: Series, dist: Any = None) -> Series:
"""Method to compute the Standardized Precipitation Evaporation Index [spei_2010]_.
"""Method to compute the Standardized Precipitation Evaporation Index
[spei_2010]_.
Parameters
----------
series: pandas.Series
Pandas time series of the precipitation. Time series index
should be a pandas DatetimeIndex.
dist: scipy.stats._continuous_distns
dist: scipy.stats.rv_continuous
Can be any continuous distribution from the scipy.stats library.
However, for the SPEI generally the log-logistic (fisk) probability
density function is recommended. Other appropriate choices could be
Expand All @@ -121,21 +148,21 @@ def spei(series: Series, dist: Any = None) -> Series:
Journal of Climate, 23, 1696-1718, 2010.
"""

if dist == None:
if dist is None:
dist = fisk # log-logistic

return get_si_ppf(series, dist)


def ssfi(series: Series, dist: Any = None) -> Series:
def ssfi(series: Series, dist: Optional[ContinuousDist] = None) -> Series:
"""Method to compute the Standardized StreamFlow Index [ssfi_2020]_.
Parameters
----------
series: pandas.Series
Pandas time series of the precipitation. Time series index
should be a pandas DatetimeIndex.
dist: scipy.stats._continuous_distns
dist: scipy.stats.rv_continuous
Can be any continuous distribution from the scipy.stats library.
However, for the SSFI generally the gamma probability density
function is recommended. Other appropriate choices could be the
Expand All @@ -153,7 +180,7 @@ def ssfi(series: Series, dist: Any = None) -> Series:
and nonparametric methods. Water Resources Research, 56, 2020.
"""

if dist == None:
if dist is None:
dist = genextreme

return get_si_ppf(series, dist)
9 changes: 9 additions & 0 deletions src/spei/typing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from typing import TypeVar

from scipy.stats._continuous_distns import rv_continuous
from matplotlib.axes._base import _AxesBase
from numpy.typing import ArrayLike

ContinuousDist = TypeVar("ContinuousDist", bound=rv_continuous)
Axes = TypeVar("Axes", bound=_AxesBase)
ArrayLike = TypeVar("ArrayLike", bound=ArrayLike)
28 changes: 19 additions & 9 deletions src/spei/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from typing import Any
from pandas import Series, DataFrame
from scipy.stats import (
norm,
Expand All @@ -12,6 +11,10 @@
kstest,
)

# Type Hinting
from typing import List, Optional, Tuple
from .typing import ArrayLike, ContinuousDist


def check_series(series: Series) -> None:
"""Check if provided time series is of type pandas.Series
Expand All @@ -30,13 +33,16 @@ def check_series(series: Series) -> None:
if not isinstance(series, Series):
if isinstance(series, DataFrame):
raise TypeError(
f"Please convert pandas.DataFrame to a pandas.Series using .squeeze()"
"Please convert pandas.DataFrame to a"
"pandas.Series using DataFrame.squeeze()"
)
else:
raise TypeError(f"Please provide a Pandas Series instead of {type(series)}")


def dist_test(data: Series, dist: Any, N: int = 100, alpha: float = 0.05) -> Any:
def dist_test(
data: Series, dist: List[ContinuousDist], N: int = 100, alpha: float = 0.05
) -> Tuple[str, float, bool, ArrayLike]:
"""Fit a distribution and perform the two-sided
Kolmogorov-Smirnov test for goodness of fit. The
null hypothesis is that the data and distributions
Expand All @@ -47,7 +53,7 @@ def dist_test(data: Series, dist: Any, N: int = 100, alpha: float = 0.05) -> Any
----------
data : array_like
1-D array of observations of random variables
dist: scipy.stats._continuous_distns
dist: scipy.stats.rv_continuous
Can be any continuous distribution from the
scipy.stats library.
N : int, optional
Expand All @@ -61,7 +67,7 @@ def dist_test(data: Series, dist: Any, N: int = 100, alpha: float = 0.05) -> Any
Returns
-------
string, float, bool, floats
string, float, bool, array_like
distribution name, p-value and fitted parameters
References
Expand All @@ -77,7 +83,10 @@ def dist_test(data: Series, dist: Any, N: int = 100, alpha: float = 0.05) -> Any


def dists_test(
data: Series, distributions: list[Any] = None, N: int = 100, alpha: float = 0.05
data: Series,
distributions: Optional[List[ContinuousDist]] = None,
N: int = 100,
alpha: float = 0.05,
) -> DataFrame:
"""Fit a list of distribution and perform the
two-sided Kolmogorov-Smirnov test for goodness
Expand All @@ -89,8 +98,9 @@ def dists_test(
----------
data : array_like
1-D array of observations of random variables
distributions : list of scipy.stats._continuous_distns, optional
A list of (can be) any continuous distribution from the scipy.stats library, by default None
distributions : list of scipy.stats.rv_continuous, optional
A list of (can be) any continuous distribution from the scipy.stats
library, by default None which makes a custom selection
N : int, optional
Sample size, by default 100
alpha : float, optional
Expand Down Expand Up @@ -125,7 +135,7 @@ def dists_test(
]

df = DataFrame([dist_test(data, D, N, alpha) for D in distributions])
cols = ["Distribution", "KS p-value", f"Reject H0"]
cols = ["Distribution", "KS p-value", "Reject H0"]
cols += [f"Param {i+1}" for i in range(df.columns.stop - len(cols))]
df.columns = cols
df = df.set_index(cols[0])
Expand Down

0 comments on commit f3a2a17

Please sign in to comment.