Skip to content

Commit

Permalink
Fixed references to things that have been moved or renamed.
Browse files Browse the repository at this point in the history
  • Loading branch information
Eli authored and Eli committed Jan 3, 2024
1 parent b3a730c commit 86accf7
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 93 deletions.
2 changes: 1 addition & 1 deletion tests/test_download_cimis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
import pandas as pd
from vtools.datastore import download_cimis
from dms_datastore import download_cimis

def test_download_davis():
davisdf=download_cimis.fetch_data(6)
Expand Down
2 changes: 1 addition & 1 deletion vtools/data/gap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-
import pandas as pd
import matplotlib.pyplot as plt
from vtools.datastore.read_ts import *
from dms_datastore.read_ts import *

import pandas as pd
import numpy as np
Expand Down
2 changes: 0 additions & 2 deletions vtools/datastore/read_ts.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import warnings

all = ["read_ts"]

def read_ts(fpath, start=None, end=None, force_regular=True,nrows=None, selector = None,hint=None):
warnings.warn("Moved to the dms_datastore package." , PendingDeprecationWarning)
import dms_datastore.read_ts
Expand Down
3 changes: 2 additions & 1 deletion vtools/functions/climatology.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import pandas as pd


__all__ = ['climatology','apply_climatology','climatology_quantiles']


Expand Down Expand Up @@ -108,7 +109,7 @@ def climatology_quantiles(ts,min_day_year,max_day_year,

if __name__=='__main__':
import matplotlib.pyplot as plt
from vtools.datastore.read_ts import read_ts
from dms_datastore.read_ts import read_ts
fname = "//cnrastore-bdo/Modeling_Data/continuous_station_repo/raw/des_twi_405_turbidity_*.csv"
fname = "//cnrastore-bdo/Modeling_Data/continuous_station_repo/raw/usgs_lib*turbidity*.rdb"
selector = "16127_63680"
Expand Down
126 changes: 38 additions & 88 deletions vtools/functions/error_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-

import matplotlib.pyplot as plt
import dask.dataframe as dd
import numpy as np
import numpy.ma as ma
import pandas as pd
Expand All @@ -12,6 +13,7 @@
from scipy.stats import iqr as scipy_iqr

from vtools.data.gap import *
from vtools.data.timeseries import to_dataframe


'''
Expand Down Expand Up @@ -100,28 +102,42 @@ def threshold(ts,bounds,copy=True):
ts_out.mask(ts_out > bounds[1],inplace=True)
return ts_out

def bounds_test(ts,bounds):
anomaly = pd.DataFrame(dtype=bool).reindex_like(ts)
anomaly[:] = False
if bounds is not None:
if bounds[0] is not None:
anomaly |= ts < bounds[0]
if bounds[1] is not None:
anomaly |= ts > bounds[1]
return anomaly


def median_test(ts, level = 4, filt_len = 7, quantiles=(0.005,0.095),copy = True):
return med_outliers(ts,level=level,filt_len=filt_len, quantiles=quantiles,copy=False,as_anomaly=True)


def median_test_oneside(ts, scale=None,level = 4, filt_len = 6, quantiles=(0.005,0.095),
copy = True,reverse=False):
copy = True,reverse=False):
if copy:
ts=ts.copy()
kappa = filt_len//2
ts.to_csv("forward.csv")
if reverse:
original_index = ts.index
vals = ts[::-1]
#vals.reset_index(inplace=True,drop=True)
else:
vals = ts
vals = vals.to_frame()
vals = to_dataframe(vals)
vals.columns=["ts"]

vals["z"]=vals.ts.diff()
min_periods = kappa*2 - 1
vals['my']= vals['ts'].shift().rolling(kappa*2,min_periods=min_periods).median()
vals['mz'] =vals.z.shift().rolling(kappa*2,min_periods=min_periods).median()
vals['pred'] = vals.my + kappa*vals.mz
res = vals.ts - vals.pred

dds = dd.from_pandas(vals,npartitions=50)
dds['my'] = dds['ts'].shift().rolling(kappa*2,min_periods=min_periods).median()
dds['mz'] = dds.z.shift().rolling(kappa*2,min_periods=min_periods).median()
dds['pred'] = dds.my + kappa*dds.mz
res = (dds.ts - dds.pred).compute()
if scale is None:
qq = res.quantile( q=quantiles)
scale = qq.loc[quantiles[1]] - qq.loc[quantiles[0]]
Expand All @@ -130,12 +146,11 @@ def median_test_oneside(ts, scale=None,level = 4, filt_len = 6, quantiles=(0.005
if reverse:
anomaly = anomaly[::-1]
anomaly.index = original_index
#print("anomaly",reverse,anomaly.loc[pd.Timestamp(2003,3,26,18)])
#print("vals",vals.loc[pd.Timestamp(2003,3,26,18),:])

#anomaly=anomaly #.astype(int)
return anomaly



def med_outliers(ts,level=4.,scale = None,\
filt_len=7,range=(None,None),
quantiles = (0.01,0.99),
Expand Down Expand Up @@ -203,10 +218,12 @@ def med_outliers(ts,level=4.,scale = None,\
warnings.resetwarnings()
return ts_out

def med_outliers7(ts,level=4.,scale = None,\
filt_len=7,range=(None,None),


def median_test_twoside(ts,level=4.,scale = None,\
filt_len=7,
quantiles = (0.01,0.99),
copy = True,as_anomaly=False):
copy = True,as_anomaly=True):
"""
Detect outliers by running a median filter, subtracting it
from the original series and comparing the resulting residuals
Expand Down Expand Up @@ -243,9 +260,7 @@ def med_outliers7(ts,level=4.,scale = None,\
import warnings
ts_out = ts.copy() if copy else ts
warnings.filterwarnings("ignore")

if range is not None:
threshold(ts_out,range,copy=False)


vals = ts_out.to_numpy()
#if ts_out.ndim == 1:
Expand All @@ -259,16 +274,16 @@ def mseq(flen):
b = np.arange(halflen+1,flen)
return np.concatenate((a,b))
medseq = mseq(filt_len)
filt = ts_out.rolling(filt_len,center=True,axis=0).apply(lambda x: np.nanmedian(x[medseq]))

res = ts_out - filt


dds = dd.from_pandas(ts_out,npartitions=50)
filt = dds.rolling(filt_len,center=True,axis=0).apply(lambda x: np.nanmedian(x[medseq]),raw=True,engine='numba').compute()
res = (ts_out - filt)

if scale is None:
qq = res.quantile( q=quantiles)
scale = qq.loc[quantiles[1]] - qq.loc[quantiles[0]]

anomaly = (res.abs() > level*scale) | (res.abs() < -level*scale)
anomaly = ((res.abs() > level*scale) | (res.abs() < -level*scale))
if as_anomaly:
return anomaly
# apply anomaly by setting values to nan
Expand All @@ -284,7 +299,7 @@ def gapdist_test_series(ts,smallgaplen=0):
testgapnull = test_gap.isnull()
is_small_gap = (gapcount <= smallgaplen)
smallgap = testgapnull & is_small_gap
test_gap.loc[smallgap] = -99999999.
test_gap.where(~smallgap,-99999999.,inplace=True)
return test_gap

def steep_then_nan(ts,level=4.,scale = None,\
Expand Down Expand Up @@ -371,77 +386,12 @@ def steep_then_nan(ts,level=4.,scale = None,\
if not as_anomaly:
values = np.where(outlier,np.nan,ts_out.values)
ts_out.iloc[:]= values

warnings.resetwarnings()


return outlier if as_anomaly else ts_out




def med_outliers2(ts,secfilt=True,level=3.0,scale=None,filt_len=7,
quantiles=(25,75),seclevel=3.0,secscale=None,
secfilt_len=241,secquantiles=(25,75),copy=True):

import warnings
ts_out = ts.copy() if copy else ts
warnings.filterwarnings("ignore")

#Secondary filter - median filter is first applied on a larger scale
# todo: reroute to scipy.ndimage.median_filter
if secfilt:
if ts_out.ndim == 1:
filt = medfilt(ts_out,secfilt_len)
else:
filt = np.apply_along_axis(medfilt,0,ts_out,secfilt_len)
res = ts_out - filt


for k in range(len(ts.data)):
if not secscale:
slicelow = int(max(0, k-((secfilt_len - 1)/2)))
slicehigh = int(min(len(ts.data), k + ((secfilt_len - 1)/2) + 1))
rwindow = ts.data[slicelow:slicehigh]
iqr = scipy_iqr(rwindow[~np.isnan(rwindow)], None, secquantiles)
else:
iqr = secscale

if (res[k] > seclevel*iqr) or (res[k] < -seclevel*iqr):
ts_out.data[k]= np.nan

#Main filter - performs a median filter on the data
#ts_out.data = ts_out.data.flatten()
if ts_out.ndim == 1:
filt = medfilt(ts_out.values,filt_len)
else:
filt = np.apply_along_axis(medfilt,0,ts_out.values,filt_len)
res = ts_out - filt

for k in range(len(ts.data)):
if not scale:
slicelow = int(max(0, k-((filt_len - 1)/2)))
slicehigh = int(min(len(ts.data), k + ((filt_len - 1)/2) + 1))
rwindow = ts.data[slicelow:slicehigh]
iqr = scipy_iqr(rwindow[~np.isnan(rwindow)], None, secquantiles)
#low,high = mquantiles(rwindow[~ np.isnan(rwindow)],quantiles)
#iqr = high - low
else:
iqr = scale

if (res[k] > level*iqr) or (res[k] < -level*iqr):
ts_out.iloc[:,k]= np.nan

warnings.resetwarnings()

filt = None #rts(filt,ts.start,ts.interval)

return ts_out

def rolling_window(data, block):
shape = data.shape[:-1] + (data.shape[-1] - block + 1, block)
strides = data.strides + (data.strides[-1],)
return np.lib.stride_tricks.as_strided(data, shape=shape, strides=strides)


def despike(arr, n1=2, n2=20, block=10):
Expand Down

0 comments on commit 86accf7

Please sign in to comment.