-
Notifications
You must be signed in to change notification settings - Fork 0
/
modules.py
304 lines (257 loc) · 13.1 KB
/
modules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
import datacube
import numpy as np
import xarray as xr
import pandas as pd
from deafrica_tools.datahandling import load_ard, mostcommon_crs
from deafrica_tools.bandindices import calculate_indices
from dea_tools.coastal import model_tides, tidal_tag, pixel_tides, tidal_stats
from deafrica_tools.dask import create_local_dask_cluster
from coastlines.raster import tide_cutoffs
from scipy.ndimage.filters import uniform_filter
from scipy.ndimage.measurements import variance
from skimage.filters import threshold_minimum, threshold_otsu
from skimage.morphology import binary_erosion,binary_dilation,disk
import random
from sklearn.model_selection import train_test_split
def lee_filter(da, size):
"""
Apply lee filter of specified window size.
Adapted from https://stackoverflow.com/questions/39785970/speckle-lee-filter-in-python
"""
img = da.values
img_mean = uniform_filter(img, (size, size))
img_sqr_mean = uniform_filter(img**2, (size, size))
img_variance = img_sqr_mean - img_mean**2
overall_variance = variance(img)
img_weights = img_variance / (img_variance + overall_variance)
img_output = img_mean + img_weights * (img - img_mean)
return img_output
def filter_by_tide_height(ds,tide_centre=0.0):
# per-pixel tide modelling
ds["tide_m"], tides_lowres = pixel_tides(ds, resample=True)
# Determine tide cutoff
tide_cutoff_min, tide_cutoff_max = tide_cutoffs(
ds, tides_lowres, tide_centre=tide_centre
)
# Filter observations using calculated tide cutoffs
tide_bool = (ds.tide_m >= tide_cutoff_min) & (
ds.tide_m <= tide_cutoff_max
)
ds_filtered = ds.sel(time=tide_bool.sum(dim=["x", "y"]) > 0)
# Apply mask, and load in corresponding tide masked data
ds_filtered = ds_filtered.where(tide_bool)
return ds_filtered
def load_s1_by_orbits(dc,query,select_orbit='all'):
'''
Function to query and load ascending and descending Sentinel-1 data
and add a variable to denote acquisition orbits
Parameters:
dc: connected datacube
query: a query dictionary to define spatial extent, measurements, time range and spatial resolution
select_orbit: String ('ascending'/'descending'/'all'), whether to select only one orbit (descending or ascending) or all observations
Returns:
Queried dataset with variable 'is_ascending' added to denote orbit path
'''
if select_orbit=='all':
# load ascending data
print('\nQuerying and loading Sentinel-1 ascending data...')
try:
ds_s1_ascending=load_ard(dc=dc,products=['s1_rtc'],resampling='bilinear',align=(10, 10),
dtype='native',sat_orbit_state='ascending',**query)
# add an variable denoting data source
ds_s1_ascending['is_ascending']=xr.DataArray(np.ones(len(ds_s1_ascending.time)),
dims=('time'),coords={'time': ds_s1_ascending.time})
except:
ds_s1_ascending=None
# load descending data
print('\nQuerying and loading Sentinel-1 descending data...')
try:
ds_s1_descending=load_ard(dc=dc,products=['s1_rtc'],resampling='bilinear',align=(10, 10),
dtype='native',sat_orbit_state='descending',**query)
# add an variable denoting data source
ds_s1_descending['is_ascending']=xr.DataArray(np.zeros(len(ds_s1_descending.time)),
dims=('time'),coords={'time': ds_s1_descending.time})
except:
ds_s1_descending=None
# merge datasets together
if ds_s1_ascending is None:
ds_s1=ds_s1_descending
elif ds_s1_descending is None:
ds_s1=ds_s1_ascending
else:
ds_s1=xr.concat([ds_s1_ascending,ds_s1_descending],dim='time').sortby('time')
else:
print('\nQuerying and loading Sentinel-1 ',select_orbit,' data...')
try:
ds_s1=load_ard(dc=dc,products=['s1_rtc'],resampling='bilinear',align=(10, 10),
dtype='native',sat_orbit_state=select_orbit,**query)
except:
ds_s1=None
return ds_s1
def filter_obs_by_orbit(ds_s1):
'''
Function to impliment per-pixel filtering of Sentinel-1 observations
to keep only observations from the orbit (ascending/descending) with higher frequency over time.
Each of the Sentinel-1 observations was acquired from either a descending or ascending orbit,
which has impacts on the local incidence angle and backscattering value.
Here we do the filtering to minimise the effects of inconsistent looking angle and obit direction for each individual pixel.
Parameters:
ds_s1: xarray.Dataset
Time-series observations of Sentinel-1 data,
with two required variables: 'is_ascending' denoting orbit path and 'mask' to identify acquisition exent
Returns:
ds_s1_filtered: xarray.Dataset
Filtered dataset
'''
print('\nFiltering Sentinel-1 product by orbit...')
cnt_ascending=((ds_s1["is_ascending"]==1)&(ds_s1['mask']!=0)).sum(dim='time')
cnt_descending=((ds_s1["is_ascending"]==0)&(ds_s1['mask']!=0)).sum(dim='time')
ds_s1_filtered=ds_s1.where(((cnt_ascending>=cnt_descending)&(ds_s1["is_ascending"]==1))|
((cnt_ascending<cnt_descending)&(ds_s1["is_ascending"]==0)))
# remove intermediate variable
ds_s1_filtered=ds_s1_filtered.drop_vars(["is_ascending"])
# drop all-nan time steps
ds_s1_filtered=ds_s1_filtered.dropna(dim='time',how='all')
return ds_s1_filtered
def process_features_s1(ds_s1,filter_size=None,s1_orbit_filtering=True,time_step='1Y',selected_orbit='all'):
'''
Function to implement preprocessing and features generation on Sentinel-1 data
preprocessing includes speckle filtering (optional), filtering observations by orbit (optional) and conversion to dB
features include annual median and std of bands/band math
Parameters:
ds_s1: xarray.Dataset
Time-series of Sentinel-1 data, with variable 'vh' required
filter_size: integer or None
Speckle filtering size
s1_orbit_filtering: Boolean
Whether to filter Sentinel-1 observations by orbit
time_step: string
time step used to generate temporal composite
selected_orbit: String ('ascending'/'descending'/'all')
whether only one or boths orbit (descending or ascending) were selected when data was queried
Returns:
xarray.Dataset
Preprocessed Sentinel-1 data
'''
ds_s1_filtered=ds_s1
# apply Lee filtering if required
if not filter_size is None:
print('Applying Lee filtering using filtering size of {} pixels...'.format(filter_size))
# The lee filter above doesn't handle null values
# We therefore set null values to 0 before applying the filter
ds_s1_filtered = ds_s1.where(np.isfinite(ds_s1), 0)
# Create a new entry in dataset corresponding to filtered VV and VH data
ds_s1_filtered["vh"] = ds_s1_filtered.vh.groupby("time").apply(lee_filter, size=filter_size)
# Null pixels should remain null, but also including pixels changed to 0 due to the filtering
ds_s1_filtered['vh'] = ds_s1_filtered.vh.where(ds_s1_filtered.vh!=0,np.nan)
# filter observations by orbit if required
if s1_orbit_filtering and (selected_orbit=='all'):
ds_s1_filtered=filter_obs_by_orbit(ds_s1_filtered)
# Scale to plot data in decibels
ds_s1_filtered['vh'] = 10 * np.log10(ds_s1_filtered.vh)
ds_s1_filtered['vv'] = 10 * np.log10(ds_s1_filtered.vv)
# generate features
print('Calculating features for Sentinel-1')
ds_s1_filtered['vv_a_vh']=ds_s1_filtered['vv']+ds_s1_filtered['vh']
ds_s1_filtered['vv_m_vh']=ds_s1_filtered['vv']-ds_s1_filtered['vh']
# annual composites
print('Generate temporal composites...')
# median
ds_summaries_s1 = (ds_s1_filtered[['vh','vv','vv_a_vh','vv_m_vh','area','angle']]
.resample(time=time_step)
.median('time')
.compute()
)
ds_summaries_s1=ds_summaries_s1.rename({var:var+'_med' for var in list(ds_summaries_s1.data_vars)})
# max
ds_max_s1 = (ds_s1_filtered[['vh','vv','vv_a_vh','vv_m_vh']]
.resample(time=time_step)
.max('time')
.compute()
)
ds_max_s1=ds_max_s1.rename({var:var+'_max' for var in list(ds_max_s1.data_vars)})
# min
ds_min_s1 = (ds_s1_filtered[['vh','vv','vv_a_vh','vv_m_vh']]
.resample(time=time_step)
.min('time')
.compute()
)
ds_min_s1=ds_min_s1.rename({var:var+'_min' for var in list(ds_min_s1.data_vars)})
# std
ds_std_s1 = (ds_s1_filtered[['vh','vv','vv_a_vh','vv_m_vh']]
.resample(time=time_step)
.std('time')
.compute()
)
ds_std_s1=ds_std_s1.rename({var:var+'_std' for var in list(ds_std_s1.data_vars)})
# merge all datasets
ds_summaries_s1=xr.merge([ds_summaries_s1,ds_std_s1,ds_max_s1,ds_min_s1])
return ds_summaries_s1
def create_coastal_mask(da,buffer_pixels):
'''
Create a simplified coastal zone mask based on time series of Sentinel-2 MNDWI data
Parameters:
ds_summaries: xarray.DataArray
Time series of Sentinel-2 MNDWI data
buffer_pixels: integer
Number of pixels to buffer coastal zone
Returns:
coastal_mask: xarray.DataArray
A single time buffered coastal zone mask (0: non-coastal and 1: coastal)
'''
print('\nCalculating simplified coastal zone mask...')
# apply thresholding and re-apply nodata values
nodata = da.isnull()
thresholded_ds = da>=0
thresholded_ds = thresholded_ds.where(~nodata)
# use 20% ~ 80% wet frequency to identify potential coastal zone
coastal_mask=(thresholded_ds.mean(dim='time') >= 0.2)&(thresholded_ds.mean(dim='time') <= 0.8)
# buffering
print('\nApplying buffering of {} Sentinel-2 pixels...'.format(buffer_pixels))
coastal_mask=xr.apply_ufunc(binary_dilation,coastal_mask.compute(),disk(buffer_pixels))
return coastal_mask
def collect_training_samples(ds_summaries_s2,ds_summaries_s1,coastal_mask,max_samples):
'''
Collect training samples of land and water from Sentinel-1 data at coastal region using classified Sentinel-2 MNDWI
Parameters:
ds_summaries_s2: xarray.Dataset
Temporarily aggregated Sentinel-2 data
ds_summaries_s1: xarray.Dataset
Temporarily aggregated Sentinel-1 features
coastal_mask:xarray.DataArray
A single time buffered coastal zone mask (0: non-coastal and 1: coastal)
max_samples: integer
Maximum number of training samples to collect for land/water class
Returns:
data,labels: Numpy arrays of Sentinel-1 features and land/water class labels
ds_summaries_s2['iswater']: xarray.DataArray of land/water classification masked to coastal region
'''
print('\nCollecting water/non-water samples...')
# subset s2 data for areas with less Sentinel-1 unavailable for some years
dim_time=list(ds_summaries_s1.dims)[0] # whether it is still named as 'time' or changed as 'year'
if len(ds_summaries_s1[dim_time])<len(ds_summaries_s2[dim_time]):
print('subseting Sentinel-2 data to match Sentinel-1 availability...')
ds_summaries_s2=ds_summaries_s2.where(ds_summaries_s2[dim_time]==ds_summaries_s1[dim_time])
# classs: water (1) and land (0)
ds_summaries_s2['iswater']=xr.where((ds_summaries_s2['MNDWI']>=0)&(coastal_mask==1),1,
xr.where((ds_summaries_s2['MNDWI']<0)&(coastal_mask==1),0,np.nan))
ds_summaries_s1=ds_summaries_s1.where(~ds_summaries_s2['iswater'].isnull(),np.nan)
# reshape arrays for input to sklearn
s1_data=ds_summaries_s1.to_array(dim='variable').transpose('x','y',dim_time, 'variable').values
# s1_data=ds_summaries_s1.to_array(dim='variable').transpose('x','y','time', 'variable').values
data_shape = s1_data.shape
data=s1_data.reshape(data_shape[0]*data_shape[1]*data_shape[2],data_shape[3])
labels=ds_summaries_s2.iswater.transpose('x','y',dim_time).values.reshape(data_shape[0]*data_shape[1]*data_shape[2],)
# labels=ds_summaries_s2.iswater.transpose('x','y','time').values.reshape(data_shape[0]*data_shape[1]*data_shape[2],)
# remove NaNs and Infinities
labels=labels[np.isfinite(data).all(axis=1)]
data=data[np.isfinite(data).all(axis=1)]
print('Number of samples available: ',data.shape[0])
# random sampling maximum number of samples per location
n_samples=np.min([int(max_samples),np.sum(labels==1),np.sum(labels==0)])
ind_water=random.sample(sorted(np.where(labels==0)[0]), n_samples)
ind_land=random.sample(sorted(np.where(labels==1)[0]), n_samples)
rand_indices=ind_water+ind_land
labels=labels[rand_indices]
data=data[rand_indices]
return data,labels,ds_summaries_s2['iswater']