Skip to content

Commit

Permalink
fix: adress Josh's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
steven-murray committed Aug 17, 2023
1 parent 2b3a755 commit b24b706
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 11 deletions.
20 changes: 17 additions & 3 deletions hera_cal/datacontainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
from collections import OrderedDict as odict
import copy
import warnings

from typing import Sequence
from .utils import conj_pol, comply_pol, make_bl, comply_bl, reverse_bl
Expand Down Expand Up @@ -515,14 +516,17 @@ def select_or_expand_times(self, new_times, in_place=True, skip_bda_check=False)
if not in_place:
return dc

def select_or_expand_freqs(
def select_freqs(
self,
freqs: np.ndarray | None = None,
channels: np.ndarray | slice | None = None,
in_place: bool=True
):
"""Update the object with a subset of frequencies (which may be repeated).
While typically this will be used to down-select frequencies, one can
'expand' the frequencies by duplicating channels.
Parameters
----------
freqs : np.ndarray, optional
Expand All @@ -532,6 +536,12 @@ def select_or_expand_freqs(
Only one of freqs or channels can be given.
in_place : bool, optional
If True, modify the object in place. Otherwise, return a modified copy.
Even if `in_place` is True, the object is still returned for convenience.
Returns
-------
DataContainer
The modified object. If `in_place` is True, this is the same object.
"""
obj = self if in_place else copy.deepcopy(self)
if freqs is None and channels is None:
Expand All @@ -540,11 +550,15 @@ def select_or_expand_freqs(
raise ValueError('Cannot specify both freqs and channels.')

if freqs is not None:
if not np.all([fq in dc.freqs for fq in freqs]):
if not np.all([fq in obj.freqs for fq in freqs]):
raise ValueError('All freqs must be in self.freqs.')
channels = np.searchsorted(obj.freqs, freqs)

axis = obj[next(iter(dc.keys()))].shape.index(len(obj.freqs))
if obj.freqs is None:
warnings.warn("It is impossible to automatically detect which axis is frequency. Trying last axis.")
axis = -1
else:
axis = obj[next(iter(obj.keys()))].shape.index(len(obj.freqs))
for bl in obj:
obj[bl] = obj[bl].take(channels, axis=axis)

Expand Down
4 changes: 2 additions & 2 deletions hera_cal/lstbin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1076,8 +1076,8 @@ def sigma_clip(array: np.ndarray | np.ma.MaskedArray, sigma: float=4.0, min_N: i
return np.zeros_like(array, dtype=bool)

if isinstance(array, np.ma.MaskedArray):
location = np.expand_dims(np.median(array, axis=axis), axis=axis)
scale = np.expand_dims(np.median(np.abs(array - location), axis=axis) * 1.482579, axis=axis)
location = np.expand_dims(np.ma.median(array, axis=axis), axis=axis)
scale = np.expand_dims(np.ma.median(np.abs(array - location), axis=axis) * 1.482579, axis=axis)
else:
# get robust location
location = np.expand_dims(np.nanmedian(array, axis=axis), axis=axis)
Expand Down
30 changes: 24 additions & 6 deletions hera_cal/lstbin_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,11 @@ def lst_average(
# Now do sigma-clipping.
if sigma_clip_thresh is not None:
if inpainted_mode:
warnings.warn("Sigma-clipping in in-painted mode is a bad idea.")
warnings.warn(
"Sigma-clipping in in-painted mode is a bad idea, because it creates "
"non-uniform flags over frequency, which can cause artificial spectral "
"structure. In-painted mode specifically attempts to avoid this."
)

nflags = np.sum(flags)
clip_flags = sigma_clip(data.real, sigma=sigma_clip_thresh, min_N = sigma_clip_min_N)
Expand All @@ -480,17 +484,19 @@ def lst_average(
# Update the "non_inpainted" mask
non_inpainted = data.mask

nsamples[non_inpainted] = 0

# Here we do a check to make sure Nsamples is uniform across frequency.
# Do this before setting non_inpainted to zero nsamples.
ndiff = np.diff(nsamples, axis=2)
if np.any(ndiff != 0):
warnings.warn("Nsamples is not uniform across frequency. This will result in spectral structure.")

nsamples[non_inpainted] = 0

norm = np.sum(nsamples, axis=0)
ndays_binned = np.sum((~flags).astype(int), axis=0)

logger.info("Calculating mean")
# np.sum works the same for both masked and non-masked arrays.
meandata = np.sum(data * nsamples, axis=0)

lstbin_flagged = np.all(flags, axis=0)
Expand All @@ -499,6 +505,7 @@ def lst_average(

normalizable = norm > 0
meandata[normalizable] /= norm[normalizable]
# Multiply by nan instead of just setting as nan, so both real and imag parts are nan
meandata[~normalizable] *= np.nan

# get other stats
Expand Down Expand Up @@ -748,7 +755,7 @@ def lst_bin_files_for_baselines(
# We need to down-selecton times/freqs (bls and pols will be sub-selected
# based on those in the data through the next loop)
inpainted.select_or_expand_times(times=tarr, skip_bda_check=True)
inpainted.select_or_expand_freqs(channels=freq_chans)
inpainted.select_freqs(channels=freq_chans)
else:
inpainted = None

Expand Down Expand Up @@ -977,6 +984,7 @@ def lst_bin_files_single_outfile(
output_inpainted: bool | None = None,
output_flagged: bool = True,
where_inpainted_file_rules: list[tuple[str, str]] | None = None,
sigma_clip_in_inpainted_mode: bool = False,
) -> dict[str, Path]:
"""
Bin data files into LST bins, and write all bins to disk in a single file.
Expand Down Expand Up @@ -1118,7 +1126,13 @@ def lst_bin_files_single_outfile(
of the data object). If not provided, but `output_inpainted` is set to True,
all data-flags will be considered in-painted except for baseline-times that are
fully flagged, which will be completely ignored.
sigma_clip_in_inpainted_mode
If True, sigma clip the data in inpainted mode (if sigma-clipping is turned on).
This is generally not a good idea, since the point of inpainting is to get
smoother spectra, and sigma-clipping creates non-uniform Nsamples, which can
lead to less smooth spectra. This option is only here to enable sigma-clipping
to be turned on for flagged mode, and off for inpainted mode.
Returns
-------
out_files
Expand Down Expand Up @@ -1417,7 +1431,10 @@ def lst_bin_files_single_outfile(
where_inpainted=where_inpainted,
inpainted_mode=inpainted,
flag_thresh=flag_thresh,
sigma_clip_thresh = sigma_clip_thresh,
sigma_clip_thresh = (
None if inpainted and not sigma_clip_in_inpainted_mode
else sigma_clip_thresh
),
sigma_clip_min_N = sigma_clip_min_N,
flag_below_min_N=flag_below_min_N,
)
Expand Down Expand Up @@ -2037,4 +2054,5 @@ def lst_bin_arg_parser():
a.add_argument("--no-flagged-mode", action='store_true', help="turn off output of flagged mode LST-binning")
a.add_argument("--do-inpaint-mode", action='store_true', default=None, help="turn on inpainting mode LST-binning")
a.add_argument("--where-inpainted-file-rules", nargs='*', type=str, help="rules to convert datafile names to where-inpainted-file names. A series of two strings where the first will be replaced by the latter")
a.add_argument('--sigma-clip-in-inpainted-mode', action='store_true', default=False, help='allow sigma-clipping in inpainted mode')
return a

0 comments on commit b24b706

Please sign in to comment.