Skip to content

Commit

Permalink
Merge pull request #912 from HERA-Team/fix-where-inpainted-redavg
Browse files Browse the repository at this point in the history
Fix LSTBIN for redundantly-averaged inpainted data
  • Loading branch information
steven-murray committed Sep 21, 2023
2 parents 98efdac + 99803bb commit 54cfb9a
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 12 deletions.
31 changes: 28 additions & 3 deletions hera_cal/lstbin_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,7 @@ def lst_bin_files_for_baselines(
)
if inpfile is not None:
# This returns a DataContainer (unless something went wrong) since it should
# always be a 'baseline' type of UVFlag.s
# always be a 'baseline' type of UVFlag.
inpainted = io.load_flags(inpfile)
if not isinstance(inpainted, DataContainer):
raise ValueError(f"Expected {inpfile} to be a DataContainer")
Expand Down Expand Up @@ -881,14 +881,29 @@ def lst_bin_files_for_baselines(
for i, bl in enumerate(antpairs):
if redundantly_averaged:
bl = keyed.get_ubl_key(bl)

for j, pol in enumerate(pols):
blpol = bl + (pol,)

if blpol in _data: # DataContainer takes care of conjugates.
data[slc, i, :, j] = _data[blpol]
flags[slc, i, :, j] = _flags[blpol]
nsamples[slc, i, :, j] = _nsamples[blpol]

if inpainted is not None:
# Get the representative baseline key from this bl group that
# exists in the where_inpainted data.
if redundantly_averaged:
for inpbl in reds[bl]:
if inpbl + (pol,) in inpainted:
blpol = inpbl + (pol,)
break
else:
raise ValueError(
f"Could not find any baseline from group {bl} in "
"inpainted file"
)

where_inpainted[slc, i, :, j] = inpainted[blpol]
else:
# This baseline+pol doesn't exist in this file. That's
Expand Down Expand Up @@ -1309,6 +1324,7 @@ def lst_bin_files_single_outfile(
where_inpainted_files = _get_where_inpainted_files(
data_files, where_inpainted_file_rules
)

output_flagged, output_inpainted = _configure_inpainted_mode(
output_flagged, output_inpainted, where_inpainted_files
)
Expand All @@ -1317,6 +1333,8 @@ def lst_bin_files_single_outfile(
# they have no associated calibration)
data_files = [df for df in data_files if df]
input_cals = [cf for cf in input_cals if cf]
if where_inpainted_files is not None:
where_inpainted_files = [wif for wif in where_inpainted_files if wif]

logger.info("Got the following numbers of data files per night:")
for dflist in data_files:
Expand Down Expand Up @@ -1419,6 +1437,7 @@ def lst_bin_files_single_outfile(
input_cals,
where_inpainted_files,
)

# If we have no times at all for this file, just return
if len(all_lsts) == 0:
return {}
Expand Down Expand Up @@ -1579,6 +1598,7 @@ def lst_bin_files_single_outfile(
flags=rdc["flags"],
nsamples=rdc["nsamples"],
)

write_baseline_slc_to_file(
fl=out_files[("STD", inpainted)],
slc=slc,
Expand All @@ -1596,7 +1616,7 @@ def lst_bin_files_single_outfile(
nsamples=rdc["nsamples"],
)
write_baseline_slc_to_file(
fl=out_files[("STD", inpainted)],
fl=out_files[("MAD", inpainted)],
slc=slc,
data=rdc["mad"],
flags=rdc["flags"],
Expand Down Expand Up @@ -1858,14 +1878,19 @@ def create_lstbin_output_file(
if lst < lst_branch_cut:
lst += 2 * np.pi

fname = outdir / fname_format.format(
fname = fname_format.format(
kind=kind,
lst=lst,
pol="".join(pols),
inpaint_mode="inpaint"
if inpaint_mode
else ("flagged" if inpaint_mode is False else ""),
)
# There's a weird gotcha with pathlib where if you do path / "/file.name"
# You get just "/file.name" which is in root.
if fname.startswith('/'):
fname = fname[1:]
fname = outdir / fname

logger.info(f"Initializing {fname}")

Expand Down
10 changes: 4 additions & 6 deletions hera_cal/tests/mock_uvdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
with open(f"{DATA_PATH}/hera_antpos.yaml", "r") as fl:
HERA_ANTPOS = yaml.safe_load(fl)

start: 46920776.3671875
end: 234298706.0546875
delta: 122070.3125
PHASEII_FREQS = np.arange(
46920776.3671875, 234298706.0546875 + 10.0, 122070.3125
)


def create_mock_hera_obs(
Expand All @@ -35,9 +35,7 @@ def create_mock_hera_obs(
lst_start=0.1,
jd_start: float | None = None,
ntimes: int = 2,
freqs: np.ndarray = np.arange(
46920776.3671875, 234298706.0546875 + 10.0, 122070.3125
),
freqs: np.ndarray = PHASEII_FREQS,
pols: list[str] = ["xx", "yy", "xy", "yx"],
ants: list[int] | None = None,
antpairs: list[tuple[int, int]] | None = None,
Expand Down
83 changes: 81 additions & 2 deletions hera_cal/tests/test_lstbin_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,6 @@ def test_flag_below_min_N(self):
assert np.all(norm_n[0] == 2)
assert not np.any(np.isinf(std_n[0]))

print(np.sum(flg_n[1:]), flg_n[1:].size)
assert not np.any(flg_n[1:])
assert np.all(norm_n[1:] == 7)
assert not np.any(np.isinf(std_n[1:]))
Expand Down Expand Up @@ -859,6 +858,83 @@ def test_simple_redundant_averaged_file(self, uvd_redavg, uvd_redavg_file):
uvd_redavg.Npols,
)

def test_redavg_with_where_inpainted(self, tmp_path):
uvds = mockuvd.make_dataset(
ndays=2,
nfiles=3,
ntimes=2,
ants=np.arange(7),
creator=mockuvd.create_uvd_identifiable,
freqs=mockuvd.PHASEII_FREQS[:25],
pols=['xx', 'xy'],
redundantly_averaged=True,
)

uvd_files = mockuvd.write_files_in_hera_format(
uvds, tmp_path, add_where_inpainted_files=True
)

ap = uvds[0][0].get_antpairs()
reds = RedundantGroups.from_antpos(
dict(zip(uvds[0][0].antenna_numbers, uvds[0][0].antenna_positions)),
)
lstbins, d0, f0, n0, inpflg, times0 = lstbin_simple.lst_bin_files_for_baselines(
data_files=sum(uvd_files, []), # flatten the list-of-lists
lst_bin_edges=[0, 1.9 * np.pi],
redundantly_averaged=True,
rephase=False,
antpairs=ap,
reds=reds,
where_inpainted_files=[str(Path(f).with_suffix(".where_inpainted.h5")) for f in sum(uvd_files, [])],
)
assert len(lstbins) == 1

# Also test that if a where_inpainted file has missing baselines, an error is
# raised.
# This is kind of a dodgy way to test it: copy the original data files,
# write a whole new dataset in the same place but with fewer baselines, then
# copy the data files (but not the where_inpainted files) back, so they mismatch.
for flist in uvd_files:
for fl in flist:
fl = Path(fl)
fl.rename(fl.parent / f"{fl.with_suffix('.bk')}")

winp = fl.with_suffix(".where_inpainted.h5")
winp.unlink()

uvds = mockuvd.make_dataset(
ndays=2,
nfiles=3,
ntimes=2,
ants=np.arange(5), # less than the original
creator=mockuvd.create_uvd_identifiable,
freqs=mockuvd.PHASEII_FREQS[:25],
pols=['xx', 'xy'],
redundantly_averaged=True,
)

uvd_files = mockuvd.write_files_in_hera_format(
uvds, tmp_path, add_where_inpainted_files=True
)

# Move back the originals.
for flist in uvd_files:
for fl in flist:
fl = Path(fl)
fl.unlink()
(fl.parent / f"{fl.with_suffix('.bk')}").rename(fl)

with pytest.raises(ValueError, match="Could not find any baseline from group"):
lstbin_simple.lst_bin_files_for_baselines(
data_files=sum(uvd_files, []), # flatten the list-of-lists
lst_bin_edges=[0, 1.9 * np.pi],
redundantly_averaged=True,
rephase=False,
antpairs=ap,
reds=reds,
where_inpainted_files=[str(Path(f).with_suffix(".where_inpainted.h5")) for f in sum(uvd_files, [])],
)


def test_make_lst_grid():
lst_grid = lstbin_simple.make_lst_grid(0.01, begin_lst=None)
Expand Down Expand Up @@ -1463,9 +1539,12 @@ def test_inpaint_mode_no_flags(self, tmp_path_factory):
ntimes_per_file=2,
clobber=True,
)

# Additionally try fname format with leading / which should be removed
# automatically in the writing.
out_files = lstbin_simple.lst_bin_files(
config_file=cfl,
fname_format="zen.{kind}.{lst:7.5f}{inpaint_mode}.uvh5",
fname_format="/zen.{kind}.{lst:7.5f}.{inpaint_mode}.uvh5",
rephase=False,
sigma_clip_thresh=None,
sigma_clip_min_N=2,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["setuptools>=30.3.0", "wheel", "setuptools_scm[toml]>=6.2"]
requires = ["setuptools>=30.3.0", "wheel", "setuptools_scm[toml]>=6.2,!=8.0"]
build-backend = "setuptools.build_meta"

[tool.setuptools_scm]
Expand Down

0 comments on commit 54cfb9a

Please sign in to comment.