Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor fixes for handling of older SMA data #1399

Merged
merged 6 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ positions are near surface of whatever celestial body their positions are refere
(either the Earth or Moon, currently).

### Changed
- Made the determination of whether or not to create a flex-pol dataset when reading
in a MIR more robust (particularly with pre-V3 data formats).
- Reading in of MIR data sets into `UVData` objects will now use pyuvdata-calculated
values for `lst_array` via (`UVData.set_lsts_from_time_array`) instead of those read in
from the file due to known precision issues in the later (~25 ms), so long as the two
agree within this known precision limit.
- `UVFlag.to_baseline` and `UVFlag.to_antenna` are now more robust to differences
in antenna metadata sorting.
- Made `MirParser` more robust against metadata indexing errors.
Expand Down Expand Up @@ -68,6 +74,8 @@ fully tested and didn't work properly.
- Having `freq_array` and `channel_width` defined on wide-band UVCal objects.

### Fixed
- A bug where `time_array` was not being correctly calculated for older (pre-V3) MIR
data formats.
- A small bug in UVFlag that could occur when reading in an older UVFlag HDF5
file with missing antenna metadata.
- A small bug (mostly affecting continuous integration) that threw an error when the
Expand Down
146 changes: 83 additions & 63 deletions pyuvdata/uvdata/mir.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,16 +343,22 @@ def _init_from_mir_parser(
if len(pol_dict) == 2:
# If dual-pol, then we need to check if the tunings are split, because
# the two polarizations will effectively be concat'd across the freq
# axis instead of the pol axis.
# axis instead of the pol axis. First, see if we have two diff receivers
rxa_mask = ant1_rxa_mask & ant2_rxa_mask
rxb_mask = ~(ant1_rxa_mask | ant2_rxa_mask)

if np.any(rxa_mask) and np.any(rxb_mask):
# If we have both VV and HH data, check to see that the tunings of the
# two receivers match.
loa_freq = np.median(mir_data.sp_data["gunnLO"][rxa_mask])
lob_freq = np.median(mir_data.sp_data["gunnLO"][rxb_mask])
pol_split_tuning = not np.isclose(loa_freq, lob_freq)
# If we have both VV and HH data, check to see that the frequencies of
# each of the spectral chunks match. If they do, then we can concat
# across the polarization axis, but if _not_, we should treat this as
# a pol-split data set.
fsky_vals = mir_data.sp_data["fsky"]
chunk_vals = mir_data.sp_data["corrchunk"]
loa_chunks = set(zip(fsky_vals[rxa_mask], chunk_vals[rxa_mask]))
lob_chunks = set(zip(fsky_vals[rxb_mask], chunk_vals[rxb_mask]))
pol_split_tuning = not (
loa_chunks.issubset(lob_chunks) or lob_chunks.issubset(loa_chunks)
)

# Map MIR pol code to pyuvdata/AIPS polarization number
pol_code_dict = {}
Expand Down Expand Up @@ -419,9 +425,13 @@ def _init_from_mir_parser(

# Make sure that something weird hasn't happened with the metadata (this
# really should never happen, only one value should exist per window).
assert np.allclose(spw_fsky, mir_data.sp_data["fsky"][data_mask])
assert np.allclose(spw_fres, mir_data.sp_data["fres"][data_mask])
assert np.allclose(spw_nchan, mir_data.sp_data["nch"][data_mask])
for val, item in zip(
[spw_fsky, spw_fres, spw_nchan], ["fsky", "fres", "nch"]
):
if not np.allclose(val, mir_data.sp_data[item][data_mask]):
warnings.warn(
"Discrepancy in %s for win %i sb %i pol %i." % (item, *spdx)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest adding some guidance in the warning message on whether this is a small discrepancy vs. something critical with the data.

Or would that be caught in the checks elsewhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I extended the error message here in aa4bd2f -- the one time I saw this pop up (which used to throw an AssertionError) it was because a single record was bad, but it can also get triggered if there's a more significant issue to be checked. In the end I figured probably best to just point the user to what values should be looked at...

)

# Get the data in the right units and dtype
spw_fsky = float(spw_fsky * 1e9) # GHz -> Hz
Expand Down Expand Up @@ -499,57 +509,6 @@ def _init_from_mir_parser(
for key in spdx_dict:
spdx_dict[key]["ch_slice"] = spw_dict[spdx_dict[key]["spw_id"]]["ch_slice"]

# Create arrays to plug visibilities and flags into. The array is shaped this
# way since when reading in a MIR file, we scan through the blt-axis the
# slowest and the freq-axis the fastest (i.e., the data is roughly ordered by
# blt, pol, freq).
self.data_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.complex64)
self.flag_array = np.ones((Nblts, Npols, Nfreqs), dtype=bool)
self.nsample_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.float32)

# Get a list of the current inhid values for later
inhid_list = mir_data.in_data["inhid"].copy()

# Store a backup of the selection masks
backup_masks = {}
for item, obj in mir_data._metadata_attrs.items():
backup_masks[item] = obj._mask.copy()

# If no data is loaded, we want to load subsets of data to start rather than
# the whole block in one go, since this will save us 2x in memory.
inhid_step = len(inhid_list)
if (mir_data.vis_data is None) and (mir_data.auto_data is None):
inhid_step = (inhid_step // 8) + 1

for start in range(0, len(inhid_list), inhid_step):
# If no data is loaded, load up a quarter of the data at a time. This
# keeps the "extra" memory usage below that for the nsample_array attr,
# which is generated and filled _after_ this step (thus no extra memory
# should be required)
if (mir_data.vis_data is None) and (mir_data.auto_data is None):
# Note that the masks are combined via and operation.
mir_data.select(
where=("inhid", "eq", inhid_list[start : start + inhid_step])
)

# Call this convenience function in case we need to run the data filling
# multiple times (if we are loading up subsets of data)
self._prep_and_insert_data(
mir_data,
sphid_dict,
spdx_dict,
blhid_blt_order,
apply_flags=apply_flags,
apply_tsys=apply_tsys,
apply_dedoppler=apply_dedoppler,
)

for item, obj in mir_data._metadata_attrs.items():
# Because the select operation messes with the masks, we want to restore
# those in case we mucked with them earlier (so that subsequent selects
# behave as expected).
obj._mask = backup_masks[item].copy()

# Now assign our flexible arrays to the object itself
self.freq_array = freq_array
self.Nfreqs = Nfreqs
Expand Down Expand Up @@ -684,7 +643,6 @@ def _init_from_mir_parser(
for blt_key in blt_temp_dict.keys():
temp_dict = blt_temp_dict[blt_key]
integration_time[blt_key] = temp_dict["rinteg"]
# TODO: Using MIR V3 convention for lst, will need make it V2 compatible.
lst_array[blt_key] = temp_dict["lst"] * (np.pi / 12.0) # Hours -> rad
mjd_array[blt_key] = temp_dict["mjd"]
ant_1_array[blt_key] = temp_dict["iant1"]
Expand All @@ -702,9 +660,22 @@ def _init_from_mir_parser(
self.baseline_array = self.antnums_to_baseline(
self.ant_1_array, self.ant_2_array, attempt256=False
)
self.integration_time = integration_time
self.lst_array = lst_array
self.time_array = Time(mjd_array, scale="tt", format="mjd").utc.jd
self.integration_time = integration_time

# There is a minor issue w/ MIR datasets where the LSTs are calculated via
# a polled average rather than calculated for the mid-point, which results
# in some imprecision (and some nuisance warnings). Fix this by calculating the
# LSTs here, checking to make sure that they agree to within the expected
# precision (sampling rate is 20 Hz, and the max error to worry about is half a
# sample: 25 ms, or in radians, 2*pi/(40 * 86400)) = pi / 1728000.
# TODO: Re-evaluate this if/when MIR data writer stops calculating LST this way
self.set_lsts_from_time_array()
if not np.allclose(lst_array, self.lst_array, rtol=0, atol=np.pi / 1728000.0):
# If this check fails, it means that there's something off w/ the lst values
# (to a larger degree than expected), and we'll pass them back to the user,
# who can inspect them directly and decide what to do.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should get printed somewhere with suggestions on what to check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I've added a warning message here in aa4bd2f -- it's a little on the long side, although it hopefully gives enough guidance to the user on what to do and when to worry (it looks like the polling rate wasn't always 20 Hz, so this still triggers for some older data sets, but still in most cases it's just a problem with the metadata, not actually the visibilities).

self.lst_array = lst_array

self.polarization_array = polarization_array
self.flex_spw_polarization_array = flex_pol
Expand Down Expand Up @@ -789,6 +760,55 @@ def _init_from_mir_parser(
self.filename = [os.path.basename(basename)]
self._filename.form = (1,)

# Finally, start the heavy lifting of loading the full data. We start this by
# creating arrays to plug visibilities and flags into. The array is shaped this
# way since when reading in a MIR file, we scan through the blt-axis the
# slowest and the freq-axis the fastest (i.e., the data is roughly ordered by
# blt, pol, freq).
self.data_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.complex64)
self.flag_array = np.ones((Nblts, Npols, Nfreqs), dtype=bool)
self.nsample_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.float32)

# Get a list of the current inhid values for later
inhid_list = mir_data.in_data["inhid"].copy()

# Store a backup of the selection masks
mir_data.save_mask("pre-select")

# If no data is loaded, we want to load subsets of data to start rather than
# the whole block in one go, since this will save us 2x in memory.
inhid_step = len(inhid_list)
if (mir_data.vis_data is None) and (mir_data.auto_data is None):
inhid_step = (inhid_step // 8) + 1

for start in range(0, len(inhid_list), inhid_step):
# If no data is loaded, load up a quarter of the data at a time. This
# keeps the "extra" memory usage below that for the nsample_array attr,
# which is generated and filled _after_ this step (thus no extra memory
# should be required)
if (mir_data.vis_data is None) and (mir_data.auto_data is None):
# Note that the masks are combined via and operation.
mir_data.select(
where=("inhid", "eq", inhid_list[start : start + inhid_step])
)

# Call this convenience function in case we need to run the data filling
# multiple times (if we are loading up subsets of data)
self._prep_and_insert_data(
mir_data,
sphid_dict,
spdx_dict,
blhid_blt_order,
apply_flags=apply_flags,
apply_tsys=apply_tsys,
apply_dedoppler=apply_dedoppler,
)

# Because the select operation messes with the masks, we want to restore
# those in case we mucked with them earlier (so that subsequent selects
# behave as expected).
mir_data.restore_mask("pre-select")

# We call transpose here since vis_data above is shape (Nblts, Npols, Nfreqs),
# and we need to get it to (Nblts,Nfreqs, Npols) to match what UVData expects.
self.data_array = np.transpose(self.data_array, (0, 2, 1))
Expand Down
11 changes: 7 additions & 4 deletions pyuvdata/uvdata/mir_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4148,15 +4148,18 @@ def _make_v3_compliant(self):
for key, value in mjd_day_dict.items():
if isinstance(value, str):
mjd_day_dict[key] = Time(
datetime.strptime(value, "%b %d, %Y"), scale="tt"
).tt.mjd
datetime.strptime(value, "%b %d, %Y"), scale="utc"
).mjd

mjd_arr = (self.in_data["dhrs"] / 24.0) + np.array(
[mjd_day_dict[idx] for idx in self.in_data["iref_time"]]
)

# Tally the JD dates, since that's used for various helper functions
jd_arr = Time(mjd_arr, format="mjd", scale="utc").utc.jd
jd_arr = Time(mjd_arr, format="mjd", scale="utc").jd

# Also, convert MJD back into the expected TT timescale
mjd_arr = Time(mjd_arr, format="mjd", scale="utc").tt.mjd

# Calculate the LST at the time of obs
lst_arr = (12.0 / np.pi) * uvutils.get_lst_for_time(
Expand All @@ -4183,7 +4186,7 @@ def _make_v3_compliant(self):
# bl_data updates: ant1rx, ant2rx, u, v, w
# First, update the antenna receiver if these values are unfilled (true in some
# earlier tracks, no version demarcation notes it).
if np.all(self.bl_data["ant1rx"] == 0) and np.all(self.bl_data["ant2rx"] == 0):
if np.all(self.bl_data["ant1rx"] == 0) or np.all(self.bl_data["ant2rx"] == 0):
ipol = self.bl_data["ipol"]
irec = self.bl_data["irec"]

Expand Down
10 changes: 10 additions & 0 deletions pyuvdata/uvdata/tests/test_mir.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,3 +789,13 @@ def test_generate_sma_antpos_dict(use_file, sma_mir):
ant_dict = generate_sma_antpos_dict(filepath)
for ant_num, xyz_pos in zip(sma_mir.antenna_numbers, sma_mir.antenna_positions):
assert np.allclose(ant_dict[ant_num], xyz_pos)


def test_spw_consistency_warning(mir_data):
mir_data.sp_data._data["fres"][1] *= 2
mir_data.bl_data._data["ant1rx"][:] = 0
mir_data.bl_data._data["ant2rx"][:] = 0

mir_uv = Mir()
with uvtest.check_warnings(UserWarning, match="Discrepancy in fres"):
mir_uv._init_from_mir_parser(mir_data)
12 changes: 0 additions & 12 deletions pyuvdata/uvdata/tests/test_mir_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,6 @@
)


@pytest.fixture(scope="session")
def mir_data_main():
mir_data = MirParser()

yield mir_data._load_test_data(load_cross=True, load_auto=True, has_auto=True)


@pytest.fixture(scope="function")
def mir_data(mir_data_main):
yield mir_data_main.copy()


@pytest.fixture(scope="module")
def compass_soln_file(tmp_path_factory):
tmp_path = tmp_path_factory.mktemp("mir_parser", numbered=True)
Expand Down
Loading