-
Notifications
You must be signed in to change notification settings - Fork 28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Minor fixes for handling of older SMA data #1399
Changes from 4 commits
221aa91
a58c2f0
9d5a352
943b2b9
aa4bd2f
f7db089
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -343,16 +343,22 @@ def _init_from_mir_parser( | |
if len(pol_dict) == 2: | ||
# If dual-pol, then we need to check if the tunings are split, because | ||
# the two polarizations will effectively be concat'd across the freq | ||
# axis instead of the pol axis. | ||
# axis instead of the pol axis. First, see if we have two diff receivers | ||
rxa_mask = ant1_rxa_mask & ant2_rxa_mask | ||
rxb_mask = ~(ant1_rxa_mask | ant2_rxa_mask) | ||
|
||
if np.any(rxa_mask) and np.any(rxb_mask): | ||
# If we have both VV and HH data, check to see that the tunings of the | ||
# two receivers match. | ||
loa_freq = np.median(mir_data.sp_data["gunnLO"][rxa_mask]) | ||
lob_freq = np.median(mir_data.sp_data["gunnLO"][rxb_mask]) | ||
pol_split_tuning = not np.isclose(loa_freq, lob_freq) | ||
# If we have both VV and HH data, check to see that the frequencies of | ||
# each of the spectral chunks match. If they do, then we can concat | ||
# across the polarization axis, but if _not_, we should treat this as | ||
# a pol-split data set. | ||
fsky_vals = mir_data.sp_data["fsky"] | ||
chunk_vals = mir_data.sp_data["corrchunk"] | ||
loa_chunks = set(zip(fsky_vals[rxa_mask], chunk_vals[rxa_mask])) | ||
lob_chunks = set(zip(fsky_vals[rxb_mask], chunk_vals[rxb_mask])) | ||
pol_split_tuning = not ( | ||
loa_chunks.issubset(lob_chunks) or lob_chunks.issubset(loa_chunks) | ||
) | ||
|
||
# Map MIR pol code to pyuvdata/AIPS polarization number | ||
pol_code_dict = {} | ||
|
@@ -419,9 +425,13 @@ def _init_from_mir_parser( | |
|
||
# Make sure that something weird hasn't happened with the metadata (this | ||
# really should never happen, only one value should exist per window). | ||
assert np.allclose(spw_fsky, mir_data.sp_data["fsky"][data_mask]) | ||
assert np.allclose(spw_fres, mir_data.sp_data["fres"][data_mask]) | ||
assert np.allclose(spw_nchan, mir_data.sp_data["nch"][data_mask]) | ||
for val, item in zip( | ||
[spw_fsky, spw_fres, spw_nchan], ["fsky", "fres", "nch"] | ||
): | ||
if not np.allclose(val, mir_data.sp_data[item][data_mask]): | ||
warnings.warn( | ||
"Discrepancy in %s for win %i sb %i pol %i." % (item, *spdx) | ||
) | ||
|
||
# Get the data in the right units and dtype | ||
spw_fsky = float(spw_fsky * 1e9) # GHz -> Hz | ||
|
@@ -499,57 +509,6 @@ def _init_from_mir_parser( | |
for key in spdx_dict: | ||
spdx_dict[key]["ch_slice"] = spw_dict[spdx_dict[key]["spw_id"]]["ch_slice"] | ||
|
||
# Create arrays to plug visibilities and flags into. The array is shaped this | ||
# way since when reading in a MIR file, we scan through the blt-axis the | ||
# slowest and the freq-axis the fastest (i.e., the data is roughly ordered by | ||
# blt, pol, freq). | ||
self.data_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.complex64) | ||
self.flag_array = np.ones((Nblts, Npols, Nfreqs), dtype=bool) | ||
self.nsample_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.float32) | ||
|
||
# Get a list of the current inhid values for later | ||
inhid_list = mir_data.in_data["inhid"].copy() | ||
|
||
# Store a backup of the selection masks | ||
backup_masks = {} | ||
for item, obj in mir_data._metadata_attrs.items(): | ||
backup_masks[item] = obj._mask.copy() | ||
|
||
# If no data is loaded, we want to load subsets of data to start rather than | ||
# the whole block in one go, since this will save us 2x in memory. | ||
inhid_step = len(inhid_list) | ||
if (mir_data.vis_data is None) and (mir_data.auto_data is None): | ||
inhid_step = (inhid_step // 8) + 1 | ||
|
||
for start in range(0, len(inhid_list), inhid_step): | ||
# If no data is loaded, load up a quarter of the data at a time. This | ||
# keeps the "extra" memory usage below that for the nsample_array attr, | ||
# which is generated and filled _after_ this step (thus no extra memory | ||
# should be required) | ||
if (mir_data.vis_data is None) and (mir_data.auto_data is None): | ||
# Note that the masks are combined via and operation. | ||
mir_data.select( | ||
where=("inhid", "eq", inhid_list[start : start + inhid_step]) | ||
) | ||
|
||
# Call this convenience function in case we need to run the data filling | ||
# multiple times (if we are loading up subsets of data) | ||
self._prep_and_insert_data( | ||
mir_data, | ||
sphid_dict, | ||
spdx_dict, | ||
blhid_blt_order, | ||
apply_flags=apply_flags, | ||
apply_tsys=apply_tsys, | ||
apply_dedoppler=apply_dedoppler, | ||
) | ||
|
||
for item, obj in mir_data._metadata_attrs.items(): | ||
# Because the select operation messes with the masks, we want to restore | ||
# those in case we mucked with them earlier (so that subsequent selects | ||
# behave as expected). | ||
obj._mask = backup_masks[item].copy() | ||
|
||
# Now assign our flexible arrays to the object itself | ||
self.freq_array = freq_array | ||
self.Nfreqs = Nfreqs | ||
|
@@ -684,7 +643,6 @@ def _init_from_mir_parser( | |
for blt_key in blt_temp_dict.keys(): | ||
temp_dict = blt_temp_dict[blt_key] | ||
integration_time[blt_key] = temp_dict["rinteg"] | ||
# TODO: Using MIR V3 convention for lst, will need make it V2 compatible. | ||
lst_array[blt_key] = temp_dict["lst"] * (np.pi / 12.0) # Hours -> rad | ||
mjd_array[blt_key] = temp_dict["mjd"] | ||
ant_1_array[blt_key] = temp_dict["iant1"] | ||
|
@@ -702,9 +660,22 @@ def _init_from_mir_parser( | |
self.baseline_array = self.antnums_to_baseline( | ||
self.ant_1_array, self.ant_2_array, attempt256=False | ||
) | ||
self.integration_time = integration_time | ||
self.lst_array = lst_array | ||
self.time_array = Time(mjd_array, scale="tt", format="mjd").utc.jd | ||
self.integration_time = integration_time | ||
|
||
# There is a minor issue w/ MIR datasets where the LSTs are calculated via | ||
# a polled average rather than calculated for the mid-point, which results | ||
# in some imprecision (and some nuisance warnings). Fix this by calculating the | ||
# LSTs here, checking to make sure that they agree to within the expected | ||
# precision (sampling rate is 20 Hz, and the max error to worry about is half a | ||
# sample: 25 ms, or in radians, 2*pi/(40 * 86400)) = pi / 1728000. | ||
# TODO: Re-evaluate this if/when MIR data writer stops calculating LST this way | ||
self.set_lsts_from_time_array() | ||
if not np.allclose(lst_array, self.lst_array, rtol=0, atol=np.pi / 1728000.0): | ||
# If this check fails, it means that there's something off w/ the lst values | ||
# (to a larger degree than expected), and we'll pass them back to the user, | ||
# who can inspect them directly and decide what to do. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should get printed somewhere with suggestions on what to check. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, I've added a warning message here in aa4bd2f -- it's a little on the long side, although it hopefully gives enough guidance to the user on what to do and when to worry (it looks like the polling rate wasn't always 20 Hz, so this still triggers for some older data sets, but still in most cases it's just a problem with the metadata, not actually the visibilities). |
||
self.lst_array = lst_array | ||
|
||
self.polarization_array = polarization_array | ||
self.flex_spw_polarization_array = flex_pol | ||
|
@@ -789,6 +760,55 @@ def _init_from_mir_parser( | |
self.filename = [os.path.basename(basename)] | ||
self._filename.form = (1,) | ||
|
||
# Finally, start the heavy lifting of loading the full data. We start this by | ||
# creating arrays to plug visibilities and flags into. The array is shaped this | ||
# way since when reading in a MIR file, we scan through the blt-axis the | ||
# slowest and the freq-axis the fastest (i.e., the data is roughly ordered by | ||
# blt, pol, freq). | ||
self.data_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.complex64) | ||
self.flag_array = np.ones((Nblts, Npols, Nfreqs), dtype=bool) | ||
self.nsample_array = np.zeros((Nblts, Npols, Nfreqs), dtype=np.float32) | ||
|
||
# Get a list of the current inhid values for later | ||
inhid_list = mir_data.in_data["inhid"].copy() | ||
|
||
# Store a backup of the selection masks | ||
mir_data.save_mask("pre-select") | ||
|
||
# If no data is loaded, we want to load subsets of data to start rather than | ||
# the whole block in one go, since this will save us 2x in memory. | ||
inhid_step = len(inhid_list) | ||
if (mir_data.vis_data is None) and (mir_data.auto_data is None): | ||
inhid_step = (inhid_step // 8) + 1 | ||
|
||
for start in range(0, len(inhid_list), inhid_step): | ||
# If no data is loaded, load up a quarter of the data at a time. This | ||
# keeps the "extra" memory usage below that for the nsample_array attr, | ||
# which is generated and filled _after_ this step (thus no extra memory | ||
# should be required) | ||
if (mir_data.vis_data is None) and (mir_data.auto_data is None): | ||
# Note that the masks are combined via and operation. | ||
mir_data.select( | ||
where=("inhid", "eq", inhid_list[start : start + inhid_step]) | ||
) | ||
|
||
# Call this convenience function in case we need to run the data filling | ||
# multiple times (if we are loading up subsets of data) | ||
self._prep_and_insert_data( | ||
mir_data, | ||
sphid_dict, | ||
spdx_dict, | ||
blhid_blt_order, | ||
apply_flags=apply_flags, | ||
apply_tsys=apply_tsys, | ||
apply_dedoppler=apply_dedoppler, | ||
) | ||
|
||
# Because the select operation messes with the masks, we want to restore | ||
# those in case we mucked with them earlier (so that subsequent selects | ||
# behave as expected). | ||
mir_data.restore_mask("pre-select") | ||
|
||
# We call transpose here since vis_data above is shape (Nblts, Npols, Nfreqs), | ||
# and we need to get it to (Nblts,Nfreqs, Npols) to match what UVData expects. | ||
self.data_array = np.transpose(self.data_array, (0, 2, 1)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggest adding some guidance in the warning message on whether this is a small discrepancy vs. something critical with the data.
Or would that be caught in the checks elsewhere?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I extended the error message here in aa4bd2f -- the one time I saw this pop up (which used to throw an
AssertionError
) it was because a single record was bad, but it can also get triggered if there's a more significant issue to be checked. In the end I figured probably best to just point the user to what values should be looked at...