Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Init commit of IDEX packet parser #79

Merged
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0fdc769
Init commit of IDEX packet parser
bryan-harter Aug 25, 2023
30694b8
Committing test data
bryan-harter Aug 25, 2023
eadbad2
Adding xarray as a dependency
bryan-harter Aug 25, 2023
623fd94
Responding to code review
bryan-harter Aug 31, 2023
f04831c
Fixing the dependencies
bryan-harter Aug 31, 2023
e6ecfbe
updating poetry.lock to sync with main branch
bryan-harter Aug 31, 2023
f606807
Merge branch 'dev' into idex_decom
bryan-harter Aug 31, 2023
f1376b3
Getting rid of old import statement
bryan-harter Aug 31, 2023
03b14fa
Updating poetry lock file
bryan-harter Aug 31, 2023
5a63649
Fixing the lock file, for some reason bitstring is
bryan-harter Aug 31, 2023
e4d293f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 31, 2023
055e335
Updating to latest version of space packet parser
bryan-harter Aug 31, 2023
c2f2d61
Changing packet parser to use individual Dust
bryan-harter Sep 1, 2023
00c247a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 1, 2023
1c9b818
Changing the requirements in poetry for xarray
bryan-harter Sep 1, 2023
a3194f3
Responding to code review comments, adding a unit
bryan-harter Sep 5, 2023
9fcc75b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 5, 2023
a8edd9f
Updating poetry.lock
bryan-harter Sep 5, 2023
a588de6
Merge branch 'idex_decom' of https://github.com/bryan-harter/imap_pro…
bryan-harter Sep 5, 2023
10d257d
Logging an error if a new science type is received
bryan-harter Sep 5, 2023
69b985c
Determining High and Low sample now
bryan-harter Sep 5, 2023
fdde569
Add more comments and make the variables clearer
bryan-harter Sep 6, 2023
9d773ed
Fixing some of the variable names, adding comments
bryan-harter Sep 7, 2023
b2b6721
Bug fix, wasn't returning right thing in new func
bryan-harter Sep 7, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ repos:
rev: v4.4.0
hooks:
- id: check-added-large-files
args: ['--maxkb=1000']
- id: detect-aws-credentials
args: [--allow-missing-credentials]
- id: detect-private-key
- id: mixed-line-ending
- id: trailing-whitespace
- id: no-commit-to-branch
args: [--branch, main, --branch, dev]
args: [--branch, main]
- repo: https://github.com/psf/black
rev: 23.7.0
hooks:
Expand Down
Empty file.
366 changes: 366 additions & 0 deletions imap_processing/idex/idex_packet_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,366 @@
import logging

import bitstring
import numpy as np
import xarray as xr

# from lasp_packets import xtcedef, parser
from space_packet_parser import parser, xtcedef

from imap_processing import packet_definition_directory

TWENTY_MICROSECONDS = 20 * (10 ** (-6))

SCITYPE_MAPPING_TO_NAMES = {
2: "TOF_High",
4: "TOF_Low",
8: "TOF_Mid",
16: "Target_Low",
32: "Target_High",
64: "Ion_Grid",
}


class PacketParser:
"""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Follow numpydoc conventions according to the style guide: https://imap-processing.readthedocs.io/en/latest/development/style-guide/api-documentation.html

Parameters
------------

There is also no filename parameter, and I think parameters can be documented within the constructor below and put attributes here.

This class encapsulates the decom work needed to decom a daily file of IDEX data
received from the POC. The class is instantiated with a reference to a L0 file as
it exists on the local file system.

Attributes
----------
data (xarray.Dataset): An object containing all of the relevant data in the file

TODO
----
* Add method to generate l1a CDF
* Add method to generate quicklook

Examples
---------
>>> # Print out the data in a L0 file
>>> from imap_processing.idex.idex_packet_parser import PacketParser
>>> l0_file = "imap_processing/idex/tests/imap_idex_l0_20230725_v01-00.pkts"
>>> l0_data = PacketParser(l0_file)
>>> print(l0_data.data)

"""

def __init__(self, packet_file: str):
"""
This function takes in a local l0 pkts file and performs all of the decom work
directly in __init__().

Parameters
-----------
packet_file (str): The path and filename to the L0 file to read

Notes
-----
Currently assumes one L0 file will generate exactly one l1a file
"""

xtce_file = f"{packet_definition_directory}/idex_packet_definition.xml"
packet_definition = xtcedef.XtcePacketDefinition(xtce_document=xtce_file)
packet_parser = parser.PacketParser(packet_definition)

binary_data = bitstring.ConstBitStream(filename=packet_file)
packet_generator = packet_parser.generator(binary_data)

dust_events = {}

for packet in packet_generator:
if "IDX__SCI0TYPE" in packet.data:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indicate why this case matters with a comment? We aren't doing anything in an else block, do we want to log packets we didn't handle in an else-block?

scitype = packet.data["IDX__SCI0TYPE"].raw_value
event_number = packet.data["IDX__SCI0EVTNUM"].derived_value
if scitype == 1:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment here indicating this is the initial packet for the event and some others are coming in later that fill in the data?

# Initial packet for new dust event
# Further packets will fill in data
dust_events[event_number] = RawDustEvent(packet)
elif event_number not in dust_events:
raise KeyError(
f"Have not receive header information from event number\
{event_number}. Packets are possibly out of order!"
)
else:
# Populate the IDEXRawDustEvent with 1's and 0's
dust_events[event_number].parse_packet(packet)
else:
logging.warning(f"Unhandled packet received: {packet}")

processed_dust_impact_list = [
dust_event.process() for dust_event in dust_events.values()
]

self.data = xr.concat(processed_dust_impact_list, dim="Epoch")


class RawDustEvent:
HIGH_SAMPLE_FREQUENCY = 1 / 260
LOW_SAMPLE_FREQUENCY = 1 / 4.0625

def __init__(self, header_packet):
"""
This function initializes a raw dust event, with an FPGA Header Packet from IDEX

Parameters:
header_packet: The FPGA metadata event header

"""

# Calculate the impact time in seconds since Epoch
self.impact_time = (
header_packet.data["SHCOARSE"].derived_value
+ TWENTY_MICROSECONDS * header_packet.data["SHFINE"].derived_value
)

# Calculate the high sample and low sample trigger times
tof_delay = (
header_packet.data["IDX__TXHDRSAMPDELAY"].raw_value >> 2
) # First two bits are padding
mask = 0b1111111111
hgdelay = (tof_delay >> 20) & mask
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is hgdelay? can we spell out hg?

lspretrigblocks = (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add comment about why those bits are shifted? It helps to give a high level context to what is happening.

header_packet.data["IDX__TXHDRBLOCKS"].derived_value >> 16
) & 0b1111
hspretrigblocks = (
header_packet.data["IDX__TXHDRBLOCKS"].derived_value >> 6
) & 0b111111

self.lstriggertime = (
8 * self.LOW_SAMPLE_FREQUENCY * (lspretrigblocks + 1)
- self.HIGH_SAMPLE_FREQUENCY * hgdelay
)
self.hstriggertime = 512 * self.HIGH_SAMPLE_FREQUENCY * (hspretrigblocks + 1)

# Log the rest of the header
self._log_packet_info(header_packet)

# Initialize the binary data received from future packets
self.TOF_High_bits = ""
self.TOF_Mid_bits = ""
self.TOF_Low_bits = ""
self.Target_Low_bits = ""
self.Target_High_bits = ""
self.Ion_Grid_bits = ""

def _log_packet_info(self, packet):
"""
This function exists solely to log the parameters in the L0
FPGA header packet for new dust events, nothing here should affect the data
"""
event_num = packet.data["IDX__SCI0EVTNUM"].derived_value
logging.debug(f"^*****Event header {event_num}******^")
logging.debug(
f"Timestamp = {self.impact_time} seconds since epoch \
(Midnight January 1st, 2012)"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this IMAP specific epoch?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe so, it's just the epoch that they use to keep track time of in their header packets.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see

)
# Extract the 17-22-bit integer (usually 8)
lspretrigblocks = (packet.data["IDX__TXHDRBLOCKS"].derived_value >> 16) & 0b1111
# Extract the next 4-bit integer (usually 8)
lsposttrigblocks = (
packet.data["IDX__TXHDRBLOCKS"].derived_value >> 12
) & 0b1111
# Extract the next 6 bits integer (usually 32)
hspretrigblocks = (
packet.data["IDX__TXHDRBLOCKS"].derived_value >> 6
) & 0b111111
# Extract the first 6 bits (usually 32)
hsposttrigblocks = (packet.data["IDX__TXHDRBLOCKS"].derived_value) & 0b111111
logging.debug("HS pre trig sampling blocks: " + str(hspretrigblocks))
logging.debug("LS pre trig sampling blocks: " + str(lspretrigblocks))
logging.debug("HS post trig sampling blocks: " + str(hsposttrigblocks))
logging.debug("LS post trig sampling blocks: " + str(lsposttrigblocks))
tof_delay = (
packet.data["IDX__TXHDRSAMPDELAY"].raw_value >> 2
) # First two bits are padding
mask = 0b1111111111
lgdelay = (tof_delay) & mask
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. hg means high gain. and mg means mid gain.

can we spell it out? I think it will help for readability.

mgdelay = (tof_delay >> 10) & mask
hgdelay = (tof_delay >> 20) & mask
logging.debug(f"High gain delay = {hgdelay} samples.")
logging.debug(f"Mid gain delay = {mgdelay} samples.")
logging.debug(f"Low gain delay = {lgdelay} samples.")
if (
packet.data["IDX__TXHDRLSTRIGMODE"].derived_value != 0
): # If this was a LS (Target Low Gain) trigger
self.Triggerorigin = "LS"
logging.debug("Low sampling trigger mode enabled.")
logging.debug(
"Packet low trigger mode = "
+ str(packet.data["IDX__TXHDRLGTRIGMODE"].derived_value)
)
logging.debug(
"Packet mid trigger mode = "
+ str(packet.data["IDX__TXHDRMGTRIGMODE"].derived_value)
)
logging.debug(
"Packet high trigger mode = "
+ str(packet.data["IDX__TXHDRHGTRIGMODE"].derived_value)
)
if packet.data["IDX__TXHDRLGTRIGMODE"].derived_value != 0:
logging.debug("Low gain TOF trigger mode enabled.")
self.Triggerorigin = "LG"
if packet.data["IDX__TXHDRMGTRIGMODE"].derived_value != 0:
logging.debug("Mid gain TOF trigger mode enabled.")
self.Triggerorigin = "MG"
if packet.data["IDX__TXHDRHGTRIGMODE"].derived_value != 0:
logging.debug("High gain trigger mode enabled.")
self.Triggerorigin = "HG"
logging.debug(
f"AID = {packet.data['IDX__SCI0AID'].derived_value}"
) # Instrument event number
logging.debug(
f"Event number = {packet.data['IDX__SCI0EVTNUM'].raw_value}"
) # Event number out of how many events constitute the file
logging.debug(
f"Rice compression enabled = {bool(packet.data['IDX__SCI0COMP'].raw_value)}"
)

def _parse_high_sample_waveform(self, waveform_raw: str):
"""
Parse a binary string representing a high sample waveform
Data arrives in 10 bit chunks
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to know!

"""
w = bitstring.ConstBitStream(bin=waveform_raw)
ints = []
while w.pos < len(w):
w.read("pad:2") # skip 2
ints += w.readlist(["uint:10"] * 3)
return ints[:-4]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In SWE, I was doing this "uint:8"] * 1260 because I had 1260 8-bit chunks. Relating that to here, you are skipping 2 bits and then reading three uint:10 at a time. So in total, you are reading 32-bits at a time in this while loop? Is that right?

Why do you take the last 4 and not the others? Is there reason for that?

Copy link
Member Author

@bryan-harter bryan-harter Sep 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes you're correct, 32-bits are being read at a time!

The last 4 numbers in the array are removed. I should leave a comment explaining it in the code, but basically it sounds like those last 4 numbers are not very useful. Sometimes they have data in them and sometimes they don't, but overall the instrument engineer said those last 4 numbers is not where the interesting data is. It's possible that we could add code in the future to consider those values (if they exist), but the sample code given to me just has them removed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. I had it wrong. You are taking everything except last four data. Can you add that in there? I think it will be helpful.


def _parse_low_sample_waveform(self, waveform_raw: str):
"""
Parse a binary string representing a low sample waveform
Data arrives in 12 bit chunks
"""
w = bitstring.ConstBitStream(bin=waveform_raw)
ints = []
while w.pos < len(w):
w.read("pad:8") # skip 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean # skip 8-bits here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes!

ints += w.readlist(["uint:12"] * 2)
return ints

def _calc_low_sample_resolution(self, num_samples: int):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a doc string for these two as well? It's been so helpful to follow!

Here, we are creating num_samples array where each element is equally spaced value between 0 to num_samples. Then we apply the formula, self.LOW_SAMPLE_FREQUENCY * time_low_sr_init - self.hstriggertime to each element. Is that right?

Do you know why we multiply by frequency and subtract by trigger time? I am asking out of curiosity.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly I have no idea why these times are calculated like this, that's just how Ethan was calculating these times in his initial analysis code. I'm going to ask him about it tomorrow when we tag up!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool.

time_low_sr_init = np.linspace(0, num_samples, num_samples)
time_low_sr_data = (
self.LOW_SAMPLE_FREQUENCY * time_low_sr_init - self.hstriggertime
)
return time_low_sr_data

def _calc_high_sample_resolution(self, num_samples: int):
time_high_sr_init = np.linspace(0, num_samples, num_samples)
time_high_sr_data = (
self.HIGH_SAMPLE_FREQUENCY * time_high_sr_init - self.lstriggertime
)
return time_high_sr_data

def parse_packet(self, packet):
"""
This function parses IDEX data packets to populate bit strings

Parameters
----------
packet: A single science data packet for one of the 6
IDEX observables
"""

scitype = packet.data["IDX__SCI0TYPE"].raw_value
raw_science_bits = packet.data["IDX__SCI0RAW"].raw_value
self._append_raw_data(scitype, raw_science_bits)

def _append_raw_data(self, scitype, bits):
"""
This function determines which variable to append the bits
to, given a specific scitype.
"""
if scitype == 2:
self.TOF_High_bits += bits
elif scitype == 4:
self.TOF_Low_bits += bits
elif scitype == 8:
self.TOF_Mid_bits += bits
elif scitype == 16:
self.Target_Low_bits += bits
elif scitype == 32:
self.Target_High_bits += bits
elif scitype == 64:
self.Ion_Grid_bits += bits
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.Ion_Grid_bits += bits
self.Ion_Grid_bits += bits
else:
logging.warning("Unknown science type received: [%s]", scitype)

Add some warnings/raises if we are getting into the edge cases?

else:
logging.warning("Unknown science type received: [%s]", scitype)

def process(self):
"""
To be called after all packets for the IDEX event have been parsed
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
To be called after all packets for the IDEX event have been parsed
To be called after all packets for the IDEX event have been parsed.

Parses the binary data into numpy integer arrays, and combines them
into an xarray.Dataset object

Returns
-------
xarray.Dataset
A Dataset object containing the data from a single impact

"""

# Process the 6 primary data variables
tof_high_xr = xr.DataArray(
name="TOF_High",
data=[self._parse_high_sample_waveform(self.TOF_High_bits)],
dims=("Epoch", "Time_High_SR_dim"),
)
tof_low_xr = xr.DataArray(
name="TOF_Low",
data=[self._parse_high_sample_waveform(self.TOF_Low_bits)],
dims=("Epoch", "Time_High_SR_dim"),
)
tof_mid_xr = xr.DataArray(
name="TOF_Mid",
data=[self._parse_high_sample_waveform(self.TOF_Mid_bits)],
dims=("Epoch", "Time_High_SR_dim"),
)
target_high_xr = xr.DataArray(
name="Target_High",
data=[self._parse_low_sample_waveform(self.Target_High_bits)],
dims=("Epoch", "Time_Low_SR_dim"),
)
target_low_xr = xr.DataArray(
name="Target_Low",
data=[self._parse_low_sample_waveform(self.Target_Low_bits)],
dims=("Epoch", "Time_Low_SR_dim"),
)
ion_grid_xr = xr.DataArray(
name="Ion_Grid",
data=[self._parse_low_sample_waveform(self.Ion_Grid_bits)],
dims=("Epoch", "Time_Low_SR_dim"),
)
# Determine the 3 coordinate variables
epoch_xr = xr.DataArray(name="Epoch", data=[self.impact_time], dims=("Epoch"))

time_low_sr_xr = xr.DataArray(
name="Time_Low_SR",
data=[self._calc_low_sample_resolution(len(target_low_xr[0]))],
dims=("Epoch", "Time_Low_SR_dim"),
)

time_high_sr_xr = xr.DataArray(
name="Time_High_SR",
data=[self._calc_high_sample_resolution(len(tof_low_xr[0]))],
dims=("Epoch", "Time_High_SR_dim"),
)

# Combine to return a dataset object
return xr.Dataset(
data_vars={
"TOF_Low": tof_low_xr,
"TOF_High": tof_high_xr,
"TOF_Mid": tof_mid_xr,
"Target_High": target_high_xr,
"Target_Low": target_low_xr,
"Ion_Grid": ion_grid_xr,
},
coords={
"Epoch": epoch_xr,
"Time_Low_SR": time_low_sr_xr,
"Time_High_SR": time_high_sr_xr,
},
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really nice work!!!

Empty file.
Binary file not shown.
Loading