DUNE · jurenag · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024
diff --git a/src/waffles/data_classes/Waveform.py b/src/waffles/data_classes/Waveform.py
@@ -15,6 +15,9 @@ class Waveform(WaveformAdcs):
     timestamp : int
         The timestamp value for this Waveform
     time_step_ns : float (inherited from WaveformAdcs)
+    daq_window_timestamp : int
+        The timestamp value for the DAQ window in which
+        this Waveform was acquired
     adcs : unidimensional numpy array of integers
     (inherited from WaveformAdcs)
     run_number : int
@@ -41,6 +44,7 @@ def __init__(
         self, 
         timestamp: int,
         time_step_ns: float,
+        daq_window_timestamp: int,
         adcs: np.ndarray,
         run_number: int,
         record_number: int,
@@ -56,6 +60,7 @@ def __init__(
         time_step_ns : float
             It is given to the 'time_step_ns' parameter of
             the base class initializer.
+        daq_window_timestamp : int
         adcs : unidimensional numpy array of integers
             It is given to the 'adcs' parameter of the base
             class initializer.
@@ -73,6 +78,7 @@ class initializer.
         # Shall we add add type checks here?
 
         self.__timestamp = timestamp
+        self.__daq_window_timestamp = daq_window_timestamp
         self.__run_number = run_number
         self.__record_number = record_number
         self.__endpoint = endpoint
@@ -89,6 +95,10 @@ class initializer.
     @property
     def timestamp(self):
         return self.__timestamp
+
+    @property
+    def daq_window_timestamp(self):
+        return self.__daq_window_timestamp
 
     @property
     def run_number(self):

diff --git a/src/waffles/input/input_utils.py b/src/waffles/input/input_utils.py
@@ -403,12 +403,11 @@ def __build_waveforms_list_from_root_file_using_uproot(
     bulk_data_tree (resp. meta_data_tree): uproot.TTree
         The tree from which the bulk data (resp. meta data)
         of the waveforms will be read. Branches whose name
-        start with 'adcs', 'channel', 'timestamp' and 'record'
-        (resp. 'run' and 'ticks_to_nsec') will be required.
+        start with 'adcs', 'channel', 'timestamp', 
+        'daq_timestamp' and 'record' (resp. 'run' and 
+        'ticks_to_nsec') will be required.
     set_offset_wrt_daq_window: bool
-        If True, then the bulk data tree must also have a
-        branch whose name starts with 'daq_timestamp'. In
-        this case, then the time_offset attribute of each
+        If True, then the time_offset attribute of each
         Waveform is set as the difference between its
         value for the 'timestamp' branch and the value
         for the 'daq_timestamp' branch, in such order,
@@ -484,6 +483,11 @@ def __build_waveforms_list_from_root_file_using_uproot(
         bulk_data_tree,
         'timestamp',
         'uproot')
+
+    daq_timestamp_branch, _ = find_tbranch_in_root_ttree(
+        bulk_data_tree,
+        'daq_timestamp',
+        'uproot')
 
     record_branch, _ = find_tbranch_in_root_ttree(
         bulk_data_tree,
@@ -521,6 +525,10 @@ def __build_waveforms_list_from_root_file_using_uproot(
             current_timestamp_array = timestamp_branch.array(
                 entry_start=branch_start,
                 entry_stop=branch_stop)
+
+            current_daq_timestamp_array = daq_timestamp_branch.array(
+                entry_start=branch_start,
+                entry_stop=branch_stop)
 
             current_record_array = record_branch.array(
                 entry_start=branch_start,
@@ -536,6 +544,7 @@ def __build_waveforms_list_from_root_file_using_uproot(
                     # 'time_to_nsec' value from the
                     # 'metadata' TTree is fixed
                     # meta_data[1],
+                    current_daq_timestamp_array[i],
                     np.array(current_adcs_array[i]),
                     meta_data[0],
                     current_record_array[i],
@@ -546,11 +555,6 @@ def __build_waveforms_list_from_root_file_using_uproot(
 
         raw_time_offsets = []
 
-        daq_timestamp_branch, _ = find_tbranch_in_root_ttree(
-            bulk_data_tree,
-            'daq_timestamp',
-            'uproot')
-
         for interval in clustered_idcs_to_retrieve:
 
             # Read the waveforms in contiguous blocks
@@ -568,12 +572,12 @@ def __build_waveforms_list_from_root_file_using_uproot(
             current_timestamp_array = timestamp_branch.array(
                 entry_start=branch_start,
                 entry_stop=branch_stop)
-
-            current_record_array = record_branch.array(
+            
+            current_daq_timestamp_array = daq_timestamp_branch.array(
                 entry_start=branch_start,
                 entry_stop=branch_stop)
 
-            current_daq_timestamp_array = daq_timestamp_branch.array(
+            current_record_array = record_branch.array(
                 entry_start=branch_start,
                 entry_stop=branch_stop)
 
@@ -586,6 +590,7 @@ def __build_waveforms_list_from_root_file_using_uproot(
                     current_timestamp_array[i],
                     16.,    # time_step_ns
                     # meta_data[1],
+                    current_daq_timestamp_array[i],
                     np.array(current_adcs_array[i]),
                     meta_data[0],
                     current_record_array[i],
@@ -632,17 +637,16 @@ def __build_waveforms_list_from_root_file_using_pyroot(
     bulk_data_tree (resp. meta_data_tree): ROOT.TTree
         The tree from which the bulk data (resp. meta data)
         of the waveforms will be read. Branches whose name
-        start with 'adcs', 'channel', 'timestamp' and 'record'
-        (resp. 'run' and 'ticks_to_nsec') will be required.
-        For more information on the expected data types for
-        these branches, check the WaveformSet_from_ROOT_file()
+        start with 'adcs', 'channel', 'timestamp', 
+        'daq_timestamp' and 'record' (resp. 'run' and 
+        'ticks_to_nsec') will be required. For more information 
+        on the expected data types for these branches, check 
+        the WaveformSet_from_ROOT_file()
         function documentation.
     set_offset_wrt_daq_window: bool
-        If True, then the bulk data tree must also have a
-        branch whose name starts with 'daq_timestamp'. In
-        this case, then the time_offset attribute of each
-        Waveform is set as the difference between its
-        value for the 'timestamp' branch and the value
+        If True, then the time_offset attribute of each 
+        Waveform is set to the difference between its 
+        value for the 'timestamp' branch and its value
         for the 'daq_timestamp' branch, in such order,
         referenced to the minimum value of such difference
         among all the waveforms. This is useful to align
@@ -707,6 +711,19 @@ def __build_waveforms_list_from_root_file_using_pyroot(
     bulk_data_tree.SetBranchAddress(
         timestamp_branch_exact_name,
         timestamp_address)
+
+    _, daq_timestamp_branch_exact_name = find_tbranch_in_root_ttree(
+        bulk_data_tree,
+        'daq_timestamp',
+        'pyroot')
+
+    daq_timestamp_address = array.array(
+        root_to_array_type_code('l'),
+        [0])
+
+    bulk_data_tree.SetBranchAddress(
+        daq_timestamp_branch_exact_name,
+        daq_timestamp_address)
 
     _, record_branch_exact_name = find_tbranch_in_root_ttree(
         bulk_data_tree,
@@ -742,6 +759,7 @@ def __build_waveforms_list_from_root_file_using_pyroot(
                 # 'time_to_nsec' value from the
                 # 'metadata' TTree is fixed
                 # meta_data[1],   # time_step_ns
+                daq_timestamp_address[0],
                 np.array(adcs_address),
                 meta_data[0],
                 record_address[0],
@@ -752,16 +770,6 @@ def __build_waveforms_list_from_root_file_using_pyroot(
 
         raw_time_offsets = []
 
-        _, daq_timestamp_branch_exact_name = find_tbranch_in_root_ttree(
-            bulk_data_tree,
-            'daq_timestamp',
-            'pyroot')
-        daq_timestamp_address = array.array(root_to_array_type_code('l'),
-                                            [0])
-
-        bulk_data_tree.SetBranchAddress(daq_timestamp_branch_exact_name,
-                                        daq_timestamp_address)
-
         for idx in idcs_to_retrieve_:
 
             bulk_data_tree.GetEntry(int(idx))
@@ -773,6 +781,7 @@ def __build_waveforms_list_from_root_file_using_pyroot(
                 16.,
                 # time_step_ns
                 # meta_data[1],
+                daq_timestamp_address[0],
                 np.array(adcs_address),
                 meta_data[0],
                 record_address[0],

diff --git a/src/waffles/input/raw_hdf5_reader.py b/src/waffles/input/raw_hdf5_reader.py
@@ -171,6 +171,7 @@ def get_filepaths_from_rucio(rucio_filepath) -> list:
 
 def WaveformSet_from_hdf5_files(filepath_list : List[str] = [],
                                 read_full_streaming_data : bool = False,
+                                truncate_wfs_to_minimum : bool = False,
                                 folderpath : Optional[str] = None,
                                 nrecord_start_fraction : float = 0.0,
                                 nrecord_stop_fraction : float = 1.0,
@@ -190,6 +191,17 @@ def WaveformSet_from_hdf5_files(filepath_list : List[str] = [],
         If True (resp. False), then only the waveforms for which
         the 'is_fullstream' parameter in the fragment has a
         value equal to True (resp. False) will be considered.
+    truncate_wfs_to_minimum: bool
+        If True, then the waveforms will be truncated to
+        the minimum length among all the waveforms in the input 
+        file before being handled to the WaveformSet class 
+        initializer. If False, then the waveforms will be 
+        read and handled to the WaveformSet initializer as 
+        they are. Note that WaveformSet.__init__() will raise 
+        an exception if the given waveforms are not homogeneous 
+        in length, so this parameter should be set to False 
+        only if the user is sure that all the waveforms in 
+        the input file have the same length.
     folderpath : str
         If given, then the value given to the 'filepath_list'
         parameter is ignored, and the list of filepaths to be
@@ -230,7 +242,17 @@ def WaveformSet_from_hdf5_files(filepath_list : List[str] = [],
     created=False
     for filepath in filepath_list:
         try:
-            aux = WaveformSet_from_hdf5_file(filepath, read_full_streaming_data, nrecord_start_fraction, nrecord_stop_fraction, subsample, wvfm_count, allowed_endpoints)
+            aux = WaveformSet_from_hdf5_file(
+                filepath,
+                read_full_streaming_data,
+                truncate_wfs_to_minimum,
+                nrecord_start_fraction,
+                nrecord_stop_fraction,
+                subsample,
+                wvfm_count,
+                allowed_endpoints,
+            )
+
         except Exception as error:
             print(error, "\n")
             print('Error reading file...')
@@ -245,6 +267,7 @@ def WaveformSet_from_hdf5_files(filepath_list : List[str] = [],
 
 def WaveformSet_from_hdf5_file(filepath : str, 
                                read_full_streaming_data : bool = False,
+                               truncate_wfs_to_minimum : bool = False,
                                nrecord_start_fraction : float = 0.0,
                                nrecord_stop_fraction : float = 1.0,
                                subsample : int = 1,
@@ -262,6 +285,17 @@ def WaveformSet_from_hdf5_file(filepath : str,
         If True (resp. False), then only the waveforms for which
         the 'is_fullstream' parameter in the fragment has a
         value equal to True (resp. False) will be considered.
+    truncate_wfs_to_minimum: bool
+        If True, then the waveforms will be truncated to
+        the minimum length among all the waveforms in the input 
+        file before being handled to the WaveformSet class 
+        initializer. If False, then the waveforms will be 
+        read and handled to the WaveformSet initializer as 
+        they are. Note that WaveformSet.__init__() will raise 
+        an exception if the given waveforms are not homogeneous 
+        in length, so this parameter should be set to False 
+        only if the user is sure that all the waveforms in 
+        the input file have the same length.
     nrecord_start_fraction : float
         Used to select at which record to start reading.
         In particular floor(nrecord_start_fraction*(total records)) is the first record.
@@ -362,6 +396,7 @@ def WaveformSet_from_hdf5_file(filepath : str,
                     if not wvfm_index % subsample:
                         waveforms.append(Waveform(timestamps_frag[index],
                                                   16.,    # time_step_ns
+                                                  0,
                                                   np.array(adcs),
                                                   run_numb,
                                                   r[0],
@@ -370,6 +405,25 @@ def WaveformSet_from_hdf5_file(filepath : str,
                                                   time_offset=0))
                     wvfm_index += 1
                     if wvfm_index >= wvfm_count:
+
+                        if truncate_wfs_to_minimum:
+                            minimum_length = np.array(
+                                [len(wf.adcs) for wf in waveforms]
+                            ).min()
+
+                            for wf in waveforms:
+                                wf._WaveformAdcs__truncate_adcs(
+                                    minimum_length)
+
                         return WaveformSet(*waveforms)
+
+    if truncate_wfs_to_minimum:
+        minimum_length = np.array(
+            [len(wf.adcs) for wf in waveforms]
+        ).min()
+
+        for wf in waveforms:
+            wf._WaveformAdcs__truncate_adcs(
+                minimum_length)
 
     return WaveformSet(*waveforms)
diff --git a/src/waffles/input/raw_root_reader.py b/src/waffles/input/raw_root_reader.py
@@ -271,21 +271,22 @@ def WaveformSet_from_root_file(
             - 'ticks_to_nsec' 
 
         from which the values for the Waveform objects attributes 
-        RunNumber and TimeStep_ns will be taken respectively.
-            The bulk data TTree must have at least five branches,
+        run_number and time_step_ns will be taken respectively.
+            The bulk data TTree must have at least six branches,
         whose names should start with
 
             - 'adcs'
             - 'channel'
             - 'timestamp'
+            - 'daq_timestamp'
             - 'record'
             - 'is_fullstream'
 
         from which the values for the Waveform objects attributes
-        Adcs, Channel, Timestamp and RecordNumber will be taken 
-        respectively. The 'is_fullstream' branch is used to 
-        decide whether a certain waveform should be grabbed 
-        or not, depending on the value given to the
+        adcs, channel, timestamp, daq_window_timestamp, and 
+        record_number will be taken respectively. The 'is_fullstream' 
+        branch is used to decide whether a certain waveform should 
+        be grabbed or not, depending on the value given to the
         'read_full_streaming_data' parameter.
     library: str
         The library to be used to read the input ROOT file. 
@@ -303,15 +304,13 @@ def WaveformSet_from_root_file(
             - 'adcs'            : vector<short>
             - 'channel'         : 'S', i.e. a 16 bit signed integer
             - 'timestamp'       : 'l', i.e. a 64 bit unsigned integer
+            - 'daq_timestamp'   : 'l', i.e. a 64 bit unsigned integer
             - 'record'          : 'i', i.e. a 32 bit unsigned integer
             - 'is_fullstream'   : 'O', i.e. a boolean
 
-        Additionally, if set_offset_wrt_daq_window is True,
-        then the 'daq_timestamp' branch must be of type 'l',
-        i.e. a 64 bit unsigned integer. Type checks are not
-        implemented here. If these requirements are not met,
-        the read data may be corrupted or a a segmentation 
-        fault may occur in the reading process.
+        Type checks are not implemented here. If these 
+        requirements are not met, the read data may be corrupted 
+        or a segmentation fault may occur in the reading process.
     bulk_data_tree_name (resp. meta_data_tree_name): str
         Name of the bulk-data (meta-data) tree which will be 
         extracted from the given ROOT file. The first object 
@@ -320,10 +319,8 @@ def WaveformSet_from_root_file(
         will be identified as the bulk-data (resp. meta-data) 
         tree.
     set_offset_wrt_daq_window: bool
-        If True, then the bulk data tree must also have a
-        branch whose name starts with 'daq_timestamp'. In
-        this case, then the TimeOffset attribute of each
-        waveform is set as the difference between its
+        If True, then the time_offset attribute of each
+        waveform is set to the difference between its
         value for the 'timestamp' branch and the value
         for the 'daq_timestamp' branch, in such order,
         referenced to the minimum value of such difference
@@ -333,8 +330,7 @@ def WaveformSet_from_root_file(
         that the time overlap of every waveform is not 
         null, otherwise an exception will be eventually
         raised by the WaveformSet initializer. If False, 
-        then the 'daq_timestamp' branch is not queried 
-        and the TimeOffset attribute of each waveform 
+        then the time_offset attribute of each waveform 
         is set to 0.
     read_full_streaming_data: bool
         If True (resp. False), then only the waveforms for which 
@@ -480,7 +476,7 @@ def WaveformSet_from_root_file(
 
     if truncate_wfs_to_minimum:
 
-        minimum_length = np.array([len(wf.Adcs) for wf in waveforms]).min()
+        minimum_length = np.array([len(wf.adcs) for wf in waveforms]).min()
 
         for wf in waveforms:
             wf._WaveformAdcs__truncate_adcs(minimum_length)