Skip to content

Commit

Permalink
Merge pull request #49 from SuperDARN/release/1.2
Browse files Browse the repository at this point in the history
Release pyDARNio v1.2.0
  • Loading branch information
carleyjmartin authored Nov 29, 2022
2 parents 48d5941 + 3e17dee commit bd91eb2
Show file tree
Hide file tree
Showing 14 changed files with 1,631 additions and 88 deletions.
29 changes: 20 additions & 9 deletions .zenodo.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
{
"creators":[
{
"name": "SuperDARN Data Standards Working Group"
"name": "SuperDARN Data Visualization Working Group"
},
{
"affiliation": "University of Saskatchewan",
"name": "Rohel, R.A."
"name": "Rohel, R.A.",
"orcid": "0000-0003-2208-1553"
},
{
"affiliation": "The University Centre in Svalbard",
"name": "Bland, E.C.",
"orcid": "0000-0002-0252-0400"
},
{
"affiliation": "University of Saskatchewan",
"name": "Martin, C.J.",
"orcid": "0000-0002-8278-9783"
},
{
"affiliation": "John Hopkins University",
Expand Down Expand Up @@ -34,15 +45,15 @@
"affiliation": "University of Saskatchewan",
"name": "Kotyk, K."
},
{
"affiliation": "University of Saskatchewan",
"name": "Martin, C.J.",
"orcid": "0000-0002-8278-9783"
},
{
"affiliation": "University of Saskatchewan",
"name": "Schmidt, M.T.",
"orcid": "0000-0002-3265-977X"
}
},
{
"affiliation": "Virginia Tech",
"name": "Shi, X.",
"orcid": "0000-0001-8425-8241"
},
]
}
}
14 changes: 6 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,12 @@ Python data IO library for the Super Dual Auroral Radar Network (SuperDARN).

## Changelog

## Version 1.1.1 - Release!

pyDARNio is released! This is a patch release to address the following issues:
- Bug fix with initializing empty arrays when converting HDF5 files from site- to array-structured
- Bug fixes with converting files from HDF5 to DMAP
- correctly check blanked_samples
- support multiple beams per record
- replacement of far-range lag0 data in rawacf conversion
## Version 1.2.0 - Release!

pyDARNio has a new release! This is a minor release which includes:
- Handling of extra fields in fitacf files coming in the next RST release, including the name of the ACF fitting algorithm used (`algorithm`), the value of TDIFF used to calculate the elevation angles (`tdiff`), and more descriptive field names for the elevation angle fields in FitACF 3.0 (`elv_error` & `elv_fitted`)
- More flexibility around optional fields for all file types
- Updates and speed improvements for Borealis file io

## Documentation

Expand Down
1 change: 1 addition & 0 deletions pydarnio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@
from .borealis.borealis import BorealisRead
from .borealis.borealis import BorealisWrite
from .borealis.borealis_convert import BorealisConvert
from .borealis.borealis_restructure import BorealisRestructure
179 changes: 176 additions & 3 deletions pydarnio/borealis/base_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@

from collections import OrderedDict
from datetime import datetime
from typing import Callable
from typing import Callable, List
import h5py

from pydarnio import borealis_exceptions

Expand Down Expand Up @@ -173,6 +174,14 @@ class BaseFormat():
array_array_dtypes: dict
fields in the array files that are made of numpy arrays, with their
given data type.
string_fields: list
List of all fields that contain string data.
single_string_fields: list
List of single element fields which are string types.
array_string_fields: list
List of array fields which are string types.
bool_types: list
List of single element fields that have boolean data types.
_site_to_array(data_dict): dict
Convert an OrderedDict of site data to array data using the information
provided for the specific data format.
Expand Down Expand Up @@ -437,6 +446,39 @@ def site_specific_fields_generate(cls):
"""
return {}

@classmethod
def array_specific_fields_iterative_generator(cls):
"""
Retrieve a generator function for each field of the format that belongs
only in the array structured files and requires access to the
information in each record.
Returns
-------
A dictionary of any fields that are array specific and can only be
generated by looking at the records of the file. The key is the name
of the array specific field and the value in the dictionary is a
function which retrieves the required information from a record. The
function must take a single record (site data dictionary) to generate a
value for that field in the arrays format. This class method is used
when restructuring from site to array style.
The array specific field generator functions are unique to the format
so should be overwritten by the child class.
Notes
-----
All fields possible = single_element_types + array_dtypes
Fields are then classified into four types to determine
how to restructure: shared_fields (all records have the same value),
unshared_fields (all records have unique value/array),
array_specific_fields (any fields unique to array files, used mainly
where dimensions may vary between records so the number to parse needs
to be stored), and site_specific_fields (any fields unique to site
files, used mainly where dimensions of flattened arrays need to be
stored).
"""
return {}

# STATIC METHODS THAT VARY BY FORMAT
# i.e. methods used in restructuring that the format to/from site
# structure for interpreting site data. These formats
Expand Down Expand Up @@ -508,6 +550,81 @@ def flatten_site_arrays(records: OrderedDict) -> OrderedDict:
new_records = copy.deepcopy(records)
return new_records

@staticmethod
def site_get_max_dims(filename: str, unshared_parameters: List[str]):
"""
Checks the records in a site file for all unshared parameter fields to
find the maximum dimensions. Used for initializing arrays in site to
array conversion to avoid padding arrays midway through conversion.
Parameters
----------
filename: str
Name of the site file being checked
unshared_parameters: List[str]
List of parameter names that are not shared between all the records
in the site restructured file, i.e. may have different dimensions
between records.
Returns
-------
fields_max_dims: dict
dictionary containing field names (str) as keys with maximum
dimensions required to restructure to array file as values (tuples)
max_num_sequences: int
integer, max number of sequences of all records in the site file
max_num_beams: int
integer, max number of beams of all records in the site file
Raises
------
"""
fields_max_dims = {key: () for key in unshared_parameters}
max_num_sequences = 0
max_num_beams = 0

# Open site file to read with h5py, iterate over all records in the
# file, and iterate through all fields required to find max dims
# needed for conversion to array file.
with h5py.File(filename, 'r') as site_file:
for rec_idx, record_name in enumerate(site_file):
for field, dims in fields_max_dims.items():
try:
# TypeError on booleans (ie: scan_start_marker)
# KeyError if field is dataset instead of attribute
field_value = site_file[record_name].attrs[field]
if field == 'num_sequences':
max_num_sequences = max(max_num_sequences, field_value)
except (KeyError, TypeError) as e:
try:
# Raises KeyError if field DNE as dataset
field_shape = site_file[record_name][field].shape
if field == 'pulse_phase_offset':
# Borealis files are written with deepdish, and this field is sometimes written
# as an empty array. If read in by h5py, h5py reads the dimensions as the data
# so here we check to catch that case.
actual_size = site_file[record_name][field].size
num_sequences = site_file[record_name]['data_dimensions'][1]
num_pulses = site_file[record_name]['pulses'].size
if actual_size != num_sequences * num_pulses:
if actual_size == 1: # This is the special case
field_shape = (0,)
else:
raise ValueError(f'Unexpected shape of field {field}: {field_shape}')
except KeyError:
continue
# Initialize shape to first record's field dimensions
if rec_idx == 0:
fields_max_dims[field] = field_shape
if field == 'beam_nums':
max_num_beams = max(field_shape[0], max_num_beams)
else:
# Update dims to keep largest for all records
new_shape = map(lambda dima, dimb: max(dima, dimb),
fields_max_dims[field],
field_shape)
fields_max_dims[field] = tuple(new_shape)
return fields_max_dims, max_num_sequences, max_num_beams

# CLASS METHODS COMMON ACROSS FORMATS
# i.e. class methods that build off the other class methods so generally
# do not need to be overwritten by the formats.
Expand Down Expand Up @@ -836,6 +953,63 @@ def array_array_dtypes(cls):

return array_array_dtypes

@classmethod
def string_fields(cls):
"""
Retrieve the fields of the format that hold strings
in the records.
Returns
-------
string_fields
All the string fields in records of the
format, as a list.
"""
return cls.single_string_fields() + cls.array_string_fields()

@classmethod
def single_string_fields(cls):
"""
Retrieve the fields of the format that hold single element strings
in the records.
Returns
-------
string_fields
All the single element string fields in records of the
format, as a list.
"""
return [k for k, v in cls.single_element_types().items() if v == np.str_]

@classmethod
def array_string_fields(cls):
"""
Retrieve the fields of the format that hold arrays of strings
in the records.
Returns
-------
string_fields
All the fields with arrays of strings in records of the
format, as a list.
"""
return [k for k, v in cls.array_dtypes().items() if v == np.str_]

@classmethod
def bool_types(cls):
"""
Retrieve the fields of the format that hold boolean data
in the records.
Returns
-------
bool_dtypes
All the boolean fields in records of the
format, as a list.
"""

return [k for k, v in cls.single_element_types().items() if v == np.bool_]

@classmethod
def _site_to_array(cls, data_dict: OrderedDict) -> dict:
"""
Expand Down Expand Up @@ -952,7 +1126,7 @@ class methods used inside this method should be specific
# dims with a determined max value
data_buffer = data_dict[k][field]
buffer_shape = data_buffer.shape
index_slice = [slice(0, i) for i in buffer_shape]
index_slice = [slice(0, i) for i in buffer_shape if i != 0]
# insert record index at start of array's slice list
index_slice.insert(0, rec_idx)
index_slice = tuple(index_slice)
Expand Down Expand Up @@ -1162,7 +1336,6 @@ def find_max_field_len(records: OrderedDict) -> int:

return find_max_field_len


@staticmethod
def find_max_pulse_phase_offset(records: OrderedDict) -> int:
"""
Expand Down
2 changes: 2 additions & 0 deletions pydarnio/borealis/borealis_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def __init__(self, filename: str, borealis_filetype: str):
try:
version = dd.io.load(self.filename,
group='/borealis_git_hash').split('-')[0]
version = '.'.join(version.split('.')[:2]) # vX.Y, ignore patch revision
except ValueError as err:
raise borealis_exceptions.BorealisStructureError(
' {} Could not find the borealis_git_hash required to '
Expand Down Expand Up @@ -361,6 +362,7 @@ def __init__(self, filename: str, borealis_arrays: dict,
# 'vX.X'
try:
version = self._arrays['borealis_git_hash'].split('-')[0]
version = '.'.join(version.split('.')[:2]) # vX.Y, ignore patch revision
except KeyError as err:
raise borealis_exceptions.BorealisStructureError(
' {} Could not find the borealis_git_hash required to '
Expand Down
41 changes: 20 additions & 21 deletions pydarnio/borealis/borealis_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,12 +363,12 @@ def _is_convertible_to_iqdat(self) -> bool:
sample_spacing = int(record['tau_spacing'] /
record['tx_pulse_len'])

# Check to see if tagged version. If not, use 255.255
git_hash = record['borealis_git_hash'].split('-')[0]
major_version, minor_version = git_hash.split('.')
if major_version[0] == 'v':
borealis_major_revision = major_version[1:]
borealis_minor_revision = minor_version
# Borealis git tag version numbers. If not a tagged version,
# then use 255.255
if record['borealis_git_hash'][0] == 'v': # tagged version, non-tagged versions have hexadecimal
version = record['borealis_git_hash'].split('-')[0].split('.')
borealis_major_revision = version[0][1:] # strip off the 'v'
borealis_minor_revision = version[1]
else:
borealis_major_revision = 255
borealis_minor_revision = 255
Expand Down Expand Up @@ -439,12 +439,12 @@ def _is_convertible_to_rawacf(self) -> bool:
sample_spacing = int(record['tau_spacing'] /
record['tx_pulse_len'])

# Check to see if tagged version. If not, use 255.255
git_hash = record['borealis_git_hash'].split('-')[0]
major_version, minor_version = git_hash.split('.')
if major_version[0] == 'v':
borealis_major_revision = major_version[1:]
borealis_minor_revision = minor_version
# Borealis git tag version numbers. If not a tagged version,
# then use 255.255
if record['borealis_git_hash'][0] == 'v': # tagged version, non-tagged versions have hexadecimal
version = record['borealis_git_hash'].split('-')[0].split('.')
borealis_major_revision = version[0][1:] # strip off the 'v'
borealis_minor_revision = version[1]
else:
borealis_major_revision = 255
borealis_minor_revision = 255
Expand Down Expand Up @@ -569,11 +569,10 @@ def __convert_bfiq_record(borealis_slice_id: int,

# Borealis git tag version numbers. If not a tagged version,
# then use 255.255
if record_dict['borealis_git_hash'][0] == 'v' and \
record_dict['borealis_git_hash'][2] == '.':

borealis_major_revision = record_dict['borealis_git_hash'][1]
borealis_minor_revision = record_dict['borealis_git_hash'][3]
if record_dict['borealis_git_hash'][0] == 'v': # tagged version, non-tagged versions have hexadecimal
version = record_dict['borealis_git_hash'].split('-')[0].split('.')
borealis_major_revision = version[0][1:] # strip off the 'v'
borealis_minor_revision = version[1]
else:
borealis_major_revision = 255
borealis_minor_revision = 255
Expand Down Expand Up @@ -841,10 +840,10 @@ def __convert_rawacf_record(borealis_slice_id: int,

# Borealis git tag version numbers. If not a tagged version,
# then use 255.255
if record_dict['borealis_git_hash'][0] == 'v' and \
record_dict['borealis_git_hash'][2] == '.':
borealis_major_revision = record_dict['borealis_git_hash'][1]
borealis_minor_revision = record_dict['borealis_git_hash'][3]
if record_dict['borealis_git_hash'][0] == 'v': # tagged version, non-tagged versions are hexadecimal
version = record_dict['borealis_git_hash'].split('-')[0].split('.')
borealis_major_revision = version[0][1:] # strip off the 'v'
borealis_minor_revision = version[1]
else:
borealis_major_revision = 255
borealis_minor_revision = 255
Expand Down
Loading

0 comments on commit bd91eb2

Please sign in to comment.