Skip to content

Commit

Permalink
Merge pull request #125 from mraspaud/feature-eo-sip-support
Browse files Browse the repository at this point in the history
Support reading EO-SIP LAC data
  • Loading branch information
mraspaud committed Mar 19, 2024
2 parents be5f731 + 2753057 commit 2ef1e10
Show file tree
Hide file tree
Showing 5 changed files with 216 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
fail-fast: true
matrix:
os: ["ubuntu-latest"]
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.10", "3.11", "3.12"]
experimental: [false]

env:
Expand Down
45 changes: 30 additions & 15 deletions pygac/pod_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def read(self, filename, fileobj=None):
# choose the right header depending on the date
with file_opener(fileobj or filename) as fd_:
self.tbm_head, self.head = self.read_header(
filename, fileobj=fd_)
filename, fileobj=fd_, header_date=self.header_date)
if self.tbm_head:
tbm_offset = tbm_header.itemsize
else:
Expand All @@ -302,12 +302,14 @@ def read(self, filename, fileobj=None):
return self.head, self.scans

@classmethod
def read_header(cls, filename, fileobj=None):
def read_header(cls, filename, fileobj=None, header_date="auto"):
"""Read the file header.
Args:
filename (str): Path to GAC/LAC file
fileobj: An open file object to read from. (optional)
header_date: date to use to choose the header.
Defaults to "auto" to use the data to pick the header corresponding to the date of the file.
Returns:
archive_header (struct): archive header
Expand All @@ -332,19 +334,7 @@ def read_header(cls, filename, fileobj=None):
fd_.seek(0)
tbm_head = None
tbm_offset = 0
# read header
head0, = np.frombuffer(
fd_.read(header0.itemsize),
dtype=header0, count=1)
year, jday, _ = cls.decode_timestamps(head0["start_time"])
start_date = (datetime.date(year, 1, 1) +
datetime.timedelta(days=int(jday) - 1))
if start_date < datetime.date(1992, 9, 8):
header = header1
elif start_date <= datetime.date(1994, 11, 15):
header = header2
else:
header = header3
header = cls.choose_header_based_on_timestamp(header_date, fd_)
fd_.seek(tbm_offset, 0)
# need to copy frombuffer to have write access on head
head, = np.frombuffer(
Expand All @@ -354,6 +344,31 @@ def read_header(cls, filename, fileobj=None):
cls._validate_header(head)
return tbm_head, head

@classmethod
def choose_header_based_on_timestamp(cls, header_date, fd_):
"""Choose the header dtype based on the timestamp."""
if header_date == "auto":
header_date = cls.get_start_date(fd_)
if header_date < datetime.date(1992, 9, 8):
header = header1
elif header_date <= datetime.date(1994, 11, 15):
header = header2
else:
header = header3
return header

@classmethod
def get_start_date(cls, fd_):
"""Get the start time from the filestream."""
head0, = np.frombuffer(
fd_.read(header0.itemsize),
dtype=header0, count=1)
year, jday, _ = cls.decode_timestamps(head0["start_time"])
header_date = (datetime.date(year, 1, 1) +
datetime.timedelta(days=int(jday) - 1))

return header_date

@classmethod
def _validate_header(cls, header):
"""Check if the header belongs to this reader."""
Expand Down
42 changes: 32 additions & 10 deletions pygac/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ class NoTLEData(IndexError):
"""Raised if no TLE data available within time range."""


class DecodingError(ValueError):
"""Raised when decoding of some value fails."""


class Reader(six.with_metaclass(ABCMeta)):
"""Reader for GAC and LAC format, POD and KLM platforms."""

Expand All @@ -95,7 +99,7 @@ class Reader(six.with_metaclass(ABCMeta)):

def __init__(self, interpolate_coords=True, adjust_clock_drift=True,
tle_dir=None, tle_name=None, tle_thresh=7, creation_site=None,
custom_calibration=None, calibration_file=None):
custom_calibration=None, calibration_file=None, header_date="auto"):
"""Init the reader.
Args:
Expand All @@ -111,6 +115,7 @@ def __init__(self, interpolate_coords=True, adjust_clock_drift=True,
custom_calibration: dictionary with a subset of user defined satellite specific
calibration coefficients
calibration_file: path to json file containing default calibrations
header_date: the date to use for pod header choice. Defaults to "auto".
"""
self.meta_data = {}
Expand All @@ -122,6 +127,7 @@ def __init__(self, interpolate_coords=True, adjust_clock_drift=True,
self.creation_site = (creation_site or 'NSS').encode('utf-8')
self.custom_calibration = custom_calibration
self.calibration_file = calibration_file
self.header_date = header_date
self.head = None
self.scans = None
self.spacecraft_name = None
Expand Down Expand Up @@ -205,22 +211,38 @@ def _correct_data_set_name(cls, header, filename):
filename (str): path to file
"""
filename = str(filename)
data_set_name = header['data_set_name'].decode(errors='ignore')
if not cls.data_set_pattern.match(data_set_name):
LOG.debug('The data_set_name in header %s does not match.'
' Use filename instead.' % header['data_set_name'])
for encoding in "utf-8", "cp500":
data_set_name = header['data_set_name']
try:
data_set_name = cls._decode_data_set_name(data_set_name, encoding)
except DecodingError as err:
LOG.debug(str(err))
else:
header["data_set_name"] = data_set_name
break
else:
LOG.debug(f'The data_set_name in header {header["data_set_name"]} does not match.'
' Use filename instead.')
match = cls.data_set_pattern.search(filename)
if match:
data_set_name = match.group()
LOG.debug("Set data_set_name, to filename %s"
% data_set_name)
LOG.debug(f"Set data_set_name, to filename {data_set_name}")
header['data_set_name'] = data_set_name.encode()
else:
LOG.debug("header['data_set_name']=%s; filename='%s'"
% (header['data_set_name'], filename))
LOG.debug(f"header['data_set_name']={header['data_set_name']}; filename='{filename}'")
raise ReaderError('Cannot determine data_set_name!')
return header

@classmethod
def _decode_data_set_name(cls, data_set_name, encoding):
data_set_name = data_set_name.decode(encoding, errors='ignore')
if not cls.data_set_pattern.match(data_set_name):
raise DecodingError(f'The data_set_name in header {data_set_name} '
f'does not seem correct using encoding {encoding}.')
else:
data_set_name = data_set_name.encode()
return data_set_name

@classmethod
def _validate_header(cls, header):
"""Check if the header belongs to this reader.
Expand Down Expand Up @@ -274,7 +296,7 @@ def _read_scanlines(self, buffer, count):
"Expected %d scan lines, but found %d!"
% (count, line_count))
warnings.warn("Unexpected number of scanlines!",
category=RuntimeWarning)
category=RuntimeWarning, stacklevel=2)
self.scans = np.frombuffer(
buffer, dtype=self.scanline_type, count=line_count)

Expand Down
6 changes: 3 additions & 3 deletions pygac/tests/test_klm.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
class TestKLM:
"""Test the klm reader."""

def setup(self):
def setup_method(self):
"""Set up the tests."""
self.reader = GACKLMReader()

Expand Down Expand Up @@ -126,7 +126,7 @@ def test_quality_indicators(self):
class TestGACKLM:
"""Tests for gac klm."""

def setup(self):
def setup_method(self):
"""Set up the tests."""
self.reader = GACKLMReader()

Expand All @@ -150,7 +150,7 @@ def test_get_tsm_pixels(self, get_tsm_idx):
class TestLACKLM:
"""Tests for lac klm."""

def setup(self):
def setup_method(self):
"""Set up the tests."""
self.reader = LACKLMReader()
self.reader.scans = np.ones(100, dtype=scanline)
Expand Down
Loading

0 comments on commit 2ef1e10

Please sign in to comment.