Skip to content

Commit

Permalink
Empty list (instead of None) if file contains no format
Browse files Browse the repository at this point in the history
  • Loading branch information
hf-sheese committed Jan 31, 2024
1 parent 6d86e8c commit 1eb67ae
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 31 deletions.
41 changes: 20 additions & 21 deletions src/edi_energy_scraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,27 +303,6 @@ async def _download(
raise
return file_path

@staticmethod
def get_edifact_format(path: Path) -> Tuple[EdifactFormatVersion, List[Optional[EdifactFormat]]]:
"""
Determines the edifact formats and the version of a given file.
A file can describe more than one format (for example APERAK and CONTRL).
Therefore, a list of all formats described in a file is returned.
"""
filename = path.stem
date_string = filename.split("_")[-1] # Assuming date is in the last part of filename
date_format = "%Y%m%d"
berlin = pytz.timezone("Europe/Berlin")
berlin_local_time = datetime.datetime.strptime(date_string, date_format).astimezone(berlin)
version = get_edifact_format_version(berlin_local_time)
edifactformat: List[Optional[EdifactFormat]] = []
for entry in EdifactFormat:
if str(entry) in filename:
edifactformat.append(entry)
if not edifactformat:
edifactformat = [None]
return version, edifactformat

# pylint:disable=too-many-locals
async def mirror(self):
"""
Expand Down Expand Up @@ -368,3 +347,23 @@ async def mirror(self):
new_file_paths.add(download_result)
self.remove_no_longer_online_files(new_file_paths)
_logger.info("Finished mirroring")


def get_edifact_version_and_formats(path: Path) -> Tuple[EdifactFormatVersion, List[EdifactFormat]]:
"""
Determines the edifact formats and the version of a given file.
A file can describe more than one format (for example APERAK and CONTRL).
Therefore, a list of all formats described in a file is returned.
"""
filename = path.stem
date_string = filename.split("_")[-1] # Assuming date is in the last part of filename
date_format = "%Y%m%d"
berlin = pytz.timezone("Europe/Berlin")
berlin_local_time = datetime.datetime.strptime(date_string, date_format).astimezone(berlin)
version = get_edifact_format_version(berlin_local_time)
list_of_edifactformats: List[EdifactFormat] = []
for entry in EdifactFormat:
if str(entry) in filename:
list_of_edifactformats.append(entry)

return version, list_of_edifactformats
17 changes: 7 additions & 10 deletions unittests/test_edienergyscraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from bs4 import BeautifulSoup
from maus.edifact import EdifactFormat, EdifactFormatVersion

from edi_energy_scraper import EdiEnergyScraper, Epoch
from edi_energy_scraper import EdiEnergyScraper, Epoch, get_edifact_version_and_formats


class TestEdiEnergyScraper:
Expand Down Expand Up @@ -496,25 +496,22 @@ async def test_mirroring(self, mocker, tmpdir_factory, datafiles, caplog):
),
),
pytest.param(
"CodelistedereuropäischenLändercodes1.0_99991231_20171001.pdf", (EdifactFormatVersion.FV2104, [None])
"CodelistedereuropäischenLändercodes1.0_99991231_20171001.pdf", (EdifactFormatVersion.FV2104, [])
),
pytest.param(
"CodelistederZeitreihentypen1.1d_99991231_20211001.pdf", (EdifactFormatVersion.FV2110, [None])
),
pytest.param("KostenblattFB1.0b_99991231_20230401.pdf", (EdifactFormatVersion.FV2304, [None])),
pytest.param("CodelistederZeitreihentypen1.1d_99991231_20211001.pdf", (EdifactFormatVersion.FV2110, [])),
pytest.param("KostenblattFB1.0b_99991231_20230401.pdf", (EdifactFormatVersion.FV2304, [])),
pytest.param("PARTINMIG1.0c_20240331_20240403.pdf", (EdifactFormatVersion.FV2404, [EdifactFormat.PARTIN])),
pytest.param("PARTINMIG1.0c_20240331_20241001.pdf", (EdifactFormatVersion.FV2410, [EdifactFormat.PARTIN])),
pytest.param("PARTINMIG1.0c_20240331_20250401.pdf", (EdifactFormatVersion.FV2504, [EdifactFormat.PARTIN])),
pytest.param("PARTINMIG1.0c_20240331_20251001.pdf", (EdifactFormatVersion.FV2510, [EdifactFormat.PARTIN])),
],
)
def test_get_edifact_format_parametrize(
self, input_filename: str, expected_result: Tuple[EdifactFormatVersion, List[Optional[EdifactFormat]]]
def test_get_edifact_version_and_formats(
self, input_filename: str, expected_result: Tuple[EdifactFormatVersion, List[EdifactFormat]]
):
"""
Tests the determination of the edifact format and version for given files
"""
ees = EdiEnergyScraper()
actual = ees.get_edifact_format(Path(input_filename))
actual = get_edifact_version_and_formats(Path(input_filename))

assert actual == expected_result

0 comments on commit 1eb67ae

Please sign in to comment.