Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 66 additions & 24 deletions src/virtualship/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import warnings
from datetime import timedelta
from functools import lru_cache
Expand Down Expand Up @@ -42,37 +43,33 @@ def _generic_load_yaml(data: str, model: BaseModel) -> BaseModel:
return model.model_validate(yaml.safe_load(data))


def mfp_to_yaml(excel_file_path: str, yaml_output_path: str): # noqa: D417
"""
Generates a YAML file with spatial and temporal information based on instrument data from MFP excel file.
def load_coordinates(file_path):
"""Loads coordinates from a file based on its extension."""
if not os.path.isfile(file_path):
raise FileNotFoundError(f"File not found: {file_path}")

Parameters
----------
- excel_file_path (str): Path to the Excel file containing coordinate and instrument data.
ext = os.path.splitext(file_path)[-1].lower()

The function:
1. Reads instrument and location data from the Excel file.
2. Determines the maximum depth and buffer based on the instruments present.
3. Ensures longitude and latitude values remain valid after applying buffer adjustments.
4. returns the yaml information.
try:
if ext in [".xls", ".xlsx"]:
return pd.read_excel(file_path)

"""
# Importing Schedule and related models from expedition module
from virtualship.expedition.instrument_type import InstrumentType
from virtualship.expedition.schedule import Schedule
from virtualship.expedition.space_time_region import (
SpaceTimeRegion,
SpatialRange,
TimeRange,
)
from virtualship.expedition.waypoint import Location, Waypoint
if ext == ".csv":
return pd.read_csv(file_path)

raise ValueError(f"Unsupported file extension {ext}.")

except Exception as e:
raise RuntimeError(
"Could not read coordinates data from the provided file. "
"Ensure it is either a csv or excel file."
) from e


def validate_coordinates(coordinates_data):
# Expected column headers
expected_columns = {"Station Type", "Name", "Latitude", "Longitude", "Instrument"}

# Read data from Excel
coordinates_data = pd.read_excel(excel_file_path)

# Check if the headers match the expected ones
actual_columns = set(coordinates_data.columns)

Expand Down Expand Up @@ -104,6 +101,51 @@ def mfp_to_yaml(excel_file_path: str, yaml_output_path: str): # noqa: D417
# Continue with the rest of the function after validation...
coordinates_data = coordinates_data.dropna()

# Convert latitude and longitude to floats, replacing commas with dots
# Handles case when the latitude and longitude have decimals with commas
if coordinates_data["Latitude"].dtype in ["object", "string"]:
coordinates_data["Latitude"] = coordinates_data["Latitude"].apply(
lambda x: float(x.replace(",", "."))
)

if coordinates_data["Longitude"].dtype in ["object", "string"]:
coordinates_data["Longitude"] = coordinates_data["Longitude"].apply(
lambda x: float(x.replace(",", "."))
)

return coordinates_data


def mfp_to_yaml(coordinates_file_path: str, yaml_output_path: str): # noqa: D417
"""
Generates a YAML file with spatial and temporal information based on instrument data from MFP excel file.

Parameters
----------
- excel_file_path (str): Path to the Excel file containing coordinate and instrument data.

The function:
1. Reads instrument and location data from the Excel file.
2. Determines the maximum depth and buffer based on the instruments present.
3. Ensures longitude and latitude values remain valid after applying buffer adjustments.
4. returns the yaml information.

"""
# Importing Schedule and related models from expedition module
from virtualship.expedition.instrument_type import InstrumentType
from virtualship.expedition.schedule import Schedule
from virtualship.expedition.space_time_region import (
SpaceTimeRegion,
SpatialRange,
TimeRange,
)
from virtualship.expedition.waypoint import Location, Waypoint

# Read data from file
coordinates_data = load_coordinates(coordinates_file_path)

coordinates_data = validate_coordinates(coordinates_data)

# maximum depth (in meters), buffer (in degrees) for each instrument
instrument_max_depths = {
"XBT": 2000,
Expand Down
127 changes: 108 additions & 19 deletions tests/test_mfp_to_yaml.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

import pandas as pd
import pytest

Expand All @@ -11,27 +13,71 @@ def valid_mfp_data():
{
"Station Type": ["A", "B", "C"],
"Name": ["Station1", "Station2", "Station3"],
"Latitude": [30, 31, 32],
"Longitude": [-44, -45, -46],
"Latitude": [30.8, 31.2, 32.5],
"Longitude": [-44.3, -45.1, -46.7],
"Instrument": ["CTD, DRIFTER", "ARGO_FLOAT", "XBT, CTD, DRIFTER"],
}
)


# Fixture for Excel file
@pytest.fixture
def valid_mfp_file(tmp_path):
def valid_excel_mfp_file(tmp_path):
path = tmp_path / "file.xlsx"
valid_mfp_data().to_excel(path, index=False)
yield path
return path


# Fixture for CSV file
@pytest.fixture
def valid_csv_mfp_file(tmp_path):
path = tmp_path / "file.csv"
valid_mfp_data().to_csv(path, index=False)
return path


@pytest.fixture
def valid_csv_mfp_file_with_commas(tmp_path):
path = tmp_path / "file.csv"
valid_mfp_data().to_csv(path, decimal=",", index=False)
return path


@pytest.fixture
def invalid_mfp_file(tmp_path):
path = tmp_path / "file.csv"
valid_mfp_data().to_csv(path, decimal=",", sep="|", index=False)

return path


@pytest.fixture
def unsupported_extension_mfp_file(tmp_path):
path = tmp_path / "file.unsupported"
valid_mfp_data().to_csv(path, index=False)

return path


@pytest.fixture
def nonexistent_mfp_file(tmp_path):
path = tmp_path / "non_file.csv"

return path


@pytest.fixture
def missing_instruments_column_mfp_file(tmp_path):
path = tmp_path / "file.xlsx"
valid_mfp_data().drop(columns=["Instrument"]).to_excel(path, index=False)
return path


@pytest.fixture
def missing_columns_mfp_file(tmp_path):
path = tmp_path / "file.xlsx"
valid_mfp_data().drop(columns=["Longitude", "Instrument"]).to_excel(
path, index=False
)
yield path
valid_mfp_data().drop(columns=["Longitude"]).to_excel(path, index=False)
return path


@pytest.fixture
Expand All @@ -43,8 +89,14 @@ def unexpected_header_mfp_file(tmp_path):
yield path


def test_mfp_to_yaml_success(valid_mfp_file, tmp_path):
"""Test that mfp_to_yaml correctly processes a valid MFP Excel file."""
@pytest.mark.parametrize(
"fixture_name",
["valid_excel_mfp_file", "valid_csv_mfp_file", "valid_csv_mfp_file_with_commas"],
)
def test_mfp_to_yaml_success(request, fixture_name, tmp_path):
"""Test that mfp_to_yaml correctly processes a valid MFP file."""
valid_mfp_file = request.getfixturevalue(fixture_name)

yaml_output_path = tmp_path / "schedule.yaml"

# Run function (No need to mock open() for YAML, real file is created)
Expand All @@ -66,15 +118,52 @@ def test_mfp_to_yaml_success(valid_mfp_file, tmp_path):
]


def test_mfp_to_yaml_missing_headers(missing_columns_mfp_file, tmp_path):
"""Test that mfp_to_yaml raises an error when required columns are missing."""
@pytest.mark.parametrize(
"fixture_name,error,match",
[
pytest.param(
"nonexistent_mfp_file",
FileNotFoundError,
os.path.basename("/non_file.csv"),
id="FileNotFound",
),
pytest.param(
"unsupported_extension_mfp_file",
RuntimeError,
"Could not read coordinates data from the provided file. Ensure it is either a csv or excel file.",
id="UnsupportedExtension",
),
pytest.param(
"invalid_mfp_file",
RuntimeError,
"Could not read coordinates data from the provided file. Ensure it is either a csv or excel file.",
id="InvalidFile",
),
pytest.param(
"missing_instruments_column_mfp_file",
ValueError,
"Error: Missing column 'Instrument'. Have you added this column after exporting from MFP?",
id="MissingInstruments",
),
pytest.param(
"missing_columns_mfp_file",
ValueError,
(
r"Error: Found columns \[.*?('Station Type'| 'Name'| 'Latitude'| 'Instrument').*?\], "
r"but expected columns \[.*?('Station Type'| 'Name'| 'Latitude'| 'Instrument'| 'Longitude').*?\]."
),
id="MissingColumns",
),
],
)
def test_mfp_to_yaml_exceptions(request, fixture_name, error, match, tmp_path):
"""Test that mfp_to_yaml raises an error when input file is not valid."""
fixture = request.getfixturevalue(fixture_name)

yaml_output_path = tmp_path / "schedule.yaml"

with pytest.raises(
ValueError,
match="Error: Missing column 'Instrument'. Have you added this column after exporting from MFP?",
):
mfp_to_yaml(missing_columns_mfp_file, yaml_output_path)
with pytest.raises(error, match=match):
mfp_to_yaml(fixture, yaml_output_path)


def test_mfp_to_yaml_extra_headers(unexpected_header_mfp_file, tmp_path):
Expand All @@ -85,12 +174,12 @@ def test_mfp_to_yaml_extra_headers(unexpected_header_mfp_file, tmp_path):
mfp_to_yaml(unexpected_header_mfp_file, yaml_output_path)


def test_mfp_to_yaml_instrument_conversion(valid_mfp_file, tmp_path):
def test_mfp_to_yaml_instrument_conversion(valid_excel_mfp_file, tmp_path):
"""Test that instruments are correctly converted into InstrumentType enums."""
yaml_output_path = tmp_path / "schedule.yaml"

# Run function
mfp_to_yaml(valid_mfp_file, yaml_output_path)
mfp_to_yaml(valid_excel_mfp_file, yaml_output_path)

# Load the generated YAML
data = Schedule.from_yaml(yaml_output_path)
Expand Down