diff --git a/django_project/gap/admin.py b/django_project/gap/admin.py index bff7f07..8d531fd 100644 --- a/django_project/gap/admin.py +++ b/django_project/gap/admin.py @@ -8,10 +8,20 @@ from .models import ( Attribute, Country, Provider, Measurement, Station, IngestorSession, - Dataset, DatasetAttribute, NetCDFFile + Dataset, DatasetAttribute, NetCDFFile, DatasetType, Unit ) +@admin.register(Unit) +class UnitAdmin(admin.ModelAdmin): + """Unit admin.""" + + list_display = ( + 'name', 'description' + ) + search_fields = ('name',) + + @admin.register(Attribute) class AttributeAdmin(admin.ModelAdmin): """Attribute admin.""" @@ -42,6 +52,15 @@ class ProviderAdmin(admin.ModelAdmin): search_fields = ('name',) +@admin.register(DatasetType) +class DatasetTypeAdmin(admin.ModelAdmin): + """DatasetType admin.""" + + list_display = ( + 'name', 'type' + ) + + @admin.register(Dataset) class DatasetAdmin(admin.ModelAdmin): """Dataset admin.""" @@ -58,6 +77,7 @@ class DatasetAttributeAdmin(admin.ModelAdmin): list_display = ( 'dataset', 'attribute', 'source', 'source_unit', ) + list_filter = ('dataset',) @admin.register(Measurement) diff --git a/django_project/gap/factories.py b/django_project/gap/factories.py index 19f8b08..028cba3 100644 --- a/django_project/gap/factories.py +++ b/django_project/gap/factories.py @@ -9,6 +9,8 @@ from core.factories import BaseMetaFactory, BaseFactory from gap.models import ( + CastType, + DatasetType, Dataset, Provider, Unit, @@ -18,7 +20,6 @@ Station, Measurement, ObservationType, - DatasetType, DatasetTimeStep, DatasetStore, NetCDFFile @@ -37,6 +38,19 @@ class Meta: # noqa description = factory.Faker('text') +class DatasetTypeFactory( + BaseFactory[DatasetType], metaclass=BaseMetaFactory[DatasetType] +): + """Factory class for DatasetType model.""" + + class Meta: # noqa + model = DatasetType + + name = factory.Faker('company') + description = factory.Faker('text') + type = CastType.HISTORICAL + + class DatasetFactory( BaseFactory[Dataset], metaclass=BaseMetaFactory[Dataset] ): @@ -47,7 +61,7 @@ class Meta: # noqa name = factory.Faker('company') description = factory.Faker('text') - type = DatasetType.CLIMATE_REANALYSIS + type = factory.SubFactory(DatasetTypeFactory) time_step = DatasetTimeStep.DAILY store_type = DatasetStore.TABLE provider = factory.SubFactory(ProviderFactory) diff --git a/django_project/gap/fixtures/2.dataset_type.json b/django_project/gap/fixtures/2.dataset_type.json new file mode 100644 index 0000000..b65f867 --- /dev/null +++ b/django_project/gap/fixtures/2.dataset_type.json @@ -0,0 +1,47 @@ +[ +{ + "model": "gap.datasettype", + "pk": 1, + "fields": { + "name": "Climate Reanalysis", + "description": "", + "type": "historical" + } +}, +{ + "model": "gap.datasettype", + "pk": 2, + "fields": { + "name": "Short-term Forecast", + "description": "", + "type": "forecast" + } +}, +{ + "model": "gap.datasettype", + "pk": 3, + "fields": { + "name": "Seasonal Forecast", + "description": "", + "type": "forecast" + } +}, +{ + "model": "gap.datasettype", + "pk": 4, + "fields": { + "name": "Ground Observational", + "description": "", + "type": "historical" + } +}, +{ + "model": "gap.datasettype", + "pk": 5, + "fields": { + "name": "Airborne Observational", + "description": "", + "type": "historical" + } +} +] diff --git a/django_project/gap/fixtures/3.unit.json b/django_project/gap/fixtures/3.unit.json new file mode 100644 index 0000000..fb36ae0 --- /dev/null +++ b/django_project/gap/fixtures/3.unit.json @@ -0,0 +1,74 @@ +[ +{ + "model": "gap.unit", + "pk": 1, + "fields": { + "name": "mm", + "description": null + } +}, +{ + "model": "gap.unit", + "pk": 3, + "fields": { + "name": "MJ/sqm", + "description": null + } +}, +{ + "model": "gap.unit", + "pk": 4, + "fields": { + "name": "mm day-1", + "description": null + } +}, +{ + "model": "gap.unit", + "pk": 5, + "fields": { + "name": "atm", + "description": null + } +}, +{ + "model": "gap.unit", + "pk": 6, + "fields": { + "name": "g/m3", + "description": null + } +}, +{ + "model": "gap.unit", + "pk": 7, + "fields": { + "name": "W/m2", + "description": null + } +}, +{ + "model": "gap.unit", + "pk": 8, + "fields": { + "name": "°C", + "description": null + } +}, +{ + "model": "gap.unit", + "pk": 9, + "fields": { + "name": "Degrees from North", + "description": null + } +}, +{ + "model": "gap.unit", + "pk": 10, + "fields": { + "name": "m/s", + "description": null + } +} +] diff --git a/django_project/gap/ingestor/tahmo.py b/django_project/gap/ingestor/tahmo.py index d85f00d..3be2d3e 100644 --- a/django_project/gap/ingestor/tahmo.py +++ b/django_project/gap/ingestor/tahmo.py @@ -18,7 +18,7 @@ from gap.models import ( Provider, Station, ObservationType, Country, IngestorSession, Attribute, Measurement, Dataset, DatasetType, DatasetTimeStep, - DatasetStore, DatasetAttribute, Unit + DatasetStore, DatasetAttribute, Unit, CastType ) @@ -58,10 +58,16 @@ def __init__(self, session: IngestorSession): self.obs_type, _ = ObservationType.objects.get_or_create( name='Ground Observations' ) + self.dataset_type, _ = DatasetType.objects.get_or_create( + name='Ground Observational', + defaults={ + 'type': CastType.HISTORICAL + } + ) self.dataset, _ = Dataset.objects.get_or_create( - name='Tahmo', + name=f'Tahmo {self.dataset_type.name}', provider=self.provider, - type=DatasetType.GROUND_OBSERVATIONAL, + type=self.dataset_type, time_step=DatasetTimeStep.DAILY, store_type=DatasetStore.TABLE ) diff --git a/django_project/gap/migrations/0001_initial.py b/django_project/gap/migrations/0001_initial.py index f6db372..ffdb557 100644 --- a/django_project/gap/migrations/0001_initial.py +++ b/django_project/gap/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.7 on 2024-07-05 15:16 +# Generated by Django 4.2.7 on 2024-07-14 21:07 import django.contrib.gis.db.models.fields from django.db import migrations, models @@ -45,7 +45,6 @@ class Migration(migrations.Migration): ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('name', models.CharField(max_length=512)), ('description', models.TextField(blank=True, null=True)), - ('type', models.CharField(choices=[('Climate Reanalysis', 'Climate Reanalysis'), ('Short-term Forecast', 'Short-term Forecast'), ('Seasonal Forecast', 'Seasonal Forecast'), ('Ground Observational', 'Ground Observational'), ('Airborne Observational', 'Airborne Observational')], max_length=512)), ('time_step', models.CharField(choices=[('DAILY', 'DAILY'), ('HOURLY', 'HOURLY')], max_length=512)), ('store_type', models.CharField(choices=[('TABLE', 'TABLE'), ('NETCDF', 'NETCDF'), ('EXT_API', 'EXT_API')], max_length=512)), ], @@ -53,6 +52,18 @@ class Migration(migrations.Migration): 'abstract': False, }, ), + migrations.CreateModel( + name='DatasetType', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=512)), + ('description', models.TextField(blank=True, null=True)), + ('type', models.CharField(choices=[('historical', 'historical'), ('forecast', 'forecast')], max_length=512)), + ], + options={ + 'abstract': False, + }, + ), migrations.CreateModel( name='IngestorSession', fields=[ @@ -140,6 +151,11 @@ class Migration(migrations.Migration): name='provider', field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='gap.provider'), ), + migrations.AddField( + model_name='dataset', + name='type', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='gap.datasettype'), + ), migrations.AddField( model_name='attribute', name='unit', diff --git a/django_project/gap/models/dataset.py b/django_project/gap/models/dataset.py index 6e9ba56..0291acc 100644 --- a/django_project/gap/models/dataset.py +++ b/django_project/gap/models/dataset.py @@ -11,14 +11,23 @@ from gap.models.common import Provider -class DatasetType: +class CastType: + """Cast type.""" + + HISTORICAL = 'historical' + FORECAST = 'forecast' + + +class DatasetType(Definition): """Dataset type.""" - CLIMATE_REANALYSIS = 'Climate Reanalysis' - SHORT_TERM_FORECAST = 'Short-term Forecast' - SEASONAL_FORECAST = 'Seasonal Forecast' - GROUND_OBSERVATIONAL = 'Ground Observational' - AIRBORNE_OBSERVATIONAL = 'Airborne Observational' + type = models.CharField( + choices=( + (CastType.HISTORICAL, CastType.HISTORICAL), + (CastType.FORECAST, CastType.FORECAST), + ), + max_length=512 + ) class DatasetStore: @@ -42,21 +51,8 @@ class Dataset(Definition): provider = models.ForeignKey( Provider, on_delete=models.CASCADE ) - type = models.CharField( - choices=( - (DatasetType.CLIMATE_REANALYSIS, DatasetType.CLIMATE_REANALYSIS), - (DatasetType.SHORT_TERM_FORECAST, DatasetType.SHORT_TERM_FORECAST), - (DatasetType.SEASONAL_FORECAST, DatasetType.SEASONAL_FORECAST), - ( - DatasetType.GROUND_OBSERVATIONAL, - DatasetType.GROUND_OBSERVATIONAL - ), - ( - DatasetType.AIRBORNE_OBSERVATIONAL, - DatasetType.AIRBORNE_OBSERVATIONAL - ), - ), - max_length=512 + type = models.ForeignKey( + DatasetType, on_delete=models.CASCADE ) time_step = models.CharField( choices=( diff --git a/django_project/gap/providers/__init__.py b/django_project/gap/providers/__init__.py new file mode 100644 index 0000000..613ad83 --- /dev/null +++ b/django_project/gap/providers/__init__.py @@ -0,0 +1,33 @@ +# coding=utf-8 +""" +Tomorrow Now GAP. + +.. note:: Helper for reading NetCDF File +""" + +from gap.models import Dataset +from gap.utils.netcdf import NetCDFProvider +from gap.providers.cbam import CBAMNetCDFReader +from gap.providers.salient import SalientNetCDFReader +from gap.providers.tahmo import TahmoDatasetReader + + +def get_reader_from_dataset(dataset: Dataset): + """Create a new Reader from given dataset. + + :param dataset: Dataset to be read + :type dataset: Dataset + :raises TypeError: if provider is neither CBAM or Salient + :return: Reader Class Type + :rtype: CBAMNetCDFReader|SalientNetCDFReader + """ + if dataset.provider.name == NetCDFProvider.CBAM: + return CBAMNetCDFReader + elif dataset.provider.name == NetCDFProvider.SALIENT: + return SalientNetCDFReader + elif dataset.provider.name == 'Tahmo': + return TahmoDatasetReader + else: + raise TypeError( + f'Unsupported provider name: {dataset.provider.name}' + ) diff --git a/django_project/gap/providers/cbam.py b/django_project/gap/providers/cbam.py new file mode 100644 index 0000000..fc02c54 --- /dev/null +++ b/django_project/gap/providers/cbam.py @@ -0,0 +1,98 @@ +# coding=utf-8 +""" +Tomorrow Now GAP. + +.. note:: CBAM Data Reader +""" + +from typing import List +from datetime import datetime +from django.contrib.gis.geos import Point +import numpy as np +import xarray as xr + +from gap.models import ( + Dataset, + DatasetAttribute, + NetCDFFile +) + +from gap.utils.reader import DatasetTimelineValue, DatasetReaderValue +from gap.utils.netcdf import ( + daterange_inc, + BaseNetCDFReader +) + + + +class CBAMNetCDFReader(BaseNetCDFReader): + """Class to read NetCDF file from CBAM provider.""" + + def __init__( + self, dataset: Dataset, attributes: List[DatasetAttribute], + point: Point, start_date: datetime, end_date: datetime) -> None: + """Initialize CBAMNetCDFReader class. + + :param dataset: Dataset from CBAM provider + :type dataset: Dataset + :param attributes: List of attributes to be queried + :type attributes: List[DatasetAttribute] + :param point: Location to be queried + :type point: Point + :param start_date: Start date time filter + :type start_date: datetime + :param end_date: End date time filter + :type end_date: datetime + """ + super().__init__(dataset, attributes, point, start_date, end_date) + + def read_historical_data(self, start_date: datetime, end_date: datetime): + """Read historical data from dataset. + + :param start_date: start date for reading historical data + :type start_date: datetime + :param end_date: end date for reading historical data + :type end_date: datetime + """ + self.setup_netcdf_reader() + self.xrDatasets = [] + for filter_date in daterange_inc(start_date, end_date): + netcdf_file = NetCDFFile.objects.filter( + dataset=self.dataset, + start_date_time__gte=filter_date, + end_date_time__lte=filter_date + ).first() + if netcdf_file is None: + continue + ds = self.open_dataset(netcdf_file) + val = self.read_variables(ds) + self.xrDatasets.append(val) + + def get_data_values(self) -> DatasetReaderValue: + """Fetch data values from list of xArray Dataset object. + + :return: Data Value. + :rtype: DatasetReaderValue + """ + results = [] + metadata = { + 'dataset': [self.dataset.name], + 'start_date': self.start_date.isoformat(), + 'end_date': self.end_date.isoformat() + } + if len(self.xrDatasets) == 0: + return DatasetReaderValue(metadata, results) + val = xr.combine_nested( + self.xrDatasets, concat_dim=[self.date_variable]) + for dt_idx, dt in enumerate(val[self.date_variable].values): + value_data = {} + for attribute in self.attributes: + v = val[attribute.source].values[dt_idx] + value_data[attribute.attribute.variable_name] = ( + v if not np.isnan(v) else None + ) + results.append(DatasetTimelineValue( + dt, + value_data + )) + return DatasetReaderValue(metadata, results) diff --git a/django_project/gap/providers/salient.py b/django_project/gap/providers/salient.py new file mode 100644 index 0000000..ad78e23 --- /dev/null +++ b/django_project/gap/providers/salient.py @@ -0,0 +1,128 @@ +# coding=utf-8 +""" +Tomorrow Now GAP. + +.. note:: CBAM Data Reader +""" + +from typing import List +from datetime import datetime +from django.contrib.gis.geos import Point +import numpy as np +from xarray.core.dataset import Dataset as xrDataset + +from gap.models import ( + Dataset, + DatasetAttribute, + NetCDFFile +) + +from gap.utils.reader import DatasetTimelineValue, DatasetReaderValue +from gap.utils.netcdf import ( + BaseNetCDFReader +) + + +class SalientNetCDFReader(BaseNetCDFReader): + """Class to read NetCDF file from Salient provider.""" + + date_variable = 'forecast_day' + + def __init__( + self, dataset: Dataset, attributes: List[DatasetAttribute], + point: Point, start_date: datetime, end_date: datetime) -> None: + """Initialize CBAMNetCDFReader class. + + :param dataset: Dataset from Salient provider + :type dataset: Dataset + :param attributes: List of attributes to be queried + :type attributes: List[DatasetAttribute] + :param point: Location to be queried + :type point: Point + :param start_date: Start date time filter + :type start_date: datetime + :param end_date: End date time filter + :type end_date: datetime + """ + super().__init__(dataset, attributes, point, start_date, end_date) + + def read_forecast_data(self, start_date: datetime, end_date: datetime): + """Read forecast data from dataset. + + :param start_date: start date for reading forecast data + :type start_date: datetime + :param end_date: end date for reading forecast data + :type end_date: datetime + """ + self.setup_netcdf_reader() + self.xrDatasets = [] + netcdf_file = NetCDFFile.objects.filter( + dataset=self.dataset + ).order_by('id').last() + if netcdf_file is None: + return + ds = self.open_dataset(netcdf_file) + val = self.read_variables(ds, start_date, end_date) + self.xrDatasets.append(val) + + def read_variables( + self, dataset: xrDataset, start_date: datetime = None, + end_date: datetime = None) -> xrDataset: + """Read data from list variable with filter from given Point. + + :param dataset: xArray Dataset object + :type dataset: xrDataset + :param start_date: start date for reading forecast data + :type start_date: datetime + :param end_date: end date for reading forecast data + :type end_date: datetime + :return: filtered xArray Dataset object + :rtype: xrDataset + """ + start_dt = np.datetime64(start_date) + end_dt = np.datetime64(end_date) + variables = [a.source for a in self.attributes] + variables.append(self.date_variable) + val = dataset[variables].sel( + lat=self.point.y, lon=self.point.x, + method='nearest' + ).where( + (dataset[self.date_variable] >= start_dt) & + (dataset[self.date_variable] <= end_dt), + drop=True + ) + return val + + def get_data_values(self) -> DatasetReaderValue: + """Fetch data values from list of xArray Dataset object. + + :return: Data Value. + :rtype: DatasetReaderValue + """ + results = [] + metadata = { + 'dataset': [self.dataset.name], + 'start_date': self.start_date.isoformat(), + 'end_date': self.end_date.isoformat() + } + if len(self.xrDatasets) == 0: + return DatasetReaderValue(metadata, results) + # forecast will always use latest dataset + val = self.xrDatasets[0] + for dt_idx, dt in enumerate(val[self.date_variable].values): + value_data = {} + for attribute in self.attributes: + if 'ensemble' in val[attribute.source].dims: + value_data[attribute.attribute.variable_name] = ( + val[attribute.source].values[:, dt_idx] + ) + else: + v = val[attribute.source].values[dt_idx] + value_data[attribute.attribute.variable_name] = ( + v if not np.isnan(v) else None + ) + results.append(DatasetTimelineValue( + dt, + value_data + )) + return DatasetReaderValue(metadata, results) diff --git a/django_project/gap/providers/tahmo.py b/django_project/gap/providers/tahmo.py new file mode 100644 index 0000000..f7a019a --- /dev/null +++ b/django_project/gap/providers/tahmo.py @@ -0,0 +1,98 @@ +# coding=utf-8 +""" +Tomorrow Now GAP. + +.. note:: CBAM Data Reader +""" + +from typing import List +from datetime import datetime +from django.contrib.gis.geos import Point +from django.contrib.gis.db.models.functions import Distance + +from gap.models import ( + Dataset, + DatasetAttribute, + Station, + Measurement +) +from gap.utils.reader import ( + DatasetTimelineValue, + DatasetReaderValue, + BaseDatasetReader +) + + +class TahmoDatasetReader(BaseDatasetReader): + """Class to read Tahmo ground observation data.""" + + def __init__( + self, dataset: Dataset, attributes: List[DatasetAttribute], + point: Point, start_date: datetime, end_date: datetime) -> None: + """Initialize TahmoDatasetReader class. + + :param dataset: Dataset from Tahmo provider + :type dataset: Dataset + :param attributes: List of attributes to be queried + :type attributes: List[DatasetAttribute] + :param point: Location to be queried + :type point: Point + :param start_date: Start date time filter + :type start_date: datetime + :param end_date: End date time filter + :type end_date: datetime + """ + super().__init__(dataset, attributes, point, start_date, end_date) + self.results = [] + + def read_historical_data(self, start_date: datetime, end_date: datetime): + """Read historical data from dataset. + + :param start_date: start date for reading historical data + :type start_date: datetime + :param end_date: end date for reading historical data + :type end_date: datetime + """ + nearest_station = Station.objects.annotate( + distance=Distance('geometry', self.point) + ).filter( + provider=self.dataset.provider + ).order_by('distance').first() + if nearest_station is None: + return + measurements = Measurement.objects.select_related( + 'dataset_attribute', 'dataset_attribute__attribute' + ).filter( + date_time__gte=start_date, + date_time__lte=end_date, + dataset_attribute__in=self.attributes, + station=nearest_station + ).order_by('date_time') + curr_dt = None + measurement_dict = {} + for measurement in measurements: + if curr_dt is None: + curr_dt = measurement.date_time + elif curr_dt != measurement.date_time: + self.results.append( + DatasetTimelineValue(curr_dt, measurement_dict)) + curr_dt = measurement.date_time + measurement_dict = {} + measurement_dict[ + measurement.dataset_attribute.attribute.variable_name + ] = measurement.value + self.results.append( + DatasetTimelineValue(curr_dt, measurement_dict)) + + def get_data_values(self) -> DatasetReaderValue: + """Fetch results. + + :return: Data Value. + :rtype: DatasetReaderValue + """ + metadata = { + 'dataset': [self.dataset.name], + 'start_date': self.start_date.isoformat(), + 'end_date': self.end_date.isoformat() + } + return DatasetReaderValue(metadata, self.results) diff --git a/django_project/gap/tasks/netcdf_sync.py b/django_project/gap/tasks/netcdf_sync.py index 504432b..4f84dcd 100644 --- a/django_project/gap/tasks/netcdf_sync.py +++ b/django_project/gap/tasks/netcdf_sync.py @@ -23,7 +23,8 @@ DatasetStore, DatasetTimeStep, DatasetType, - Unit + Unit, + CastType ) from gap.utils.netcdf import ( NetCDFProvider, @@ -47,14 +48,24 @@ def initialize_provider(provider_name: str) -> Tuple[Provider, Dataset]: """ provider, _ = Provider.objects.get_or_create(name=provider_name) if provider.name == NetCDFProvider.CBAM: - dataset_type = DatasetType.CLIMATE_REANALYSIS + dataset_type, _ = DatasetType.objects.get_or_create( + name='Climate Reanalysis', + defaults={ + 'type': CastType.HISTORICAL + } + ) else: - dataset_type = DatasetType.SEASONAL_FORECAST + dataset_type, _ = DatasetType.objects.get_or_create( + name='Seasonal Forecast', + defaults={ + 'type': CastType.FORECAST + } + ) dataset, _ = Dataset.objects.get_or_create( - name=provider.name, + name=f'{provider.name} {dataset_type.name}', provider=provider, + type=dataset_type, defaults={ - 'type': dataset_type, 'time_step': DatasetTimeStep.DAILY, 'store_type': DatasetStore.NETCDF } diff --git a/django_project/gap/tests/providers/__init__.py b/django_project/gap/tests/providers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/django_project/gap/tests/providers/test_cbam.py b/django_project/gap/tests/providers/test_cbam.py new file mode 100644 index 0000000..868bec2 --- /dev/null +++ b/django_project/gap/tests/providers/test_cbam.py @@ -0,0 +1,84 @@ +# coding=utf-8 +""" +Tomorrow Now GAP. + +.. note:: Unit tests for CBAM Reader. +""" + +from django.test import TestCase +from datetime import datetime +import xarray as xr +from django.contrib.gis.geos import Point +from unittest.mock import Mock, patch + +from core.settings.utils import absolute_path +from gap.utils.netcdf import ( + NetCDFProvider, +) +from gap.providers import ( + CBAMNetCDFReader +) +from gap.factories import ( + ProviderFactory, + DatasetFactory, + DatasetAttributeFactory, + AttributeFactory, + NetCDFFileFactory +) + + +class TestCBAMNetCDFReader(TestCase): + """Unit test for CBAM NetCDFReader class.""" + + @patch('gap.utils.netcdf.daterange_inc', + return_value=[datetime(2023, 1, 1)]) + @patch('gap.models.NetCDFFile.objects.filter') + def test_read_historical_data_empty( + self, mock_filter, mock_daterange_inc): + """Test for reading historical data that returns empty.""" + dataset = Mock() + attributes = [] + point = Mock() + start_date = datetime(2023, 1, 1) + end_date = datetime(2023, 1, 2) + reader = CBAMNetCDFReader( + dataset, attributes, point, start_date, end_date) + mock_filter.return_value.first.return_value = None + reader.read_historical_data(start_date, end_date) + self.assertEqual(reader.xrDatasets, []) + + def test_read_historical_data(self): + """Test for reading historical data from CBAM sample.""" + dataset = DatasetFactory.create( + provider=ProviderFactory(name=NetCDFProvider.CBAM)) + attribute = AttributeFactory.create( + name='Max Total Temperature', + variable_name='max_total_temperature') + dataset_attr = DatasetAttributeFactory.create( + dataset=dataset, + attribute=attribute, + source='max_total_temperature' + ) + dt = datetime(2019, 11, 1, 0, 0, 0) + p = Point(x=26.97, y=-12.56) + NetCDFFileFactory.create( + dataset=dataset, + start_date_time=dt, + end_date_time=dt + ) + file_path = absolute_path( + 'gap', 'tests', 'netcdf', 'cbam.nc' + ) + with patch.object(CBAMNetCDFReader, 'open_dataset') as mock_open: + mock_open.return_value = ( + xr.open_dataset(file_path) + ) + reader = CBAMNetCDFReader(dataset, [dataset_attr], p, dt, dt) + reader.read_historical_data(dt, dt) + mock_open.assert_called_once() + self.assertEqual(len(reader.xrDatasets), 1) + data_value = reader.get_data_values() + self.assertEqual(len(data_value.results), 1) + self.assertEqual( + data_value.results[0].values['max_total_temperature'], + 33.371735) diff --git a/django_project/gap/tests/providers/test_salient.py b/django_project/gap/tests/providers/test_salient.py new file mode 100644 index 0000000..e320446 --- /dev/null +++ b/django_project/gap/tests/providers/test_salient.py @@ -0,0 +1,96 @@ +# coding=utf-8 +""" +Tomorrow Now GAP. + +.. note:: Unit tests for Salient Reader. +""" + +from django.test import TestCase +from datetime import datetime +import xarray as xr +from django.contrib.gis.geos import Point +from unittest.mock import Mock, patch + +from core.settings.utils import absolute_path +from gap.utils.netcdf import ( + NetCDFProvider, +) +from gap.providers import ( + SalientNetCDFReader +) +from gap.factories import ( + ProviderFactory, + DatasetFactory, + DatasetAttributeFactory, + AttributeFactory, + NetCDFFileFactory +) + + +class TestSalientNetCDFReader(TestCase): + """Unit test for Salient NetCDFReader class.""" + + @patch('gap.models.NetCDFFile.objects.filter') + @patch('xarray.open_dataset') + def test_read_forecast_data_empty(self, mock_open_dataset, mock_filter): + """Test for reading forecast data.""" + dataset = Mock() + attributes = [] + point = Mock() + start_date = datetime(2023, 1, 1) + end_date = datetime(2023, 1, 2) + reader = SalientNetCDFReader( + dataset, attributes, point, start_date, end_date) + mock_filter.return_value.order_by.return_value.last.return_value = ( + None + ) + reader.read_forecast_data(start_date, end_date) + self.assertEqual(reader.xrDatasets, []) + + def test_read_forecast_data(self): + """Test for reading forecast data from Salient sample.""" + dataset = DatasetFactory.create( + provider=ProviderFactory(name=NetCDFProvider.SALIENT)) + attribute1 = AttributeFactory.create( + name='Temperature Climatology', + variable_name='temp_clim') + dataset_attr1 = DatasetAttributeFactory.create( + dataset=dataset, + attribute=attribute1, + source='temp_clim' + ) + attribute2 = AttributeFactory.create( + name='Precipitation Anomaly', + variable_name='precip_anom') + dataset_attr2 = DatasetAttributeFactory.create( + dataset=dataset, + attribute=attribute2, + source='precip_anom' + ) + dt = datetime(2024, 3, 14, 0, 0, 0) + dt1 = datetime(2024, 3, 15, 0, 0, 0) + dt2 = datetime(2024, 3, 17, 0, 0, 0) + p = Point(x=29.12, y=-2.625) + NetCDFFileFactory.create( + dataset=dataset, + start_date_time=dt, + end_date_time=dt + ) + file_path = absolute_path( + 'gap', 'tests', 'netcdf', 'salient.nc' + ) + with patch.object(SalientNetCDFReader, 'open_dataset') as mock_open: + mock_open.return_value = ( + xr.open_dataset(file_path) + ) + reader = SalientNetCDFReader( + dataset, [dataset_attr1, dataset_attr2], p, dt1, dt2) + reader.read_forecast_data(dt1, dt2) + self.assertEqual(len(reader.xrDatasets), 1) + data_value = reader.get_data_values() + mock_open.assert_called_once() + self.assertEqual(len(data_value.results), 3) + self.assertEqual( + data_value.results[0].values['temp_clim'], 19.461235) + self.assertEqual( + len(data_value.results[0].values['precip_anom']), 50) diff --git a/django_project/gap/tests/providers/test_tahmo.py b/django_project/gap/tests/providers/test_tahmo.py new file mode 100644 index 0000000..3e4cca3 --- /dev/null +++ b/django_project/gap/tests/providers/test_tahmo.py @@ -0,0 +1,59 @@ +# coding=utf-8 +""" +Tomorrow Now GAP. + +.. note:: Unit tests for Tahmo Reader. +""" + +from django.test import TestCase +from datetime import datetime +from django.contrib.gis.geos import Point + +from gap.providers import ( + TahmoDatasetReader +) +from gap.factories import ( + ProviderFactory, + DatasetFactory, + DatasetAttributeFactory, + AttributeFactory, + StationFactory, + MeasurementFactory +) + + +class TestTahmoReader(TestCase): + """Unit test for Tahmo NetCDFReader class.""" + + def test_read_historical_data(self): + """Test for reading historical data from Tahmo.""" + dataset = DatasetFactory.create( + provider=ProviderFactory(name='Tahmo')) + attribute = AttributeFactory.create( + name='Surface air temperature', + variable_name='surface_air_temperature') + dataset_attr = DatasetAttributeFactory.create( + dataset=dataset, + attribute=attribute, + source='surface_air_temperature' + ) + dt = datetime(2019, 11, 1, 0, 0, 0) + p = Point(x=26.97, y=-12.56, srid=4326) + station = StationFactory.create( + geometry=p, + provider=dataset.provider + ) + MeasurementFactory.create( + station=station, + dataset_attribute=dataset_attr, + date_time=dt, + value=100 + ) + reader = TahmoDatasetReader( + dataset, [dataset_attr], p, dt, dt) + reader.read_historical_data(dt, dt) + data_value = reader.get_data_values() + self.assertEqual(len(data_value.results), 1) + self.assertEqual( + data_value.results[0].values['surface_air_temperature'], + 100) diff --git a/django_project/gap/tests/test_task_netcdf_sync.py b/django_project/gap/tests/test_task_netcdf_sync.py index 68f2410..622a072 100644 --- a/django_project/gap/tests/test_task_netcdf_sync.py +++ b/django_project/gap/tests/test_task_netcdf_sync.py @@ -9,13 +9,13 @@ from unittest.mock import patch, MagicMock from gap.models import ( - DatasetType, DatasetTimeStep, DatasetStore, Unit, Attribute, DatasetAttribute, - NetCDFFile + NetCDFFile, + CastType ) from gap.utils.netcdf import ( NetCDFProvider, NetCDFVariable, CBAM_VARIABLES, SALIENT_VARIABLES @@ -41,8 +41,9 @@ def test_initialize_provider_cbam(self): provider, dataset = initialize_provider('CBAM') self.assertEqual(provider.name, 'CBAM') - self.assertEqual(dataset.name, 'CBAM') - self.assertEqual(dataset.type, DatasetType.CLIMATE_REANALYSIS) + self.assertEqual(dataset.name, 'CBAM Climate Reanalysis') + self.assertEqual(dataset.type.name, 'Climate Reanalysis') + self.assertEqual(dataset.type.type, CastType.HISTORICAL) self.assertEqual(dataset.time_step, DatasetTimeStep.DAILY) self.assertEqual(dataset.store_type, DatasetStore.NETCDF) @@ -51,8 +52,9 @@ def test_initialize_provider_salient(self): provider, dataset = initialize_provider('Salient') self.assertEqual(provider.name, 'Salient') - self.assertEqual(dataset.name, 'Salient') - self.assertEqual(dataset.type, DatasetType.SEASONAL_FORECAST) + self.assertEqual(dataset.name, 'Salient Seasonal Forecast') + self.assertEqual(dataset.type.name, 'Seasonal Forecast') + self.assertEqual(dataset.type.type, CastType.FORECAST) self.assertEqual(dataset.time_step, DatasetTimeStep.DAILY) self.assertEqual(dataset.store_type, DatasetStore.NETCDF) diff --git a/django_project/gap/tests/test_utils_netcdf.py b/django_project/gap/tests/test_utils_netcdf.py index 2f12729..d962bb6 100644 --- a/django_project/gap/tests/test_utils_netcdf.py +++ b/django_project/gap/tests/test_utils_netcdf.py @@ -8,26 +8,28 @@ from django.test import TestCase from datetime import datetime import numpy as np -import xarray as xr from django.contrib.gis.geos import Point from unittest.mock import Mock, MagicMock, patch -from core.settings.utils import absolute_path +from gap.utils.reader import ( + DatasetTimelineValue, + DatasetReaderValue +) from gap.utils.netcdf import ( NetCDFProvider, daterange_inc, - DatasetTimelineValue, - DatasetReaderValue, BaseNetCDFReader, +) +from gap.providers import ( CBAMNetCDFReader, - SalientNetCDFReader + SalientNetCDFReader, + get_reader_from_dataset ) from gap.factories import ( ProviderFactory, DatasetFactory, DatasetAttributeFactory, - AttributeFactory, - NetCDFFileFactory + AttributeFactory ) @@ -155,163 +157,13 @@ def test_from_dataset(self): """Test for creating NetCDFReader from dataset.""" dataset1 = DatasetFactory.create( provider=ProviderFactory(name=NetCDFProvider.CBAM)) - reader = BaseNetCDFReader.from_dataset(dataset1) + reader = get_reader_from_dataset(dataset1) self.assertEqual(reader, CBAMNetCDFReader) dataset2 = DatasetFactory.create( provider=ProviderFactory(name=NetCDFProvider.SALIENT)) - reader = BaseNetCDFReader.from_dataset(dataset2) + reader = get_reader_from_dataset(dataset2) self.assertEqual(reader, SalientNetCDFReader) # invalid type dataset3 = DatasetFactory.create() with self.assertRaises(TypeError): - BaseNetCDFReader.from_dataset(dataset3) - - -class TestCBAMNetCDFReader(TestCase): - """Unit test for CBAM NetCDFReader class.""" - - @patch('gap.utils.netcdf.daterange_inc', - return_value=[datetime(2023, 1, 1)]) - @patch('gap.models.NetCDFFile.objects.filter') - def test_read_historical_data_empty( - self, mock_filter, mock_daterange_inc): - """Test for reading historical data that returns empty.""" - dataset = Mock() - attributes = [] - point = Mock() - start_date = datetime(2023, 1, 1) - end_date = datetime(2023, 1, 2) - reader = CBAMNetCDFReader( - dataset, attributes, point, start_date, end_date) - mock_filter.return_value.first.return_value = None - reader.read_historical_data() - self.assertEqual(reader.xrDatasets, []) - - def test_read_forecast_data_not_implemented(self): - """Test for reading forecast data.""" - dataset = Mock() - attributes = [] - point = Mock() - start_date = datetime(2023, 1, 1) - end_date = datetime(2023, 1, 2) - reader = CBAMNetCDFReader( - dataset, attributes, point, start_date, end_date) - with self.assertRaises(NotImplementedError): - reader.read_forecast_data() - - def test_read_historical_data(self): - """Test for reading historical data from CBAM sample.""" - dataset = DatasetFactory.create( - provider=ProviderFactory(name=NetCDFProvider.CBAM)) - attribute = AttributeFactory.create( - name='Max Total Temperature', - variable_name='max_total_temperature') - dataset_attr = DatasetAttributeFactory.create( - dataset=dataset, - attribute=attribute, - source='max_total_temperature' - ) - dt = datetime(2019, 11, 1, 0, 0, 0) - p = Point(x=26.97, y=-12.56) - NetCDFFileFactory.create( - dataset=dataset, - start_date_time=dt, - end_date_time=dt - ) - file_path = absolute_path( - 'gap', 'tests', 'netcdf', 'cbam.nc' - ) - with patch.object(CBAMNetCDFReader, 'open_dataset') as mock_open: - mock_open.return_value = ( - xr.open_dataset(file_path) - ) - reader = CBAMNetCDFReader(dataset, [dataset_attr], p, dt, dt) - reader.read_historical_data() - mock_open.assert_called_once() - self.assertEqual(len(reader.xrDatasets), 1) - data_value = reader.get_data_values() - self.assertEqual(len(data_value.results), 1) - self.assertEqual( - data_value.results[0].values['max_total_temperature'], - 33.371735) - - -class TestSalientNetCDFReader(TestCase): - """Unit test for Salient NetCDFReader class.""" - - def test_read_historical_data_not_implemented(self): - """Test for reading historical data.""" - dataset = Mock() - attributes = [] - point = Mock() - start_date = datetime(2023, 1, 1) - end_date = datetime(2023, 1, 2) - reader = SalientNetCDFReader( - dataset, attributes, point, start_date, end_date) - with self.assertRaises(NotImplementedError): - reader.read_historical_data() - - @patch('gap.models.NetCDFFile.objects.filter') - @patch('xarray.open_dataset') - def test_read_forecast_data_empty(self, mock_open_dataset, mock_filter): - """Test for reading forecast data.""" - dataset = Mock() - attributes = [] - point = Mock() - start_date = datetime(2023, 1, 1) - end_date = datetime(2023, 1, 2) - reader = SalientNetCDFReader( - dataset, attributes, point, start_date, end_date) - mock_filter.return_value.order_by.return_value.last.return_value = ( - None - ) - reader.read_forecast_data() - self.assertEqual(reader.xrDatasets, []) - - def test_read_forecast_data(self): - """Test for reading forecast data from Salient sample.""" - dataset = DatasetFactory.create( - provider=ProviderFactory(name=NetCDFProvider.SALIENT)) - attribute1 = AttributeFactory.create( - name='Temperature Climatology', - variable_name='temp_clim') - dataset_attr1 = DatasetAttributeFactory.create( - dataset=dataset, - attribute=attribute1, - source='temp_clim' - ) - attribute2 = AttributeFactory.create( - name='Precipitation Anomaly', - variable_name='precip_anom') - dataset_attr2 = DatasetAttributeFactory.create( - dataset=dataset, - attribute=attribute2, - source='precip_anom' - ) - dt = datetime(2024, 3, 14, 0, 0, 0) - dt1 = datetime(2024, 3, 15, 0, 0, 0) - dt2 = datetime(2024, 3, 17, 0, 0, 0) - p = Point(x=29.12, y=-2.625) - NetCDFFileFactory.create( - dataset=dataset, - start_date_time=dt, - end_date_time=dt - ) - file_path = absolute_path( - 'gap', 'tests', 'netcdf', 'salient.nc' - ) - with patch.object(SalientNetCDFReader, 'open_dataset') as mock_open: - mock_open.return_value = ( - xr.open_dataset(file_path) - ) - reader = SalientNetCDFReader( - dataset, [dataset_attr1, dataset_attr2], p, dt1, dt2) - reader.read_forecast_data() - self.assertEqual(len(reader.xrDatasets), 1) - data_value = reader.get_data_values() - mock_open.assert_called_once() - self.assertEqual(len(data_value.results), 3) - self.assertEqual( - data_value.results[0].values['temp_clim'], 19.461235) - self.assertEqual( - len(data_value.results[0].values['precip_anom']), 50) + get_reader_from_dataset(dataset3) diff --git a/django_project/gap/utils/netcdf.py b/django_project/gap/utils/netcdf.py index 6608c4b..651fdf7 100644 --- a/django_project/gap/utils/netcdf.py +++ b/django_project/gap/utils/netcdf.py @@ -9,7 +9,6 @@ from typing import List from datetime import datetime, timedelta from django.contrib.gis.geos import Point -import numpy as np import xarray as xr from xarray.core.dataset import Dataset as xrDataset import fsspec @@ -20,6 +19,9 @@ DatasetAttribute, NetCDFFile ) +from gap.utils.reader import ( + BaseDatasetReader +) class NetCDFProvider: @@ -86,11 +88,11 @@ def __init__(self, name, desc, unit=None) -> None: ), 'max_total_temperature': NetCDFVariable( 'Max Total Temperature', - 'Maximum temperature (0000:2300)', 'Deg C' + 'Maximum temperature (0000:2300)', '°C' ), 'max_night_temperature': NetCDFVariable( 'Max Night Temperature', - 'Maximum night-time temperature (1900:0500)', 'Deg C' + 'Maximum night-time temperature (1900:0500)', '°C' ), 'average_solar_irradiance': NetCDFVariable( 'Average Solar Irradiance', @@ -103,11 +105,11 @@ def __init__(self, name, desc, unit=None) -> None: ), 'min_night_temperature': NetCDFVariable( 'Min Night Temperature', - 'Minimum night-time temperature (1900:0500)', 'Deg C' + 'Minimum night-time temperature (1900:0500)', '°C' ), 'max_day_temperature': NetCDFVariable( 'Max Day Temperature', - 'Maximum day-time temperature (0600:1800)', 'Deg C' + 'Maximum day-time temperature (0600:1800)', '°C' ), 'total_rainfall': NetCDFVariable( 'Total Rainfall', @@ -115,11 +117,11 @@ def __init__(self, name, desc, unit=None) -> None: ), 'min_day_temperature': NetCDFVariable( 'Min Day Temperature', - 'Minumum day-time temperature (0600:1800)', 'Deg C' + 'Minumum day-time temperature (0600:1800)', '°C' ), 'min_total_temperature': NetCDFVariable( 'Min Total Temperature', - 'Minumum temperature (0000:2300)', 'Deg C' + 'Minumum temperature (0000:2300)', '°C' ), } @@ -129,19 +131,19 @@ def __init__(self, name, desc, unit=None) -> None: 'Precipitation Climatology', None, 'mm day-1' ), 'temp_clim': NetCDFVariable( - 'Temperature Climatology', None, 'Deg C' + 'Temperature Climatology', None, '°C' ), 'precip_anom': NetCDFVariable( 'Precipitation Anomaly', None, 'mm day-1' ), 'temp_anom': NetCDFVariable( - 'Temperature Anomaly', None, 'Deg C' + 'Temperature Anomaly', None, '°C' ), 'precip': NetCDFVariable( 'Precipitation', None, 'mm day-1' ), 'temp': NetCDFVariable( - 'Temperature', None, 'Deg C' + 'Temperature', None, '°C' ), } @@ -161,61 +163,7 @@ def daterange_inc(start_date: datetime, end_date: datetime): yield start_date + timedelta(n) -class DatasetTimelineValue: - """Class representing data value for given datetime.""" - - def __init__(self, datetime: np.datetime64, values: dict) -> None: - """Initialize DatasetTimelineValue object. - - :param datetime: datetime of data - :type datetime: np.datetime64 - :param values: Dictionary of variable and its value - :type values: dict - """ - self.datetime = datetime - self.values = values - - def to_dict(self): - """Convert into dict. - - :return: Dictionary of datetime and values - :rtype: dict - """ - return { - 'datetime': np.datetime_as_string(self.datetime, timezone='UTC'), - 'values': self.values - } - - -class DatasetReaderValue: - """Class representing all values from reader.""" - - def __init__( - self, metadata: dict, - results: List[DatasetTimelineValue]) -> None: - """Initialize DatasetReaderValue object. - - :param metadata: Dictionary of metadata - :type metadata: dict - :param results: Data value list - :type results: List[DatasetTimelineValue] - """ - self.metadata = metadata - self.results = results - - def to_dict(self): - """Convert into dict. - - :return: Dictionary of metadata and data - :rtype: dict - """ - return { - 'metadata': self.metadata, - 'data': [result.to_dict() for result in self.results] - } - - -class BaseNetCDFReader: +class BaseNetCDFReader(BaseDatasetReader): """Base class for NetCDF File Reader.""" date_variable = 'date' @@ -236,23 +184,9 @@ def __init__( :param end_date: End date time filter :type end_date: datetime """ - self.dataset = dataset - self.attributes = attributes - self.point = point - self.start_date = start_date - self.end_date = end_date + super().__init__(dataset, attributes, point, start_date, end_date) self.xrDatasets = [] - def add_attribute(self, attribute: DatasetAttribute): - """Add a new attribuute to be read. - - :param attribute: Dataset Attribute - :type attribute: DatasetAttribute - """ - is_existing = [a for a in self.attributes if a.id == attribute.id] - if len(is_existing) == 0: - self.attributes.append(attribute) - def setup_netcdf_reader(self): """Initialize s3fs.""" self.s3 = NetCDFProvider.get_s3_variables(self.dataset.provider) @@ -281,7 +215,9 @@ def open_dataset(self, netcdf_file: NetCDFFile) -> xrDataset: netcdf_url += f'{netcdf_file.name}' return xr.open_dataset(self.fs.open(netcdf_url)) - def read_variables(self, dataset: xrDataset) -> xrDataset: + def read_variables( + self, dataset: xrDataset, start_date: datetime = None, + end_date: datetime = None) -> xrDataset: """Read data from list variable with filter from given Point. :param dataset: xArray Dataset object @@ -293,202 +229,3 @@ def read_variables(self, dataset: xrDataset) -> xrDataset: variables.append(self.date_variable) return dataset[variables].sel( lat=self.point.y, lon=self.point.x, method='nearest') - - def get_data_values(self) -> DatasetReaderValue: - """Fetch data values from list of xArray Dataset object. - - :return: Data Value. - :rtype: DatasetReaderValue - """ - pass - - def read_historical_data(self): - """Read historical data from dataset.""" - pass - - def read_forecast_data(self): - """Read forecast data from dataset.""" - pass - - @classmethod - def from_dataset(cls, dataset: Dataset): - """Create a new Reader from given dataset. - - :param dataset: Dataset to be read - :type dataset: Dataset - :raises TypeError: if provider is neither CBAM or Salient - :return: Reader Class Type - :rtype: CBAMNetCDFReader|SalientNetCDFReader - """ - if dataset.provider.name == NetCDFProvider.CBAM: - return CBAMNetCDFReader - elif dataset.provider.name == NetCDFProvider.SALIENT: - return SalientNetCDFReader - else: - raise TypeError( - f'Unsupported provider name: {dataset.provider.name}' - ) - - -class CBAMNetCDFReader(BaseNetCDFReader): - """Class to read NetCDF file from CBAM provider.""" - - def __init__( - self, dataset: Dataset, attributes: List[DatasetAttribute], - point: Point, start_date: datetime, end_date: datetime) -> None: - """Initialize CBAMNetCDFReader class. - - :param dataset: Dataset from CBAM provider - :type dataset: Dataset - :param attributes: List of attributes to be queried - :type attributes: List[DatasetAttribute] - :param point: Location to be queried - :type point: Point - :param start_date: Start date time filter - :type start_date: datetime - :param end_date: End date time filter - :type end_date: datetime - """ - super().__init__(dataset, attributes, point, start_date, end_date) - - def read_historical_data(self): - """Read historical data from dataset.""" - self.setup_netcdf_reader() - self.xrDatasets = [] - for filter_date in daterange_inc(self.start_date, self.end_date): - netcdf_file = NetCDFFile.objects.filter( - dataset=self.dataset, - start_date_time__gte=filter_date, - end_date_time__lte=filter_date - ).first() - if netcdf_file is None: - continue - ds = self.open_dataset(netcdf_file) - val = self.read_variables(ds) - self.xrDatasets.append(val) - - def read_forecast_data(self): - """Read forecast data from dataset.""" - raise NotImplementedError( - 'CBAM does not have forecast data implementation!') - - def get_data_values(self) -> DatasetReaderValue: - """Fetch data values from list of xArray Dataset object. - - :return: Data Value. - :rtype: DatasetReaderValue - """ - results = [] - val = xr.combine_nested( - self.xrDatasets, concat_dim=[self.date_variable]) - for dt_idx, dt in enumerate(val[self.date_variable].values): - value_data = {} - for attribute in self.attributes: - value_data[attribute.attribute.variable_name] = ( - val[attribute.source].values[dt_idx] - ) - results.append(DatasetTimelineValue( - dt, - value_data - )) - metadata = { - 'dataset': self.dataset.name, - 'start_date': self.start_date.isoformat(), - 'end_date': self.end_date.isoformat() - } - return DatasetReaderValue(metadata, results) - - -class SalientNetCDFReader(BaseNetCDFReader): - """Class to read NetCDF file from Salient provider.""" - - date_variable = 'forecast_day' - - def __init__( - self, dataset: Dataset, attributes: List[DatasetAttribute], - point: Point, start_date: datetime, end_date: datetime) -> None: - """Initialize CBAMNetCDFReader class. - - :param dataset: Dataset from Salient provider - :type dataset: Dataset - :param attributes: List of attributes to be queried - :type attributes: List[DatasetAttribute] - :param point: Location to be queried - :type point: Point - :param start_date: Start date time filter - :type start_date: datetime - :param end_date: End date time filter - :type end_date: datetime - """ - super().__init__(dataset, attributes, point, start_date, end_date) - - def read_historical_data(self): - """Read historical data from dataset.""" - raise NotImplementedError( - 'Salient does not have historical data implementation!') - - def read_forecast_data(self): - """Read forecast data from dataset.""" - self.setup_netcdf_reader() - self.xrDatasets = [] - netcdf_file = NetCDFFile.objects.filter( - dataset=self.dataset - ).order_by('id').last() - if netcdf_file is None: - return - ds = self.open_dataset(netcdf_file) - val = self.read_variables(ds) - self.xrDatasets.append(val) - - def read_variables(self, dataset: xrDataset) -> xrDataset: - """Read data from list variable with filter from given Point. - - :param dataset: xArray Dataset object - :type dataset: xrDataset - :return: filtered xArray Dataset object - :rtype: xrDataset - """ - start_dt = np.datetime64(self.start_date) - end_dt = np.datetime64(self.end_date) - variables = [a.source for a in self.attributes] - variables.append(self.date_variable) - val = dataset[variables].sel( - lat=self.point.y, lon=self.point.x, - method='nearest' - ).where( - (dataset[self.date_variable] >= start_dt) & - (dataset[self.date_variable] <= end_dt), - drop=True - ) - return val - - def get_data_values(self) -> DatasetReaderValue: - """Fetch data values from list of xArray Dataset object. - - :return: Data Value. - :rtype: DatasetReaderValue - """ - # forecast will always use latest dataset - val = self.xrDatasets[0] - results = [] - for dt_idx, dt in enumerate(val[self.date_variable].values): - value_data = {} - for attribute in self.attributes: - if 'ensemble' in val[attribute.source].dims: - value_data[attribute.attribute.variable_name] = ( - val[attribute.source].values[:, dt_idx] - ) - else: - value_data[attribute.attribute.variable_name] = ( - val[attribute.source].values[dt_idx] - ) - results.append(DatasetTimelineValue( - dt, - value_data - )) - metadata = { - 'dataset': self.dataset.name, - 'start_date': self.start_date.isoformat(), - 'end_date': self.end_date.isoformat() - } - return DatasetReaderValue(metadata, results) diff --git a/django_project/gap/utils/reader.py b/django_project/gap/utils/reader.py new file mode 100644 index 0000000..be1f948 --- /dev/null +++ b/django_project/gap/utils/reader.py @@ -0,0 +1,176 @@ +# coding=utf-8 +""" +Tomorrow Now GAP. + +.. note:: Helper for reading dataset +""" + +from typing import Union, List +import numpy as np +from datetime import datetime +import pytz +from django.contrib.gis.geos import Point + +from gap.models import ( + Dataset, + DatasetAttribute +) + + +class DatasetTimelineValue: + """Class representing data value for given datetime.""" + + def __init__( + self, datetime: Union[np.datetime64, datetime], + values: dict) -> None: + """Initialize DatasetTimelineValue object. + + :param datetime: datetime of data + :type datetime: np.datetime64 or datetime + :param values: Dictionary of variable and its value + :type values: dict + """ + self.datetime = datetime + self.values = values + + def _datetime_as_str(self): + """Convert datetime object to string.""" + if isinstance(self.datetime, np.datetime64): + return np.datetime_as_string( + self.datetime, unit='s', timezone='UTC') + return self.datetime.isoformat(timespec='seconds') + + def to_dict(self): + """Convert into dict. + + :return: Dictionary of datetime and values + :rtype: dict + """ + return { + 'datetime': self._datetime_as_str(), + 'values': self.values + } + + +class DatasetReaderValue: + """Class representing all values from reader.""" + + def __init__( + self, metadata: dict, + results: List[DatasetTimelineValue]) -> None: + """Initialize DatasetReaderValue object. + + :param metadata: Dictionary of metadata + :type metadata: dict + :param results: Data value list + :type results: List[DatasetTimelineValue] + """ + self.metadata = metadata + self.results = results + + def to_dict(self): + """Convert into dict. + + :return: Dictionary of metadata and data + :rtype: dict + """ + return { + 'metadata': self.metadata, + 'data': [result.to_dict() for result in self.results] + } + + +class BaseDatasetReader: + """Base class for Dataset Reader.""" + + def __init__( + self, dataset: Dataset, attributes: List[DatasetAttribute], + point: Point, start_date: datetime, end_date: datetime) -> None: + """Initialize BaseDatasetReader class. + + :param dataset: Dataset for reading + :type dataset: Dataset + :param attributes: List of attributes to be queried + :type attributes: List[DatasetAttribute] + :param point: Location to be queried + :type point: Point + :param start_date: Start date time filter + :type start_date: datetime + :param end_date: End date time filter + :type end_date: datetime + """ + self.dataset = dataset + self.attributes = attributes + self.point = point + self.start_date = start_date + self.end_date = end_date + + def add_attribute(self, attribute: DatasetAttribute): + """Add a new attribuute to be read. + + :param attribute: Dataset Attribute + :type attribute: DatasetAttribute + """ + is_existing = [a for a in self.attributes if a.id == attribute.id] + if len(is_existing) == 0: + self.attributes.append(attribute) + + def get_attributes_metadata(self) -> dict: + """Get attributes metadata (unit and desc). + + :return: Dictionary of attribute and its metadata + :rtype: dict + """ + results = {} + for attrib in self.attributes: + results[attrib.attribute.variable_name] = { + 'units': attrib.attribute.unit.name, + 'longname': attrib.attribute.name + } + return results + + def read(self): + """Read values from dataset.""" + today = datetime.now(tz=pytz.UTC) + if self.start_date < today: + self.read_historical_data( + self.start_date, + self.end_date if self.end_date < today else today + ) + if self.end_date > today: + self.read_forecast_data( + today, self.end_date + ) + else: + self.read_forecast_data( + self.start_date, self.end_date + ) + + def get_data_values(self) -> DatasetReaderValue: + """Fetch data values from dataset. + + :return: Data Value. + :rtype: DatasetReaderValue + """ + pass + + + def read_historical_data(self, start_date: datetime, end_date: datetime): + """Read historical data from dataset. + + :param start_date: start date for reading historical data + :type start_date: datetime + :param end_date: end date for reading historical data + :type end_date: datetime + """ + pass + + def read_forecast_data(self, start_date: datetime, end_date: datetime): + """Read forecast data from dataset. + + :param start_date: start date for reading forecast data + :type start_date: datetime + :param end_date: end date for reading forecast data + :type end_date: datetime + """ + pass diff --git a/django_project/gap_api/api_views/measurement.py b/django_project/gap_api/api_views/measurement.py index 64b4933..c7252d9 100644 --- a/django_project/gap_api/api_views/measurement.py +++ b/django_project/gap_api/api_views/measurement.py @@ -5,7 +5,7 @@ .. note:: Measurement APIs """ -from typing import Dict, List +from typing import Dict import pytz from datetime import date, datetime, time from drf_yasg.utils import swagger_auto_schema @@ -13,23 +13,24 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from rest_framework.views import APIView +from django.db.models.functions import Lower from django.contrib.gis.geos import Point from gap.models import ( Attribute, - DatasetAttribute, - DatasetStore, - DatasetType + DatasetAttribute ) -from gap.utils.netcdf import BaseNetCDFReader, DatasetReaderValue +from gap.utils.reader import DatasetReaderValue, BaseDatasetReader from gap_api.serializers.common import APIErrorSerializer from gap_api.utils.helper import ApiTag +from gap.providers import get_reader_from_dataset -class BaseMeasurementAPI(APIView): - """Base API class for Measurement.""" +class MeasurementAPI(APIView): + """API class for measurement.""" date_format = '%Y-%m-%d' + permission_classes = [IsAuthenticated] def _get_attribute_filter(self): """Get list of attributes in the query parameter. @@ -65,28 +66,32 @@ def _get_location_filter(self): lat = self.request.GET.get('lat', None) if lon is None or lat is None: return None - return Point(x=float(lon), y=float(lat)) + return Point(x=float(lon), y=float(lat), srid=4326) - def _get_dataset_types(self) -> List[DatasetType]: - """Get dataset types that the API will query. + def _get_provider_filter(self): + """Get provider name filter in the request parameters. - :return: List of DatasetType - :rtype: List[DatasetType] + :return: List of provider name lowercase + :rtype: List[str] """ - return [] + providers = self.request.GET.get('providers', None) + if providers is None: + return None + return providers.lower().split(',') - def _read_data(self, reader: BaseNetCDFReader) -> DatasetReaderValue: + def _read_data(self, reader: BaseDatasetReader) -> DatasetReaderValue: """Read data from given reader. - :param reader: NetCDF File Reader - :type reader: BaseNetCDFReader + :param reader: Dataset Reader + :type reader: BaseDatasetReader :return: data value :rtype: DatasetReaderValue """ - return DatasetReaderValue({}, []) + reader.read() + return reader.get_data_values() def get_response_data(self): - """Read data from NetCDF File. + """Read data from dataset. :return: Dictionary of metadata and data :rtype: dict @@ -99,53 +104,49 @@ def get_response_data(self): ) end_dt = datetime.combine( self._get_date_filter('end_date'), - time.min, tzinfo=pytz.UTC + time.max, tzinfo=pytz.UTC ) data = {} if location is None: return data dataset_attributes = DatasetAttribute.objects.filter( - attribute__in=attributes, - dataset__type__in=self._get_dataset_types() + attribute__in=attributes ) - dataset_dict: Dict[int, BaseNetCDFReader] = {} + provider_filter = self._get_provider_filter() + if provider_filter: + dataset_attributes = dataset_attributes.annotate( + provider_name=Lower('dataset__provider__name') + ).filter( + provider_name__in=provider_filter + ) + dataset_dict: Dict[int, BaseDatasetReader] = {} for da in dataset_attributes: if da.dataset.id in dataset_dict: dataset_dict[da.dataset.id].add_attribute(da) - elif da.dataset.store_type == DatasetStore.NETCDF: - reader = BaseNetCDFReader.from_dataset(da.dataset) + else: + reader = get_reader_from_dataset(da.dataset) dataset_dict[da.dataset.id] = reader( da.dataset, [da], location, start_dt, end_dt) for reader in dataset_dict.values(): - values = self._read_data(reader) - data.update(values.to_dict()) + values = self._read_data(reader).to_dict() + if 'metadata' in data: + data['metadata']['dataset'].append( + reader.dataset.name) + data['metadata']['attributes'].update( + reader.get_attributes_metadata()) + else: + data['metadata'] = values['metadata'] + data['metadata']['attributes'] = ( + reader.get_attributes_metadata() + ) + if 'data' in data: + data['data'][reader.dataset.name] = values['data'] + else: + data['data'] = { + reader.dataset.name: values['data'] + } return data - -class HistoricalAPI(BaseMeasurementAPI): - """Fetch historical by attribute and date range.""" - - permission_classes = [IsAuthenticated] - - def _get_dataset_types(self) -> List[DatasetType]: - """Get dataset types that the API will query. - - :return: List of DatasetType - :rtype: List[DatasetType] - """ - return [DatasetType.CLIMATE_REANALYSIS] - - def _read_data(self, reader: BaseNetCDFReader) -> DatasetReaderValue: - """Read hitorical data from given reader. - - :param reader: NetCDF File Reader - :type reader: BaseNetCDFReader - :return: data value - :rtype: DatasetReaderValue - """ - reader.read_historical_data() - return reader.get_data_values() - @swagger_auto_schema( operation_id='get-measurement', tags=[ApiTag.Measurement], @@ -173,79 +174,11 @@ def _read_data(self, reader: BaseNetCDFReader) -> DatasetReaderValue: 'lon', openapi.IN_QUERY, description='Longitude', type=openapi.TYPE_NUMBER - ) - ], - responses={ - 200: openapi.Schema( - description=( - 'Measurement data' - ), - type=openapi.TYPE_OBJECT, - properties={} - ), - 400: APIErrorSerializer - } - ) - def get(self, request, *args, **kwargs): - """Fetch historical data by attributes and date range filter.""" - return Response( - status=200, - data=self.get_response_data() - ) - - -class ForecastAPI(BaseMeasurementAPI): - """Fetch forecast by attribute and date range.""" - - permission_classes = [IsAuthenticated] - - def _get_dataset_types(self) -> List[DatasetType]: - """Get dataset types that the API will query. - - :return: List of DatasetType - :rtype: List[DatasetType] - """ - return [DatasetType.SEASONAL_FORECAST, - DatasetType.SHORT_TERM_FORECAST] - - def _read_data(self, reader: BaseNetCDFReader) -> DatasetReaderValue: - """Read forecast data from given reader. - - :param reader: NetCDF File Reader - :type reader: BaseNetCDFReader - :return: data value - :rtype: DatasetReaderValue - """ - reader.read_forecast_data() - return reader.get_data_values() - - @swagger_auto_schema( - operation_id='get-forecast', - tags=[ApiTag.Measurement], - manual_parameters=[ - openapi.Parameter( - 'attributes', openapi.IN_QUERY, - description='List of attribute name', type=openapi.TYPE_STRING ), openapi.Parameter( - 'start_date', openapi.IN_QUERY, - description='Start Date', + 'providers', openapi.IN_QUERY, + description='List of provider name', type=openapi.TYPE_STRING - ), - openapi.Parameter( - 'end_date', openapi.IN_QUERY, - description='End Date', - type=openapi.TYPE_STRING - ), - openapi.Parameter( - 'lat', openapi.IN_QUERY, - description='Latitude', - type=openapi.TYPE_NUMBER - ), - openapi.Parameter( - 'lon', openapi.IN_QUERY, - description='Longitude', - type=openapi.TYPE_NUMBER ) ], responses={ @@ -260,7 +193,7 @@ def _read_data(self, reader: BaseNetCDFReader) -> DatasetReaderValue: } ) def get(self, request, *args, **kwargs): - """Fetch forecast by attributes and date range filter.""" + """Fetch measurement data by attributes and date range filter.""" return Response( status=200, data=self.get_response_data() diff --git a/django_project/gap_api/tests/test_measurement_api.py b/django_project/gap_api/tests/test_measurement_api.py index 5b7facf..067db68 100644 --- a/django_project/gap_api/tests/test_measurement_api.py +++ b/django_project/gap_api/tests/test_measurement_api.py @@ -5,10 +5,42 @@ .. note:: Unit tests for User API. """ +from datetime import datetime +from typing import List +from django.contrib.gis.geos import Point from django.urls import reverse +from unittest.mock import patch from core.tests.common import FakeResolverMatchV1, BaseAPIViewTest -from gap_api.api_views.measurement import HistoricalAPI, ForecastAPI +from django_project.gap.models import DatasetAttribute +from django_project.gap.utils.reader import ( + DatasetReaderValue, + DatasetTimelineValue +) +from gap_api.api_views.measurement import MeasurementAPI +from gap.utils.reader import BaseDatasetReader +from gap.factories import DatasetAttributeFactory + + +class MockDatasetReader(BaseDatasetReader): + """Class to mock a dataset reader.""" + + def __init__(self, dataset, attributes: List[DatasetAttribute], + point: Point, start_date: datetime, + end_date: datetime) -> None: + """Initialize MockDatasetReader class.""" + super().__init__(dataset, attributes, point, start_date, end_date) + + def get_data_values(self) -> DatasetReaderValue: + """Override data values with a mock object.""" + return DatasetReaderValue( + { + 'dataset': [self.dataset.name] + }, + [DatasetTimelineValue(self.start_date, { + 'test': 100 + })] + ) class CommonMeasurementAPITest(BaseAPIViewTest): @@ -16,7 +48,7 @@ class CommonMeasurementAPITest(BaseAPIViewTest): def _get_measurement_request( self, lat=-2.215, lon=29.125, attributes='max_total_temperature', - start_dt='2024-04-01', end_dt='2024-04-04'): + start_dt='2024-04-01', end_dt='2024-04-04', providers=None): """Get request for Measurement API. :param lat: latitude, defaults to -2.215 @@ -33,11 +65,15 @@ def _get_measurement_request( :return: Request object :rtype: WSGIRequest """ - request = self.factory.get( - reverse('api:v1:user-info') + + request_params = ( f'?lat={lat}&lon={lon}&attributes={attributes}' f'&start_date={start_dt}&end_date={end_dt}' ) + if providers: + request_params = request_params + f'&providers={providers}' + request = self.factory.get( + reverse('api:v1:get-measurement') + request_params + ) request.user = self.superuser request.resolver_match = FakeResolverMatchV1 return request @@ -48,12 +84,48 @@ class HistoricalAPITest(CommonMeasurementAPITest): def test_read_historical_data_empty(self): """Test read historical data that returns empty.""" - view = HistoricalAPI.as_view() + view = MeasurementAPI.as_view() request = self._get_measurement_request() response = view(request) self.assertEqual(response.status_code, 200) self.assertEqual(response.data, {}) + @patch('gap_api.api_views.measurement.get_reader_from_dataset') + def test_read_historical_data(self, mocked_reader): + """Test read historical data.""" + view = MeasurementAPI.as_view() + mocked_reader.return_value = MockDatasetReader + attribute1 = DatasetAttributeFactory.create() + attribute2 = DatasetAttributeFactory.create( + dataset=attribute1.dataset + ) + attribs = [ + attribute1.attribute.variable_name, + attribute2.attribute.variable_name + ] + request = self._get_measurement_request( + attributes=','.join(attribs) + ) + response = view(request) + self.assertEqual(response.status_code, 200) + mocked_reader.assert_called_once_with(attribute1.dataset) + self.assertIn('metadata', response.data) + self.assertIn('data', response.data) + response_data = response.data['data'] + self.assertIn(attribute1.dataset.name, response_data) + results = response_data[attribute1.dataset.name] + self.assertEqual(len(results), 1) + self.assertIn('values', results[0]) + self.assertIn('test', results[0]['values']) + self.assertEqual(100, results[0]['values']['test']) + # with providers + request = self._get_measurement_request( + attributes=','.join(attribs), + providers='test_empty' + ) + response = view(request) + self.assertEqual(response.status_code, 200) + self.assertEqual(response.data, {}) class ForecastAPITest(CommonMeasurementAPITest): @@ -61,7 +133,7 @@ class ForecastAPITest(CommonMeasurementAPITest): def test_read_forecast_data_empty(self): """Test read forecast data that returns empty.""" - view = ForecastAPI.as_view() + view = MeasurementAPI.as_view() request = self._get_measurement_request() response = view(request) self.assertEqual(response.status_code, 200) diff --git a/django_project/gap_api/urls/v1.py b/django_project/gap_api/urls/v1.py index 5f465e7..684d767 100644 --- a/django_project/gap_api/urls/v1.py +++ b/django_project/gap_api/urls/v1.py @@ -10,7 +10,7 @@ from rest_framework import permissions, authentication from gap_api.api_views.user import UserInfo -from gap_api.api_views.measurement import HistoricalAPI, ForecastAPI +from gap_api.api_views.measurement import MeasurementAPI from gap_api.urls.schema import CustomSchemaGenerator schema_view_v1 = get_schema_view( @@ -50,14 +50,9 @@ # MEASUREMENT APIs measurement_urls = [ path( - 'historical/', - HistoricalAPI.as_view(), - name='get-historical' - ), - path( - 'forecast/', - ForecastAPI.as_view(), - name='get-forecast' + 'measurement/', + MeasurementAPI.as_view(), + name='get-measurement' ) ] diff --git a/docs/src/developer/diagram/ground-observations-database-design-1.png b/docs/src/developer/diagram/ground-observations-database-design-1.png index 1dc3fd1..f5c15cd 100644 Binary files a/docs/src/developer/diagram/ground-observations-database-design-1.png and b/docs/src/developer/diagram/ground-observations-database-design-1.png differ diff --git a/docs/src/developer/diagram/solution-design-app-0018.drawio b/docs/src/developer/diagram/solution-design-app-0018.drawio index a4bea6a..ba5101d 100644 --- a/docs/src/developer/diagram/solution-design-app-0018.drawio +++ b/docs/src/developer/diagram/solution-design-app-0018.drawio @@ -1,4 +1,4 @@ - + @@ -85,7 +85,7 @@ - + @@ -386,28 +386,28 @@ - + - + - + - + - + - + @@ -557,350 +557,420 @@ - - + + - + - + - + - + - + - + - + - + - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - + + + - + - + - + - + - + - + + + - + - + + + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +