diff --git a/django_project/gap/fixtures/8.dataset_attribute.json b/django_project/gap/fixtures/8.dataset_attribute.json index df8e01f..0491e9a 100755 --- a/django_project/gap/fixtures/8.dataset_attribute.json +++ b/django_project/gap/fixtures/8.dataset_attribute.json @@ -955,5 +955,93 @@ "source_unit": 5, "ensembles": false } + }, + { + "model": "gap.datasetattribute", + "pk": 88, + "fields": { + "dataset": 4, + "attribute": 15, + "source": "total_rainfall", + "source_unit": 1, + "ensembles": false + } + }, + { + "model": "gap.datasetattribute", + "pk": 89, + "fields": { + "dataset": 4, + "attribute": 8, + "source": "total_evapotranspiration_flux", + "source_unit": 1, + "ensembles": false + } + }, + { + "model": "gap.datasetattribute", + "pk": 90, + "fields": { + "dataset": 4, + "attribute": 9, + "source": "max_temperature", + "source_unit": 5, + "ensembles": false + } + }, + { + "model": "gap.datasetattribute", + "pk": 91, + "fields": { + "dataset": 4, + "attribute": 17, + "source": "min_temperature", + "source_unit": 5, + "ensembles": false + } + }, + { + "model": "gap.datasetattribute", + "pk": 92, + "fields": { + "dataset": 4, + "attribute": 36, + "source": "precipitation_probability", + "source_unit": 6, + "ensembles": false + } + }, + { + "model": "gap.datasetattribute", + "pk": 93, + "fields": { + "dataset": 4, + "attribute": 54, + "source": "humidity_maximum", + "source_unit": 6, + "ensembles": false + } + }, + { + "model": "gap.datasetattribute", + "pk": 94, + "fields": { + "dataset": 4, + "attribute": 55, + "source": "humidity_minimum", + "source_unit": 6, + "ensembles": false + } + }, + { + "model": "gap.datasetattribute", + "pk": 95, + "fields": { + "dataset": 4, + "attribute": 56, + "source": "wind_speed_avg", + "source_unit": 8, + "ensembles": false + } } ] \ No newline at end of file diff --git a/django_project/gap/providers/__init__.py b/django_project/gap/providers/__init__.py index cab582f..2b09115 100644 --- a/django_project/gap/providers/__init__.py +++ b/django_project/gap/providers/__init__.py @@ -5,14 +5,15 @@ .. note:: Helper for reading NetCDF File """ -from gap.models import Dataset +from gap.models import Dataset, DatasetStore from gap.utils.netcdf import NetCDFProvider from gap.providers.cbam import CBAMZarrReader, CBAMNetCDFReader # noqa from gap.providers.salient import SalientNetCDFReader, SalientZarrReader # noqa from gap.providers.tahmo import TahmoDatasetReader from gap.providers.tio import ( TomorrowIODatasetReader, - PROVIDER_NAME as TIO_PROVIDER + PROVIDER_NAME as TIO_PROVIDER, + TioZarrReader ) @@ -31,8 +32,16 @@ def get_reader_from_dataset(dataset: Dataset): return SalientZarrReader elif dataset.provider.name == 'Tahmo': return TahmoDatasetReader - elif dataset.provider.name == TIO_PROVIDER: + elif ( + dataset.provider.name == TIO_PROVIDER and + dataset.store_type == DatasetStore.EXT_API + ): return TomorrowIODatasetReader + elif ( + dataset.provider.name == TIO_PROVIDER and + dataset.store_type == DatasetStore.ZARR + ): + return TioZarrReader else: raise TypeError( f'Unsupported provider name: {dataset.provider.name}' diff --git a/django_project/gap/providers/tio.py b/django_project/gap/providers/tio.py index 43c9a73..a6fec84 100644 --- a/django_project/gap/providers/tio.py +++ b/django_project/gap/providers/tio.py @@ -13,6 +13,12 @@ import pytz import requests +import numpy as np +import pandas as pd +import regionmask +import xarray as xr +from shapely.geometry import shape +from xarray.core.dataset import Dataset as xrDataset from gap.models import ( Provider, @@ -21,7 +27,8 @@ Dataset, DatasetAttribute, DatasetTimeStep, - DatasetStore + DatasetStore, + DataSourceFile ) from gap.utils.reader import ( LocationInputType, @@ -31,6 +38,7 @@ DatasetReaderValue, BaseDatasetReader ) +from gap.utils.zarr import BaseZarrReader logger = logging.getLogger(__name__) PROVIDER_NAME = 'Tomorrow.io' @@ -488,3 +496,276 @@ def get_raw_results(self) -> List[DatasetTimelineValue]: if not self.is_success(): self._log_errors() return self.results + + +class TioZarrReaderValue(DatasetReaderValue): + """Class that convert Tio Zarr Dataset to TimelineValues.""" + + date_variable = 'forecast_day' + + def __init__( + self, val: xrDataset | List[DatasetTimelineValue], + location_input: DatasetReaderInput, + attributes: List[DatasetAttribute], + forecast_date: np.datetime64) -> None: + """Initialize TioZarrReaderValue class. + + :param val: value that has been read + :type val: xrDataset | List[DatasetTimelineValue] + :param location_input: location input query + :type location_input: DatasetReaderInput + :param attributes: list of dataset attributes + :type attributes: List[DatasetAttribute] + """ + self.forecast_date = forecast_date + super().__init__(val, location_input, attributes) + + def _post_init(self): + if self.is_empty(): + return + if not self._is_xr_dataset: + return + + # rename attributes and the forecast_day + renamed_dict = { + 'forecast_day_idx': 'forecast_day' + } + for attr in self.attributes: + renamed_dict[attr.source] = attr.attribute.variable_name + self._val = self._val.rename(renamed_dict) + + # replace forecast_day to actualdates + initial_date = pd.Timestamp(self.forecast_date) + forecast_day_timedelta = pd.to_timedelta( + self._val.forecast_day, unit='D') + forecast_day = initial_date + forecast_day_timedelta + self._val = self._val.assign_coords( + forecast_day=('forecast_day', forecast_day)) + + def _xr_dataset_to_dict(self) -> dict: + """Convert xArray Dataset to dictionary. + + Implementation depends on provider. + :return: data dictionary + :rtype: dict + """ + if self.is_empty(): + return { + 'geometry': json.loads(self.location_input.point.json), + 'data': [] + } + results: List[DatasetTimelineValue] = [] + for dt_idx, dt in enumerate( + self.xr_dataset[self.date_variable].values): + value_data = {} + for attribute in self.attributes: + var_name = attribute.attribute.variable_name + v = self.xr_dataset[var_name].values[dt_idx] + value_data[var_name] = ( + v if not np.isnan(v) else None + ) + results.append(DatasetTimelineValue( + dt, + value_data, + self.location_input.point + )) + return { + 'geometry': json.loads(self.location_input.point.json), + 'data': [result.to_dict() for result in results] + } + + +class TioZarrReader(BaseZarrReader): + """Tio Zarr Reader.""" + + date_variable = 'forecast_day_idx' + + def __init__( + self, dataset: Dataset, attributes: List[DatasetAttribute], + location_input: DatasetReaderInput, start_date: datetime, + end_date: datetime) -> None: + """Initialize TioZarrReader class.""" + super().__init__( + dataset, attributes, location_input, start_date, end_date) + self.latest_forecast_date = None + + def read_forecast_data(self, start_date: datetime, end_date: datetime): + """Read forecast data from dataset. + + :param start_date: start date for reading forecast data + :type start_date: datetime + :param end_date: end date for reading forecast data + :type end_date: datetime + """ + self.setup_reader() + self.xrDatasets = [] + zarr_file = DataSourceFile.objects.filter( + dataset=self.dataset, + format=DatasetStore.ZARR, + is_latest=True + ).order_by('id').last() + if zarr_file is None: + return + ds = self.open_dataset(zarr_file) + # get latest forecast date + self.latest_forecast_date = ds['forecast_date'][-1].values + if np.datetime64(start_date) < self.latest_forecast_date: + return + val = self.read_variables(ds, start_date, end_date) + if val is None: + return + self.xrDatasets.append(val) + + def get_data_values(self) -> DatasetReaderValue: + """Fetch data values from dataset. + + :return: Data Value. + :rtype: DatasetReaderValue + """ + val = None + if len(self.xrDatasets) > 0: + val = self.xrDatasets[0] + return TioZarrReaderValue( + val, self.location_input, self.attributes, + self.latest_forecast_date) + + def _get_forecast_day_idx(self, date: np.datetime64) -> int: + return int( + abs((date - self.latest_forecast_date) / np.timedelta64(1, 'D')) + ) + + def _read_variables_by_point( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + """Read variables values from single point. + + :param dataset: Dataset to be read + :type dataset: xrDataset + :param variables: list of variable name + :type variables: List[str] + :param start_dt: start datetime + :type start_dt: np.datetime64 + :param end_dt: end datetime + :type end_dt: np.datetime64 + :return: Dataset that has been filtered + :rtype: xrDataset + """ + point = self.location_input.point + min_idx = self._get_forecast_day_idx(start_dt) + max_idx = self._get_forecast_day_idx(end_dt) + return dataset[variables].sel( + forecast_date=self.latest_forecast_date, + **{self.date_variable: slice(min_idx, max_idx)} + ).sel( + lat=point.y, + lon=point.x, method='nearest') + + def _read_variables_by_bbox( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + """Read variables values from a bbox. + + :param dataset: Dataset to be read + :type dataset: xrDataset + :param variables: list of variable name + :type variables: List[str] + :param start_dt: start datetime + :type start_dt: np.datetime64 + :param end_dt: end datetime + :type end_dt: np.datetime64 + :return: Dataset that has been filtered + :rtype: xrDataset + """ + points = self.location_input.points + lat_min = points[0].y + lat_max = points[1].y + lon_min = points[0].x + lon_max = points[1].x + min_idx = self._get_forecast_day_idx(start_dt) + max_idx = self._get_forecast_day_idx(end_dt) + # output results is in two dimensional array + return dataset[variables].sel( + forecast_date=self.latest_forecast_date, + lat=slice(lat_min, lat_max), + lon=slice(lon_min, lon_max), + **{self.date_variable: slice(min_idx, max_idx)} + ) + + def _read_variables_by_polygon( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + """Read variables values from a polygon. + + :param dataset: Dataset to be read + :type dataset: xrDataset + :param variables: list of variable name + :type variables: List[str] + :param start_dt: start datetime + :type start_dt: np.datetime64 + :param end_dt: end datetime + :type end_dt: np.datetime64 + :return: Dataset that has been filtered + :rtype: xrDataset + """ + min_idx = self._get_forecast_day_idx(start_dt) + max_idx = self._get_forecast_day_idx(end_dt) + # Convert the polygon to a format compatible with shapely + shapely_multipolygon = shape( + json.loads(self.location_input.polygon.geojson)) + + # Create a mask using regionmask from the shapely polygon + mask = regionmask.Regions([shapely_multipolygon]).mask(dataset) + # Mask the dataset + return dataset[variables].sel( + forecast_date=self.latest_forecast_date, + **{self.date_variable: slice(min_idx, max_idx)} + ).where( + mask == 0, + drop=True + ) + + def _read_variables_by_points( + self, dataset: xrDataset, variables: List[str], + start_dt: np.datetime64, + end_dt: np.datetime64) -> xrDataset: + """Read variables values from a list of point. + + :param dataset: Dataset to be read + :type dataset: xrDataset + :param variables: list of variable name + :type variables: List[str] + :param start_dt: start datetime + :type start_dt: np.datetime64 + :param end_dt: end datetime + :type end_dt: np.datetime64 + :return: Dataset that has been filtered + :rtype: xrDataset + """ + min_idx = self._get_forecast_day_idx(start_dt) + max_idx = self._get_forecast_day_idx(end_dt) + # use the 0 index for it's date variable + mask = np.zeros_like(dataset[variables[0]][0][0], dtype=bool) + + # Iterate through the points and update the mask + for lon, lat in self.location_input.points: + # Find nearest lat and lon indices + lat_idx = np.abs(dataset['lat'] - lat).argmin() + lon_idx = np.abs(dataset['lon'] - lon).argmin() + mask[lat_idx, lon_idx] = True + mask_da = xr.DataArray( + mask, + coords={ + 'lat': dataset['lat'], 'lon': dataset['lon'] + }, dims=['lat', 'lon'] + ) + # Apply the mask to the dataset + return dataset[variables].sel( + forecast_date=self.latest_forecast_date, + **{self.date_variable: slice(min_idx, max_idx)} + ).where( + mask_da, + drop=True + ) diff --git a/django_project/gap/tests/providers/test_tio_zarr.py b/django_project/gap/tests/providers/test_tio_zarr.py new file mode 100644 index 0000000..8f9129f --- /dev/null +++ b/django_project/gap/tests/providers/test_tio_zarr.py @@ -0,0 +1,277 @@ +# coding=utf-8 +""" +Tomorrow Now GAP. + +.. note:: Unit tests for Salient Reader. +""" + +from django.test import TestCase +from datetime import datetime +import xarray as xr +import numpy as np +import pandas as pd +from django.contrib.gis.geos import Point, MultiPoint +from unittest.mock import Mock, patch + +from gap.models import ( + DatasetAttribute, Dataset, DatasetStore, + Attribute +) +from gap.utils.reader import ( + DatasetReaderInput, + LocationInputType, + DatasetReaderValue +) +from gap.providers.tio import ( + TioZarrReaderValue, + TioZarrReader +) +from gap.factories import ( + DataSourceFileFactory +) +from gap.tests.ingestor.test_tio_shortterm_ingestor import ( + mock_open_zarr_dataset, + LAT_METADATA, + LON_METADATA +) + + +class TestTioZarrReaderValue(TestCase): + """Unit test for class TioZarrReaderValue.""" + + fixtures = [ + '2.provider.json', + '3.station_type.json', + '4.dataset_type.json', + '5.dataset.json', + '6.unit.json', + '7.attribute.json', + '8.dataset_attribute.json' + ] + + def setUp(self): + """Set TestTioZarrReaderValue class.""" + self.dataset = Dataset.objects.get( + name='Tomorrow.io Short-term Forecast', + store_type=DatasetStore.ZARR + ) + # Mocking DatasetAttribute + self.attribute = DatasetAttribute.objects.filter( + dataset=self.dataset, + attribute__variable_name='max_temperature' + ).first() + + # Creating mock DatasetReaderInput + point = Point(30, 10, srid=4326) + self.mock_location_input = DatasetReaderInput.from_point(point) + + # Creating filtered xarray dataset + forecast_days = np.array([0, 1, 2]) + lats = np.array([10, 20]) + lons = np.array([30, 40]) + temperature_data = np.random.rand( + len(forecast_days), len(lats), len(lons)) + + self.mock_xr_dataset = xr.Dataset( + { + "max_temperature": ( + ["forecast_day_idx", "lat", "lon"], temperature_data + ), + }, + coords={ + "forecast_day_idx": forecast_days, + "lat": lats, + "lon": lons, + } + ) + # Mock forecast_date + self.forecast_date = np.datetime64('2023-01-01') + variables = [ + 'forecast_day_idx', + 'max_temperature' + ] + self.mock_xr_dataset = self.mock_xr_dataset[variables].sel( + lat=point.y, + lon=point.x, method='nearest' + ).where( + (self.mock_xr_dataset['forecast_day_idx'] >= 0) & + (self.mock_xr_dataset['forecast_day_idx'] <= 1), + drop=True) + + # TioZarrReaderValue initialization with xarray dataset + self.tio_reader_value_xr = TioZarrReaderValue( + val=self.mock_xr_dataset, + location_input=self.mock_location_input, + attributes=[self.attribute], + forecast_date=self.forecast_date + ) + + def test_initialization(self): + """Test initialization method.""" + self.assertEqual( + self.tio_reader_value_xr.forecast_date, self.forecast_date) + self.assertTrue(self.tio_reader_value_xr._is_xr_dataset) + + def test_post_init(self): + """Test post initialization method.""" + # Check if the renaming happened correctly + self.assertIn( + 'forecast_day', self.tio_reader_value_xr.xr_dataset.coords) + self.assertIn( + 'max_temperature', self.tio_reader_value_xr.xr_dataset.data_vars) + self.assertNotIn( + 'forecast_day_idx', self.tio_reader_value_xr.xr_dataset.coords + ) + + # Check if forecast_day has been updated to actual dates + forecast_days = pd.date_range('2023-01-01', periods=2) + xr_forecast_days = pd.to_datetime( + self.tio_reader_value_xr.xr_dataset.forecast_day.values) + pd.testing.assert_index_equal( + pd.Index(xr_forecast_days), forecast_days) + + def test_is_empty(self): + """Test is_empty method.""" + self.assertFalse(self.tio_reader_value_xr.is_empty()) + + def test_to_json_with_point_type(self): + """Test convert to_json with point.""" + result = self.tio_reader_value_xr.to_json() + self.assertIn('geometry', result) + self.assertIn('data', result) + self.assertIsInstance(result['data'], list) + + def test_to_json_with_non_point_type(self): + """Test convert to_json with exception.""" + self.mock_location_input.type = 'polygon' + with self.assertRaises(TypeError): + self.tio_reader_value_xr.to_json() + + def test_xr_dataset_to_dict(self): + """Test convert xarray dataset to dict.""" + result_dict = self.tio_reader_value_xr._xr_dataset_to_dict() + self.assertIn('geometry', result_dict) + self.assertIn('data', result_dict) + self.assertIsInstance(result_dict['data'], list) + + +class TestTioZarrReader(TestCase): + """Unit test for Tio Zarr Reader class.""" + + fixtures = [ + '2.provider.json', + '3.station_type.json', + '4.dataset_type.json', + '5.dataset.json', + '6.unit.json', + '7.attribute.json', + '8.dataset_attribute.json' + ] + + def setUp(self): + """Set Test class for Tio Zarr Reader.""" + self.dataset = Dataset.objects.get( + name='Tomorrow.io Short-term Forecast', + store_type=DatasetStore.ZARR + ) + self.zarr_source = DataSourceFileFactory.create( + dataset=self.dataset, + format=DatasetStore.ZARR, + name='tio.zarr', + is_latest=True + ) + self.attribute1 = Attribute.objects.get( + name='Max Temperature', + variable_name='max_temperature') + self.dataset_attr1 = DatasetAttribute.objects.get( + dataset=self.dataset, + attribute=self.attribute1, + source='max_temperature' + ) + self.attributes = [self.dataset_attr1] + self.location_input = DatasetReaderInput.from_point( + Point(LON_METADATA['min'], LAT_METADATA['min']) + ) + self.start_date = datetime(2024, 10, 3) + self.end_date = datetime(2024, 10, 5) + self.reader = TioZarrReader( + self.dataset, self.attributes, self.location_input, + self.start_date, self.end_date + ) + + @patch('gap.models.DataSourceFile.objects.filter') + def test_read_forecast_data_empty(self, mock_filter): + """Test for reading forecast data that returns empty.""" + dataset = Mock() + attributes = [] + point = Mock() + start_date = datetime(2023, 1, 1) + end_date = datetime(2023, 1, 2) + reader = TioZarrReader( + dataset, attributes, point, start_date, end_date) + mock_filter.return_value.order_by.return_value.last.return_value = ( + None + ) + reader.read_forecast_data(start_date, end_date) + self.assertEqual(reader.xrDatasets, []) + + def test_read_forecast_data(self): + """Test for reading forecast data.""" + dt1 = datetime(2024, 10, 3) + dt2 = datetime(2024, 10, 5) + with patch.object(self.reader, 'open_dataset') as mock_open: + mock_open.return_value = mock_open_zarr_dataset() + self.reader.read_forecast_data(dt1, dt2) + self.assertEqual(len(self.reader.xrDatasets), 1) + data_value = self.reader.get_data_values().to_json() + mock_open.assert_called_once() + result_data = data_value['data'] + self.assertEqual(len(result_data), 3) + self.assertIn('max_temperature', result_data[0]['values']) + + def test_read_from_bbox(self): + """Test for reading forecast data using bbox.""" + dt1 = datetime(2024, 10, 3) + dt2 = datetime(2024, 10, 5) + with patch.object(self.reader, 'open_dataset') as mock_open: + mock_open.return_value = mock_open_zarr_dataset() + self.reader.location_input = DatasetReaderInput.from_bbox( + [ + LON_METADATA['min'], + LAT_METADATA['min'], + LON_METADATA['min'] + LON_METADATA['inc'], + LAT_METADATA['min'] + LAT_METADATA['inc'] + ] + ) + self.reader.read_forecast_data(dt1, dt2) + self.assertEqual(len(self.reader.xrDatasets), 1) + data_value = self.reader.get_data_values() + mock_open.assert_called_once() + self.assertTrue(isinstance(data_value, DatasetReaderValue)) + self.assertTrue(isinstance(data_value._val, xr.Dataset)) + dataset = data_value.xr_dataset + self.assertIn('max_temperature', dataset.data_vars) + + def test_read_from_points(self): + """Test for reading forecast data using points.""" + dt1 = datetime(2024, 10, 3) + dt2 = datetime(2024, 10, 5) + with patch.object(self.reader, 'open_dataset') as mock_open: + mock_open.return_value = mock_open_zarr_dataset() + p1 = Point(LON_METADATA['min'], LAT_METADATA['min']) + p2 = Point( + LON_METADATA['min'] + LON_METADATA['inc'], + LAT_METADATA['min'] + LAT_METADATA['inc'] + ) + self.reader.location_input = DatasetReaderInput( + MultiPoint([p1, p2]), + LocationInputType.LIST_OF_POINT + ) + self.reader.read_forecast_data(dt1, dt2) + self.assertEqual(len(self.reader.xrDatasets), 1) + data_value = self.reader.get_data_values() + mock_open.assert_called_once() + self.assertTrue(isinstance(data_value, DatasetReaderValue)) + self.assertTrue(isinstance(data_value._val, xr.Dataset)) + dataset = data_value.xr_dataset + self.assertIn('max_temperature', dataset.data_vars) diff --git a/docs/src/developer/api/guide/measurment.md b/docs/src/developer/api/guide/measurment.md index a1e72db..7fa282a 100644 --- a/docs/src/developer/api/guide/measurment.md +++ b/docs/src/developer/api/guide/measurment.md @@ -77,6 +77,15 @@ TomorrowNow provides access to the data through a RESTful API, available at http | Seasonal Forecast | Wind Speed Climatology | | m/s | wind_speed | | Seasonal Forecast | Wind Speed Climatology | | m/s | wind_speed_clim | | Seasonal Forecast | Wind Speed Climatology | | m/s | wind_speed_anom | +| **Short-term Forecast** | +| Short-term Forecast | Total Rainfall | | mm | total_rainfall | +| Short-term Forecast | Total Evapotranspiration Flux | | mm | total_evapotranspiration_flux | +| Short-term Forecast | Max Temperature | | °C | max_temperature | +| Short-term Forecast | Min Temperature | | °C | min_temperature | +| Short-term Forecast | Precipitation Probability | | % | precipitation_probability | +| Short-term Forecast | Humidity Maximum | | % | humidity_maximum | +| Short-term Forecast | Humidity Minimum | | % | humidity_minimum | +| Short-term Forecast | Wind Speed Average | | m/s | wind_speed_avg | | **Historical Reanalysis** | | Historical Reanalysis | Min Total Temperature | Minimum temperature (0000:2300) | °C | min_temperature | | Historical Reanalysis | Min Day Temperature | Minimum day-time temperature (0600:1800) | °C | min_day_temperature |