From 31c5fb4c1d2f1e60cbd81e10d3e0424bb5f70816 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 24 Sep 2019 11:01:19 +0200 Subject: [PATCH 1/6] about to fix it! --- test/api/gen/default/test_gen.py | 66 +++++++++-- test/api/gen/test_config.py | 162 +++++++++++++++++++++++++++ test/util/test_config.py | 184 ------------------------------- test/util/test_dsio.py | 19 ++++ xcube/api/gen/config.py | 38 ++++--- xcube/api/gen/gen.py | 26 ++++- xcube/cli/gen.py | 31 +++--- xcube/util/dsio.py | 29 ++++- xcube/util/update.py | 19 ++-- 9 files changed, 332 insertions(+), 242 deletions(-) create mode 100644 test/api/gen/test_config.py diff --git a/test/api/gen/default/test_gen.py b/test/api/gen/default/test_gen.py index 2785c6621..6e6251b5d 100644 --- a/test/api/gen/default/test_gen.py +++ b/test/api/gen/default/test_gen.py @@ -1,6 +1,6 @@ import os import unittest -from typing import Tuple, Optional +from typing import Tuple, Optional, Dict, Any import numpy as np import xarray as xr @@ -32,6 +32,10 @@ def test_process_inputs_single(self): [get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c-single.nc') self.assertEqual(True, status) self.assertTrue('\nstep 8 of 8: creating input slice in l2c-single.nc...\n' in output) + self.assert_cube_ok(xr.open_dataset('l2c-single.nc', autoclose=True), 1, + dict(date_modified=None, + time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-01T12:00:00.000000000')) def test_process_inputs_append_multiple_nc(self): status, output = gen_cube_wrapper( @@ -40,6 +44,10 @@ def test_process_inputs_append_multiple_nc(self): self.assertEqual(True, status) self.assertTrue('\nstep 8 of 8: creating input slice in l2c.nc...\n' in output) self.assertTrue('\nstep 8 of 8: appending input slice to l2c.nc...\n' in output) + self.assert_cube_ok(xr.open_dataset('l2c.nc', autoclose=True), 3, + dict(date_modified=None, + time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-03T12:00:00.000000000')) def test_process_inputs_append_multiple_zarr(self): status, output = gen_cube_wrapper( @@ -48,6 +56,10 @@ def test_process_inputs_append_multiple_zarr(self): self.assertEqual(True, status) self.assertTrue('\nstep 8 of 8: creating input slice in l2c.zarr...\n' in output) self.assertTrue('\nstep 8 of 8: appending input slice to l2c.zarr...\n' in output) + self.assert_cube_ok(xr.open_zarr('l2c.zarr'), 3, + dict(date_modified=None, + time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-03T12:00:00.000000000')) def test_process_inputs_insert_multiple_zarr(self): status, output = gen_cube_wrapper( @@ -59,6 +71,10 @@ def test_process_inputs_insert_multiple_zarr(self): self.assertTrue('\nstep 8 of 8: creating input slice in l2c.zarr...\n' in output) self.assertTrue('\nstep 8 of 8: appending input slice to l2c.zarr...\n' in output) self.assertTrue('\nstep 8 of 8: inserting input slice before index 0 in l2c.zarr...\n' in output) + self.assert_cube_ok(xr.open_zarr('l2c.zarr'), 3, + dict(date_modified=None, + time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-03T12:00:00.000000000')) def test_process_inputs_replace_multiple_zarr(self): status, output = gen_cube_wrapper( @@ -71,6 +87,10 @@ def test_process_inputs_replace_multiple_zarr(self): self.assertTrue('\nstep 8 of 8: creating input slice in l2c.zarr...\n' in output) self.assertTrue('\nstep 8 of 8: appending input slice to l2c.zarr...\n' in output) self.assertTrue('\nstep 8 of 8: replacing input slice at index 1 in l2c.zarr...\n' in output) + self.assert_cube_ok(xr.open_zarr('l2c.zarr'), 3, + dict(date_modified=None, + time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-03T12:00:00.000000000')) def test_input_txt(self): f = open((os.path.join(os.path.dirname(__file__), 'inputdata', "input.txt")), "w+") @@ -81,6 +101,28 @@ def test_input_txt(self): f.close() status, output = gen_cube_wrapper([get_inputdata_path('input.txt')], 'l2c.zarr', sort_mode=True) self.assertEqual(True, status) + self.assert_cube_ok(xr.open_zarr('l2c.zarr'), 3, + dict(time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-03T12:00:00.000000000')) + + def assert_cube_ok(self, cube: xr.Dataset, expected_time_dim: int, expected_extra_attrs: Dict[str, Any]): + self.assertEqual({'lat': 180, 'lon': 320, 'bnds': 2, 'time': expected_time_dim}, cube.dims) + self.assertEqual({'lon', 'lat', 'time', 'lon_bnds', 'lat_bnds', 'time_bnds'}, set(cube.coords)) + self.assertEqual({'analysed_sst'}, set(cube.data_vars)) + expected_attrs = dict(date_modified=None, + geospatial_lon_min=-4.0, + geospatial_lon_max=12.0, + geospatial_lon_resolution=0.05, + geospatial_lon_units='degrees_east', + geospatial_lat_min=47.0, + geospatial_lat_max=56.0, + geospatial_lat_resolution=0.05, + geospatial_lat_units='degrees_north') + expected_attrs.update(expected_extra_attrs) + for k, v in expected_attrs.items(): + self.assertIn(k, cube.attrs) + if v is not None: + self.assertEqual(v, cube.attrs[k], msg=f'key {k!r}') def test_handle_360_lon(self): status, output = gen_cube_wrapper( @@ -117,13 +159,15 @@ def output_monitor(msg): else: output += msg + '\n' - config = get_config_dict(dict(input_paths=input_paths, output_path=output_path)) - return gen_cube(input_processor_name=input_processor_name, - output_size=(320, 180), - output_region=(-4., 47., 12., 56.), - output_resampling='Nearest', - output_variables=[('analysed_sst', dict(name='SST'))], - sort_mode=sort_mode, - dry_run=False, - monitor=output_monitor, - **config), output + config = get_config_dict( + input_paths=input_paths, + input_processor_name=input_processor_name, + output_path=output_path, + output_size='320,180', + output_region='-4,47,12,56', + output_resampling='Nearest', + output_variables='analysed_sst', + sort_mode=sort_mode, + ) + + return gen_cube(dry_run=False, monitor=output_monitor, **config), output diff --git a/test/api/gen/test_config.py b/test/api/gen/test_config.py new file mode 100644 index 000000000..1ca0855e8 --- /dev/null +++ b/test/api/gen/test_config.py @@ -0,0 +1,162 @@ +import os +import shutil +import unittest + +import yaml +from xcube.api.gen.config import get_config_dict + +TEMP_PATH_FOR_YAML = './temp_test_data_for_xcube_tests' + +CONFIG_1_NAME = 'config_1.json' +CONFIG_2_NAME = 'config_2.json' +CONFIG_1_FILE_LIST = [(os.path.join(TEMP_PATH_FOR_YAML, CONFIG_1_NAME)), + (os.path.join(TEMP_PATH_FOR_YAML, CONFIG_2_NAME))] +CONFIG_1_YAML = """ +output_size: [2000, 1000] +output_region: [0, 20, 20, 30] +output_variables: + - x + - y + - z* +""" + +CONFIG_3_NAME = 'config_3.json' +CONFIG_4_NAME = 'config_4.json' +CONFIG_2_FILE_LIST = [(os.path.join(TEMP_PATH_FOR_YAML, CONFIG_3_NAME)), + (os.path.join(TEMP_PATH_FOR_YAML, CONFIG_4_NAME))] +CONFIG_2_YAML = """ +: output_variables: + - x + 6-- +""" + + +def _create_temp_yaml(temp_path_for_yaml, config_file_name, config_yaml): + if not os.path.exists(TEMP_PATH_FOR_YAML): + try: + os.mkdir(os.path.join(temp_path_for_yaml)) + except OSError as e: + print(e) + print("Creation of the directory %s failed" % temp_path_for_yaml) + else: + print("Successfully created the directory %s " % temp_path_for_yaml) + yaml_path = os.path.join(temp_path_for_yaml, config_file_name) + with open(yaml_path, 'w') as outfile: + yaml.dump(yaml.full_load(config_yaml), outfile) + return yaml_path + + else: + yaml_path = os.path.join(temp_path_for_yaml, config_file_name) + with open(yaml_path, 'w') as outfile: + yaml.dump(config_yaml, outfile, default_flow_style=False) + return yaml_path + + +class GetConfigDictTest(unittest.TestCase): + def test_config_file_alone(self): + try: + _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_2_NAME, CONFIG_1_YAML) + config_obj = dict(config_files=CONFIG_1_FILE_LIST) + _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_1_NAME, config_obj) + + config = get_config_dict(**config_obj) + self.assertIsNotNone(config) + self.assertEqual([2000, 1000], config['output_size']) + self.assertEqual([0, 20, 20, 30], config['output_region']) + self.assertEqual([('x', None), ('y', None), ('z*', None)], config['output_variables']) + finally: + if os.path.exists(TEMP_PATH_FOR_YAML): + shutil.rmtree(TEMP_PATH_FOR_YAML) + print('Successfully removed folder') + + def test_config_file_overwritten_by_config_obj(self): + try: + _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_2_NAME, CONFIG_1_YAML) + config_obj = dict(config_files=CONFIG_1_FILE_LIST, + output_variables='a,b') + _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_1_NAME, config_obj) + config = get_config_dict(**config_obj) + self.assertIn('output_variables', config) + self.assertIsNotNone(['a', 'b'], config['output_variables']) + finally: + if os.path.exists(TEMP_PATH_FOR_YAML): + shutil.rmtree(TEMP_PATH_FOR_YAML) + print('Successfully removed folder') + + def test_config_file_does_not_exist(self): + config_obj = dict(config_files=['bibo.yaml', ]) + with self.assertRaises(ValueError) as cm: + get_config_dict(**config_obj) + self.assertEqual("Cannot find configuration 'bibo.yaml'", + f'{cm.exception}') + + def test_output_size_option(self): + config_obj = dict(output_size='120, 140') + config = get_config_dict(**config_obj) + self.assertIn('output_size', config) + self.assertEqual([120, 140], config['output_size']) + + config_obj = dict(output_size='120,abc') + with self.assertRaises(ValueError) as cm: + get_config_dict(**config_obj) + self.assertEqual( + "output_size must have the form ,, where both values must be positive integer numbers", + f'{cm.exception}') + + def test_output_region_option(self): + config_obj = dict(output_region='-10.5, 5., 10.5, 25.') + config = get_config_dict(**config_obj) + self.assertIn('output_region', config) + self.assertEqual([-10.5, 5., 10.5, 25.], config['output_region']) + + config_obj = dict(output_region='50,_2,55,21') + with self.assertRaises(ValueError) as cm: + get_config_dict(**config_obj) + self.assertEqual("output_region must have the form ,,,," + " where all four numbers must be floating point numbers in degrees", + f'{cm.exception}') + + config_obj = dict(output_region='50, 20, 55') + with self.assertRaises(ValueError) as cm: + get_config_dict(**config_obj) + self.assertEqual("output_region must have the form ,,,," + " where all four numbers must be floating point numbers in degrees", + f'{cm.exception}') + + def test_output_variables_option(self): + config_obj = dict(output_variables='hanni, nanni, pfanni') + config = get_config_dict(**config_obj) + self.assertIn('output_variables', config) + self.assertEqual([('hanni', None), ('nanni', None), ('pfanni', None)], + config['output_variables']) + + config_obj = dict(output_variables='') + with self.assertRaises(ValueError) as cm: + get_config_dict(**config_obj) + self.assertEqual("output_variables must be a list of existing variable names", + f'{cm.exception}') + + config_obj = dict(output_variables='a*,') + with self.assertRaises(ValueError) as cm: + get_config_dict(**config_obj) + self.assertEqual("output_variables must be a list of existing variable names", + f'{cm.exception}') + + # This test is still not running correcly, needs to be fixed. TODO: AliceBalfanz + # def test_config_file_with_invalid_yaml(self): + # try: + # _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_4_NAME, CONFIG_2_YAML) + # config_obj = dict(config_files=CONFIG_2_FILE_LIST) + # _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_3_NAME, config_obj) + # + # with self.assertRaises(ParserError) as cm: + # get_config_dict(config_obj) + # self.assertEqual('YAML in \'config_2.json\' is invalid: ' + # 'while parsing a block mapping\n' + # 'expected , but found \':\'\n' + # ' in "", line 2, column 1', + # f'{cm.exception}') + # finally: + # if os.path.exists(TEMP_PATH_FOR_YAML): + # shutil.rmtree(TEMP_PATH_FOR_YAML) + # print('Successfully removed folder') diff --git a/test/util/test_config.py b/test/util/test_config.py index 51bff9e38..1fa0b6034 100644 --- a/test/util/test_config.py +++ b/test/util/test_config.py @@ -1,12 +1,7 @@ -import os -import shutil import unittest from io import StringIO -from typing import Dict import yaml - -from xcube.api.gen.config import get_config_dict from xcube.util.config import flatten_dict, to_name_dict_pair, to_name_dict_pairs, to_resolved_name_dict_pairs, \ merge_config @@ -291,182 +286,3 @@ def test_merge_dict_value(self): second_dict = {'a': dict(c=25)} actual_dict = merge_config(first_dict, second_dict) self.assertEqual({'a': dict(c=25), 'o': 105}, actual_dict) - - -def _get_config_obj(config_file=None, - input_files=None, - input_processor=None, - output_dir=None, - output_name=None, - output_writer=None, - output_size=None, - output_region=None, - output_variables=None, - output_resampling=None) -> Dict: - return dict(config_file=config_file, - input_files=input_files, - input_processor=input_processor, - output_dir=output_dir, - output_name=output_name, - output_writer=output_writer, - output_size=output_size, - output_region=output_region, - output_variables=output_variables, - output_resampling=output_resampling) - - -TEMP_PATH_FOR_YAML = './temp_test_data_for_xcube_tests' - -CONFIG_1_NAME = 'config_1.json' -CONFIG_2_NAME = 'config_2.json' -CONFIG_1_FILE_LIST = [(os.path.join(TEMP_PATH_FOR_YAML, CONFIG_1_NAME)), - (os.path.join(TEMP_PATH_FOR_YAML, CONFIG_2_NAME))] -CONFIG_1_YAML = """ -output_size: [2000, 1000] -output_region: [0, 20, 20, 30] -output_variables: - - x - - y - - z* -""" - -CONFIG_3_NAME = 'config_3.json' -CONFIG_4_NAME = 'config_4.json' -CONFIG_2_FILE_LIST = [(os.path.join(TEMP_PATH_FOR_YAML, CONFIG_3_NAME)), - (os.path.join(TEMP_PATH_FOR_YAML, CONFIG_4_NAME))] -CONFIG_2_YAML = """ -: output_variables: - - x - 6-- -""" - - -def _create_temp_yaml(temp_path_for_yaml, config_file_name, config_yaml): - if not os.path.exists(TEMP_PATH_FOR_YAML): - try: - os.mkdir(os.path.join(temp_path_for_yaml)) - except OSError as e: - print(e) - print("Creation of the directory %s failed" % temp_path_for_yaml) - else: - print("Successfully created the directory %s " % temp_path_for_yaml) - yaml_path = os.path.join(temp_path_for_yaml, config_file_name) - with open(yaml_path, 'w') as outfile: - yaml.dump(yaml.full_load(config_yaml), outfile) - return yaml_path - - else: - yaml_path = os.path.join(temp_path_for_yaml, config_file_name) - with open(yaml_path, 'w') as outfile: - yaml.dump(config_yaml, outfile, default_flow_style=False) - return yaml_path - - -class GetConfigDictTest(unittest.TestCase): - def test_config_file_alone(self): - try: - _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_2_NAME, CONFIG_1_YAML) - config_obj = _get_config_obj(config_file=CONFIG_1_FILE_LIST) - _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_1_NAME, config_obj) - - config = get_config_dict(config_obj) - self.assertIsNotNone(config) - self.assertEqual([2000, 1000], config['output_size']) - self.assertEqual([0, 20, 20, 30], config['output_region']) - self.assertEqual([('x', None), ('y', None), ('z*', None)], config['output_variables']) - finally: - if os.path.exists(TEMP_PATH_FOR_YAML): - shutil.rmtree(TEMP_PATH_FOR_YAML) - print('Successfully removed folder') - - def test_config_file_overwritten_by_config_obj(self): - try: - _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_2_NAME, CONFIG_1_YAML) - config_obj = _get_config_obj(config_file=CONFIG_1_FILE_LIST, - output_variables='a,b') - _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_1_NAME, config_obj) - config = get_config_dict(config_obj) - self.assertIn('output_variables', config) - self.assertIsNotNone(['a', 'b'], config['output_variables']) - finally: - if os.path.exists(TEMP_PATH_FOR_YAML): - shutil.rmtree(TEMP_PATH_FOR_YAML) - print('Successfully removed folder') - - def test_config_file_does_not_exist(self): - config_obj = _get_config_obj(config_file=['bibo.yaml', ]) - with self.assertRaises(ValueError) as cm: - get_config_dict(config_obj) - self.assertEqual("Cannot find configuration 'bibo.yaml'", - f'{cm.exception}') - - def test_output_size_option(self): - config_obj = _get_config_obj(output_size='120, 140') - config = get_config_dict(config_obj) - self.assertIn('output_size', config) - self.assertEqual([120, 140], config['output_size']) - - config_obj = _get_config_obj(output_size='120,abc') - with self.assertRaises(ValueError) as cm: - get_config_dict(config_obj) - self.assertEqual( - "output_size must have the form ,, where both values must be positive integer numbers", - f'{cm.exception}') - - def test_output_region_option(self): - config_obj = _get_config_obj(output_region='-10.5, 5., 10.5, 25.') - config = get_config_dict(config_obj) - self.assertIn('output_region', config) - self.assertEqual([-10.5, 5., 10.5, 25.], config['output_region']) - - config_obj = _get_config_obj(output_region='50,_2,55,21') - with self.assertRaises(ValueError) as cm: - get_config_dict(config_obj) - self.assertEqual("output_region must have the form ,,,," - " where all four numbers must be floating point numbers in degrees", - f'{cm.exception}') - - config_obj = _get_config_obj(output_region='50, 20, 55') - with self.assertRaises(ValueError) as cm: - get_config_dict(config_obj) - self.assertEqual("output_region must have the form ,,,," - " where all four numbers must be floating point numbers in degrees", - f'{cm.exception}') - - def test_output_variables_option(self): - config_obj = _get_config_obj(output_variables='hanni, nanni, pfanni') - config = get_config_dict(config_obj) - self.assertIn('output_variables', config) - self.assertEqual([('hanni', None), ('nanni', None), ('pfanni', None)], - config['output_variables']) - - config_obj = _get_config_obj(output_variables='') - with self.assertRaises(ValueError) as cm: - get_config_dict(config_obj) - self.assertEqual("output_variables must be a list of existing variable names", - f'{cm.exception}') - - config_obj = _get_config_obj(output_variables='a*,') - with self.assertRaises(ValueError) as cm: - get_config_dict(config_obj) - self.assertEqual("output_variables must be a list of existing variable names", - f'{cm.exception}') - - # This test is still not running correcly, needs to be fixed. TODO: AliceBalfanz - # def test_config_file_with_invalid_yaml(self): - # try: - # _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_4_NAME, CONFIG_2_YAML) - # config_obj = _get_config_obj(config_file=CONFIG_2_FILE_LIST) - # _create_temp_yaml(TEMP_PATH_FOR_YAML, CONFIG_3_NAME, config_obj) - # - # with self.assertRaises(ParserError) as cm: - # get_config_dict(config_obj) - # self.assertEqual('YAML in \'config_2.json\' is invalid: ' - # 'while parsing a block mapping\n' - # 'expected , but found \':\'\n' - # ' in "", line 2, column 1', - # f'{cm.exception}') - # finally: - # if os.path.exists(TEMP_PATH_FOR_YAML): - # shutil.rmtree(TEMP_PATH_FOR_YAML) - # print('Successfully removed folder') diff --git a/test/util/test_dsio.py b/test/util/test_dsio.py index 6e6621256..5b85d06be 100644 --- a/test/util/test_dsio.py +++ b/test/util/test_dsio.py @@ -52,6 +52,25 @@ def test_append_raises(self): # noinspection PyTypeChecker ds_io.append(None, 'test.nc') + def test_insert_raises(self): + ds_io = MyDatasetIO() + with self.assertRaises(NotImplementedError): + # noinspection PyTypeChecker + ds_io.insert(None, 0, 'test.nc') + + + def test_replace_raises(self): + ds_io = MyDatasetIO() + with self.assertRaises(NotImplementedError): + # noinspection PyTypeChecker + ds_io.replace(None, 0, 'test.nc') + + def test_update_raises(self): + ds_io = MyDatasetIO() + with self.assertRaises(NotImplementedError): + # noinspection PyTypeChecker + ds_io.update(None, 'test.nc') + class MemDatasetIOTest(unittest.TestCase): diff --git a/xcube/api/gen/config.py b/xcube/api/gen/config.py index 1bbf74b37..831d8f900 100644 --- a/xcube/api/gen/config.py +++ b/xcube/api/gen/config.py @@ -18,33 +18,31 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - -from typing import Dict, Union +from typing import Sequence from ...util.config import flatten_dict, load_configs, to_name_dict_pairs -def get_config_dict(config_obj: Dict[str, Union[str, bool, int, float, list, dict, tuple]]): +def get_config_dict(config_files: Sequence[str] = None, + input_paths: Sequence[str] = None, + input_processor_name: str = None, + output_path: str = None, + output_writer_name: str = None, + output_size: str = None, + output_region: str = None, + output_variables: str = None, + output_resampling: str = None, + append_mode: bool = True, + profile_mode: bool = False, + sort_mode: bool = False): """ - Get configuration dictionary. + Get a configuration dictionary from given (command-line) arguments. - :param config_obj: A configuration object. :return: Configuration dictionary :raise OSError, ValueError """ - config_file = config_obj.get("config_file") - input_paths = config_obj.get("input_paths") - input_processor_name = config_obj.get("input_processor_name") - output_path = config_obj.get("output_path") - output_writer_name = config_obj.get("output_writer_name") - output_size = config_obj.get("output_size") - output_region = config_obj.get("output_region") - output_variables = config_obj.get("output_variables") - output_resampling = config_obj.get("output_resampling") - append_mode = config_obj.get("append_mode") - sort_mode = config_obj.get("sort_mode") - - config = load_configs(*config_file) if config_file else {} + + config = load_configs(*config_files) if config_files else {} # preserve backward compatibility for old names if 'input_processor' in config: @@ -99,6 +97,9 @@ def get_config_dict(config_obj: Dict[str, Union[str, bool, int, float, list, dic raise ValueError('output_variables must be a list of existing variable names') config['output_variables'] = output_variables + if profile_mode is not None and config.get('profile_mode') is None: + config['profile_mode'] = profile_mode + if append_mode is not None and config.get('append_mode') is None: config['append_mode'] = append_mode @@ -116,4 +117,5 @@ def get_config_dict(config_obj: Dict[str, Union[str, bool, int, float, list, dic output_metadata = config.get('output_metadata') if output_metadata: config['output_metadata'] = flatten_dict(output_metadata) + return config diff --git a/xcube/api/gen/gen.py b/xcube/api/gen/gen.py index 16b93ff98..59590cb73 100644 --- a/xcube/api/gen/gen.py +++ b/xcube/api/gen/gen.py @@ -266,7 +266,8 @@ def step8(input_slice): if not dry_run: rimraf(output_path) output_writer.write(input_slice, output_path, **output_writer_params) - return update_dataset_attrs(input_slice, global_attrs=output_metadata, update_existing=True) + _update_cube_attrs(output_writer, output_path, global_attrs=output_metadata, temporal_only=False) + return input_slice steps.append((step8, f'creating input slice in {output_path}')) @@ -274,7 +275,8 @@ def step8(input_slice): def step8(input_slice): if not dry_run: output_writer.append(input_slice, output_path, **output_writer_params) - return update_dataset_temporal_attrs(input_slice, update_existing=True) + _update_cube_attrs(output_writer, output_path, temporal_only=True) + return input_slice steps.append((step8, f'appending input slice to {output_path}')) @@ -282,7 +284,8 @@ def step8(input_slice): def step8(input_slice): if not dry_run: output_writer.insert(input_slice, time_index, output_path) - return update_dataset_temporal_attrs(input_slice, update_existing=True) + _update_cube_attrs(output_writer, output_path, temporal_only=True) + return input_slice steps.append((step8, f'inserting input slice before index {time_index} in {output_path}')) @@ -290,7 +293,8 @@ def step8(input_slice): def step8(input_slice): if not dry_run: output_writer.replace(input_slice, time_index, output_path) - return update_dataset_temporal_attrs(input_slice, update_existing=True) + _update_cube_attrs(output_writer, output_path, temporal_only=True) + return input_slice steps.append((step8, f'replacing input slice at index {time_index} in {output_path}')) @@ -327,3 +331,17 @@ def step8(input_slice): print(s.getvalue()) return True + + +def _update_cube_attrs(output_writer: DatasetIO, output_path: str, + global_attrs: Dict = None, + temporal_only: bool = False): + cube = output_writer.read(output_path) + if temporal_only: + cube = update_dataset_temporal_attrs(cube, update_existing=True, in_place=True) + else: + cube = update_dataset_attrs(cube, update_existing=True, in_place=True) + global_attrs = dict(global_attrs) if global_attrs else {} + global_attrs.update(cube.attrs) + cube.close() + output_writer.update(output_path, global_attrs=global_attrs) diff --git a/xcube/cli/gen.py b/xcube/cli/gen.py index cdaee5a60..f65f8e75a 100644 --- a/xcube/cli/gen.py +++ b/xcube/cli/gen.py @@ -18,6 +18,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from typing import Sequence import click @@ -68,9 +69,9 @@ help='Displays additional information about format options or about input processors.') @click.option('--dry_run', is_flag=True, help='Just read and process inputs, but don\'t produce any outputs.') -def gen(input: str, +def gen(input: Sequence[str], proc: str, - config: tuple, + config: Sequence[str], output: str, format: str, size: str, @@ -89,19 +90,8 @@ def gen(input: str, The input paths can also be passed as lines of a text file. To do so, provide exactly one input file with ".txt" extension which contains the actual input paths to be used. """ - input_paths = input - input_processor_name = proc - config_file = config - output_path = output - output_writer_name = format - output_size = size - output_region = region - output_variables = variables - output_resampling = resampling - profile_mode = prof dry_run = dry_run info_mode = info - sort_mode = sort from xcube.api.gen.config import get_config_dict from xcube.api.gen.gen import gen_cube @@ -112,7 +102,20 @@ def gen(input: str, print(_format_info()) return 0 - config = get_config_dict(locals()) + config = get_config_dict( + input_paths=input, + input_processor_name=proc, + config_files=config, + output_path=output, + output_writer_name=format, + output_size=size, + output_region=region, + output_variables=variables, + output_resampling=resampling, + profile_mode=prof, + append_mode=append, + sort_mode=sort, + ) gen_cube(dry_run=dry_run, monitor=print, diff --git a/xcube/util/dsio.py b/xcube/util/dsio.py index 493d3026b..d0d10ed1a 100644 --- a/xcube/util/dsio.py +++ b/xcube/util/dsio.py @@ -23,7 +23,7 @@ import shutil import warnings from abc import ABCMeta, abstractmethod -from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple +from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Any import pandas as pd import s3fs @@ -31,8 +31,8 @@ import zarr from .constants import FORMAT_NAME_MEM, FORMAT_NAME_NETCDF4, FORMAT_NAME_ZARR -from .timeslice import append_time_slice, insert_time_slice, replace_time_slice from .objreg import get_obj_registry +from .timeslice import append_time_slice, insert_time_slice, replace_time_slice FORMAT_NAME_EXCEL = "excel" FORMAT_NAME_CSV = "csv" @@ -102,6 +102,10 @@ def replace(self, dataset: xr.Dataset, index: int, output_path: str, **kwargs): """"Replace *dataset* at *index* in existing *output_path* using format-specific write parameters *kwargs*.""" raise NotImplementedError() + def update(self, output_path: str, global_attrs: Dict[str, Any] = None, **kwargs): + """"Update *dataset* at *output_path* using format-specific open parameters *kwargs*.""" + raise NotImplementedError() + def register_dataset_io(dataset_io: DatasetIO): # noinspection PyTypeChecker @@ -238,6 +242,10 @@ def append(self, dataset: xr.Dataset, path: str, **kwargs): else: self.datasets[path] = dataset.copy() + def update(self, output_path: str, global_attrs: Dict[str, Any] = None, **kwargs): + if global_attrs: + ds = self.datasets[output_path] + ds.attrs.update(global_attrs) class Netcdf4DatasetIO(DatasetIO): """ @@ -293,6 +301,13 @@ def append(self, dataset: xr.Dataset, output_path: str, **kwargs): old_ds.close() rimraf(temp_path) + def update(self, output_path: str, global_attrs: Dict[str, Any] = None, **kwargs): + if global_attrs: + import netCDF4 + ds = netCDF4.Dataset(output_path, 'r+') + ds.setncatts(global_attrs) + ds.close() + class ZarrDatasetIO(DatasetIO): """ @@ -372,7 +387,8 @@ def read(self, path: str, **kwargs) -> xr.Dataset: root = root[1:] if endpoint_url and root is not None: - s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(endpoint_url=endpoint_url, region_name=region_name)) + s3 = s3fs.S3FileSystem(anon=True, + client_kwargs=dict(endpoint_url=endpoint_url, region_name=region_name)) path_or_store = s3fs.S3Map(root=root, s3=s3, check=False) if 'max_cache_size' in kwargs: max_cache_size = kwargs.pop('max_cache_size') @@ -428,6 +444,13 @@ def insert(self, dataset: xr.Dataset, index: int, output_path: str, **kwargs): def replace(self, dataset: xr.Dataset, index: int, output_path: str, **kwargs): replace_time_slice(output_path, index, dataset) + def update(self, output_path: str, global_attrs: Dict[str, Any] = None, **kwargs): + if global_attrs: + import zarr + ds = zarr.open_group(output_path, mode='r+', **kwargs) + ds.attrs.update(global_attrs) + + # noinspection PyAbstractClass class CsvDatasetIO(DatasetIO): """ diff --git a/xcube/util/update.py b/xcube/util/update.py index 967697dc1..f1b18be24 100644 --- a/xcube/util/update.py +++ b/xcube/util/update.py @@ -58,7 +58,7 @@ def update_dataset_attrs(dataset: xr.Dataset, dataset.attrs.update(global_attrs) return _update_dataset_attrs(dataset, [_LON_ATTRS_DATA, _LAT_ATTRS_DATA, _TIME_ATTRS_DATA], - update_existing=update_existing, in_place=False) + update_existing=update_existing, in_place=True) def update_dataset_spatial_attrs(dataset: xr.Dataset, @@ -111,7 +111,7 @@ def _update_dataset_attrs(dataset: xr.Dataset, coord_bnds_name = coord.attrs.get('bounds', coord_bnds_name) if coord_bnds_name in dataset: coord_bnds = dataset[coord_bnds_name] - if coord_bnds is not None and coord_bnds.ndim == 2 and coord_bnds.shape[0] > 1 and coord_bnds.shape[1] == 2: + if coord_bnds is not None and coord_bnds.ndim == 2 and coord_bnds.shape[1] == 2: coord_v1 = coord_bnds[0][0] coord_v2 = coord_bnds[-1][1] coord_res = (coord_v2 - coord_v1) / coord_bnds.shape[0] @@ -119,14 +119,17 @@ def _update_dataset_attrs(dataset: xr.Dataset, coord_min, coord_max = (coord_v1, coord_v2) if coord_res > 0 else (coord_v2, coord_v1) dataset.attrs[coord_min_attr_name] = cast(coord_min.values) dataset.attrs[coord_max_attr_name] = cast(coord_max.values) - elif coord is not None and coord.ndim == 1 and coord.shape[0] > 1: + elif coord is not None and coord.ndim == 1: coord_v1 = coord[0] coord_v2 = coord[-1] - coord_res = (coord_v2 - coord_v1) / (coord.shape[0] - 1) - coord_v1 -= coord_res / 2 - coord_v2 += coord_res / 2 - coord_res = float(coord_res.values) - coord_min, coord_max = (coord_v1, coord_v2) if coord_res > 0 else (coord_v2, coord_v1) + if coord.shape[0] > 1: + coord_res = (coord_v2 - coord_v1) / (coord.shape[0] - 1) + coord_v1 -= coord_res / 2 + coord_v2 += coord_res / 2 + coord_res = float(coord_res.values) + coord_min, coord_max = (coord_v1, coord_v2) if coord_res > 0 else (coord_v2, coord_v1) + else: + coord_min, coord_max = coord_v1, coord_v2 dataset.attrs[coord_min_attr_name] = cast(coord_min.values) dataset.attrs[coord_max_attr_name] = cast(coord_max.values) if coord_units_attr_name is not None and coord_units is not None: From d019273676f944cf23822c7773b9e79a7552fa9e Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 24 Sep 2019 19:54:26 +0200 Subject: [PATCH 2/6] closes #181 --- CHANGES.md | 1 + test/api/gen/default/test_gen.py | 25 +++++++++++++++++++++---- xcube/api/gen/gen.py | 1 - xcube/util/timeslice.py | 13 +++++++++++++ 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index e74be1b32..728b37160 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -39,6 +39,7 @@ ### Fixes +* `xcube gen` CLI now updates metadata correctly. (#181) * It was no longer possible to use the `xcube gen` CLI with `--proc` option. (#120) * `totalCount` attribute of time series returned by Web API `ts/{dataset}/{variable}/{geom-type}` now contains the correct number of possible observations. Was always `1` before. diff --git a/test/api/gen/default/test_gen.py b/test/api/gen/default/test_gen.py index 6e6251b5d..ed05156ff 100644 --- a/test/api/gen/default/test_gen.py +++ b/test/api/gen/default/test_gen.py @@ -12,7 +12,7 @@ def clean_up(): - files = ['l2c-single.nc', 'l2c.nc', 'l2c.zarr', 'l2c-single.zarr'] + files = ['l2c-single.nc', 'l2c-single.zarr', 'l2c.nc', 'l2c.zarr'] for file in files: rimraf(file) rimraf(file + '.temp.nc') # May remain from Netcdf4DatasetIO.append() @@ -27,7 +27,7 @@ def setUp(self): def tearDown(self): clean_up() - def test_process_inputs_single(self): + def test_process_inputs_single_nc(self): status, output = gen_cube_wrapper( [get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c-single.nc') self.assertEqual(True, status) @@ -49,6 +49,16 @@ def test_process_inputs_append_multiple_nc(self): time_coverage_start='2016-12-31T12:00:00.000000000', time_coverage_end='2017-01-03T12:00:00.000000000')) + def test_process_inputs_single_zarr(self): + status, output = gen_cube_wrapper( + [get_inputdata_path('20170101-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c-single.zarr') + self.assertEqual(True, status) + self.assertTrue('\nstep 8 of 8: creating input slice in l2c-single.zarr...\n' in output) + self.assert_cube_ok(xr.open_zarr('l2c-single.zarr'), 1, + dict(date_modified=None, + time_coverage_start='2016-12-31T12:00:00.000000000', + time_coverage_end='2017-01-01T12:00:00.000000000')) + def test_process_inputs_append_multiple_zarr(self): status, output = gen_cube_wrapper( [get_inputdata_path('201701??-IFR-L4_GHRSST-SSTfnd-ODYSSEA-NWE_002-v2.0-fv1.0.nc')], 'l2c.zarr', @@ -109,7 +119,9 @@ def assert_cube_ok(self, cube: xr.Dataset, expected_time_dim: int, expected_extr self.assertEqual({'lat': 180, 'lon': 320, 'bnds': 2, 'time': expected_time_dim}, cube.dims) self.assertEqual({'lon', 'lat', 'time', 'lon_bnds', 'lat_bnds', 'time_bnds'}, set(cube.coords)) self.assertEqual({'analysed_sst'}, set(cube.data_vars)) - expected_attrs = dict(date_modified=None, + expected_attrs = dict(title='Test Cube', + project='xcube', + date_modified=None, geospatial_lon_min=-4.0, geospatial_lon_max=12.0, geospatial_lon_resolution=0.05, @@ -170,4 +182,9 @@ def output_monitor(msg): sort_mode=sort_mode, ) - return gen_cube(dry_run=False, monitor=output_monitor, **config), output + output_metadata = dict( + title='Test Cube', + project='xcube', + ) + + return gen_cube(dry_run=False, monitor=output_monitor, output_metadata=output_metadata, **config), output diff --git a/xcube/api/gen/gen.py b/xcube/api/gen/gen.py index 59590cb73..9552e4e07 100644 --- a/xcube/api/gen/gen.py +++ b/xcube/api/gen/gen.py @@ -261,7 +261,6 @@ def step7(input_slice): steps.append((step7, 'post-processing input slice')) if update_mode == 'create': - def step8(input_slice): if not dry_run: rimraf(output_path) diff --git a/xcube/util/timeslice.py b/xcube/util/timeslice.py index caf7d8167..128341ab9 100644 --- a/xcube/util/timeslice.py +++ b/xcube/util/timeslice.py @@ -77,6 +77,19 @@ def append_time_slice(store: Union[str, MutableMapping], """ if chunk_sizes: time_slice = chunk_dataset(time_slice, chunk_sizes, format_name='zarr') + + # Unfortunately time_slice.to_zarr(store, mode='a', append_dim='time') will replace global attributes of store + # with attributes of time_slice (xarray bug?), which are usually empty in our case. + # Hence, we must save our old attributes in a copy of time_slice. + ds = zarr.open_group(store, mode='r') + time_slice = time_slice.copy() + time_slice.attrs.update(ds.attrs) + if 'coordinates' in time_slice.attrs: + # Remove 'coordinates', otherwise we get + # ValueError: cannot serialize coordinates because the global attribute 'coordinates' already exists + # from next time_slice.to_zarr(...) call. + time_slice.attrs.pop('coordinates') + time_slice.to_zarr(store, mode='a', append_dim='time') unchunk_dataset(store, coords_only=True) From a754fdcce9fb264056b82a3be552ecf491a579f1 Mon Sep 17 00:00:00 2001 From: AliceBalfanz Date: Wed, 25 Sep 2019 08:56:34 +0200 Subject: [PATCH 3/6] updating output of xcube gen example section to include the metadata of the generated cube --- docs/source/examples/xcube_gen.rst | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/docs/source/examples/xcube_gen.rst b/docs/source/examples/xcube_gen.rst index aa783cff9..e652b9ce2 100644 --- a/docs/source/examples/xcube_gen.rst +++ b/docs/source/examples/xcube_gen.rst @@ -195,6 +195,41 @@ The metadata of the xcube dataset can be viewed with :doc:`cli/xcube dump` as we Dimensions without coordinates: bnds Data variables: analysed_sst (time, lat, lon) float64 dask.array + Attributes: + acknowledgment: Data Cube produced based on data provided by ... + comment: + contributor_name: + contributor_role: + creator_email: info@brockmann-consult.de + creator_name: Brockmann Consult GmbH + creator_url: https://www.brockmann-consult.de + date_modified: 2019-09-25T08:50:32.169031 + geospatial_lat_max: 62.666666666666664 + geospatial_lat_min: 48.0 + geospatial_lat_resolution: 0.002604166666666666 + geospatial_lat_units: degrees_north + geospatial_lon_max: 10.666666666666664 + geospatial_lon_min: -16.0 + geospatial_lon_resolution: 0.0026041666666666665 + geospatial_lon_units: degrees_east + history: xcube/reproj-snap-nc + id: demo-bc-sst-sns-l2c-v1 + institution: Brockmann Consult GmbH + keywords: + license: terms and conditions of the DCS4COP data dist... + naming_authority: bc + processing_level: L2C + project: xcube + publisher_email: info@brockmann-consult.de + publisher_name: Brockmann Consult GmbH + publisher_url: https://www.brockmann-consult.de + references: https://dcs4cop.eu/ + source: CMEMS Global SST & Sea Ice Anomaly Data Cube + standard_name_vocabulary: + summary: + time_coverage_end: 2017-06-08T00:00:00.000000000 + time_coverage_start: 2017-06-05T00:00:00.000000000 + title: CMEMS Global SST Anomaly Data Cube The metadata for the variable analysed_sst can be viewed: From d707e7ae62e3dda2d9a1f07d064578a29aa83d38 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 25 Sep 2019 10:33:35 +0200 Subject: [PATCH 4/6] updates --- CHANGES.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 728b37160..dd9f12be3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,14 +1,13 @@ ## Changes in 0.2.0.dev2 (in dev) -* Reorganisation of the Documentation and Examples Section (partly addressing #106) -* Loosened python conda environment to satisfy conda-forge requirements - ### New * Added first version of the [xcube documentation](https://xcube.readthedocs.io/) generated from `./docs` folder. ### Enhancements +* Reorganisation of the Documentation and Examples Section (partly addressing #106) +* Loosened python conda environment to satisfy conda-forge requirements * Making CLI parameters consistent and removing or changing parameter abbreviations in case they were used twice for different params. (partly addressing #91) For every CLI command which is generating an output a path must be provided by the option `-o`, `--output`. If not provided by the user, a default output_path is generated. The following CLI parameter have changed and their abbreviation is not enabled anymore : From c84a0ae36744edc8d76f512bc981359746c7e530 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 25 Sep 2019 11:22:57 +0200 Subject: [PATCH 5/6] updates --- docs/source/cli/xcube_gen.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/cli/xcube_gen.rst b/docs/source/cli/xcube_gen.rst index 31d70400f..eb6ffdf8a 100644 --- a/docs/source/cli/xcube_gen.rst +++ b/docs/source/cli/xcube_gen.rst @@ -108,7 +108,7 @@ Configuration File ================== Configuration files passed to ``xcube gen`` via the ``-c, --config`` option use `YAML format`_. -Multiple configuration files may be given. In this case all configuration are merged into a single one. +Multiple configuration files may be given. In this case all configurations are merged into a single one. Parameter values will be overwritten by subsequent configurations if they are scalars. If they are objects / mappings, their values will be deeply merged. From 8387e82110fe474b3ec5d6cedfa9a3f7c83d7640 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 25 Sep 2019 11:41:04 +0200 Subject: [PATCH 6/6] by default, input_processor_name = None --- docs/source/cli/xcube_gen.rst | 32 +++++++++++++++++--------------- test/api/gen/default/test_gen.py | 4 ++-- xcube/api/gen/gen.py | 6 ++++-- xcube/cli/gen.py | 6 ++++-- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/docs/source/cli/xcube_gen.rst b/docs/source/cli/xcube_gen.rst index eb6ffdf8a..08916d0e6 100644 --- a/docs/source/cli/xcube_gen.rst +++ b/docs/source/cli/xcube_gen.rst @@ -19,14 +19,16 @@ Generate xcube dataset. :: Usage: xcube gen [OPTIONS] [INPUT]... - - Generate xcube dataset. Data cubes may be created in one go or successively in - append mode, input by input. The input paths may be one or more input - files or a pattern that may contain wildcards '?', '*', and '**'. The - input paths can also be passed as lines of a text file. To do so, provide - exactly one input file with ".txt" extension which contains the actual - input paths to be used. - + + Generate xcube dataset. Data cubes may be created in one go or + successively for all given inputs. Each input is expected to provide a + single time slice which may be appended, inserted or which may replace an + existing time slice in the output dataset. The input paths may be one or + more input files or a pattern that may contain wildcards '?', '*', and + '**'. The input paths can also be passed as lines of a text file. To do + so, provide exactly one input file with ".txt" extension which contains + the actual input paths to be used. + Options: -P, --proc INPUT-PROCESSOR Input processor name. The available input processor names and additional information @@ -36,8 +38,8 @@ Generate xcube dataset. with simple datasets whose variables have dimensions ("lat", "lon") and conform with the CF conventions. - -c, --config CONFIG xcube dataset configuration file in YAML format. - More than one config input file is + -c, --config CONFIG xcube dataset configuration file in YAML + format. More than one config input file is allowed.When passing several config files, they are merged considering the order passed via command line. @@ -50,8 +52,7 @@ Generate xcube dataset. ",". -R, --region REGION Output region using format ",,," - --variables, --vars VARIABLES - Variables to be included in output. Comma- + --variables, --vars VARIABLES Variables to be included in output. Comma- separated list of names which may contain wildcard characters "*" and "?". --resampling [Average|Bilinear|Cubic|CubicSpline|Lanczos|Max|Median|Min|Mode|Nearest|Q1|Q3] @@ -68,9 +69,9 @@ Generate xcube dataset. --prof Collect profiling information and dump results after processing. --sort The input file list will be sorted before - creating the xcube dataset. If --sort parameter - is not passed, order of input list will be - kept. + creating the xcube dataset. If --sort + parameter is not passed, order of input list + will be kept. -I, --info Displays additional information about format options or about input processors. --dry_run Just read and process inputs, but don't @@ -78,6 +79,7 @@ Generate xcube dataset. --help Show this message and exit. + Below is the ouput of a ``xcube gen --info`` call showing five input processors installed via plugins. :: diff --git a/test/api/gen/default/test_gen.py b/test/api/gen/default/test_gen.py index ed05156ff..2ae937ca6 100644 --- a/test/api/gen/default/test_gen.py +++ b/test/api/gen/default/test_gen.py @@ -150,7 +150,7 @@ def test_illegal_proc(self): gen_cube_wrapper( [get_inputdata_path('20170101120000-UKMO-L4_GHRSST-SSTfnd-OSTIAanom-GLOB-v02.0-fv02.0.nc')], 'l2c-single.zarr', sort_mode=True, input_processor_name="") - self.assertEqual('Missing input_processor_name', f'{e.exception}') + self.assertEqual('input_processor_name must not be empty', f'{e.exception}') with self.assertRaises(ValueError) as e: gen_cube_wrapper( @@ -160,7 +160,7 @@ def test_illegal_proc(self): # noinspection PyShadowingBuiltins -def gen_cube_wrapper(input_paths, output_path, sort_mode=False, input_processor_name='default') \ +def gen_cube_wrapper(input_paths, output_path, sort_mode=False, input_processor_name=None) \ -> Tuple[bool, Optional[str]]: output = None diff --git a/xcube/api/gen/gen.py b/xcube/api/gen/gen.py index 9552e4e07..3541d955a 100644 --- a/xcube/api/gen/gen.py +++ b/xcube/api/gen/gen.py @@ -92,8 +92,10 @@ def gen_cube(input_paths: Sequence[str] = None, warnings.warn('append_mode in gen_cube() is deprecated, ' 'time slices will now always be inserted, replaced, or appended.') - if not input_processor_name: - raise ValueError('Missing input_processor_name') + if input_processor_name is None: + input_processor_name = 'default' + elif input_processor_name == '': + raise ValueError('input_processor_name must not be empty') input_processor = get_input_processor(input_processor_name) if not input_processor: diff --git a/xcube/cli/gen.py b/xcube/cli/gen.py index f65f8e75a..e8ebf94c8 100644 --- a/xcube/cli/gen.py +++ b/xcube/cli/gen.py @@ -31,7 +31,7 @@ # noinspection PyShadowingBuiltins @click.command(name='gen', context_settings={"ignore_unknown_options": True}) @click.argument('input', nargs=-1) -@click.option('--proc', '-P', metavar='INPUT-PROCESSOR', default='default', +@click.option('--proc', '-P', metavar='INPUT-PROCESSOR', default=None, help=f'Input processor name. ' f'The available input processor names and additional information about input processors ' 'can be accessed by calling xcube gen --info . Defaults to "default", an input processor ' @@ -85,7 +85,9 @@ def gen(input: Sequence[str], sort: bool): """ Generate xcube dataset. - Data cubes may be created in one go or successively in append mode, input by input. + Data cubes may be created in one go or successively for all given inputs. + Each input is expected to provide a single time slice which may be appended, inserted or which may replace an + existing time slice in the output dataset. The input paths may be one or more input files or a pattern that may contain wildcards '?', '*', and '**'. The input paths can also be passed as lines of a text file. To do so, provide exactly one input file with ".txt" extension which contains the actual input paths to be used.