Skip to content

Commit 61815c3

Browse files
authored
Merge pull request #527 from dcs4cop/toniof-516-unify-datasetio
Toniof 516 unify datasetio
2 parents 0b9c41c + 9f49034 commit 61815c3

File tree

8 files changed

+455
-96
lines changed

8 files changed

+455
-96
lines changed

examples/serve/demo/config-with-stores.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ DataStores:
3636
# client_kwargs:
3737
# endpoint_url: https://s3.eu-central-1.amazonaws.com
3838
Datasets:
39-
- Identifier: "*.zarr"
39+
- Path: "*.zarr"
4040
Style: "default"
4141
# ChunkCacheSize: 1G
4242

test/core/store/test_storepool.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,34 @@ def test_multi_stores_with_params(self):
326326
self.assertIsInstance(pool, DataStorePool)
327327
self.assertEqual(["local-1", "local-2", "ram-1", "ram-2"], pool.store_instance_ids)
328328
for instance_id in pool.store_instance_ids:
329-
self.assertTrue(pool.has_store_config(instance_id))
329+
self.assertTrue(pool.has_store_instance(instance_id))
330330
self.assertIsInstance(pool.get_store_config(instance_id), DataStoreConfig)
331331
self.assertIsInstance(pool.get_store(instance_id), DataStore)
332+
333+
def test_get_store_instance_id(self):
334+
store_params_1 = {
335+
"root": "./bibo"
336+
}
337+
ds_config_1 = DataStoreConfig(store_id='file',
338+
store_params=store_params_1)
339+
ds_configs = {'dir-1': ds_config_1}
340+
pool = DataStorePool(ds_configs)
341+
342+
store_params_2 = {
343+
"root": "./babo"
344+
}
345+
ds_config_2 = DataStoreConfig(store_id='file',
346+
store_params=store_params_2)
347+
ds_config_3 = DataStoreConfig(store_id='file',
348+
store_params=store_params_1,
349+
title='A third configuration')
350+
351+
self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_1))
352+
self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_1,
353+
strict_check=True))
354+
355+
self.assertIsNone(pool.get_store_instance_id(ds_config_2))
356+
357+
self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_3))
358+
self.assertIsNone(pool.get_store_instance_id(ds_config_3,
359+
strict_check=True))

test/webapi/test_config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def test_from_dict(self):
6060
},
6161
"Datasets": [
6262
{
63-
"Identifier": "*.zarr",
63+
"Path": "*.zarr",
6464
"Style": "default"
6565
}
6666
]

test/webapi/test_context.py

+243
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,246 @@ def test_interpolates_vars(self):
211211
normalize_prefix('/${name}'))
212212
self.assertEqual(f'/xcube/v{version}',
213213
normalize_prefix('/${name}/v${version}'))
214+
215+
216+
class MaybeAssignStoreInstanceIdsTest(unittest.TestCase):
217+
218+
def test_find_common_store(self):
219+
ctx = new_test_service_context()
220+
dataset_configs = [
221+
{
222+
'Identifier': 'z_0',
223+
'FileSystem': 'local',
224+
'Path': '/one/path/abc.zarr'
225+
},
226+
{
227+
'Identifier': 'z_1',
228+
'FileSystem': 'local',
229+
'Path': '/one/path/def.zarr'
230+
},
231+
{
232+
'Identifier': 'z_4',
233+
'FileSystem': 'obs',
234+
'Path': '/one/path/mno.zarr'
235+
},
236+
{
237+
'Identifier': 'z_2',
238+
'FileSystem': 'local',
239+
'Path': '/another/path/ghi.zarr'
240+
},
241+
{
242+
'Identifier': 'z_3',
243+
'FileSystem': 'local',
244+
'Path': '/one/more/path/jkl.zarr'
245+
},
246+
{
247+
'Identifier': 'z_5',
248+
'FileSystem': 'obs',
249+
'Path': '/one/path/pqr.zarr'
250+
},
251+
{
252+
'Identifier': 'z_6',
253+
'FileSystem': 'local',
254+
'Path': '/one/path/stu.zarr'
255+
},
256+
{
257+
'Identifier': 'z_7',
258+
'FileSystem': 'local',
259+
'Path': '/one/more/path/vwx.zarr'
260+
},
261+
]
262+
ctx.config['Datasets'] = dataset_configs
263+
adjusted_dataset_configs = ctx.get_dataset_configs()
264+
265+
expected_dataset_configs = [
266+
{
267+
'Identifier': 'z_0',
268+
'FileSystem': 'local',
269+
'Path': 'path/abc.zarr',
270+
'StoreInstanceId': 'local_2'
271+
},
272+
{
273+
'Identifier': 'z_1',
274+
'FileSystem': 'local',
275+
'Path': 'path/def.zarr',
276+
'StoreInstanceId': 'local_2'
277+
},
278+
{
279+
'Identifier': 'z_4',
280+
'FileSystem': 'obs',
281+
'Path': 'mno.zarr',
282+
'StoreInstanceId': 'obs_1'
283+
},
284+
{
285+
'Identifier': 'z_2',
286+
'FileSystem': 'local',
287+
'Path': 'ghi.zarr',
288+
'StoreInstanceId': 'local_1'
289+
},
290+
{
291+
'Identifier': 'z_3',
292+
'FileSystem': 'local',
293+
'Path': 'more/path/jkl.zarr',
294+
'StoreInstanceId': 'local_2'
295+
},
296+
{
297+
'Identifier': 'z_5',
298+
'FileSystem': 'obs',
299+
'Path': 'pqr.zarr',
300+
'StoreInstanceId': 'obs_1'
301+
},
302+
{
303+
'Identifier': 'z_6',
304+
'FileSystem': 'local',
305+
'Path': 'path/stu.zarr',
306+
'StoreInstanceId': 'local_2'
307+
},
308+
{
309+
'Identifier': 'z_7',
310+
'FileSystem': 'local',
311+
'Path': 'more/path/vwx.zarr',
312+
'StoreInstanceId': 'local_2'
313+
},
314+
]
315+
self.assertEqual(expected_dataset_configs, adjusted_dataset_configs)
316+
317+
def test_with_instance_id(self):
318+
ctx = new_test_service_context()
319+
dataset_config = {'Identifier': 'zero',
320+
'Title': 'Test 0',
321+
'FileSystem': 'local',
322+
'StoreInstanceId': 'some_id'}
323+
dataset_config_copy = dataset_config.copy()
324+
325+
ctx.config['Datasets'] = [dataset_config]
326+
dataset_config = ctx.get_dataset_configs()[0]
327+
328+
self.assertEqual(dataset_config_copy, dataset_config)
329+
330+
def test_local(self):
331+
ctx = new_test_service_context()
332+
dataset_config = {'Identifier': 'one',
333+
'Title': 'Test 1',
334+
'FileSystem': 'local',
335+
'Path': 'cube-1-250-250.zarr'}
336+
337+
ctx.config['Datasets'] = [dataset_config]
338+
dataset_config = ctx.get_dataset_configs()[0]
339+
340+
self.assertEqual(['Identifier', 'Title', 'FileSystem', 'Path',
341+
'StoreInstanceId'],
342+
list(dataset_config.keys()))
343+
self.assertEqual('one',
344+
dataset_config['Identifier'])
345+
self.assertEqual('Test 1', dataset_config['Title'])
346+
self.assertEqual('local', dataset_config['FileSystem'])
347+
self.assertEqual('cube-1-250-250.zarr', dataset_config["Path"])
348+
self.assertEqual('local_1', dataset_config['StoreInstanceId'])
349+
350+
def test_s3(self):
351+
ctx = new_test_service_context()
352+
dataset_config = {'Identifier': 'two',
353+
'Title': 'Test 2',
354+
'FileSystem': 'obs',
355+
'Endpoint': 'https://s3.eu-central-1.amazonaws.com',
356+
'Path': 'xcube-examples/OLCI-SNS-RAW-CUBE-2.zarr',
357+
'Region': 'eu-central-1'}
358+
359+
ctx.config['Datasets'] = [dataset_config]
360+
dataset_config = ctx.get_dataset_configs()[0]
361+
362+
self.assertEqual(['Identifier', 'Title', 'FileSystem', 'Endpoint',
363+
'Path', 'Region', 'StoreInstanceId'],
364+
list(dataset_config.keys()))
365+
self.assertEqual('two', dataset_config['Identifier'])
366+
self.assertEqual('Test 2', dataset_config['Title'])
367+
self.assertEqual('obs', dataset_config['FileSystem'])
368+
self.assertEqual('https://s3.eu-central-1.amazonaws.com',
369+
dataset_config['Endpoint'])
370+
self.assertEqual('OLCI-SNS-RAW-CUBE-2.zarr', dataset_config['Path'])
371+
self.assertEqual('eu-central-1', dataset_config['Region'])
372+
self.assertEqual('obs_1', dataset_config['StoreInstanceId'])
373+
374+
def test_memory(self):
375+
ctx = new_test_service_context()
376+
dataset_config = {'Identifier': 'three',
377+
'Title': 'Test 3',
378+
'FileSystem': 'memory'}
379+
dataset_config_copy = dataset_config.copy()
380+
381+
ctx.config['Datasets'] = [dataset_config]
382+
dataset_config = ctx.get_dataset_configs()[0]
383+
384+
self.assertEqual(dataset_config_copy, dataset_config)
385+
386+
def test_missing_file_system(self):
387+
ctx = new_test_service_context()
388+
dataset_config = {'Identifier': 'five',
389+
'Title': 'Test 5',
390+
'Path': 'cube-1-250-250.zarr'}
391+
392+
ctx.config['Datasets'] = [dataset_config]
393+
dataset_config = ctx.get_dataset_configs()[0]
394+
395+
self.assertEqual(['Identifier', 'Title', 'Path', 'StoreInstanceId'],
396+
list(dataset_config.keys()))
397+
self.assertEqual('five', dataset_config['Identifier'])
398+
self.assertEqual('Test 5', dataset_config['Title'])
399+
self.assertEqual('cube-1-250-250.zarr', dataset_config['Path'])
400+
self.assertEqual('local_1', dataset_config['StoreInstanceId'])
401+
402+
def test_invalid_file_system(self):
403+
ctx = new_test_service_context()
404+
dataset_config = {'Identifier': 'five',
405+
'Title': 'Test 5a',
406+
'FileSystem': 'invalid',
407+
'Path': 'cube-1-250-250.zarr'}
408+
409+
ctx.config['Datasets'] = [dataset_config]
410+
dataset_config = ctx.get_dataset_configs()[0]
411+
412+
self.assertEqual(['Identifier', 'Title', 'FileSystem', 'Path'],
413+
list(dataset_config.keys()))
414+
self.assertEqual('five', dataset_config['Identifier'])
415+
self.assertEqual('Test 5a', dataset_config['Title'])
416+
self.assertEqual('invalid', dataset_config['FileSystem'])
417+
self.assertEqual('cube-1-250-250.zarr', dataset_config['Path'])
418+
419+
def test_local_store_already_existing(self):
420+
ctx = new_test_service_context()
421+
dataset_config_1 = {'Identifier': 'six',
422+
'Title': 'Test 6',
423+
'FileSystem': 'local',
424+
'Path': 'cube-1-250-250.zarr'}
425+
dataset_config_2 = {'Identifier': 'six_a',
426+
'Title': 'Test 6 a',
427+
'FileSystem': 'local',
428+
'Path': 'cube-5-100-200.zarr'}
429+
430+
ctx.config['Datasets'] = [dataset_config_1, dataset_config_2]
431+
dataset_configs = ctx.get_dataset_configs()
432+
433+
self.assertEqual(dataset_configs[0]['StoreInstanceId'],
434+
dataset_configs[1]['StoreInstanceId'])
435+
436+
def test_s3_store_already_existing(self):
437+
ctx = new_test_service_context()
438+
dataset_config_1 = {'Identifier': 'seven',
439+
'Title': 'Test 7',
440+
'FileSystem': 'obs',
441+
'Endpoint': 'https://s3.eu-central-1.amazonaws.com',
442+
'Path': 'xcube-examples/OLCI-SNS-RAW-CUBE-2.zarr',
443+
'Region': 'eu-central-1'}
444+
445+
dataset_config_2 = {'Identifier': 'seven_a',
446+
'Title': 'Test 7 a',
447+
'FileSystem': 'obs',
448+
'Endpoint': 'https://s3.eu-central-1.amazonaws.com',
449+
'Path': 'xcube-examples/OLCI-SNS-RAW-CUBE-3.zarr',
450+
'Region': 'eu-central-1'}
451+
452+
ctx.config['Datasets'] = [dataset_config_1, dataset_config_2]
453+
dataset_configs = ctx.get_dataset_configs()
454+
455+
self.assertEqual(dataset_configs[0]['StoreInstanceId'],
456+
dataset_configs[1]['StoreInstanceId'])

xcube/core/store/storepool.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,25 @@ def store_instance_ids(self) -> List[str]:
262262
def store_configs(self) -> List[DataStoreConfig]:
263263
return [v.store_config for k, v in self._instances.items()]
264264

265-
def has_store_config(self, store_instance_id: str) -> bool:
265+
def get_store_instance_id(self,
266+
store_config: DataStoreConfig,
267+
strict_check: bool = False) -> Optional[str]:
268+
assert_instance(store_config, DataStoreConfig, 'store_config')
269+
for id, instance in self._instances.items():
270+
if strict_check:
271+
if instance.store_config == store_config:
272+
return id
273+
else:
274+
if instance.store_config.store_id == store_config.store_id and \
275+
instance.store_config.store_params == \
276+
store_config.store_params:
277+
return id
278+
return None
279+
280+
def has_store_config(self, store_config: DataStoreConfig) -> bool:
281+
return self.get_store_instance_id(store_config) is not None
282+
283+
def has_store_instance(self, store_instance_id: str) -> bool:
266284
assert_instance(store_instance_id, str, 'store_instance_id')
267285
return store_instance_id in self._instances
268286

xcube/webapi/config.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def get_schema(cls) -> JsonObjectSchema:
9191
factory=DatasetConfig,
9292
required=[
9393
'Identifier',
94+
'Path'
9495
],
9596
properties=dict(
9697
Identifier=IdentifierSchema,
@@ -181,10 +182,11 @@ def get_schema(cls) -> JsonObjectSchema:
181182
return JsonObjectSchema(
182183
factory=DataStoreDatasetConfig,
183184
required=[
184-
'Identifier'
185+
'Path'
185186
],
186187
properties=dict(
187188
Identifier=IdentifierSchema,
189+
Path=PathSchema,
188190
StoreInstanceId=IdentifierSchema, # will be set by server
189191
StoreOpenParams=JsonObjectSchema(additional_properties=True),
190192
**_get_common_dataset_properties()

0 commit comments

Comments
 (0)