From b2705b5620d5644cd00301e14c63500a4ad482bd Mon Sep 17 00:00:00 2001 From: Noa Aviel Dove Date: Wed, 24 Jul 2024 15:03:57 -0700 Subject: [PATCH] [u r] Remove support for indexing TDR datasets (#6426) --- OPERATOR.rst | 2 +- UPGRADING.rst | 10 +++ deployments/anvilbox/environment.py | 2 +- deployments/anvildev/environment.py | 2 +- deployments/anvilprod/environment.py | 2 +- deployments/dev/environment.py | 2 +- deployments/hammerbox/environment.py | 2 +- deployments/prod/environment.py | 2 +- deployments/sandbox/environment.py | 2 +- deployments/tempdev/environment.py | 2 +- environment.py | 11 +-- scripts/recan_bundle_tdr.py | 6 +- .../plugins/repository/tdr_hca/__init__.py | 61 +++++-------- src/azul/terra.py | 86 +++++-------------- test/azul_test_case.py | 4 +- ...2-e274-affe-aabc-eb3db63ad068.results.json | 32 +++---- test/indexer/test_tdr.py | 2 +- test/service/test_manifest.py | 6 +- test/service/test_repository_files.py | 2 +- test/service/test_response_anvil.py | 16 ++-- 20 files changed, 98 insertions(+), 156 deletions(-) diff --git a/OPERATOR.rst b/OPERATOR.rst index d097043b27..0b528751a8 100644 --- a/OPERATOR.rst +++ b/OPERATOR.rst @@ -278,7 +278,7 @@ To specify a catalog to be reindexed, set ``Key`` to ``azul_current_catalog`` and ``Value`` to the name of the catalog, for example, ``dcp3``. To specify the sources to be reindexed, set ``Key`` to ``azul_current_sources`` and ``Value`` to a space-separated list of sources globs, e.g. -``*:snapshot/hca_dev_* *:snapshot/lungmap_dev_*``. Check the inputs you just +``*:hca_dev_* *:lungmap_dev_*``. Check the inputs you just made. Start the ``reindex`` job by clicking on ``Run job``. Wait until the job has completed. diff --git a/UPGRADING.rst b/UPGRADING.rst index 3f7dfc4860..89eec7bcc4 100644 --- a/UPGRADING.rst +++ b/UPGRADING.rst @@ -20,6 +20,16 @@ reverted. This is all fairly informal and loosely defined. Hopefully we won't have too many entries in this file. +#6426 Clean-up and generalize TDR source specs +============================================== + +The "snapshot/" string has been removed from TDR source specs. + +Update the ``mksrc`` function in ``environment.py`` for each of your personal +deployments. As always, use the sandbox deployment's ``environment.py`` as a +model when upgrading personal deployments. + + #6381 Update Terraform to 1.9.x =============================== diff --git a/deployments/anvilbox/environment.py b/deployments/anvilbox/environment.py index 7a4d466c27..8574ed750d 100644 --- a/deployments/anvilbox/environment.py +++ b/deployments/anvilbox/environment.py @@ -41,7 +41,7 @@ def mksrc(google_project, source = None if flags & pop else ':'.join([ 'tdr', google_project, - 'snapshot/' + snapshot, + snapshot, prefix + '/0' ]) return project, source diff --git a/deployments/anvildev/environment.py b/deployments/anvildev/environment.py index a9adc0269e..72911af2e2 100644 --- a/deployments/anvildev/environment.py +++ b/deployments/anvildev/environment.py @@ -28,7 +28,7 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str source = None if flags & pop else ':'.join([ 'tdr', google_project, - 'snapshot/' + snapshot, + snapshot, '/' + str(partition_prefix_length(subgraphs)) ]) return project, source diff --git a/deployments/anvilprod/environment.py b/deployments/anvilprod/environment.py index 6f1aed7168..bbf6ad4b7c 100644 --- a/deployments/anvilprod/environment.py +++ b/deployments/anvilprod/environment.py @@ -28,7 +28,7 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str source = None if flags & pop else ':'.join([ 'tdr', google_project, - 'snapshot/' + snapshot, + snapshot, '/' + str(partition_prefix_length(subgraphs)) ]) return project, source diff --git a/deployments/dev/environment.py b/deployments/dev/environment.py index 97f8b53dba..926a953b28 100644 --- a/deployments/dev/environment.py +++ b/deployments/dev/environment.py @@ -28,7 +28,7 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str source = None if flags & pop else ':'.join([ 'tdr', google_project, - 'snapshot/' + snapshot, + snapshot, '/' + str(partition_prefix_length(subgraphs)) ]) return project, source diff --git a/deployments/hammerbox/environment.py b/deployments/hammerbox/environment.py index d5de18020e..71693f0a31 100644 --- a/deployments/hammerbox/environment.py +++ b/deployments/hammerbox/environment.py @@ -41,7 +41,7 @@ def mksrc(google_project, source = None if flags & pop else ':'.join([ 'tdr', google_project, - 'snapshot/' + snapshot, + snapshot, prefix + '/0' ]) return project, source diff --git a/deployments/prod/environment.py b/deployments/prod/environment.py index 35721cf04b..7449a9d6cd 100644 --- a/deployments/prod/environment.py +++ b/deployments/prod/environment.py @@ -30,7 +30,7 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str source = None if flags & pop else ':'.join([ 'tdr', google_project, - 'snapshot/' + snapshot, + snapshot, '/' + str(partition_prefix_length(subgraphs)) ]) return project, source diff --git a/deployments/sandbox/environment.py b/deployments/sandbox/environment.py index 01a45c4d89..bb5fdc4685 100644 --- a/deployments/sandbox/environment.py +++ b/deployments/sandbox/environment.py @@ -41,7 +41,7 @@ def mksrc(google_project, source = None if flags & pop else ':'.join([ 'tdr', google_project, - 'snapshot/' + snapshot, + snapshot, prefix + '/0' ]) return project, source diff --git a/deployments/tempdev/environment.py b/deployments/tempdev/environment.py index e90ddb6928..41748e54cc 100644 --- a/deployments/tempdev/environment.py +++ b/deployments/tempdev/environment.py @@ -28,7 +28,7 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str source = None if flags & pop else ':'.join([ 'tdr', google_project, - 'snapshot/' + snapshot, + snapshot, '/' + str(partition_prefix_length(subgraphs)) ]) return project, source diff --git a/environment.py b/environment.py index a1ff3cc8a7..cfea7373ec 100644 --- a/environment.py +++ b/environment.py @@ -70,15 +70,13 @@ def env() -> Mapping[str, Optional[str]]: # # The first catalog listed is the default catalog. # - # A source represents a TDR dataset, TDR snapshot, or canned staging - # area to index. Each source is a string matching the following EBNF - # grammar: + # A source represents a TDR snapshot or canned staging area to index. + # Each source is a string matching the following EBNF grammar: # # source = TDR source | canned source ; # # TDR source = 'tdr:', Google Cloud project name, - # ':', ( 'dataset' | 'snapshot' ), - # '/', TDR dataset or snapshot name, + # ':', TDR dataset or snapshot name, # ':', [ prefix ], # '/', partition prefix length ; # @@ -116,8 +114,7 @@ def env() -> Mapping[str, Optional[str]]: # # Examples: # - # tdr:broad-jade-dev-data:snapshot/hca_mvp:/1 - # tdr:broad-jade-dev-data:dataset/hca_mvp:2/1 + # tdr:broad-jade-dev-data:hca_mvp:/1 # https://github.com/HumanCellAtlas/schema-test-data/tree/de355ca/tests:2 # # This variable tends to be large. If you get `Argument list too long` diff --git a/scripts/recan_bundle_tdr.py b/scripts/recan_bundle_tdr.py index f9e6a18088..372504a2cb 100644 --- a/scripts/recan_bundle_tdr.py +++ b/scripts/recan_bundle_tdr.py @@ -240,7 +240,6 @@ def __init__(self, bundle: TDRHCABundle, file_name: str): assert self.concrete_type.endswith('_file') self.file_manifest_entry = one(e for e in bundle.manifest if e['name'] == self.metadata['file_core']['file_name']) - assert bundle.fqid.source.spec.is_snapshot assert self.file_manifest_entry['drs_path'] is not None def to_json_row(self) -> JSON: @@ -368,7 +367,7 @@ def main(argv): help='The UUID of the existing DCP/1 canned bundle.') parser.add_argument('--source-id', '-s', default=TestTDRHCAPlugin.source.id, - help='The UUID of the snapshot/dataset to contain the canned DCP/2 bundle.') + help='The UUID of the snapshot to contain the canned DCP/2 bundle.') parser.add_argument('--version', '-v', default=TestTDRHCAPlugin.bundle_fqid.version, help='The version for any mock entities synthesized by the script.') @@ -401,8 +400,7 @@ def main(argv): tdr_source = TDRSourceRef(id=args.source_id, spec=TDRSourceSpec(prefix=Prefix.of_everything, subdomain='test_project', - name='test_name', - is_snapshot=True)) + name='test_name')) tdr_bundle = dss_bundle_to_tdr(dss_bundle, tdr_source) add_supp_files(tdr_bundle, diff --git a/src/azul/plugins/repository/tdr_hca/__init__.py b/src/azul/plugins/repository/tdr_hca/__init__.py index a46b555225..df9b259862 100644 --- a/src/azul/plugins/repository/tdr_hca/__init__.py +++ b/src/azul/plugins/repository/tdr_hca/__init__.py @@ -5,7 +5,6 @@ ThreadPoolExecutor, ) from itertools import ( - groupby, islice, ) import json @@ -39,7 +38,6 @@ ) from azul.bigquery import ( BigQueryRow, - BigQueryRows, backtick, ) from azul.drs import ( @@ -276,27 +274,21 @@ def _parse_drs_uri(self, file_id: Optional[str], descriptor: JSON ) -> Optional[str]: - # The file_id column is present for datasets, but is usually null, may - # contain unexpected/unusable values, and NEVER produces usable DRS URLs, - # so we avoid parsing the column altogether for datasets. - if self.fqid.source.spec.is_snapshot: - if file_id is None: - try: - external_drs_uri = descriptor['drs_uri'] - except KeyError: - raise RequirementError('`file_id` is null and `drs_uri` ' - 'is not set in file descriptor', descriptor) - else: - # FIXME: Support non-null DRS URIs in file descriptors - # https://github.com/DataBiosphere/azul/issues/3631 - if external_drs_uri is not None: - log.warning('Non-null `drs_uri` in file descriptor (%s)', external_drs_uri) - external_drs_uri = None - return external_drs_uri + if file_id is None: + try: + external_drs_uri = descriptor['drs_uri'] + except KeyError: + raise RequirementError('`file_id` is null and `drs_uri` ' + 'is not set in file descriptor', descriptor) else: - return file_id + # FIXME: Support non-null DRS URIs in file descriptors + # https://github.com/DataBiosphere/azul/issues/3631 + if external_drs_uri is not None: + log.warning('Non-null `drs_uri` in file descriptor (%s)', external_drs_uri) + external_drs_uri = None + return external_drs_uri else: - return None + return file_id class Plugin(TDRPlugin[TDRHCABundle, TDRSourceSpec, TDRSourceRef, TDRBundleFQID]): @@ -324,7 +316,7 @@ def _list_bundles(self, ) -> list[TDRBundleFQID]: source_prefix = source.spec.prefix.common validate_uuid_prefix(source_prefix + prefix) - current_bundles = self._query_latest_version(source.spec, f''' + current_bundles = self._query_unique_sorted(f''' SELECT links_id, version FROM {backtick(self._full_table_name(source.spec, 'links'))} WHERE STARTS_WITH(links_id, '{source_prefix + prefix}') @@ -336,24 +328,15 @@ def _list_bundles(self, for row in current_bundles ] - def _query_latest_version(self, - source: TDRSourceSpec, - query: str, - group_by: str - ) -> list[BigQueryRow]: + def _query_unique_sorted(self, + query: str, + group_by: str + ) -> list[BigQueryRow]: iter_rows = self._run_sql(query) key = itemgetter(group_by) - groups = groupby(sorted(iter_rows, key=key), key=key) - return [self._choose_one_version(source, group) for _, group in groups] - - def _choose_one_version(self, - source: TDRSourceSpec, - versioned_items: BigQueryRows - ) -> BigQueryRow: - if source.is_snapshot: - return one(versioned_items) - else: - return max(versioned_items, key=itemgetter('version')) + rows = sorted(iter_rows, key=key) + require(len(set(map(key, rows))) == len(rows), 'Expected unique keys', group_by) + return rows def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRHCABundle: bundle = TDRHCABundle(fqid=bundle_fqid, @@ -514,7 +497,7 @@ def quote(s): WHERE {self._in(where_columns, where_values)} ''' log.debug('Retrieving %i entities of type %r ...', len(entity_ids), entity_type) - rows = self._query_latest_version(source, query, group_by=pk_column) + rows = self._query_unique_sorted(query, group_by=pk_column) log.debug('Retrieved %i entities of type %r', len(rows), entity_type) missing = expected - {row[pk_column] for row in rows} require(not missing, diff --git a/src/azul/terra.py b/src/azul/terra.py index 787b2699c3..93739a25d1 100644 --- a/src/azul/terra.py +++ b/src/azul/terra.py @@ -95,49 +95,24 @@ class TDRSourceSpec(SourceSpec): subdomain: str name: str - is_snapshot: bool - - _type_dataset = 'dataset' - - _type_snapshot = 'snapshot' @classmethod def parse(cls, spec: str) -> 'TDRSourceSpec': """ Construct an instance from its string representation, using the syntax - 'tdr:{subdomain}:{type}/{name}:{prefix}' ending with an optional + 'tdr:{subdomain}:{name}:{prefix}' ending with an optional '/{partition_prefix_length}'. - >>> s = TDRSourceSpec.parse('tdr:foo:snapshot/bar:/0') + >>> s = TDRSourceSpec.parse('tdr:foo:bar:/0') >>> s # doctest: +NORMALIZE_WHITESPACE TDRSourceSpec(prefix=Prefix(common='', partition=0), subdomain='foo', - name='bar', - is_snapshot=True) - - >>> s.bq_name - 'bar' + name='bar') >>> str(s) - 'tdr:foo:snapshot/bar:/0' - - >>> d = TDRSourceSpec.parse('tdr:foo:dataset/bar:42/2') - >>> d # doctest: +NORMALIZE_WHITESPACE - TDRSourceSpec(prefix=Prefix(common='42', partition=2), - subdomain='foo', - name='bar', - is_snapshot=False) - >>> d.bq_name - 'datarepo_bar' - >>> str(d) - 'tdr:foo:dataset/bar:42/2' - - >>> TDRSourceSpec.parse('tdr:foo:baz/bar:42/0') - Traceback (most recent call last): - ... - AssertionError: baz + 'tdr:foo:bar:/0' - >>> TDRSourceSpec.parse('tdr:foo:snapshot/bar:n32/0') + >>> TDRSourceSpec.parse('tdr:foo:bar:n32/0') Traceback (most recent call last): ... azul.uuids.InvalidUUIDPrefixError: 'n32' is not a valid UUID prefix. @@ -145,76 +120,55 @@ def parse(cls, spec: str) -> 'TDRSourceSpec': rest, prefix = cls._parse(spec) # BigQuery (and by extension the TDR) does not allow : or / in dataset names service, subdomain, name = rest.split(':') - type, name = name.split('/') assert service == 'tdr', service - if type == cls._type_snapshot: - is_snapshot = True - elif type == cls._type_dataset: - is_snapshot = False - else: - assert False, type self = cls(prefix=prefix, subdomain=subdomain, - name=name, - is_snapshot=is_snapshot) + name=name) assert spec == str(self), spec return self - @property - def bq_name(self): - return self.name if self.is_snapshot else f'datarepo_{self.name}' - def __str__(self) -> str: """ The inverse of :meth:`parse`. - >>> s = 'tdr:foo:snapshot/bar:/0' + >>> s = 'tdr:foo:bar:/0' >>> s == str(TDRSourceSpec.parse(s)) True - >>> s = 'tdr:foo:snapshot/bar:22/0' + >>> s = 'tdr:foo:bar:22/0' >>> s == str(TDRSourceSpec.parse(s)) True - >>> s = 'tdr:foo:snapshot/bar:22/2' + >>> s = 'tdr:foo:bar:22/2' >>> s == str(TDRSourceSpec.parse(s)) True """ - source_type = self._type_snapshot if self.is_snapshot else self._type_dataset return ':'.join([ 'tdr', self.subdomain, - f'{source_type}/{self.name}', + self.name, str(self.prefix) ]) - @property - def type_name(self): - return self._type_snapshot if self.is_snapshot else self._type_dataset - def qualify_table(self, table_name: str) -> str: - return '.'.join((self.subdomain, self.bq_name, table_name)) + return '.'.join((self.subdomain, self.name, table_name)) def contains(self, other: 'SourceSpec') -> bool: """ >>> p = TDRSourceSpec.parse - >>> p('tdr:foo:snapshot/bar:/0').contains(p('tdr:foo:snapshot/bar:/0')) + >>> p('tdr:foo:bar:/0').contains(p('tdr:foo:bar:/0')) True - >>> p('tdr:foo:snapshot/bar:/0').contains(p('tdr:bar:snapshot/bar:/0')) + >>> p('tdr:foo:bar:/0').contains(p('tdr:bar:bar:/0')) False - >>> p('tdr:foo:snapshot/bar:/0').contains(p('tdr:foo:dataset/bar:/0')) - False - - >>> p('tdr:foo:snapshot/bar:/0').contains(p('tdr:foo:snapshot/baz:/0')) + >>> p('tdr:foo:bar:/0').contains(p('tdr:foo:baz:/0')) False """ return ( isinstance(other, TDRSourceSpec) and super().contains(other) - and self.is_snapshot == other.is_snapshot and self.subdomain == other.subdomain and self.name == other.name ) @@ -431,7 +385,7 @@ def lookup_source(self, source_spec: TDRSourceSpec) -> TDRSource: location=storage['region']) def _retrieve_source(self, source: SourceRef) -> MutableJSON: - endpoint = self._repository_endpoint(source.spec.type_name + 's', source.id) + endpoint = self._repository_endpoint('snapshots', source.id) response = self._request('GET', endpoint) response = self._check_response(endpoint, response) require(source.spec.name == response['name'], @@ -439,8 +393,8 @@ def _retrieve_source(self, source: SourceRef) -> MutableJSON: return response def _lookup_source(self, source: TDRSourceSpec) -> MutableJSON: - endpoint = self._repository_endpoint(source.type_name + 's') - endpoint.set(args=dict(filter=source.bq_name, limit='2')) + endpoint = self._repository_endpoint('snapshots') + endpoint.set(args=dict(filter=source.name, limit='2')) response = self._request('GET', endpoint) response = self._check_response(endpoint, response) total = response['filteredTotal'] @@ -450,17 +404,17 @@ def _lookup_source(self, source: TDRSourceSpec) -> MutableJSON: source_id = one(response['items'])['id'] return self._retrieve_source(SourceRef(id=source_id, spec=source)) else: - raise TerraNameConflictException(endpoint, source.bq_name, response) + raise TerraNameConflictException(endpoint, source.name, response) def check_bigquery_access(self, source: TDRSourceSpec): """ Verify that the client is authorized to read from TDR BigQuery tables. """ - resource = f'BigQuery dataset {source.bq_name!r} in Google Cloud project {source.subdomain!r}' + resource = f'BigQuery dataset {source.name!r} in Google Cloud project {source.subdomain!r}' try: self.run_sql(f''' SELECT * - FROM `{source.subdomain}.{source.bq_name}.INFORMATION_SCHEMA.TABLES` + FROM `{source.subdomain}.{source.name}.INFORMATION_SCHEMA.TABLES` LIMIT 1 ''') except Forbidden: diff --git a/test/azul_test_case.py b/test/azul_test_case.py index 2244650685..64f9a7ab63 100644 --- a/test/azul_test_case.py +++ b/test/azul_test_case.py @@ -502,7 +502,7 @@ def _patch_source_cache(cls): class DCP2TestCase(TDRTestCase): source = TDRSourceRef(id='d8c20944-739f-4e7d-9161-b720953432ce', - spec=TDRSourceSpec.parse('tdr:test_hca_project:snapshot/hca_snapshot:/2')) + spec=TDRSourceSpec.parse('tdr:test_hca_project:hca_snapshot:/2')) @classmethod def catalog_config(cls) -> dict[CatalogName, config.Catalog]: @@ -518,7 +518,7 @@ def catalog_config(cls) -> dict[CatalogName, config.Catalog]: class AnvilTestCase(TDRTestCase): source = TDRSourceRef(id='6c87f0e1-509d-46a4-b845-7584df39263b', - spec=TDRSourceSpec.parse('tdr:test_anvil_project:snapshot/anvil_snapshot:/2')) + spec=TDRSourceSpec.parse('tdr:test_anvil_project:anvil_snapshot:/2')) @classmethod def catalog_config(cls) -> dict[CatalogName, config.Catalog]: diff --git a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json index 7ec62da8a3..dd7a18da64 100644 --- a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json +++ b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json @@ -230,7 +230,7 @@ "sources": [ { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" } ], "bundles": [ @@ -421,7 +421,7 @@ "document_id": "1509ef40-d1ba-440d-b298-16b7c173dcd4_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists", "source": { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" }, "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068", "bundle_version": "2022-06-01T00:00:00.000000Z", @@ -664,7 +664,7 @@ "sources": [ { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" } ], "bundles": [ @@ -855,7 +855,7 @@ "document_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists", "source": { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" }, "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068", "bundle_version": "2022-06-01T00:00:00.000000Z", @@ -1226,7 +1226,7 @@ "sources": [ { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" } ], "bundles": [ @@ -1462,7 +1462,7 @@ "document_id": "2370f948-2783-4eb6-afea-e022897f4dcf_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists", "source": { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" }, "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068", "bundle_version": "2022-06-01T00:00:00.000000Z", @@ -1709,7 +1709,7 @@ "sources": [ { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" } ], "bundles": [ @@ -1932,7 +1932,7 @@ "document_id": "3b17377b-16b1-431c-9967-e5d01fc5923f_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists", "source": { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" }, "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068", "bundle_version": "2022-06-01T00:00:00.000000Z", @@ -2170,7 +2170,7 @@ "sources": [ { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" } ], "bundles": [ @@ -2391,7 +2391,7 @@ "document_id": "816e364e-1193-4e5b-a91a-14e4b009157c_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists", "source": { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" }, "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068", "bundle_version": "2022-06-01T00:00:00.000000Z", @@ -2686,7 +2686,7 @@ "sources": [ { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" } ], "bundles": [ @@ -2922,7 +2922,7 @@ "document_id": "826dea02-e274-4ffe-aabc-eb3db63ad068_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists", "source": { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" }, "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068", "bundle_version": "2022-06-01T00:00:00.000000Z", @@ -3266,7 +3266,7 @@ "sources": [ { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" } ], "bundles": [ @@ -3502,7 +3502,7 @@ "document_id": "826dea02-e274-affe-aabc-eb3db63ad068_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists", "source": { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" }, "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068", "bundle_version": "2022-06-01T00:00:00.000000Z", @@ -3839,7 +3839,7 @@ "sources": [ { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" } ], "bundles": [ @@ -4075,7 +4075,7 @@ "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists", "source": { "id": "6c87f0e1-509d-46a4-b845-7584df39263b", - "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2" + "spec": "tdr:test_anvil_project:anvil_snapshot:/2" }, "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068", "bundle_version": "2022-06-01T00:00:00.000000Z", diff --git a/test/indexer/test_tdr.py b/test/indexer/test_tdr.py index bd0216e01a..175a49e189 100644 --- a/test/indexer/test_tdr.py +++ b/test/indexer/test_tdr.py @@ -194,7 +194,7 @@ def setUpClass(cls): '--log-level=debug', '--port=9050', '--project=' + cls.source.spec.subdomain, - '--dataset=' + cls.source.spec.bq_name + '--dataset=' + cls.source.spec.name ]) def _make_mock_tdr_tables(self, diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py index 5f2b06b770..e03e2376bc 100644 --- a/test/service/test_manifest.py +++ b/test/service/test_manifest.py @@ -1680,9 +1680,9 @@ def test_compact_manifest(self): ), ( 'source_spec', - 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2', - 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2', - 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2' + 'tdr:test_anvil_project:anvil_snapshot:/2', + 'tdr:test_anvil_project:anvil_snapshot:/2', + 'tdr:test_anvil_project:anvil_snapshot:/2' ), ( 'datasets.document_id', diff --git a/test/service/test_repository_files.py b/test/service/test_repository_files.py index b9976c3c2d..97ef72bc8c 100644 --- a/test/service/test_repository_files.py +++ b/test/service/test_repository_files.py @@ -176,7 +176,7 @@ def test_repository_files(self, mock_get_cached_sources): self.assertEqual(response.status, 404) mock_source_names = ['mock_snapshot_1', 'mock_snapshot_2'] - make_mock_source_spec = 'tdr:mock:snapshot/{}:/2'.format + make_mock_source_spec = 'tdr:mock:{}:/2'.format @classmethod def _sources(cls): diff --git a/test/service/test_response_anvil.py b/test/service/test_response_anvil.py index f0a7a5fd66..3072f6b6da 100644 --- a/test/service/test_response_anvil.py +++ b/test/service/test_response_anvil.py @@ -48,7 +48,7 @@ def test_entity_indices(self): 'entryId': '1509ef40-d1ba-440d-b298-16b7c173dcd4', 'sources': [ { - 'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2', + 'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2', 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b' } ], @@ -169,7 +169,7 @@ def test_entity_indices(self): 'entryId': '816e364e-1193-4e5b-a91a-14e4b009157c', 'sources': [ { - 'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2', + 'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2', 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b' } ], @@ -524,7 +524,7 @@ def test_entity_indices(self): 'entryId': '826dea02-e274-4ffe-aabc-eb3db63ad068', 'sources': [ { - 'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2', + 'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2', 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b' } ], @@ -953,7 +953,7 @@ def test_entity_indices(self): 'sources': [ { 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b', - 'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2' + 'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2' } ] } @@ -1088,7 +1088,7 @@ def test_entity_indices(self): 'entryId': '2370f948-2783-4eb6-afea-e022897f4dcf', 'sources': [ { - 'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2', + 'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2', 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b' } ], @@ -1469,7 +1469,7 @@ def test_entity_indices(self): 'entryId': 'bfd991f2-2797-4083-972a-da7c6d7f1b2e', 'sources': [ { - 'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2', + 'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2', 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b' } ], @@ -1845,7 +1845,7 @@ def test_entity_indices(self): 'entryId': '15b76f9c-6b46-433f-851d-34e89f1b9ba6', 'sources': [ { - 'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2', + 'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2', 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b' } ], @@ -1974,7 +1974,7 @@ def test_entity_indices(self): 'entryId': '3b17377b-16b1-431c-9967-e5d01fc5923f', 'sources': [ { - 'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2', + 'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2', 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b' } ],