From b2705b5620d5644cd00301e14c63500a4ad482bd Mon Sep 17 00:00:00 2001
From: Noa Aviel Dove <nadove@ucsc.edu>
Date: Wed, 24 Jul 2024 15:03:57 -0700
Subject: [PATCH] [u r] Remove support for indexing TDR datasets (#6426)

---
 OPERATOR.rst                                  |  2 +-
 UPGRADING.rst                                 | 10 +++
 deployments/anvilbox/environment.py           |  2 +-
 deployments/anvildev/environment.py           |  2 +-
 deployments/anvilprod/environment.py          |  2 +-
 deployments/dev/environment.py                |  2 +-
 deployments/hammerbox/environment.py          |  2 +-
 deployments/prod/environment.py               |  2 +-
 deployments/sandbox/environment.py            |  2 +-
 deployments/tempdev/environment.py            |  2 +-
 environment.py                                | 11 +--
 scripts/recan_bundle_tdr.py                   |  6 +-
 .../plugins/repository/tdr_hca/__init__.py    | 61 +++++--------
 src/azul/terra.py                             | 86 +++++--------------
 test/azul_test_case.py                        |  4 +-
 ...2-e274-affe-aabc-eb3db63ad068.results.json | 32 +++----
 test/indexer/test_tdr.py                      |  2 +-
 test/service/test_manifest.py                 |  6 +-
 test/service/test_repository_files.py         |  2 +-
 test/service/test_response_anvil.py           | 16 ++--
 20 files changed, 98 insertions(+), 156 deletions(-)

diff --git a/OPERATOR.rst b/OPERATOR.rst
index d097043b27..0b528751a8 100644
--- a/OPERATOR.rst
+++ b/OPERATOR.rst
@@ -278,7 +278,7 @@ To specify a catalog to be reindexed, set ``Key`` to ``azul_current_catalog``
 and ``Value`` to the name of the catalog, for example, ``dcp3``. To specify the
 sources to be reindexed, set ``Key`` to ``azul_current_sources`` and
 ``Value`` to a space-separated list of sources globs, e.g.
-``*:snapshot/hca_dev_* *:snapshot/lungmap_dev_*``. Check the inputs you just
+``*:hca_dev_* *:lungmap_dev_*``. Check the inputs you just
 made. Start the ``reindex`` job by clicking on ``Run job``. Wait until the job
 has completed.
 
diff --git a/UPGRADING.rst b/UPGRADING.rst
index 3f7dfc4860..89eec7bcc4 100644
--- a/UPGRADING.rst
+++ b/UPGRADING.rst
@@ -20,6 +20,16 @@ reverted. This is all fairly informal and loosely defined. Hopefully we won't
 have too many entries in this file.
 
 
+#6426 Clean-up and generalize TDR source specs
+==============================================
+
+The "snapshot/" string has been removed from TDR source specs.
+
+Update the ``mksrc`` function in ``environment.py`` for each of your personal
+deployments. As always, use the sandbox deployment's ``environment.py`` as a
+model when upgrading personal deployments.
+
+
 #6381 Update Terraform to 1.9.x
 ===============================
 
diff --git a/deployments/anvilbox/environment.py b/deployments/anvilbox/environment.py
index 7a4d466c27..8574ed750d 100644
--- a/deployments/anvilbox/environment.py
+++ b/deployments/anvilbox/environment.py
@@ -41,7 +41,7 @@ def mksrc(google_project,
     source = None if flags & pop else ':'.join([
         'tdr',
         google_project,
-        'snapshot/' + snapshot,
+        snapshot,
         prefix + '/0'
     ])
     return project, source
diff --git a/deployments/anvildev/environment.py b/deployments/anvildev/environment.py
index a9adc0269e..72911af2e2 100644
--- a/deployments/anvildev/environment.py
+++ b/deployments/anvildev/environment.py
@@ -28,7 +28,7 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str
     source = None if flags & pop else ':'.join([
         'tdr',
         google_project,
-        'snapshot/' + snapshot,
+        snapshot,
         '/' + str(partition_prefix_length(subgraphs))
     ])
     return project, source
diff --git a/deployments/anvilprod/environment.py b/deployments/anvilprod/environment.py
index 6f1aed7168..bbf6ad4b7c 100644
--- a/deployments/anvilprod/environment.py
+++ b/deployments/anvilprod/environment.py
@@ -28,7 +28,7 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str
     source = None if flags & pop else ':'.join([
         'tdr',
         google_project,
-        'snapshot/' + snapshot,
+        snapshot,
         '/' + str(partition_prefix_length(subgraphs))
     ])
     return project, source
diff --git a/deployments/dev/environment.py b/deployments/dev/environment.py
index 97f8b53dba..926a953b28 100644
--- a/deployments/dev/environment.py
+++ b/deployments/dev/environment.py
@@ -28,7 +28,7 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str
     source = None if flags & pop else ':'.join([
         'tdr',
         google_project,
-        'snapshot/' + snapshot,
+        snapshot,
         '/' + str(partition_prefix_length(subgraphs))
     ])
     return project, source
diff --git a/deployments/hammerbox/environment.py b/deployments/hammerbox/environment.py
index d5de18020e..71693f0a31 100644
--- a/deployments/hammerbox/environment.py
+++ b/deployments/hammerbox/environment.py
@@ -41,7 +41,7 @@ def mksrc(google_project,
     source = None if flags & pop else ':'.join([
         'tdr',
         google_project,
-        'snapshot/' + snapshot,
+        snapshot,
         prefix + '/0'
     ])
     return project, source
diff --git a/deployments/prod/environment.py b/deployments/prod/environment.py
index 35721cf04b..7449a9d6cd 100644
--- a/deployments/prod/environment.py
+++ b/deployments/prod/environment.py
@@ -30,7 +30,7 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str
     source = None if flags & pop else ':'.join([
         'tdr',
         google_project,
-        'snapshot/' + snapshot,
+        snapshot,
         '/' + str(partition_prefix_length(subgraphs))
     ])
     return project, source
diff --git a/deployments/sandbox/environment.py b/deployments/sandbox/environment.py
index 01a45c4d89..bb5fdc4685 100644
--- a/deployments/sandbox/environment.py
+++ b/deployments/sandbox/environment.py
@@ -41,7 +41,7 @@ def mksrc(google_project,
     source = None if flags & pop else ':'.join([
         'tdr',
         google_project,
-        'snapshot/' + snapshot,
+        snapshot,
         prefix + '/0'
     ])
     return project, source
diff --git a/deployments/tempdev/environment.py b/deployments/tempdev/environment.py
index e90ddb6928..41748e54cc 100644
--- a/deployments/tempdev/environment.py
+++ b/deployments/tempdev/environment.py
@@ -28,7 +28,7 @@ def mksrc(google_project, snapshot, subgraphs, flags: int = 0) -> tuple[str, str
     source = None if flags & pop else ':'.join([
         'tdr',
         google_project,
-        'snapshot/' + snapshot,
+        snapshot,
         '/' + str(partition_prefix_length(subgraphs))
     ])
     return project, source
diff --git a/environment.py b/environment.py
index a1ff3cc8a7..cfea7373ec 100644
--- a/environment.py
+++ b/environment.py
@@ -70,15 +70,13 @@ def env() -> Mapping[str, Optional[str]]:
         #
         # The first catalog listed is the default catalog.
         #
-        # A source represents a TDR dataset, TDR snapshot, or canned staging
-        # area to index. Each source is a string matching the following EBNF
-        # grammar:
+        # A source represents a TDR snapshot or canned staging area to index.
+        # Each source is a string matching the following EBNF grammar:
         #
         # source = TDR source | canned source ;
         #
         # TDR source = 'tdr:', Google Cloud project name,
-        #              ':', ( 'dataset' | 'snapshot' ),
-        #              '/', TDR dataset or snapshot name,
+        #              ':', TDR dataset or snapshot name,
         #              ':', [ prefix ],
         #              '/', partition prefix length ;
         #
@@ -116,8 +114,7 @@ def env() -> Mapping[str, Optional[str]]:
         #
         # Examples:
         #
-        # tdr:broad-jade-dev-data:snapshot/hca_mvp:/1
-        # tdr:broad-jade-dev-data:dataset/hca_mvp:2/1
+        # tdr:broad-jade-dev-data:hca_mvp:/1
         # https://github.com/HumanCellAtlas/schema-test-data/tree/de355ca/tests:2
         #
         # This variable tends to be large. If you get `Argument list too long`
diff --git a/scripts/recan_bundle_tdr.py b/scripts/recan_bundle_tdr.py
index f9e6a18088..372504a2cb 100644
--- a/scripts/recan_bundle_tdr.py
+++ b/scripts/recan_bundle_tdr.py
@@ -240,7 +240,6 @@ def __init__(self, bundle: TDRHCABundle, file_name: str):
         assert self.concrete_type.endswith('_file')
         self.file_manifest_entry = one(e for e in bundle.manifest
                                        if e['name'] == self.metadata['file_core']['file_name'])
-        assert bundle.fqid.source.spec.is_snapshot
         assert self.file_manifest_entry['drs_path'] is not None
 
     def to_json_row(self) -> JSON:
@@ -368,7 +367,7 @@ def main(argv):
                         help='The UUID of the existing DCP/1 canned bundle.')
     parser.add_argument('--source-id', '-s',
                         default=TestTDRHCAPlugin.source.id,
-                        help='The UUID of the snapshot/dataset to contain the canned DCP/2 bundle.')
+                        help='The UUID of the snapshot to contain the canned DCP/2 bundle.')
     parser.add_argument('--version', '-v',
                         default=TestTDRHCAPlugin.bundle_fqid.version,
                         help='The version for any mock entities synthesized by the script.')
@@ -401,8 +400,7 @@ def main(argv):
     tdr_source = TDRSourceRef(id=args.source_id,
                               spec=TDRSourceSpec(prefix=Prefix.of_everything,
                                                  subdomain='test_project',
-                                                 name='test_name',
-                                                 is_snapshot=True))
+                                                 name='test_name'))
     tdr_bundle = dss_bundle_to_tdr(dss_bundle, tdr_source)
 
     add_supp_files(tdr_bundle,
diff --git a/src/azul/plugins/repository/tdr_hca/__init__.py b/src/azul/plugins/repository/tdr_hca/__init__.py
index a46b555225..df9b259862 100644
--- a/src/azul/plugins/repository/tdr_hca/__init__.py
+++ b/src/azul/plugins/repository/tdr_hca/__init__.py
@@ -5,7 +5,6 @@
     ThreadPoolExecutor,
 )
 from itertools import (
-    groupby,
     islice,
 )
 import json
@@ -39,7 +38,6 @@
 )
 from azul.bigquery import (
     BigQueryRow,
-    BigQueryRows,
     backtick,
 )
 from azul.drs import (
@@ -276,27 +274,21 @@ def _parse_drs_uri(self,
                        file_id: Optional[str],
                        descriptor: JSON
                        ) -> Optional[str]:
-        # The file_id column is present for datasets, but is usually null, may
-        # contain unexpected/unusable values, and NEVER produces usable DRS URLs,
-        # so we avoid parsing the column altogether for datasets.
-        if self.fqid.source.spec.is_snapshot:
-            if file_id is None:
-                try:
-                    external_drs_uri = descriptor['drs_uri']
-                except KeyError:
-                    raise RequirementError('`file_id` is null and `drs_uri` '
-                                           'is not set in file descriptor', descriptor)
-                else:
-                    # FIXME: Support non-null DRS URIs in file descriptors
-                    #        https://github.com/DataBiosphere/azul/issues/3631
-                    if external_drs_uri is not None:
-                        log.warning('Non-null `drs_uri` in file descriptor (%s)', external_drs_uri)
-                        external_drs_uri = None
-                    return external_drs_uri
+        if file_id is None:
+            try:
+                external_drs_uri = descriptor['drs_uri']
+            except KeyError:
+                raise RequirementError('`file_id` is null and `drs_uri` '
+                                       'is not set in file descriptor', descriptor)
             else:
-                return file_id
+                # FIXME: Support non-null DRS URIs in file descriptors
+                #        https://github.com/DataBiosphere/azul/issues/3631
+                if external_drs_uri is not None:
+                    log.warning('Non-null `drs_uri` in file descriptor (%s)', external_drs_uri)
+                    external_drs_uri = None
+                return external_drs_uri
         else:
-            return None
+            return file_id
 
 
 class Plugin(TDRPlugin[TDRHCABundle, TDRSourceSpec, TDRSourceRef, TDRBundleFQID]):
@@ -324,7 +316,7 @@ def _list_bundles(self,
                       ) -> list[TDRBundleFQID]:
         source_prefix = source.spec.prefix.common
         validate_uuid_prefix(source_prefix + prefix)
-        current_bundles = self._query_latest_version(source.spec, f'''
+        current_bundles = self._query_unique_sorted(f'''
             SELECT links_id, version
             FROM {backtick(self._full_table_name(source.spec, 'links'))}
             WHERE STARTS_WITH(links_id, '{source_prefix + prefix}')
@@ -336,24 +328,15 @@ def _list_bundles(self,
             for row in current_bundles
         ]
 
-    def _query_latest_version(self,
-                              source: TDRSourceSpec,
-                              query: str,
-                              group_by: str
-                              ) -> list[BigQueryRow]:
+    def _query_unique_sorted(self,
+                             query: str,
+                             group_by: str
+                             ) -> list[BigQueryRow]:
         iter_rows = self._run_sql(query)
         key = itemgetter(group_by)
-        groups = groupby(sorted(iter_rows, key=key), key=key)
-        return [self._choose_one_version(source, group) for _, group in groups]
-
-    def _choose_one_version(self,
-                            source: TDRSourceSpec,
-                            versioned_items: BigQueryRows
-                            ) -> BigQueryRow:
-        if source.is_snapshot:
-            return one(versioned_items)
-        else:
-            return max(versioned_items, key=itemgetter('version'))
+        rows = sorted(iter_rows, key=key)
+        require(len(set(map(key, rows))) == len(rows), 'Expected unique keys', group_by)
+        return rows
 
     def _emulate_bundle(self, bundle_fqid: TDRBundleFQID) -> TDRHCABundle:
         bundle = TDRHCABundle(fqid=bundle_fqid,
@@ -514,7 +497,7 @@ def quote(s):
             WHERE {self._in(where_columns, where_values)}
         '''
         log.debug('Retrieving %i entities of type %r ...', len(entity_ids), entity_type)
-        rows = self._query_latest_version(source, query, group_by=pk_column)
+        rows = self._query_unique_sorted(query, group_by=pk_column)
         log.debug('Retrieved %i entities of type %r', len(rows), entity_type)
         missing = expected - {row[pk_column] for row in rows}
         require(not missing,
diff --git a/src/azul/terra.py b/src/azul/terra.py
index 787b2699c3..93739a25d1 100644
--- a/src/azul/terra.py
+++ b/src/azul/terra.py
@@ -95,49 +95,24 @@
 class TDRSourceSpec(SourceSpec):
     subdomain: str
     name: str
-    is_snapshot: bool
-
-    _type_dataset = 'dataset'
-
-    _type_snapshot = 'snapshot'
 
     @classmethod
     def parse(cls, spec: str) -> 'TDRSourceSpec':
         """
         Construct an instance from its string representation, using the syntax
-        'tdr:{subdomain}:{type}/{name}:{prefix}' ending with an optional
+        'tdr:{subdomain}:{name}:{prefix}' ending with an optional
         '/{partition_prefix_length}'.
 
-        >>> s = TDRSourceSpec.parse('tdr:foo:snapshot/bar:/0')
+        >>> s = TDRSourceSpec.parse('tdr:foo:bar:/0')
         >>> s # doctest: +NORMALIZE_WHITESPACE
         TDRSourceSpec(prefix=Prefix(common='', partition=0),
                       subdomain='foo',
-                      name='bar',
-                      is_snapshot=True)
-
-        >>> s.bq_name
-        'bar'
+                      name='bar')
 
         >>> str(s)
-        'tdr:foo:snapshot/bar:/0'
-
-        >>> d = TDRSourceSpec.parse('tdr:foo:dataset/bar:42/2')
-        >>> d # doctest: +NORMALIZE_WHITESPACE
-        TDRSourceSpec(prefix=Prefix(common='42', partition=2),
-                      subdomain='foo',
-                      name='bar',
-                      is_snapshot=False)
-        >>> d.bq_name
-        'datarepo_bar'
-        >>> str(d)
-        'tdr:foo:dataset/bar:42/2'
-
-        >>> TDRSourceSpec.parse('tdr:foo:baz/bar:42/0')
-        Traceback (most recent call last):
-        ...
-        AssertionError: baz
+        'tdr:foo:bar:/0'
 
-        >>> TDRSourceSpec.parse('tdr:foo:snapshot/bar:n32/0')
+        >>> TDRSourceSpec.parse('tdr:foo:bar:n32/0')
         Traceback (most recent call last):
         ...
         azul.uuids.InvalidUUIDPrefixError: 'n32' is not a valid UUID prefix.
@@ -145,76 +120,55 @@ def parse(cls, spec: str) -> 'TDRSourceSpec':
         rest, prefix = cls._parse(spec)
         # BigQuery (and by extension the TDR) does not allow : or / in dataset names
         service, subdomain, name = rest.split(':')
-        type, name = name.split('/')
         assert service == 'tdr', service
-        if type == cls._type_snapshot:
-            is_snapshot = True
-        elif type == cls._type_dataset:
-            is_snapshot = False
-        else:
-            assert False, type
         self = cls(prefix=prefix,
                    subdomain=subdomain,
-                   name=name,
-                   is_snapshot=is_snapshot)
+                   name=name)
         assert spec == str(self), spec
         return self
 
-    @property
-    def bq_name(self):
-        return self.name if self.is_snapshot else f'datarepo_{self.name}'
-
     def __str__(self) -> str:
         """
         The inverse of :meth:`parse`.
 
-        >>> s = 'tdr:foo:snapshot/bar:/0'
+        >>> s = 'tdr:foo:bar:/0'
         >>> s == str(TDRSourceSpec.parse(s))
         True
 
-        >>> s = 'tdr:foo:snapshot/bar:22/0'
+        >>> s = 'tdr:foo:bar:22/0'
         >>> s == str(TDRSourceSpec.parse(s))
         True
 
-        >>> s = 'tdr:foo:snapshot/bar:22/2'
+        >>> s = 'tdr:foo:bar:22/2'
         >>> s == str(TDRSourceSpec.parse(s))
         True
         """
-        source_type = self._type_snapshot if self.is_snapshot else self._type_dataset
         return ':'.join([
             'tdr',
             self.subdomain,
-            f'{source_type}/{self.name}',
+            self.name,
             str(self.prefix)
         ])
 
-    @property
-    def type_name(self):
-        return self._type_snapshot if self.is_snapshot else self._type_dataset
-
     def qualify_table(self, table_name: str) -> str:
-        return '.'.join((self.subdomain, self.bq_name, table_name))
+        return '.'.join((self.subdomain, self.name, table_name))
 
     def contains(self, other: 'SourceSpec') -> bool:
         """
         >>> p = TDRSourceSpec.parse
 
-        >>> p('tdr:foo:snapshot/bar:/0').contains(p('tdr:foo:snapshot/bar:/0'))
+        >>> p('tdr:foo:bar:/0').contains(p('tdr:foo:bar:/0'))
         True
 
-        >>> p('tdr:foo:snapshot/bar:/0').contains(p('tdr:bar:snapshot/bar:/0'))
+        >>> p('tdr:foo:bar:/0').contains(p('tdr:bar:bar:/0'))
         False
 
-        >>> p('tdr:foo:snapshot/bar:/0').contains(p('tdr:foo:dataset/bar:/0'))
-        False
-
-        >>> p('tdr:foo:snapshot/bar:/0').contains(p('tdr:foo:snapshot/baz:/0'))
+        >>> p('tdr:foo:bar:/0').contains(p('tdr:foo:baz:/0'))
         False
         """
         return (
             isinstance(other, TDRSourceSpec)
             and super().contains(other)
-            and self.is_snapshot == other.is_snapshot
             and self.subdomain == other.subdomain
             and self.name == other.name
         )
@@ -431,7 +385,7 @@ def lookup_source(self, source_spec: TDRSourceSpec) -> TDRSource:
                               location=storage['region'])
 
     def _retrieve_source(self, source: SourceRef) -> MutableJSON:
-        endpoint = self._repository_endpoint(source.spec.type_name + 's', source.id)
+        endpoint = self._repository_endpoint('snapshots', source.id)
         response = self._request('GET', endpoint)
         response = self._check_response(endpoint, response)
         require(source.spec.name == response['name'],
@@ -439,8 +393,8 @@ def _retrieve_source(self, source: SourceRef) -> MutableJSON:
         return response
 
     def _lookup_source(self, source: TDRSourceSpec) -> MutableJSON:
-        endpoint = self._repository_endpoint(source.type_name + 's')
-        endpoint.set(args=dict(filter=source.bq_name, limit='2'))
+        endpoint = self._repository_endpoint('snapshots')
+        endpoint.set(args=dict(filter=source.name, limit='2'))
         response = self._request('GET', endpoint)
         response = self._check_response(endpoint, response)
         total = response['filteredTotal']
@@ -450,17 +404,17 @@ def _lookup_source(self, source: TDRSourceSpec) -> MutableJSON:
             source_id = one(response['items'])['id']
             return self._retrieve_source(SourceRef(id=source_id, spec=source))
         else:
-            raise TerraNameConflictException(endpoint, source.bq_name, response)
+            raise TerraNameConflictException(endpoint, source.name, response)
 
     def check_bigquery_access(self, source: TDRSourceSpec):
         """
         Verify that the client is authorized to read from TDR BigQuery tables.
         """
-        resource = f'BigQuery dataset {source.bq_name!r} in Google Cloud project {source.subdomain!r}'
+        resource = f'BigQuery dataset {source.name!r} in Google Cloud project {source.subdomain!r}'
         try:
             self.run_sql(f'''
                 SELECT *
-                FROM `{source.subdomain}.{source.bq_name}.INFORMATION_SCHEMA.TABLES`
+                FROM `{source.subdomain}.{source.name}.INFORMATION_SCHEMA.TABLES`
                 LIMIT 1
             ''')
         except Forbidden:
diff --git a/test/azul_test_case.py b/test/azul_test_case.py
index 2244650685..64f9a7ab63 100644
--- a/test/azul_test_case.py
+++ b/test/azul_test_case.py
@@ -502,7 +502,7 @@ def _patch_source_cache(cls):
 
 class DCP2TestCase(TDRTestCase):
     source = TDRSourceRef(id='d8c20944-739f-4e7d-9161-b720953432ce',
-                          spec=TDRSourceSpec.parse('tdr:test_hca_project:snapshot/hca_snapshot:/2'))
+                          spec=TDRSourceSpec.parse('tdr:test_hca_project:hca_snapshot:/2'))
 
     @classmethod
     def catalog_config(cls) -> dict[CatalogName, config.Catalog]:
@@ -518,7 +518,7 @@ def catalog_config(cls) -> dict[CatalogName, config.Catalog]:
 
 class AnvilTestCase(TDRTestCase):
     source = TDRSourceRef(id='6c87f0e1-509d-46a4-b845-7584df39263b',
-                          spec=TDRSourceSpec.parse('tdr:test_anvil_project:snapshot/anvil_snapshot:/2'))
+                          spec=TDRSourceSpec.parse('tdr:test_anvil_project:anvil_snapshot:/2'))
 
     @classmethod
     def catalog_config(cls) -> dict[CatalogName, config.Catalog]:
diff --git a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json
index 7ec62da8a3..dd7a18da64 100644
--- a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json
+++ b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json
@@ -230,7 +230,7 @@
             "sources": [
                 {
                     "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                    "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                    "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
                 }
             ],
             "bundles": [
@@ -421,7 +421,7 @@
             "document_id": "1509ef40-d1ba-440d-b298-16b7c173dcd4_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists",
             "source": {
                 "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
             },
             "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068",
             "bundle_version": "2022-06-01T00:00:00.000000Z",
@@ -664,7 +664,7 @@
             "sources": [
                 {
                     "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                    "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                    "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
                 }
             ],
             "bundles": [
@@ -855,7 +855,7 @@
             "document_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists",
             "source": {
                 "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
             },
             "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068",
             "bundle_version": "2022-06-01T00:00:00.000000Z",
@@ -1226,7 +1226,7 @@
             "sources": [
                 {
                     "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                    "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                    "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
                 }
             ],
             "bundles": [
@@ -1462,7 +1462,7 @@
             "document_id": "2370f948-2783-4eb6-afea-e022897f4dcf_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists",
             "source": {
                 "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
             },
             "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068",
             "bundle_version": "2022-06-01T00:00:00.000000Z",
@@ -1709,7 +1709,7 @@
             "sources": [
                 {
                     "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                    "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                    "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
                 }
             ],
             "bundles": [
@@ -1932,7 +1932,7 @@
             "document_id": "3b17377b-16b1-431c-9967-e5d01fc5923f_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists",
             "source": {
                 "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
             },
             "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068",
             "bundle_version": "2022-06-01T00:00:00.000000Z",
@@ -2170,7 +2170,7 @@
             "sources": [
                 {
                     "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                    "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                    "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
                 }
             ],
             "bundles": [
@@ -2391,7 +2391,7 @@
             "document_id": "816e364e-1193-4e5b-a91a-14e4b009157c_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists",
             "source": {
                 "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
             },
             "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068",
             "bundle_version": "2022-06-01T00:00:00.000000Z",
@@ -2686,7 +2686,7 @@
             "sources": [
                 {
                     "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                    "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                    "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
                 }
             ],
             "bundles": [
@@ -2922,7 +2922,7 @@
             "document_id": "826dea02-e274-4ffe-aabc-eb3db63ad068_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists",
             "source": {
                 "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
             },
             "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068",
             "bundle_version": "2022-06-01T00:00:00.000000Z",
@@ -3266,7 +3266,7 @@
             "sources": [
                 {
                     "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                    "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                    "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
                 }
             ],
             "bundles": [
@@ -3502,7 +3502,7 @@
             "document_id": "826dea02-e274-affe-aabc-eb3db63ad068_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists",
             "source": {
                 "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
             },
             "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068",
             "bundle_version": "2022-06-01T00:00:00.000000Z",
@@ -3839,7 +3839,7 @@
             "sources": [
                 {
                     "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                    "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                    "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
                 }
             ],
             "bundles": [
@@ -4075,7 +4075,7 @@
             "document_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e_826dea02-e274-affe-aabc-eb3db63ad068_2022-06-01T00:00:00.000000Z_exists",
             "source": {
                 "id": "6c87f0e1-509d-46a4-b845-7584df39263b",
-                "spec": "tdr:test_anvil_project:snapshot/anvil_snapshot:/2"
+                "spec": "tdr:test_anvil_project:anvil_snapshot:/2"
             },
             "bundle_uuid": "826dea02-e274-affe-aabc-eb3db63ad068",
             "bundle_version": "2022-06-01T00:00:00.000000Z",
diff --git a/test/indexer/test_tdr.py b/test/indexer/test_tdr.py
index bd0216e01a..175a49e189 100644
--- a/test/indexer/test_tdr.py
+++ b/test/indexer/test_tdr.py
@@ -194,7 +194,7 @@ def setUpClass(cls):
                                                '--log-level=debug',
                                                '--port=9050',
                                                '--project=' + cls.source.spec.subdomain,
-                                               '--dataset=' + cls.source.spec.bq_name
+                                               '--dataset=' + cls.source.spec.name
                                            ])
 
     def _make_mock_tdr_tables(self,
diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py
index 5f2b06b770..e03e2376bc 100644
--- a/test/service/test_manifest.py
+++ b/test/service/test_manifest.py
@@ -1680,9 +1680,9 @@ def test_compact_manifest(self):
             ),
             (
                 'source_spec',
-                'tdr:test_anvil_project:snapshot/anvil_snapshot:/2',
-                'tdr:test_anvil_project:snapshot/anvil_snapshot:/2',
-                'tdr:test_anvil_project:snapshot/anvil_snapshot:/2'
+                'tdr:test_anvil_project:anvil_snapshot:/2',
+                'tdr:test_anvil_project:anvil_snapshot:/2',
+                'tdr:test_anvil_project:anvil_snapshot:/2'
             ),
             (
                 'datasets.document_id',
diff --git a/test/service/test_repository_files.py b/test/service/test_repository_files.py
index b9976c3c2d..97ef72bc8c 100644
--- a/test/service/test_repository_files.py
+++ b/test/service/test_repository_files.py
@@ -176,7 +176,7 @@ def test_repository_files(self, mock_get_cached_sources):
             self.assertEqual(response.status, 404)
 
     mock_source_names = ['mock_snapshot_1', 'mock_snapshot_2']
-    make_mock_source_spec = 'tdr:mock:snapshot/{}:/2'.format
+    make_mock_source_spec = 'tdr:mock:{}:/2'.format
 
     @classmethod
     def _sources(cls):
diff --git a/test/service/test_response_anvil.py b/test/service/test_response_anvil.py
index f0a7a5fd66..3072f6b6da 100644
--- a/test/service/test_response_anvil.py
+++ b/test/service/test_response_anvil.py
@@ -48,7 +48,7 @@ def test_entity_indices(self):
                         'entryId': '1509ef40-d1ba-440d-b298-16b7c173dcd4',
                         'sources': [
                             {
-                                'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2',
+                                'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2',
                                 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b'
                             }
                         ],
@@ -169,7 +169,7 @@ def test_entity_indices(self):
                         'entryId': '816e364e-1193-4e5b-a91a-14e4b009157c',
                         'sources': [
                             {
-                                'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2',
+                                'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2',
                                 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b'
                             }
                         ],
@@ -524,7 +524,7 @@ def test_entity_indices(self):
                         'entryId': '826dea02-e274-4ffe-aabc-eb3db63ad068',
                         'sources': [
                             {
-                                'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2',
+                                'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2',
                                 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b'
                             }
                         ],
@@ -953,7 +953,7 @@ def test_entity_indices(self):
                         'sources': [
                             {
                                 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b',
-                                'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2'
+                                'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2'
                             }
                         ]
                     }
@@ -1088,7 +1088,7 @@ def test_entity_indices(self):
                         'entryId': '2370f948-2783-4eb6-afea-e022897f4dcf',
                         'sources': [
                             {
-                                'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2',
+                                'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2',
                                 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b'
                             }
                         ],
@@ -1469,7 +1469,7 @@ def test_entity_indices(self):
                         'entryId': 'bfd991f2-2797-4083-972a-da7c6d7f1b2e',
                         'sources': [
                             {
-                                'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2',
+                                'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2',
                                 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b'
                             }
                         ],
@@ -1845,7 +1845,7 @@ def test_entity_indices(self):
                         'entryId': '15b76f9c-6b46-433f-851d-34e89f1b9ba6',
                         'sources': [
                             {
-                                'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2',
+                                'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2',
                                 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b'
                             }
                         ],
@@ -1974,7 +1974,7 @@ def test_entity_indices(self):
                         'entryId': '3b17377b-16b1-431c-9967-e5d01fc5923f',
                         'sources': [
                             {
-                                'source_spec': 'tdr:test_anvil_project:snapshot/anvil_snapshot:/2',
+                                'source_spec': 'tdr:test_anvil_project:anvil_snapshot:/2',
                                 'source_id': '6c87f0e1-509d-46a4-b845-7584df39263b'
                             }
                         ],