From 4dfb3fd4e3531ca9f2052d82bf1c73fed5cae049 Mon Sep 17 00:00:00 2001 From: Noa Aviel Dove Date: Wed, 24 Jul 2024 15:09:55 -0700 Subject: [PATCH] Rename TDRSourceSpec.project to subdomain (#6426) --- scripts/post_deploy_tdr.py | 4 ++-- scripts/recan_bundle_tdr.py | 2 +- scripts/update_subgraph_counts.py | 6 +++--- src/azul/terra.py | 24 +++++++++++++----------- test/indexer/test_tdr.py | 6 +++--- 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/scripts/post_deploy_tdr.py b/scripts/post_deploy_tdr.py index f1fd34abdf..fe3565c10b 100644 --- a/scripts/post_deploy_tdr.py +++ b/scripts/post_deploy_tdr.py @@ -93,9 +93,9 @@ def verify_source(self, ) -> None: source = self.tdr.lookup_source(source_spec) log.info('TDR client is authorized for API access to %s.', source_spec) - require(source.project == source_spec.project, + require(source.project == source_spec.subdomain, 'Actual Google project of TDR source differs from configured one', - source.project, source_spec.project) + source.project, source_spec.subdomain) # Uppercase is standard for multi-regions in the documentation but TDR # returns 'us' in lowercase require(source.location.lower() == config.tdr_source_location.lower(), diff --git a/scripts/recan_bundle_tdr.py b/scripts/recan_bundle_tdr.py index 6c34afc9c0..f9e6a18088 100644 --- a/scripts/recan_bundle_tdr.py +++ b/scripts/recan_bundle_tdr.py @@ -400,7 +400,7 @@ def main(argv): tdr_source = TDRSourceRef(id=args.source_id, spec=TDRSourceSpec(prefix=Prefix.of_everything, - project='test_project', + subdomain='test_project', name='test_name', is_snapshot=True)) tdr_bundle = dss_bundle_to_tdr(dss_bundle, tdr_source) diff --git a/scripts/update_subgraph_counts.py b/scripts/update_subgraph_counts.py index d0b7fae106..2b28d54c6e 100644 --- a/scripts/update_subgraph_counts.py +++ b/scripts/update_subgraph_counts.py @@ -104,13 +104,13 @@ def ideal_common_prefix(self) -> str: @attr.s(auto_attribs=True, frozen=True, kw_only=True) class SourceSpecArgs: - project: str + subdomain: str name: str subgraph_count: int explicit_prefix: Optional[str] def __str__(self) -> str: - params = f'{self.project!r}, {self.name!r}, {self.subgraph_count!r}' + params = f'{self.subdomain!r}, {self.name!r}, {self.subgraph_count!r}' if self.explicit_prefix is not None: params += f', prefix={self.explicit_prefix!r}' return f'mksrc({params})' @@ -140,7 +140,7 @@ def generate_source(source: TDRSourceRef) -> SourceSpecArgs: prefixed_counter = SubgraphCounter.for_source(plugin, source, counter_prefix) if default_prefix not in prefixed_counter.partition_sizes: explicit_prefix = prefixed_counter.ideal_common_prefix() - return SourceSpecArgs(project=source.spec.project, + return SourceSpecArgs(subdomain=source.spec.subdomain, name=source.spec.name, subgraph_count=counter.count, explicit_prefix=explicit_prefix) diff --git a/src/azul/terra.py b/src/azul/terra.py index ca980a2f1f..787b2699c3 100644 --- a/src/azul/terra.py +++ b/src/azul/terra.py @@ -93,7 +93,7 @@ @attrs.frozen(kw_only=True) class TDRSourceSpec(SourceSpec): - project: str + subdomain: str name: str is_snapshot: bool @@ -105,24 +105,26 @@ class TDRSourceSpec(SourceSpec): def parse(cls, spec: str) -> 'TDRSourceSpec': """ Construct an instance from its string representation, using the syntax - 'tdr:{project}:{type}/{name}:{prefix}' ending with an optional + 'tdr:{subdomain}:{type}/{name}:{prefix}' ending with an optional '/{partition_prefix_length}'. >>> s = TDRSourceSpec.parse('tdr:foo:snapshot/bar:/0') >>> s # doctest: +NORMALIZE_WHITESPACE TDRSourceSpec(prefix=Prefix(common='', partition=0), - project='foo', + subdomain='foo', name='bar', is_snapshot=True) + >>> s.bq_name 'bar' + >>> str(s) 'tdr:foo:snapshot/bar:/0' >>> d = TDRSourceSpec.parse('tdr:foo:dataset/bar:42/2') >>> d # doctest: +NORMALIZE_WHITESPACE TDRSourceSpec(prefix=Prefix(common='42', partition=2), - project='foo', + subdomain='foo', name='bar', is_snapshot=False) >>> d.bq_name @@ -142,7 +144,7 @@ def parse(cls, spec: str) -> 'TDRSourceSpec': """ rest, prefix = cls._parse(spec) # BigQuery (and by extension the TDR) does not allow : or / in dataset names - service, project, name = rest.split(':') + service, subdomain, name = rest.split(':') type, name = name.split('/') assert service == 'tdr', service if type == cls._type_snapshot: @@ -152,7 +154,7 @@ def parse(cls, spec: str) -> 'TDRSourceSpec': else: assert False, type self = cls(prefix=prefix, - project=project, + subdomain=subdomain, name=name, is_snapshot=is_snapshot) assert spec == str(self), spec @@ -181,7 +183,7 @@ def __str__(self) -> str: source_type = self._type_snapshot if self.is_snapshot else self._type_dataset return ':'.join([ 'tdr', - self.project, + self.subdomain, f'{source_type}/{self.name}', str(self.prefix) ]) @@ -191,7 +193,7 @@ def type_name(self): return self._type_snapshot if self.is_snapshot else self._type_dataset def qualify_table(self, table_name: str) -> str: - return '.'.join((self.project, self.bq_name, table_name)) + return '.'.join((self.subdomain, self.bq_name, table_name)) def contains(self, other: 'SourceSpec') -> bool: """ @@ -213,7 +215,7 @@ def contains(self, other: 'SourceSpec') -> bool: isinstance(other, TDRSourceSpec) and super().contains(other) and self.is_snapshot == other.is_snapshot - and self.project == other.project + and self.subdomain == other.subdomain and self.name == other.name ) @@ -454,11 +456,11 @@ def check_bigquery_access(self, source: TDRSourceSpec): """ Verify that the client is authorized to read from TDR BigQuery tables. """ - resource = f'BigQuery dataset {source.bq_name!r} in Google Cloud project {source.project!r}' + resource = f'BigQuery dataset {source.bq_name!r} in Google Cloud project {source.subdomain!r}' try: self.run_sql(f''' SELECT * - FROM `{source.project}.{source.bq_name}.INFORMATION_SCHEMA.TABLES` + FROM `{source.subdomain}.{source.bq_name}.INFORMATION_SCHEMA.TABLES` LIMIT 1 ''') except Forbidden: diff --git a/test/indexer/test_tdr.py b/test/indexer/test_tdr.py index 8c6005598e..bd0216e01a 100644 --- a/test/indexer/test_tdr.py +++ b/test/indexer/test_tdr.py @@ -177,7 +177,7 @@ def plugin_for_source_spec(self, source_spec) -> TDR_PLUGIN: # noinspection PyAbstractClass class Plugin(MockPlugin, self._plugin_cls()): netloc = self.netloc - project_id = self.source.spec.project + project_id = self.source.spec.subdomain return Plugin(sources={source_spec}) @@ -193,7 +193,7 @@ def setUpClass(cls): command=[ '--log-level=debug', '--port=9050', - '--project=' + cls.source.spec.project, + '--project=' + cls.source.spec.subdomain, '--dataset=' + cls.source.spec.bq_name ]) @@ -228,7 +228,7 @@ def dump_row(row: JSON) -> JSON: } plugin = self.plugin_for_source_spec(source) - bq = plugin.tdr._bigquery(source.project) + bq = plugin.tdr._bigquery(source.subdomain) table_name = plugin._full_table_name(source, table_name) # https://youtrack.jetbrains.com/issue/PY-50178 # noinspection PyTypeChecker