ci: Detect ambiguous issue references

And fix found references
MaterializeInc · Sep 20, 2024 · 8042afc · 8042afc
1 parent fbd069a
commit 8042afc
Show file tree

Hide file tree

Showing 28 changed files with 104 additions and 32 deletions.
diff --git a/doc/developer/design/20231027_refresh_mvs.md b/doc/developer/design/20231027_refresh_mvs.md
@@ -57,7 +57,7 @@ Criteria:
 We are targeting the new refresh options only to materialized views, but not indexes for now. Note that one can create an index that is refreshed at a specified interval simply by creating a normal index on top of a `REFRESH EVERY <interval>` materialized view.
 
 We are not aiming to cover the entire range of possible freshness-cost trade-off settings. In particular:
-- We are not aiming here to make results more fresh at a higher cost than what Materialize can provide by default. For efforts in that direction, see [#19322: Re-enable TIMESTAMP INTERVAL source option](https://github.com/MaterializeInc/materialize/issues/19322).
+- We are not aiming here to make results more fresh at a higher cost than what Materialize can provide by default. For efforts in that direction, see [materialize#19322: Re-enable TIMESTAMP INTERVAL source option](https://github.com/MaterializeInc/materialize/issues/19322).
 - A more subtle scoping consideration is that we are also not targeting the range of settings that would require incremental computation with on-disk computation state. Instead, we now focus only on the easy case that is the range of freshness where redoing the entire computation periodically yields a lower cost than having a continuously running dataflow. This means that setting a refresh interval of less than 1 hour will not work well in most cases. It might be that even a few-hour refresh interval won't make using this feature worthwhile. Note that for one important user, a 1-day refresh interval is ok. Later, we might be able to cover this missing intermediate range of freshness by entirely different approaches:
  - When we have robust [out-of-core in Compute](https://materializeinc.slack.com/archives/C04UK7BNVL7/p1700084826815849) (probably with auto-scaling), we might be able to trade off freshness vs. cost by simply using a much smaller replica than the cluster's total memory needs, but supplying input data at a much larger tick interval than the default 1 sec. For example, we might supply new input data at every 10 mins, and hope that even with a lot of swapping, we'll be able to process the update in 10 mins.
  - We could suspend replica processes and save their entire state to S3 and a local disk cache, and restore it when a refresh is needed. [There are existing techniques for saving process state, e.g., CRIU](https://faun.pub/kubernetes-checkpointing-a-definitive-guide-33dd1a0310f6).

diff --git a/misc/python/materialize/checks/all_checks/peek_cancellation.py b/misc/python/materialize/checks/all_checks/peek_cancellation.py
@@ -12,7 +12,7 @@
 from materialize.checks.checks import Check, disabled
 
 
-@disabled("due to #20743")
+@disabled("due to materialize#20743")
 class PeekCancellation(Check):
  def initialize(self) -> Testdrive:
  return Testdrive(

diff --git a/misc/python/materialize/cli/ci_closed_issues_detect.py b/misc/python/materialize/cli/ci_closed_issues_detect.py
@@ -24,9 +24,11 @@
 ISSUE_RE = re.compile(
  r"""
  ( TimelyDataflow/timely-dataflow\#(?P<timelydataflow>[0-9]+)
- | ( materialize\# | materialize/issues/ | \# ) (?P<materialize>[0-9]+)
+ | ( materialize\# | materialize/issues/ ) (?P<materialize>[0-9]+)
  | ( cloud\# | cloud/issues/ ) (?P<cloud>[0-9]+)
  | ( incidents-and-escalations\# | incidents-and-escalations/issues/ ) (?P<incidentsandescalations>[0-9]+)
+ | ( database-issues\# | database-issues/issues/ ) (?P<databaseissues>[0-9]+)
+ | \# (?P<ambiguous>[0-9]+)
  )
  """,
  re.VERBOSE,
@@ -37,6 +39,8 @@
  "materialize": "MaterializeInc/materialize",
  "cloud": "MaterializeInc/cloud",
  "incidentsandescalations": "MaterializeInc/incidents-and-escalations",
+ "databaseissues": "MaterializeInc/database-issues",
+ "ambiguous": None,
 }
 
 REFERENCE_RE = re.compile(
@@ -99,12 +103,12 @@
  re.VERBOSE,
 )
 
-FILENAME_REFERENCE_RE = re.compile(r".*\.(td|slt|test)\.gh(?P<materialize>[0-9]+)")
+FILENAME_REFERENCE_RE = re.compile(r".*\.(td|slt|test)\.gh(?P<ambiguous>[0-9]+)")
 
 
 @dataclass
 class IssueRef:
- repository: str
+ repository: str | None
  issue_id: int
  filename: str
  line_number: int
@@ -168,7 +172,7 @@ def detect_referenced_issues(filename: str) -> list[IssueRef]:
 
  # Explain plans can look like issue references
  if (
- group == "materialize"
+ group == "ambiguous"
  and int(issue_id) < 100
  and not is_referenced_with_url
  ):
@@ -187,7 +191,8 @@ def detect_referenced_issues(filename: str) -> list[IssueRef]:
  return issue_refs
 
 
-def is_issue_closed_on_github(repository: str, issue_id: int) -> bool:
+def is_issue_closed_on_github(repository: str | None, issue_id: int) -> bool:
+ assert repository
  headers = {
  "Accept": "application/vnd.github+json",
  "X-GitHub-Api-Version": "2022-11-28",
@@ -231,6 +236,14 @@ def filter_changed_lines(issue_refs: list[IssueRef]) -> list[IssueRef]:
  ]
 
 
+def filter_ambiguous_issues(
+ issue_refs: list[IssueRef],
+) -> tuple[list[IssueRef], list[IssueRef]]:
+ return [issue_ref for issue_ref in issue_refs if issue_ref.repository], [
+ issue_ref for issue_ref in issue_refs if not issue_ref.repository
+ ]
+
+
 def filter_closed_issues(issue_refs: list[IssueRef]) -> list[IssueRef]:
  issues = {(issue_ref.repository, issue_ref.issue_id) for issue_ref in issue_refs}
  closed_issues = {
@@ -289,11 +302,24 @@ def main() -> int:
  ):
  issue_refs.extend(detect_referenced_issues(filename))
 
+ issue_refs, ambiguous_refs = filter_ambiguous_issues(issue_refs)
+
  if args.changed_lines_only:
  issue_refs = filter_changed_lines(issue_refs)
 
  issue_refs = filter_closed_issues(issue_refs)
 
+ for issue_ref in ambiguous_refs:
+ print(f"--- Ambiguous issue reference: #{issue_ref.issue_id}")
+ if issue_ref.text is not None:
+ print(f"{issue_ref.filename}:{issue_ref.line_number}:")
+ print(issue_ref.text)
+ else:
+ print(f"{issue_ref.filename} (filename)")
+ print(
+ f"Use database-issues#{issue_ref.issue_id} or materialize#{issue_ref.issue_id} instead to have an unambiguous reference"
+ )
+
  for issue_ref in issue_refs:
  url = buildkite.inline_link(
  f"https://github.com/{issue_ref.repository}/issues/{issue_ref.issue_id}",
@@ -306,7 +332,7 @@ def main() -> int:
  else:
  print(f"{issue_ref.filename} (filename)")
 
- return 1 if len(issue_refs) else 0
+ return 1 if issue_refs + ambiguous_refs else 0
 
 
 if __name__ == "__main__":

diff --git a/src/adapter/src/catalog/builtin_table_updates/notice.rs b/src/adapter/src/catalog/builtin_table_updates/notice.rs
@@ -128,7 +128,7 @@ impl CatalogState {
 
  // push `id` column
  packer.push(Datum::String(id.as_str()));
- // push `notice_type` column (TODO(materialize#21513): encode as int?)
+ // push `notice_type` column (TODO: encode as int?)
  packer.push(Datum::String(notice.kind.as_str()));
  // push `message` column
  packer.push(Datum::String(&notice.message));
@@ -159,7 +159,7 @@ impl CatalogState {
  },
  None => Datum::Null,
  });
- // push `action_type` column (TODO(materialize#21513): encode as int?)
+ // push `action_type` column (TODO: encode as int?)
  packer.push(match &notice.action {
  Action::None => Datum::Null,
  action => Datum::String(action.kind().as_str()),

diff --git a/src/catalog/src/durable/upgrade.rs b/src/catalog/src/durable/upgrade.rs
@@ -92,7 +92,7 @@ macro_rules! objects {
  fn from(value: StateUpdateKind) -> Self {
  let kind = value.kind.expect("kind should be set");
  // TODO: This requires that the json->proto->json roundtrips
- // exactly, see #23908.
+ // exactly, see materialize#23908.
  StateUpdateKindJson::from_serde(&kind)
  }
  }

diff --git a/src/compute/src/compute_state.rs b/src/compute/src/compute_state.rs
@@ -1480,7 +1480,7 @@ pub struct CollectionState {
  ///
  /// The compute protocol does not allow `Frontiers` responses for subscribe and copy-to
  /// collections, so we need to be able to recognize them. This is something we would like to
- /// change in the future (#16274).
+ /// change in the future (materialize#16274).
  pub is_subscribe_or_copy: bool,
  /// The collection's initial as-of frontier.
  ///

diff --git a/src/compute/src/logging/initialize.rs b/src/compute/src/logging/initialize.rs
@@ -142,7 +142,7 @@ impl<A: Allocate + 'static> LoggingContext<'_, A> {
 
  // TODO(vmarcos): If we introduce introspection sources that would match
  // type specialization for keys, we'd need to ensure that type specialized
- // variants reach the map below (issue #22398).
+ // variants reach the map below (issue materialize#22398).
  collections
  .into_iter()
  .map(|(log, collection)| {

diff --git a/src/compute/src/render/reduce.rs b/src/compute/src/render/reduce.rs
@@ -205,7 +205,7 @@ where
  S: Scope<Timestamp = G::Timestamp>,
  {
  // TODO(vmarcos): Arrangement specialization here could eventually be extended to keys,
- // not only values (#22103).
+ // not only values (materialize#22103).
  let arrangement = match plan {
  // If we have no aggregations or just a single type of reduction, we
  // can go ahead and render them directly.
@@ -1923,8 +1923,9 @@ fn finalize_accum<'a>(aggr_func: &'a AggregateFunc, accum: &'a Accum, total: Dif
  | (AggregateFunc::SumUInt32, Accum::SimpleNumber { accum, .. }) => {
  if !accum.is_negative() {
  // Our semantics of overflow are not clearly articulated wrt.
- // unsigned vs. signed types (#17758). We adopt an unsigned
- // wrapping behavior to match what we do above for signed types.
+ // unsigned vs. signed types (materialize#17758). We adopt an
+ // unsigned wrapping behavior to match what we do above for
+ // signed types.
  // TODO(vmarcos): remove potentially dangerous usage of `as`.
  #[allow(clippy::as_conversions)]
  Datum::UInt64(*accum as u64)

diff --git a/src/compute/src/render/top_k.rs b/src/compute/src/render/top_k.rs
@@ -67,7 +67,8 @@ where
  // TODO(vmarcos): We evaluate the limit expression below for each input update. There
  // is an opportunity to do so for every group key instead if the error handling is
  // integrated with: 1. The intra-timestamp thinning step in monotonic top-k, e.g., by
- // adding an error output there; 2. The validating reduction on basic top-k (#23687).
+ // adding an error output there; 2. The validating reduction on basic top-k
+ // (materialize#23687).
  let limit_err = match &top_k_plan {
  TopKPlan::MonotonicTop1(MonotonicTop1Plan { .. }) => None,
  TopKPlan::MonotonicTopK(MonotonicTopKPlan { limit, .. }) => Some(limit),

diff --git a/src/environmentd/tests/pgwire.rs b/src/environmentd/tests/pgwire.rs
@@ -112,7 +112,7 @@ fn test_bind_params() {
  .query_one("CREATE VIEW v AS SELECT $3", &[])
  .unwrap_db_error();
  // TODO(benesch): this should be `UNDEFINED_PARAMETER`, but blocked
- // on #3147.
+ // on materialize#3147.
  assert_eq!(err.message(), "there is no parameter $3");
  assert_eq!(err.code(), &SqlState::INTERNAL_ERROR);
 

diff --git a/src/sql/src/catalog.rs b/src/sql/src/catalog.rs
@@ -691,10 +691,10 @@ impl CatalogItemType {
  ///
  /// We don't presently construct types that mirror relational objects,
  /// though we likely will need to in the future for full PostgreSQL
- /// compatibility (see #23789). For now, we use this method to prevent
- /// creating types and relational objects that have the same name, so that
- /// it is a backwards compatible change in the future to introduce a type
- /// named after each relational object in the system.
+ /// compatibility (see materialize#23789). For now, we use this method to
+ /// prevent creating types and relational objects that have the same name, so
+ /// that it is a backwards compatible change in the future to introduce a
+ /// type named after each relational object in the system.
  pub fn conflicts_with_type(&self) -> bool {
  match self {
  CatalogItemType::Table => true,

diff --git a/test/cloudtest/test_compute.py b/test/cloudtest/test_compute.py
@@ -52,7 +52,7 @@ def test_cluster_sizing(mz: MaterializeApplication) -> None:
  "failpoint",
  ["", "after_catalog_drop_replica=panic", "after_sequencer_drop_replica=panic"],
 )
-@pytest.mark.skip(reason="Failpoints mess up the Mz intance #18000")
+@pytest.mark.skip(reason="Failpoints mess up the Mz instance materialize#18000")
 def test_cluster_shutdown(mz: MaterializeApplication, failpoint: str) -> None:
  """Test that dropping a cluster or replica causes the associated clusterds to shut down."""
 
@@ -163,7 +163,7 @@ def test_disk_label(mz: MaterializeApplication) -> None:
  )
 
 
-@pytest.mark.skip(reason="Keeps flaking, see #28327")
+@pytest.mark.skip(reason="Keeps flaking, see materialize#28327")
 def test_cluster_replica_sizes(mz: MaterializeApplication) -> None:
  """Test that --cluster-replica-sizes mapping is respected"""
  # Some time for existing cluster drops to complete so we don't try to spin them up again

diff --git a/test/cloudtest/test_metrics.py b/test/cloudtest/test_metrics.py
@@ -14,7 +14,7 @@
 from materialize.cloudtest.app.materialize_application import MaterializeApplication
 
 
-@pytest.mark.skip(reason="Fails occasionally, see #29171")
+@pytest.mark.skip(reason="Fails occasionally, see materialize#29171")
 def test_replica_metrics(mz: MaterializeApplication) -> None:
  mz.testdrive.run(
  input=dedent(

diff --git a/test/cloudtest/test_replica_restart.py b/test/cloudtest/test_replica_restart.py
@@ -49,7 +49,7 @@ def assert_notice(conn: Connection, contains: bytes) -> None:
 
 # Test that an OOMing cluster replica generates expected entries in
 # `mz_cluster_replica_statuses`
-@pytest.mark.skip(reason="Now fails after a Buildkite upgrade #20948")
+@pytest.mark.skip(reason="Now fails after a Buildkite upgrade materialize#20948")
 def test_oom_clusterd(mz: MaterializeApplication) -> None:
  def verify_cluster_oomed() -> None:
  with mz.environmentd.sql_cursor(autocommit=False) as cur:
@@ -98,7 +98,7 @@ def verify_cluster_oomed() -> None:
 
 # Test that a crashed (and restarted) cluster replica generates expected notice
 # events.
-@pytest.mark.skip(reason="Hangs occasionally, see #28235")
+@pytest.mark.skip(reason="Hangs occasionally, see materialize#28235")
 def test_crash_clusterd(mz: MaterializeApplication) -> None:
  mz.environmentd.sql("DROP TABLE IF EXISTS t1 CASCADE")
  mz.environmentd.sql("CREATE TABLE t1 (f1 TEXT)")

diff --git a/test/cloudtest/test_secrets.py b/test/cloudtest/test_secrets.py
@@ -62,7 +62,7 @@ def test_secrets(mz: MaterializeApplication) -> None:
 
 # Tests that secrets deleted from the catalog but not from k8s are cleaned up on
 # envd startup.
-@pytest.mark.skip(reason="Failpoints mess up the Mz intance #18000")
+@pytest.mark.skip(reason="Failpoints mess up the Mz intance materialize#18000")
 def test_orphaned_secrets(mz: MaterializeApplication) -> None:
  # Use two separate failpoints. One that crashes after modifying the catalog
  # (drop_secrets), and one that fails during bootstrap (orphan_secrets) so
@@ -95,7 +95,7 @@ def test_orphaned_secrets(mz: MaterializeApplication) -> None:
  wait(condition="delete", resource=f"secret/{secret}")
 
 
-@pytest.mark.skip(reason="Flaky, see #29072")
+@pytest.mark.skip(reason="Flaky, see materialize#29072")
 def test_missing_secret(mz: MaterializeApplication) -> None:
  """Test that Mz does not panic if a secret goes missing from K8s"""
  mz.testdrive.run(

diff --git a/...ium/postgres/02-add-primary-key.td.gh6570 → test/debezium/postgres/02-add-primary-key.td b/...ium/postgres/02-add-primary-key.td.gh6570 → test/debezium/postgres/02-add-primary-key.td
@@ -11,6 +11,10 @@
 # Test adding a primary key over an existing column
 #
 
+# TODO: Reenable when materialize#6570 is fixed
+$ skip-if
+SELECT true
+
 $ postgres-execute connection=postgres://postgres:postgres@postgres
 CREATE TABLE alter_add_primary_key (f1 INTEGER);
 INSERT INTO alter_add_primary_key VALUES (123), (234);

diff --git a/...um/postgres/02-drop-primary-key.td.gh6521 → .../debezium/postgres/02-drop-primary-key.td b/...um/postgres/02-drop-primary-key.td.gh6521 → .../debezium/postgres/02-drop-primary-key.td
@@ -11,6 +11,10 @@
 # Test that dropping the primary key is handled correctly
 #
 
+# TODO: Reenable when materialize#6521 is fixed
+$ skip-if
+SELECT true
+
 $ postgres-execute connection=postgres://postgres:postgres@postgres
 CREATE TABLE alter_drop_primary_key (f1 INTEGER PRIMARY KEY);
 INSERT INTO alter_drop_primary_key VALUES (123);

diff --git a/...tgres/05-add-column-primary-key.td.gh6570 → ...ium/postgres/05-add-column-primary-key.td b/...tgres/05-add-column-primary-key.td.gh6570 → ...ium/postgres/05-add-column-primary-key.td
@@ -12,6 +12,10 @@
 # Currently rejected by the schema registry on the Debezium side. Replication stops
 #
 
+# TODO: Reenable when materialize#6570 is fixed
+$ skip-if
+SELECT true
+
 $ postgres-execute connection=postgres://postgres:postgres@postgres
 CREATE TABLE alter_add_column_primary_key (f1 INTEGER);
 INSERT INTO alter_add_column_primary_key VALUES (123);

diff --git a/.../postgres/08-primary-key-extend.td.gh6570 → ...ebezium/postgres/08-primary-key-extend.td b/.../postgres/08-primary-key-extend.td.gh6570 → ...ebezium/postgres/08-primary-key-extend.td
@@ -12,6 +12,10 @@
 # Currently, this throws an error on the Debezium side and replication stops
 #
 
+# TODO: Reenable when materialize#6570 is fixed
+$ skip-if
+SELECT true
+
 $ postgres-execute connection=postgres://postgres:postgres@postgres
 CREATE TABLE alter_extend_primary_key (f1 INTEGER, f2 INTEGER, PRIMARY KEY (f1));
 ALTER TABLE alter_extend_primary_key REPLICA IDENTITY FULL;

diff --git a/.../postgres/08-primary-key-shrink.td.gh6570 → ...ebezium/postgres/08-primary-key-shrink.td b/.../postgres/08-primary-key-shrink.td.gh6570 → ...ebezium/postgres/08-primary-key-shrink.td
@@ -12,6 +12,10 @@
 # Currently, this throws an error on the Debezium side and replication stops
 #
 
+# TODO: Reenable when materialize#6570 is fixed
+$ skip-if
+SELECT true
+
 $ postgres-execute connection=postgres://postgres:postgres@postgres
 CREATE TABLE alter_shrink_primary_key (f1 INTEGER DEFAULT 1, f2 INTEGER DEFAULT 2, PRIMARY KEY (f1, f2));
 ALTER TABLE alter_shrink_primary_key REPLICA IDENTITY FULL;

diff --git a/.../postgres/10-remove-nullability.td.gh6570 → ...ebezium/postgres/10-remove-nullability.td b/.../postgres/10-remove-nullability.td.gh6570 → ...ebezium/postgres/10-remove-nullability.td
@@ -11,6 +11,10 @@
 # Change the definition of a column to be NOT NULL
 #
 
+# TODO: Reenable when materialize#6570 is fixed
+$ skip-if
+SELECT true
+
 $ postgres-execute connection=postgres://postgres:postgres@postgres
 CREATE TABLE alter_remove_nullability (f1 INTEGER);
 ALTER TABLE alter_remove_nullability REPLICA IDENTITY FULL;

diff --git a/...zium/postgres/20-types-temporal.td.gh6535 → test/debezium/postgres/20-types-temporal.td b/...zium/postgres/20-types-temporal.td.gh6535 → test/debezium/postgres/20-types-temporal.td
@@ -11,6 +11,10 @@
 # Make sure that temporal types are properly replicated, including sub-second precision
 #
 
+# TODO: Reenable when materialize#6535 is fixed
+$ skip-if
+SELECT true
+
 $ postgres-execute connection=postgres://postgres:postgres@postgres
 CREATE TABLE temporal_types (date_col DATE, time_col TIME, timestamp_col TIMESTAMP);
 ALTER TABLE temporal_types REPLICA IDENTITY FULL;

diff --git a/test/pg-cdc-old-syntax/pg-cdc-ssl.td.gh14037 → test/pg-cdc-old-syntax/pg-cdc-ssl.td b/test/pg-cdc-old-syntax/pg-cdc-ssl.td.gh14037 → test/pg-cdc-old-syntax/pg-cdc-ssl.td
@@ -18,6 +18,10 @@
 # Check out https://www.postgresql.org/docs/13/auth-pg-hba-conf.html
 # for more details.
 
+# TODO: Reenable when materialize#14037 is fixed
+$ skip-if
+SELECT true
+
 > CREATE SECRET ssl_ca AS '${arg.ssl-ca}'
 > CREATE SECRET ssl_cert AS '${arg.ssl-cert}'
 > CREATE SECRET ssl_key AS '${arg.ssl-key}'

diff --git a/...plica-identity-default-nothing.td.gh14802 → ...yntax/replica-identity-default-nothing.td b/...plica-identity-default-nothing.td.gh14802 → ...yntax/replica-identity-default-nothing.td
@@ -14,6 +14,10 @@
 # IDENTITY DEFAULT is the same as USING INDEX (t1_pkey)
 #
 
+# TODO: Reenable when materialize#14802 is fixed
+$ skip-if
+SELECT true
+
 > CREATE SECRET pgpass AS 'postgres'
 > CREATE CONNECTION pg TO POSTGRES (
  HOST postgres,

diff --git a/test/pg-cdc/pg-cdc-ssl.td.gh14037 → test/pg-cdc/pg-cdc-ssl.td b/test/pg-cdc/pg-cdc-ssl.td.gh14037 → test/pg-cdc/pg-cdc-ssl.td
@@ -18,6 +18,10 @@
 # Check out https://www.postgresql.org/docs/13/auth-pg-hba-conf.html
 # for more details.
 
+# TODO: Reenable when materialize#14037 is fixed
+$ skip-if
+SELECT true
+
 > CREATE SECRET ssl_ca AS '${arg.ssl-ca}'
 > CREATE SECRET ssl_cert AS '${arg.ssl-cert}'
 > CREATE SECRET ssl_key AS '${arg.ssl-key}'