You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
This patch results in a nonsensical error from BigQuery Emulator. If the query is copied to the BigQuery console and the mock snapshots are replaced with real ones, it succeeds.
Subject: [PATCH] diff
---
Index: src/azul/plugins/repository/tdr_anvil/__init__.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py--- a/src/azul/plugins/repository/tdr_anvil/__init__.py (revision 17dadfabbcd151db21a50883da101ab7ab9dd0c0)+++ b/src/azul/plugins/repository/tdr_anvil/__init__.py (date 1731467652092)@@ -350,9 +350,7 @@
table_names = sorted(filter(BundleType.is_batched, table_names))
log.info('Calculating batch prefix lengths for partition %r of %d tables '
'in source %s', prefix, len(table_names), source)
- # The extraneous outer 'SELECT *' works around a bug in BigQuery emulator
query = ' UNION ALL '.join(f'''(
- SELECT * FROM (
SELECT
{table_name!r} AS table_name,
{prefix_len} + LENGTH(CONCAT(
@@ -371,7 +369,6 @@
GROUP BY batch_prefix_length
ORDER BY ABS({target_size} - average_batch_size)
LIMIT 1
- )
)''' for table_name in table_names)
def result(row):
$ (cd test && python3 -m unittest indexer.test_anvil.TestAnvilIndexer.test_list_and_fetch_bundles)
...
Traceback (most recent call last):
File "/home/nadove/PycharmProjects/azul-sc/src/azul/terra.py", line 493, in run_sql
result = job.result()
^^^^^^^^^^^^
File "/home/nadove/PycharmProjects/azul-sc/.venv/lib/python3.11/site-packages/google/cloud/bigquery/job/query.py", line 1681, in result
while not is_job_done():
^^^^^^^^^^^^^
File "/home/nadove/PycharmProjects/azul-sc/.venv/lib/python3.11/site-packages/google/api_core/retry.py", line 366, in retry_wrapped_func
return retry_target(
^^^^^^^^^^^^^
File "/home/nadove/PycharmProjects/azul-sc/.venv/lib/python3.11/site-packages/google/api_core/retry.py", line 204, in retry_target
return target()
^^^^^^^^
File "/home/nadove/PycharmProjects/azul-sc/.venv/lib/python3.11/site-packages/google/cloud/bigquery/job/query.py", line 1630, in is_job_done
raise job_failed_exception
google.api_core.exceptions.InternalServerError: 500 failed to query (
SELECT
'anvil_activity' AS table_name,
0 + LENGTH(CONCAT(
IFNULL(p1, ""), IFNULL(p2, ""), IFNULL(p3, ""), IFNULL(p4, "")
)) AS batch_prefix_length,
AVG(num_rows) AS average_batch_size,
COUNT(*) AS num_batches
FROM (
SELECT
SUBSTR(datarepo_row_id, 0 + 1, 1) AS p1, SUBSTR(datarepo_row_id, 0 + 2, 1) AS p2, SUBSTR(datarepo_row_id, 0 + 3, 1) AS p3, SUBSTR(datarepo_row_id, 0 + 4, 1) AS p4,
COUNT(*) AS num_rows
FROM test_anvil_project.anvil_snapshot.anvil_activity
WHERE STARTS_WITH(datarepo_row_id, '')
GROUP BY ROLLUP (p1, p2, p3, p4)
)
GROUP BY batch_prefix_length
ORDER BY ABS(256 - average_batch_size)
LIMIT 1
) UNION ALL (
SELECT
'anvil_alignmentactivity' AS table_name,
0 + LENGTH(CONCAT(
IFNULL(p1, ""), IFNULL(p2, ""), IFNULL(p3, ""), IFNULL(p4, "")
)) AS batch_prefix_length,
AVG(num_rows) AS average_batch_size,
COUNT(*) AS num_batches
FROM (
SELECT
SUBSTR(datarepo_row_id, 0 + 1, 1) AS p1, SUBSTR(datarepo_row_id, 0 + 2, 1) AS p2, SUBSTR(datarepo_row_id, 0 + 3, 1) AS p3, SUBSTR(datarepo_row_id, 0 + 4, 1) AS p4,
COUNT(*) AS num_rows
FROM test_anvil_project.anvil_snapshot.anvil_alignmentactivity
WHERE STARTS_WITH(datarepo_row_id, '')
GROUP BY ROLLUP (p1, p2, p3, p4)
)
GROUP BY batch_prefix_length
ORDER BY ABS(256 - average_batch_size)
LIMIT 1
) UNION ALL (
SELECT
'anvil_assayactivity' AS table_name,
0 + LENGTH(CONCAT(
IFNULL(p1, ""), IFNULL(p2, ""), IFNULL(p3, ""), IFNULL(p4, "")
)) AS batch_prefix_length,
AVG(num_rows) AS average_batch_size,
COUNT(*) AS num_batches
FROM (
SELECT
SUBSTR(datarepo_row_id, 0 + 1, 1) AS p1, SUBSTR(datarepo_row_id, 0 + 2, 1) AS p2, SUBSTR(datarepo_row_id, 0 + 3, 1) AS p3, SUBSTR(datarepo_row_id, 0 + 4, 1) AS p4,
COUNT(*) AS num_rows
FROM test_anvil_project.anvil_snapshot.anvil_assayactivity
WHERE STARTS_WITH(datarepo_row_id, '')
GROUP BY ROLLUP (p1, p2, p3, p4)
)
GROUP BY batch_prefix_length
ORDER BY ABS(256 - average_batch_size)
LIMIT 1
) UNION ALL (
SELECT
'anvil_diagnosis' AS table_name,
0 + LENGTH(CONCAT(
IFNULL(p1, ""), IFNULL(p2, ""), IFNULL(p3, ""), IFNULL(p4, "")
)) AS batch_prefix_length,
AVG(num_rows) AS average_batch_size,
COUNT(*) AS num_batches
FROM (
SELECT
SUBSTR(datarepo_row_id, 0 + 1, 1) AS p1, SUBSTR(datarepo_row_id, 0 + 2, 1) AS p2, SUBSTR(datarepo_row_id, 0 + 3, 1) AS p3, SUBSTR(datarepo_row_id, 0 + 4, 1) AS p4,
COUNT(*) AS num_rows
FROM test_anvil_project.anvil_snapshot.anvil_diagnosis
WHERE STARTS_WITH(datarepo_row_id, '')
GROUP BY ROLLUP (p1, p2, p3, p4)
)
GROUP BY batch_prefix_length
ORDER BY ABS(256 - average_batch_size)
LIMIT 1
) UNION ALL (
SELECT
'anvil_donor' AS table_name,
0 + LENGTH(CONCAT(
IFNULL(p1, ""), IFNULL(p2, ""), IFNULL(p3, ""), IFNULL(p4, "")
)) AS batch_prefix_length,
AVG(num_rows) AS average_batch_size,
COUNT(*) AS num_batches
FROM (
SELECT
SUBSTR(datarepo_row_id, 0 + 1, 1) AS p1, SUBSTR(datarepo_row_id, 0 + 2, 1) AS p2, SUBSTR(datarepo_row_id, 0 + 3, 1) AS p3, SUBSTR(datarepo_row_id, 0 + 4, 1) AS p4,
COUNT(*) AS num_rows
FROM test_anvil_project.anvil_snapshot.anvil_donor
WHERE STARTS_WITH(datarepo_row_id, '')
GROUP BY ROLLUP (p1, p2, p3, p4)
)
GROUP BY batch_prefix_length
ORDER BY ABS(256 - average_batch_size)
LIMIT 1
) UNION ALL (
SELECT
'anvil_file' AS table_name,
0 + LENGTH(CONCAT(
IFNULL(p1, ""), IFNULL(p2, ""), IFNULL(p3, ""), IFNULL(p4, "")
)) AS batch_prefix_length,
AVG(num_rows) AS average_batch_size,
COUNT(*) AS num_batches
FROM (
SELECT
SUBSTR(datarepo_row_id, 0 + 1, 1) AS p1, SUBSTR(datarepo_row_id, 0 + 2, 1) AS p2, SUBSTR(datarepo_row_id, 0 + 3, 1) AS p3, SUBSTR(datarepo_row_id, 0 + 4, 1) AS p4,
COUNT(*) AS num_rows
FROM test_anvil_project.anvil_snapshot.anvil_file
WHERE STARTS_WITH(datarepo_row_id, '')
GROUP BY ROLLUP (p1, p2, p3, p4)
)
GROUP BY batch_prefix_length
ORDER BY ABS(256 - average_batch_size)
LIMIT 1
) UNION ALL (
SELECT
'anvil_sequencingactivity' AS table_name,
0 + LENGTH(CONCAT(
IFNULL(p1, ""), IFNULL(p2, ""), IFNULL(p3, ""), IFNULL(p4, "")
)) AS batch_prefix_length,
AVG(num_rows) AS average_batch_size,
COUNT(*) AS num_batches
FROM (
SELECT
SUBSTR(datarepo_row_id, 0 + 1, 1) AS p1, SUBSTR(datarepo_row_id, 0 + 2, 1) AS p2, SUBSTR(datarepo_row_id, 0 + 3, 1) AS p3, SUBSTR(datarepo_row_id, 0 + 4, 1) AS p4,
COUNT(*) AS num_rows
FROM test_anvil_project.anvil_snapshot.anvil_sequencingactivity
WHERE STARTS_WITH(datarepo_row_id, '')
GROUP BY ROLLUP (p1, p2, p3, p4)
)
GROUP BY batch_prefix_length
ORDER BY ABS(256 - average_batch_size)
LIMIT 1
) UNION ALL (
SELECT
'anvil_variantcallingactivity' AS table_name,
0 + LENGTH(CONCAT(
IFNULL(p1, ""), IFNULL(p2, ""), IFNULL(p3, ""), IFNULL(p4, "")
)) AS batch_prefix_length,
AVG(num_rows) AS average_batch_size,
COUNT(*) AS num_batches
FROM (
SELECT
SUBSTR(datarepo_row_id, 0 + 1, 1) AS p1, SUBSTR(datarepo_row_id, 0 + 2, 1) AS p2, SUBSTR(datarepo_row_id, 0 + 3, 1) AS p3, SUBSTR(datarepo_row_id, 0 + 4, 1) AS p4,
COUNT(*) AS num_rows
FROM test_anvil_project.anvil_snapshot.anvil_variantcallingactivity
WHERE STARTS_WITH(datarepo_row_id, '')
GROUP BY ROLLUP (p1, p2, p3, p4)
)
GROUP BY batch_prefix_length
ORDER BY ABS(256 - average_batch_size)
LIMIT 1
) UNION ALL (
SELECT
'non_schema_orphan_table' AS table_name,
0 + LENGTH(CONCAT(
IFNULL(p1, ""), IFNULL(p2, ""), IFNULL(p3, ""), IFNULL(p4, "")
)) AS batch_prefix_length,
AVG(num_rows) AS average_batch_size,
COUNT(*) AS num_batches
FROM (
SELECT
SUBSTR(datarepo_row_id, 0 + 1, 1) AS p1, SUBSTR(datarepo_row_id, 0 + 2, 1) AS p2, SUBSTR(datarepo_row_id, 0 + 3, 1) AS p3, SUBSTR(datarepo_row_id, 0 + 4, 1) AS p4,
COUNT(*) AS num_rows
FROM test_anvil_project.anvil_snapshot.non_schema_orphan_table
WHERE STARTS_WITH(datarepo_row_id, '')
GROUP BY ROLLUP (p1, p2, p3, p4)
)
GROUP BY batch_prefix_length
ORDER BY ABS(256 - average_batch_size)
LIMIT 1
): LIMIT clause should come after UNION ALL not before; message: failed to query (
...
): LIMIT clause should come after UNION ALL not before, reason: jobInternalError
Location: None
Job ID: 8a1f5049-c267-48dd-b181-95c22d032973
The text was updated successfully, but these errors were encountered:
This patch results in a nonsensical error from BigQuery Emulator. If the query is copied to the BigQuery console and the mock snapshots are replaced with real ones, it succeeds.
The text was updated successfully, but these errors were encountered: