From 6a7d6f660815d03217d2e9a0a724e632ab26f1ab Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Wed, 6 Nov 2024 10:01:27 +0000 Subject: [PATCH 01/13] chore(firestore-bigquery-changetracker): bump version --- firestore-bigquery-export/scripts/import/src/index.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/firestore-bigquery-export/scripts/import/src/index.ts b/firestore-bigquery-export/scripts/import/src/index.ts index daafc9aa7..e0ea43e05 100644 --- a/firestore-bigquery-export/scripts/import/src/index.ts +++ b/firestore-bigquery-export/scripts/import/src/index.ts @@ -80,6 +80,7 @@ const run = async (): Promise => { datasetLocation, wildcardIds: queryCollectionGroup, useNewSnapshotQuerySyntax, + clustering: null, }); await initializeDataSink(dataSink, config); From 4500c29afe1545df0ea788c4bac8af7c861df6dd Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Wed, 6 Nov 2024 10:24:14 +0000 Subject: [PATCH 02/13] fix(firestore-bigquery-export): added ts-expect-error and TODOs in the import script --- firestore-bigquery-export/scripts/import/src/index.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/firestore-bigquery-export/scripts/import/src/index.ts b/firestore-bigquery-export/scripts/import/src/index.ts index e0ea43e05..daafc9aa7 100644 --- a/firestore-bigquery-export/scripts/import/src/index.ts +++ b/firestore-bigquery-export/scripts/import/src/index.ts @@ -80,7 +80,6 @@ const run = async (): Promise => { datasetLocation, wildcardIds: queryCollectionGroup, useNewSnapshotQuerySyntax, - clustering: null, }); await initializeDataSink(dataSink, config); From 9d08baed54f30dad467eac05af5f45c02ac5a962 Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Fri, 27 Sep 2024 11:28:57 +0100 Subject: [PATCH 03/13] feat: try to immediately write to bq first --- .../functions/src/index.ts | 212 ++++++++++++------ .../functions/src/logs.ts | 24 +- 2 files changed, 155 insertions(+), 81 deletions(-) diff --git a/firestore-bigquery-export/functions/src/index.ts b/firestore-bigquery-export/functions/src/index.ts index 76f2399a8..b78bc8528 100644 --- a/firestore-bigquery-export/functions/src/index.ts +++ b/firestore-bigquery-export/functions/src/index.ts @@ -19,7 +19,7 @@ import * as functions from "firebase-functions"; import * as admin from "firebase-admin"; import { getExtensions } from "firebase-admin/extensions"; import { getFunctions } from "firebase-admin/functions"; -import { getFirestore } from "firebase-admin/firestore"; +import { DocumentSnapshot } from "firebase-admin/firestore"; import { ChangeType, @@ -67,29 +67,42 @@ export const syncBigQuery = functions.tasks .taskQueue() .onDispatch( async ({ context, changeType, documentId, data, oldData }, ctx) => { - const update = { - timestamp: context.timestamp, // This is a Cloud Firestore commit timestamp with microsecond precision. - operation: changeType, - documentName: context.resource.name, - documentId: documentId, - pathParams: config.wildcardIds ? context.params : null, - eventId: context.eventId, - data, - oldData, - }; - - /** Record the chnages in the change tracker */ - await eventTracker.record([{ ...update }]); - - /** Send an event Arc update , if configured */ - await events.recordSuccessEvent({ - subject: documentId, - data: { - ...update, - }, - }); - - logs.complete(); + try { + // Use the shared function to write the event to BigQuery + await writeEventToBigQuery( + changeType, + documentId, + data, + oldData, + context + ); + + // Record a success event in EventArc, if configured + await events.recordSuccessEvent({ + subject: documentId, + data: { + timestamp: context.timestamp, + operation: changeType, + documentName: context.resource.name, + documentId, + pathParams: config.wildcardIds ? context.params : null, + eventId: context.eventId, + data, + oldData, + }, + }); + + logs.complete(); + } catch (err) { + logs.error(true, "Failed to process syncBigQuery task", err, { + context, + changeType, + documentId, + data, + oldData, + }); + throw err; + } } ); @@ -109,10 +122,6 @@ export const fsexportbigquery = functions const oldData = isCreated || config.excludeOldData ? undefined : change.before?.data(); - /** - * Serialize early before queueing in cloud task - * Cloud tasks currently have a limit of 1mb, this also ensures payloads are kept to a minimum - */ let serializedData: any; let serializedOldData: any; @@ -120,65 +129,130 @@ export const fsexportbigquery = functions serializedData = eventTracker.serializeData(data); serializedOldData = eventTracker.serializeData(oldData); } catch (err) { - logs.error(false, "Failed to serialize data", err, null, null); + logs.error(true, "Failed to serialize data", err, { data, oldData }); throw err; } try { - await events.recordStartEvent({ - documentId, - changeType, - before: { data: change.before.data() }, - after: { data: change.after.data() }, - context: context.resource, - }); + await recordEventArcStartEvent(documentId, changeType, change, context); } catch (err) { - logs.error(false, "Failed to record start event", err, null, null); + logs.error(false, "Failed to record start event", err); throw err; } try { - const queue = getFunctions().taskQueue( - `locations/${config.location}/functions/syncBigQuery`, - config.instanceId + await writeEventToBigQuery( + changeType, + documentId, + serializedData, + serializedOldData, + context ); - - await queue.enqueue({ + } catch (err) { + await handleEnqueueError( + err, context, changeType, documentId, - data: serializedData, - oldData: serializedOldData, - }); - } catch (err) { - const event = { - timestamp: context.timestamp, // This is a Cloud Firestore commit timestamp with microsecond precision. - operation: changeType, - documentName: context.resource.name, - documentId: documentId, - pathParams: config.wildcardIds ? context.params : null, - eventId: context.eventId, - data: serializedData, - oldData: serializedOldData, - }; - - await events.recordErrorEvent(err as Error); - // Only log the error once here - if (!err.logged) { - logs.error( - config.logFailedExportData, - "Failed to enqueue task to syncBigQuery", - err, - event, - eventTrackerConfig - ); - } - return; + serializedData, + serializedOldData + ); } logs.complete(); }); +/** + * Record the start event for tracking purposes. + */ +async function recordEventArcStartEvent( + documentId: string, + changeType: string, + change: functions.Change, + context: functions.EventContext +) { + await events.recordStartEvent({ + documentId, + changeType, + before: { data: change.before.data() }, + after: { data: change.after.data() }, + context: context.resource, + }); +} + +/** + * Record the event to the event tracker. + */ +async function writeEventToBigQuery( + changeType: string, + documentId: string, + serializedData: any, + serializedOldData: any, + context: functions.EventContext +) { + const event = { + timestamp: context.timestamp, + operation: changeType, + documentName: context.resource.name, + documentId, + pathParams: config.wildcardIds ? context.params : null, + eventId: context.eventId, + data: serializedData, + oldData: serializedOldData, + }; + + eventTracker.record([event]); +} + +/** + * Handle errors when enqueueing tasks to sync BigQuery. + */ +async function handleEnqueueError( + err: Error, + context: functions.EventContext, + changeType: string, + documentId: string, + serializedData: any, + serializedOldData: any +) { + try { + const queue = getFunctions().taskQueue( + `locations/${config.location}/functions/syncBigQuery`, + config.instanceId + ); + + await queue.enqueue({ + context, + changeType, + documentId, + data: serializedData, + oldData: serializedOldData, + }); + } catch (enqueueErr) { + const event = { + timestamp: context.timestamp, + operation: changeType, + documentName: context.resource.name, + documentId, + pathParams: config.wildcardIds ? context.params : null, + eventId: context.eventId, + data: serializedData, + oldData: serializedOldData, + }; + + await events.recordErrorEvent(enqueueErr as Error); + + if (!enqueueErr.logged) { + logs.error( + true, + "Failed to enqueue task to syncBigQuery", + enqueueErr, + event + ); + } + } +} + export const setupBigQuerySync = functions.tasks .taskQueue() .onDispatch(async () => { diff --git a/firestore-bigquery-export/functions/src/logs.ts b/firestore-bigquery-export/functions/src/logs.ts index c312cecdf..d81ee7fb0 100644 --- a/firestore-bigquery-export/functions/src/logs.ts +++ b/firestore-bigquery-export/functions/src/logs.ts @@ -153,20 +153,20 @@ export const error = ( includeEvent: boolean, message: string, err: Error, - event: any, - eventTrackerConfig: any + event?: any, // Made optional, as it is not always required + eventTrackerConfig?: any // Made optional, as it is not always required ) => { - if (includeEvent) { - logger.error(`Error when mirroring data to BigQuery: ${message}`, { - error: err, - event, - eventTrackerConfig, - }); - } else { - logger.error(`Error when mirroring data to BigQuery: ${message}`, { - error: err, - }); + const logDetails: Record = { error: err }; + + if (includeEvent && event) { + logDetails.event = event; + } + + if (includeEvent && eventTrackerConfig) { + logDetails.eventTrackerConfig = eventTrackerConfig; } + + logger.error(`Error when mirroring data to BigQuery: ${message}`, logDetails); }; export const init = () => { From f87e3bd30c6987b6a01f93b73a514f32b8f60b9b Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Fri, 27 Sep 2024 11:33:46 +0100 Subject: [PATCH 04/13] chore: remove legacy backfill code --- firestore-bigquery-export/README.md | 4 +- firestore-bigquery-export/extension.yaml | 68 --------------- .../functions/src/config.ts | 3 - .../functions/src/index.ts | 84 ------------------- 4 files changed, 1 insertion(+), 158 deletions(-) diff --git a/firestore-bigquery-export/README.md b/firestore-bigquery-export/README.md index 932e71881..1a6fb9eb9 100644 --- a/firestore-bigquery-export/README.md +++ b/firestore-bigquery-export/README.md @@ -126,7 +126,7 @@ To install an extension, your project must be on the [Blaze (pay as you go) plan * Collection path: What is the path of the collection that you would like to export? You may use `{wildcard}` notation to match a subcollection of all documents in a collection (for example: `chatrooms/{chatid}/posts`). Parent Firestore Document IDs from `{wildcards}` can be returned in `path_params` as a JSON formatted string. -* Enable logging failed exports: If enabled, the extension will log event exports that failed to enqueue to Cloud Logging, to mitigate data loss. +* Enable logging failed exports: If enabled, the extension will export what failed to enqueue to the Firebase console, to mitigate data loss. * Enable Wildcard Column field with Parent Firestore Document IDs: If enabled, creates a column containing a JSON object of all wildcard ids from a documents path. @@ -158,8 +158,6 @@ essential for the script to insert data into an already partitioned table.) * Exclude old data payloads: If enabled, table rows will never contain old data (document snapshot before the Firestore onDocumentUpdate event: `change.before.data()`). The reduction in data should be more performant, and avoid potential resource limitations. -* Use Collection Group query: Do you want to use a [collection group](https://firebase.google.com/docs/firestore/query-data/queries#collection-group-query) query for importing existing documents? You have to enable collectionGroup query if your import path contains subcollections. Warning: A collectionGroup query will target every collection in your Firestore project that matches the 'Existing documents collection'. For example, if you have 10,000 documents with a subcollection named: landmarks, this will query every document in 10,000 landmarks collections. - * Cloud KMS key name: Instead of Google managing the key encryption keys that protect your data, you control and manage key encryption keys in Cloud KMS. If this parameter is set, the extension will specify the KMS key name when creating the BQ table. See the PREINSTALL.md for more details. diff --git a/firestore-bigquery-export/extension.yaml b/firestore-bigquery-export/extension.yaml index c8045fe97..05ab49d52 100644 --- a/firestore-bigquery-export/extension.yaml +++ b/firestore-bigquery-export/extension.yaml @@ -409,74 +409,6 @@ params: - label: No value: no - # - param: DO_BACKFILL - # label: Import existing Firestore documents into BigQuery? - # description: >- - # Do you want to import existing documents from your Firestore collection - # into BigQuery? These documents will have each have a special changelog - # with the operation of `IMPORT` and the timestamp of epoch. This ensures - # that any operation on an imported document supersedes the import record. - # type: select - # required: true - # default: no - # options: - # - label: Yes - # value: yes - # - label: No - # value: no - - # - param: IMPORT_COLLECTION_PATH - # label: Existing Documents Collection - # description: >- - # Specify the path of the Cloud Firestore Collection you would like to - # import from. This may or may not be the same Collection for which you plan - # to mirror changes. If you want to use a collectionGroup query, provide the - # collection name value here, and set 'Use Collection Group query' to true. - # You may use `{wildcard}` notation with an enabled collectionGroup query to - # match a subcollection of all documents in a collection (e.g., - # `chatrooms/{chatid}/posts`). - # type: string - # validationRegex: "^[^/]+(/[^/]+/[^/]+)*$" - # validationErrorMessage: - # Firestore collection paths must be an odd number of segments separated by - # slashes, e.g. "path/to/collection". - # example: posts - # required: false - - - param: USE_COLLECTION_GROUP_QUERY - label: Use Collection Group query - description: >- - Do you want to use a [collection - group](https://firebase.google.com/docs/firestore/query-data/queries#collection-group-query) - query for importing existing documents? You have to enable collectionGroup - query if your import path contains subcollections. Warning: A - collectionGroup query will target every collection in your Firestore - project that matches the 'Existing documents collection'. For example, if - you have 10,000 documents with a subcollection named: landmarks, this will - query every document in 10,000 landmarks collections. - type: select - default: no - options: - - label: Yes - value: yes - - label: No - value: no - - # - param: DOCS_PER_BACKFILL - # label: Docs per backfill - # description: >- - # When importing existing documents, how many should be imported at once? - # The default value of 200 should be ok for most users. If you are using a - # transform function or have very large documents, you may need to set this - # to a lower number. If the lifecycle event function times out, lower this - # value. - # type: string - # example: 200 - # validationRegex: "^[1-9][0-9]*$" - # validationErrorMessage: Must be a postive integer. - # default: 200 - # required: true - - param: KMS_KEY_NAME label: Cloud KMS key name description: >- diff --git a/firestore-bigquery-export/functions/src/config.ts b/firestore-bigquery-export/functions/src/config.ts index 3adb5d05d..bc5178dd0 100644 --- a/firestore-bigquery-export/functions/src/config.ts +++ b/firestore-bigquery-export/functions/src/config.ts @@ -37,8 +37,6 @@ export default { databaseId: "(default)", collectionPath: process.env.COLLECTION_PATH, datasetId: process.env.DATASET_ID, - doBackfill: process.env.DO_BACKFILL === "yes", - docsPerBackfill: parseInt(process.env.DOCS_PER_BACKFILL) || 200, tableId: process.env.TABLE_ID, location: process.env.LOCATION, initialized: false, @@ -63,5 +61,4 @@ export default { process.env.MAX_DISPATCHES_PER_SECOND || "10" ), kmsKeyName: process.env.KMS_KEY_NAME, - useCollectionGroupQuery: process.env.USE_COLLECTION_GROUP_QUERY === "yes", }; diff --git a/firestore-bigquery-export/functions/src/index.ts b/firestore-bigquery-export/functions/src/index.ts index b78bc8528..f1f7422e8 100644 --- a/firestore-bigquery-export/functions/src/index.ts +++ b/firestore-bigquery-export/functions/src/index.ts @@ -277,93 +277,9 @@ export const initBigQuerySync = functions.tasks /** Init the BigQuery sync */ await eventTracker.initialize(); - /** Run Backfill */ - if (false) { - await getFunctions() - .taskQueue( - `locations/${config.location}/functions/fsimportexistingdocs`, - config.instanceId - ) - .enqueue({ offset: 0, docsCount: 0 }); - return; - } - await runtime.setProcessingState( "PROCESSING_COMPLETE", "Sync setup completed" ); return; }); - -exports.fsimportexistingdocs = functions.tasks - .taskQueue() - .onDispatch(async (data, context) => { - const runtime = getExtensions().runtime(); - await runtime.setProcessingState( - "PROCESSING_COMPLETE", - "Completed. No existing documents imported into BigQuery." - ); - return; - - // if (!config.doBackfill || !config.importCollectionPath) { - // await runtime.setProcessingState( - // "PROCESSING_COMPLETE", - // "Completed. No existing documents imported into BigQuery." - // ); - // return; - // } - - // const offset = (data["offset"] as number) ?? 0; - // const docsCount = (data["docsCount"] as number) ?? 0; - - // const query = config.useCollectionGroupQuery - // ? getFirestore(config.databaseId).collectionGroup( - // config.importCollectionPath.split("/")[ - // config.importCollectionPath.split("/").length - 1 - // ] - // ) - // : getFirestore(config.databaseId).collection(config.importCollectionPath); - - // const snapshot = await query - // .offset(offset) - // .limit(config.docsPerBackfill) - // .get(); - - // const rows = snapshot.docs.map((d) => { - // return { - // timestamp: new Date().toISOString(), - // operation: ChangeType.IMPORT, - // documentName: `projects/${config.bqProjectId}/databases/(default)/documents/${d.ref.path}`, - // documentId: d.id, - // eventId: "", - // pathParams: resolveWildcardIds(config.importCollectionPath, d.ref.path), - // data: eventTracker.serializeData(d.data()), - // }; - // }); - // try { - // await eventTracker.record(rows); - // } catch (err: any) { - // /** If configured, event tracker wil handle failed rows in a backup collection */ - // functions.logger.log(err); - // } - // if (rows.length == config.docsPerBackfill) { - // // There are more documents to import - enqueue another task to continue the backfill. - // const queue = getFunctions().taskQueue( - // `locations/${config.location}/functions/fsimportexistingdocs`, - // config.instanceId - // ); - // await queue.enqueue({ - // offset: offset + config.docsPerBackfill, - // docsCount: docsCount + rows.length, - // }); - // } else { - // // We are finished, set the processing state to report back how many docs were imported. - // runtime.setProcessingState( - // "PROCESSING_COMPLETE", - // `Successfully imported ${ - // docsCount + rows.length - // } documents into BigQuery` - // ); - // } - // await events.recordCompletionEvent({ context }); - }); From c69fb84eac03fbdc336aab1a8ab11f24547fb23e Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Fri, 27 Sep 2024 11:55:17 +0100 Subject: [PATCH 05/13] feat: add max enqueue attempts param --- firestore-bigquery-export/README.md | 2 + firestore-bigquery-export/extension.yaml | 12 ++ .../__snapshots__/config.test.ts.snap | 4 +- .../functions/src/config.ts | 1 + .../functions/src/index.ts | 149 ++++++++++++------ .../functions/src/util.ts | 10 ++ 6 files changed, 131 insertions(+), 47 deletions(-) diff --git a/firestore-bigquery-export/README.md b/firestore-bigquery-export/README.md index 1a6fb9eb9..d97f1d128 100644 --- a/firestore-bigquery-export/README.md +++ b/firestore-bigquery-export/README.md @@ -160,6 +160,8 @@ essential for the script to insert data into an already partitioned table.) * Cloud KMS key name: Instead of Google managing the key encryption keys that protect your data, you control and manage key encryption keys in Cloud KMS. If this parameter is set, the extension will specify the KMS key name when creating the BQ table. See the PREINSTALL.md for more details. +* Maximum number of enqueue attempts: This parameter will set the maximum number of attempts to enqueue a document to cloud tasks for export to BigQuery. If the maximum number of attempts is reached, the failed export will be handled according to the `LOG_FAILED_EXPORTS` parameter. + **Cloud Functions:** diff --git a/firestore-bigquery-export/extension.yaml b/firestore-bigquery-export/extension.yaml index 05ab49d52..36096a578 100644 --- a/firestore-bigquery-export/extension.yaml +++ b/firestore-bigquery-export/extension.yaml @@ -423,6 +423,18 @@ params: 'projects/PROJECT_NAME/locations/KEY_RING_LOCATION/keyRings/KEY_RING_ID/cryptoKeys/KEY_ID'. required: false + - param: MAX_ENQUEUE_ATTEMPTS + label: Maximum number of enqueue attempts + description: >- + This parameter will set the maximum number of attempts to enqueue a + document to cloud tasks for export to BigQuery. If the maximum number of + attempts is reached, the failed export will be handled according to the + `LOG_FAILED_EXPORTS` parameter. + type: string + validationRegex: ^(10|[1-9])$ + validationErrorMessage: Please select an integer between 1 and 10 + default: 3 + events: - type: firebase.extensions.firestore-counter.v1.onStart description: diff --git a/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap b/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap index d5a48cdab..a58287104 100644 --- a/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap +++ b/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap @@ -12,8 +12,6 @@ Object { "databaseId": "(default)", "datasetId": "my_dataset", "datasetLocation": undefined, - "doBackfill": false, - "docsPerBackfill": 200, "excludeOldData": false, "importCollectionPath": undefined, "initialized": false, @@ -22,13 +20,13 @@ Object { "location": "us-central1", "logFailedExportData": false, "maxDispatchesPerSecond": 10, + "maxEnqueueAttempts": 3, "tableId": "my_table", "timePartitioning": null, "timePartitioningField": undefined, "timePartitioningFieldType": undefined, "timePartitioningFirestoreField": undefined, "transformFunction": "", - "useCollectionGroupQuery": false, "useNewSnapshotQuerySyntax": false, "wildcardIds": false, } diff --git a/firestore-bigquery-export/functions/src/config.ts b/firestore-bigquery-export/functions/src/config.ts index bc5178dd0..c8436f0f3 100644 --- a/firestore-bigquery-export/functions/src/config.ts +++ b/firestore-bigquery-export/functions/src/config.ts @@ -61,4 +61,5 @@ export default { process.env.MAX_DISPATCHES_PER_SECOND || "10" ), kmsKeyName: process.env.KMS_KEY_NAME, + maxEnqueueAttempts: parseInt(process.env.MAX_ENQUEUE_ATTEMPTS || "3"), }; diff --git a/firestore-bigquery-export/functions/src/index.ts b/firestore-bigquery-export/functions/src/index.ts index f1f7422e8..4dcb2c60e 100644 --- a/firestore-bigquery-export/functions/src/index.ts +++ b/firestore-bigquery-export/functions/src/index.ts @@ -19,7 +19,6 @@ import * as functions from "firebase-functions"; import * as admin from "firebase-admin"; import { getExtensions } from "firebase-admin/extensions"; import { getFunctions } from "firebase-admin/functions"; -import { DocumentSnapshot } from "firebase-admin/firestore"; import { ChangeType, @@ -27,11 +26,11 @@ import { FirestoreEventHistoryTracker, } from "@firebaseextensions/firestore-bigquery-change-tracker"; -import { getEventarc } from "firebase-admin/eventarc"; import * as logs from "./logs"; import * as events from "./events"; import { getChangeType, getDocumentId, resolveWildcardIds } from "./util"; +// Configuration for the Firestore Event History Tracker. const eventTrackerConfig = { tableId: config.tableId, datasetId: config.datasetId, @@ -42,34 +41,42 @@ const eventTrackerConfig = { timePartitioningField: config.timePartitioningField, timePartitioningFieldType: config.timePartitioningFieldType, timePartitioningFirestoreField: config.timePartitioningFirestoreField, + // Database related configurations databaseId: config.databaseId, clustering: config.clustering, wildcardIds: config.wildcardIds, bqProjectId: config.bqProjectId, + // Optional configurations useNewSnapshotQuerySyntax: config.useNewSnapshotQuerySyntax, skipInit: true, kmsKeyName: config.kmsKeyName, }; -const eventTracker: FirestoreBigQueryEventHistoryTracker = +// Initialize the Firestore Event History Tracker with the given configuration. +const eventTracker: FirestoreEventHistoryTracker = new FirestoreBigQueryEventHistoryTracker(eventTrackerConfig); +// Initialize logging. logs.init(); -/** Init app, if not already initialized */ +/** Initialize Firebase Admin SDK if not already initialized */ if (admin.apps.length === 0) { admin.initializeApp(); } +// Setup the event channel for EventArc. events.setupEventChannel(); +/** + * Cloud Function to handle enqueued tasks to synchronize Firestore changes to BigQuery. + */ export const syncBigQuery = functions.tasks .taskQueue() .onDispatch( async ({ context, changeType, documentId, data, oldData }, ctx) => { try { // Use the shared function to write the event to BigQuery - await writeEventToBigQuery( + await recordEventToBigQuery( changeType, documentId, data, @@ -92,8 +99,10 @@ export const syncBigQuery = functions.tasks }, }); + // Log completion of the task. logs.complete(); } catch (err) { + // Log error and throw it to handle in the calling function. logs.error(true, "Failed to process syncBigQuery task", err, { context, changeType, @@ -106,18 +115,26 @@ export const syncBigQuery = functions.tasks } ); +/** + * Cloud Function triggered on Firestore document changes to export data to BigQuery. + */ export const fsexportbigquery = functions .runWith({ failurePolicy: true }) .firestore.database(config.databaseId) .document(config.collectionPath) .onWrite(async (change, context) => { + // Start logging the function execution. logs.start(); + + // Determine the type of change (CREATE, UPDATE, DELETE). const changeType = getChangeType(change); const documentId = getDocumentId(change); + // Check if the document is newly created or deleted. const isCreated = changeType === ChangeType.CREATE; const isDeleted = changeType === ChangeType.DELETE; + // Get the new data (after change) and old data (before change). const data = isDeleted ? undefined : change.after?.data(); const oldData = isCreated || config.excludeOldData ? undefined : change.before?.data(); @@ -126,22 +143,33 @@ export const fsexportbigquery = functions let serializedOldData: any; try { + // Serialize the data before processing. serializedData = eventTracker.serializeData(data); serializedOldData = eventTracker.serializeData(oldData); } catch (err) { + // Log serialization error and throw it. logs.error(true, "Failed to serialize data", err, { data, oldData }); throw err; } try { - await recordEventArcStartEvent(documentId, changeType, change, context); + // Record the start event for the change in EventArc, if configured. + await events.recordStartEvent({ + documentId, + changeType, + before: { data: change.before.data() }, + after: { data: change.after.data() }, + context: context.resource, + }); } catch (err) { + // Log the error if recording start event fails and throw it. logs.error(false, "Failed to record start event", err); throw err; } try { - await writeEventToBigQuery( + // Write the change event to BigQuery. + await recordEventToBigQuery( changeType, documentId, serializedData, @@ -149,6 +177,7 @@ export const fsexportbigquery = functions context ); } catch (err) { + // Handle enqueue errors with retries. await handleEnqueueError( err, context, @@ -159,31 +188,20 @@ export const fsexportbigquery = functions ); } + // Log the successful completion of the function. logs.complete(); }); /** - * Record the start event for tracking purposes. - */ -async function recordEventArcStartEvent( - documentId: string, - changeType: string, - change: functions.Change, - context: functions.EventContext -) { - await events.recordStartEvent({ - documentId, - changeType, - before: { data: change.before.data() }, - after: { data: change.after.data() }, - context: context.resource, - }); -} - -/** - * Record the event to the event tracker. + * Record the event to the Firestore Event History Tracker and BigQuery. + * + * @param changeType - The type of change (CREATE, UPDATE, DELETE). + * @param documentId - The ID of the Firestore document. + * @param serializedData - The serialized new data of the document. + * @param serializedOldData - The serialized old data of the document. + * @param context - The event context from Firestore. */ -async function writeEventToBigQuery( +async function recordEventToBigQuery( changeType: string, documentId: string, serializedData: any, @@ -191,21 +209,29 @@ async function writeEventToBigQuery( context: functions.EventContext ) { const event = { - timestamp: context.timestamp, - operation: changeType, - documentName: context.resource.name, - documentId, - pathParams: config.wildcardIds ? context.params : null, - eventId: context.eventId, - data: serializedData, - oldData: serializedOldData, + timestamp: context.timestamp, // Cloud Firestore commit timestamp + operation: changeType, // The type of operation performed + documentName: context.resource.name, // The document name + documentId, // The document ID + pathParams: config.wildcardIds ? context.params : null, // Path parameters, if any + eventId: context.eventId, // The event ID from Firestore + data: serializedData, // Serialized new data + oldData: serializedOldData, // Serialized old data }; + // Record the event in the Firestore Event History Tracker and BigQuery. eventTracker.record([event]); } /** * Handle errors when enqueueing tasks to sync BigQuery. + * + * @param err - The error object. + * @param context - The event context from Firestore. + * @param changeType - The type of change (CREATE, UPDATE, DELETE). + * @param documentId - The ID of the Firestore document. + * @param serializedData - The serialized new data of the document. + * @param serializedOldData - The serialized old data of the document. */ async function handleEnqueueError( err: Error, @@ -221,14 +247,39 @@ async function handleEnqueueError( config.instanceId ); - await queue.enqueue({ - context, - changeType, - documentId, - data: serializedData, - oldData: serializedOldData, - }); + let attempts = 0; + const jitter = Math.random() * 100; // Adding jitter to avoid collision + + // Exponential backoff formula with a maximum of 5 + jitter seconds + const backoff = (attempt: number) => + Math.min(Math.pow(2, attempt) * 100, 5000) + jitter; + + while (attempts < config.maxEnqueueAttempts) { + if (attempts > 0) { + // Wait before retrying to enqueue the task. + await new Promise((resolve) => setTimeout(resolve, backoff(attempts))); + } + + attempts++; + try { + // Attempt to enqueue the task to the queue. + await queue.enqueue({ + context, + changeType, + documentId, + data: serializedData, + oldData: serializedOldData, + }); + break; // Break the loop if enqueuing is successful. + } catch (enqueueErr) { + // Throw the error if max attempts are reached. + if (attempts === config.maxEnqueueAttempts) { + throw enqueueErr; + } + } + } } catch (enqueueErr) { + // Prepare the event object for error logging. const event = { timestamp: context.timestamp, operation: changeType, @@ -240,8 +291,10 @@ async function handleEnqueueError( oldData: serializedOldData, }; + // Record the error event. await events.recordErrorEvent(enqueueErr as Error); + // Log the error if it has not been logged already. if (!enqueueErr.logged) { logs.error( true, @@ -253,30 +306,38 @@ async function handleEnqueueError( } } +/** + * Cloud Function to set up BigQuery sync by initializing the event tracker. + */ export const setupBigQuerySync = functions.tasks .taskQueue() .onDispatch(async () => { /** Setup runtime environment */ const runtime = getExtensions().runtime(); - /** Init the BigQuery sync */ + // Initialize the BigQuery sync. await eventTracker.initialize(); + // Update the processing state. await runtime.setProcessingState( "PROCESSING_COMPLETE", "Sync setup completed" ); }); +/** + * Cloud Function to initialize BigQuery sync. + */ export const initBigQuerySync = functions.tasks .taskQueue() .onDispatch(async () => { /** Setup runtime environment */ const runtime = getExtensions().runtime(); - /** Init the BigQuery sync */ + // Initialize the BigQuery sync. await eventTracker.initialize(); + // Update the processing state. await runtime.setProcessingState( "PROCESSING_COMPLETE", "Sync setup completed" diff --git a/firestore-bigquery-export/functions/src/util.ts b/firestore-bigquery-export/functions/src/util.ts index c93ab45f1..c128ee56f 100644 --- a/firestore-bigquery-export/functions/src/util.ts +++ b/firestore-bigquery-export/functions/src/util.ts @@ -19,6 +19,11 @@ import { Change } from "firebase-functions"; import { ChangeType } from "@firebaseextensions/firestore-bigquery-change-tracker"; +/** + * Get the change type (CREATE, UPDATE, DELETE) from the Firestore change. + * @param change Firestore document change object. + * @returns {ChangeType} The type of change. + */ export function getChangeType(change: Change): ChangeType { if (!change.after.exists) { return ChangeType.DELETE; @@ -29,6 +34,11 @@ export function getChangeType(change: Change): ChangeType { return ChangeType.UPDATE; } +/** + * Get the document ID from the Firestore change. + * @param change Firestore document change object. + * @returns {string} The document ID. + */ export function getDocumentId(change: Change): string { if (change.after.exists) { return change.after.id; From 15628fa5a45edcc0d7ed01eed013cbf7576ba6e6 Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Fri, 27 Sep 2024 12:50:18 +0100 Subject: [PATCH 06/13] test: add flags to test, remove unused resource --- firestore-bigquery-export/README.md | 2 -- firestore-bigquery-export/extension.yaml | 13 ------------- .../functions/__tests__/e2e.test.ts | 6 +++--- firestore-bigquery-export/functions/package.json | 14 +++++++------- 4 files changed, 10 insertions(+), 25 deletions(-) diff --git a/firestore-bigquery-export/README.md b/firestore-bigquery-export/README.md index d97f1d128..936e931f0 100644 --- a/firestore-bigquery-export/README.md +++ b/firestore-bigquery-export/README.md @@ -168,8 +168,6 @@ essential for the script to insert data into an already partitioned table.) * **fsexportbigquery:** Listens for document changes in your specified Cloud Firestore collection, then exports the changes into BigQuery. -* **fsimportexistingdocs:** Imports existing documents from the specified collection into BigQuery. Imported documents will have a special changelog with the operation of `IMPORT` and the timestamp of epoch. - * **syncBigQuery:** A task-triggered function that gets called on BigQuery sync * **initBigQuerySync:** Runs configuration for sycning with BigQuery diff --git a/firestore-bigquery-export/extension.yaml b/firestore-bigquery-export/extension.yaml index 36096a578..49b7af636 100644 --- a/firestore-bigquery-export/extension.yaml +++ b/firestore-bigquery-export/extension.yaml @@ -60,19 +60,6 @@ resources: eventType: providers/cloud.firestore/eventTypes/document.write resource: projects/${param:PROJECT_ID}/databases/(default)/documents/${param:COLLECTION_PATH}/{documentId} - - name: fsimportexistingdocs - type: firebaseextensions.v1beta.function - description: - Imports existing documents from the specified collection into BigQuery. - Imported documents will have a special changelog with the operation of - `IMPORT` and the timestamp of epoch. - properties: - runtime: nodejs18 - taskQueueTrigger: - retryConfig: - maxAttempts: 15 - minBackoffSeconds: 60 - - name: syncBigQuery type: firebaseextensions.v1beta.function description: >- diff --git a/firestore-bigquery-export/functions/__tests__/e2e.test.ts b/firestore-bigquery-export/functions/__tests__/e2e.test.ts index 4b6ff773e..48851c722 100644 --- a/firestore-bigquery-export/functions/__tests__/e2e.test.ts +++ b/firestore-bigquery-export/functions/__tests__/e2e.test.ts @@ -2,9 +2,9 @@ import * as admin from "firebase-admin"; import { BigQuery } from "@google-cloud/bigquery"; /** Set defaults */ -const bqProjectId = "dev-extensions-testing"; -const datasetId = "firestore_export"; -const tableId = "bq_e2e_test_raw_changelog"; +const bqProjectId = process.env.BQ_PROJECT_ID || "dev-extensions-testing"; +const datasetId = process.env.DATASET_ID || "firestore_export_e2e"; +const tableId = process.env.TABLE_ID || "posts_raw_changelog"; /** Init resources */ admin.initializeApp({ projectId: bqProjectId }); diff --git a/firestore-bigquery-export/functions/package.json b/firestore-bigquery-export/functions/package.json index fca7e29df..614c66a61 100644 --- a/firestore-bigquery-export/functions/package.json +++ b/firestore-bigquery-export/functions/package.json @@ -17,6 +17,7 @@ "@google-cloud/bigquery": "^7.6.0", "@types/chai": "^4.1.6", "@types/express-serve-static-core": "4.17.30", + "@types/jest": "29.5.0", "@types/node": "^20.4.4", "chai": "^4.2.0", "firebase-admin": "^12.0.0", @@ -24,20 +25,19 @@ "firebase-functions-test": "^0.3.3", "generate-schema": "^2.6.0", "inquirer": "^6.4.0", + "jest": "29.5.0", + "jest-config": "29.5.0", "lodash": "^4.17.14", "nyc": "^14.0.0", "rimraf": "^2.6.3", "sql-formatter": "^2.3.3", + "ts-jest": "29.1.2", "ts-node": "^9.0.0", - "typescript": "^4.8.4", - "@types/jest": "29.5.0", - "jest": "29.5.0", - "jest-config": "29.5.0", - "ts-jest": "29.1.2" + "typescript": "^4.8.4" }, "private": true, "devDependencies": { - "mocked-env": "^1.3.2", - "faker": "^5.1.0" + "faker": "^5.1.0", + "mocked-env": "^1.3.2" } } From 2d5bacb81e381617659bb71a2791bfc3daf4399d Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Fri, 27 Sep 2024 15:46:17 +0100 Subject: [PATCH 07/13] feat: add backup to gcs --- firestore-bigquery-export/README.md | 6 ++ firestore-bigquery-export/extension.yaml | 23 +++++ .../__snapshots__/config.test.ts.snap | 2 + .../functions/src/cloud_storage_backups.ts | 96 +++++++++++++++++++ .../functions/src/config.ts | 7 ++ .../functions/src/index.ts | 27 +++++- 6 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 firestore-bigquery-export/functions/src/cloud_storage_backups.ts diff --git a/firestore-bigquery-export/README.md b/firestore-bigquery-export/README.md index 936e931f0..b77bdfc69 100644 --- a/firestore-bigquery-export/README.md +++ b/firestore-bigquery-export/README.md @@ -162,6 +162,10 @@ essential for the script to insert data into an already partitioned table.) * Maximum number of enqueue attempts: This parameter will set the maximum number of attempts to enqueue a document to cloud tasks for export to BigQuery. If the maximum number of attempts is reached, the failed export will be handled according to the `LOG_FAILED_EXPORTS` parameter. +* Backup to GCS: If enabled, failed BigQuery updates will be written to a GCS bucket. + +* Backup GCS Bucket Name: This (optional) parameter will allow you to specify a GCS bucket for which failed BigQuery updates will be written to, if this feature is enabled. + **Cloud Functions:** @@ -191,3 +195,5 @@ This extension will operate with the following project IAM roles: * bigquery.dataEditor (Reason: Allows the extension to configure and export data into BigQuery.) * datastore.user (Reason: Allows the extension to write updates to the database.) + +* storage.objectAdmin (Reason: Allows the extension to create objects in the storage bucket.) diff --git a/firestore-bigquery-export/extension.yaml b/firestore-bigquery-export/extension.yaml index 49b7af636..23694257f 100644 --- a/firestore-bigquery-export/extension.yaml +++ b/firestore-bigquery-export/extension.yaml @@ -48,6 +48,9 @@ roles: - role: datastore.user reason: Allows the extension to write updates to the database. + - role: storage.objectAdmin + reason: Allows the extension to create objects in the storage bucket. + resources: - name: fsexportbigquery type: firebaseextensions.v1beta.function @@ -422,6 +425,26 @@ params: validationErrorMessage: Please select an integer between 1 and 10 default: 3 + - param: BACKUP_TO_GCS + label: Backup to GCS + description: >- + If enabled, failed BigQuery updates will be written to a GCS bucket. + type: select + options: + - label: Yes + value: yes + - label: No + value: no + default: no + required: true + + - param: BACKUP_GCS_BUCKET + label: Backup GCS Bucket Name + description: >- + This (optional) parameter will allow you to specify a GCS bucket for which + failed BigQuery updates will be written to, if this feature is enabled. + type: string + events: - type: firebase.extensions.firestore-counter.v1.onStart description: diff --git a/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap b/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap index a58287104..73a6a8cc1 100644 --- a/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap +++ b/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap @@ -2,7 +2,9 @@ exports[`extension config config loaded from environment variables 1`] = ` Object { + "backupBucketName": "undefined.appspot.com", "backupCollectionId": undefined, + "backupDir": "_firestore-bigquery-export", "bqProjectId": undefined, "clustering": Array [ "data", diff --git a/firestore-bigquery-export/functions/src/cloud_storage_backups.ts b/firestore-bigquery-export/functions/src/cloud_storage_backups.ts new file mode 100644 index 000000000..7ffa5c712 --- /dev/null +++ b/firestore-bigquery-export/functions/src/cloud_storage_backups.ts @@ -0,0 +1,96 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import * as functions from "firebase-functions"; +import { Storage } from "@google-cloud/storage"; +import * as logs from "./logs"; +import * as path from "path"; +import * as fs from "fs"; +import { promisify } from "util"; + +// Promisify the writeFile function to use async/await +const writeFileAsync = promisify(fs.writeFile); + +// Initialize Google Cloud Storage client +const storage = new Storage(); + +/** + * Backs up the event data to Google Cloud Storage as a CSV file. + * + * @param bucketName - The name of the GCS bucket. + * @param dirName - The directory path inside the bucket where the file will be stored. + * @param event - The event data containing changeType, documentId, serializedData, serializedOldData, context. + */ +export async function backupToGCS( + bucketName: string, + dirName: string, + { + changeType, + documentId, + serializedData, + serializedOldData, + context, + }: { + changeType: string; + documentId: string; + serializedData: any; + serializedOldData: any; + context: functions.EventContext; + } +) { + // Create a timestamp for the backup file + const timestamp = new Date().toISOString(); + + // Define the filename using documentId and timestamp to ensure uniqueness + const fileName = `${dirName}/${documentId}_${timestamp}.csv`; + + // Create a CSV string from the event data + const csvData = ` +timestamp,event_id,document_name,operation,data,old_data,document_id +"${context.timestamp}","${context.eventId}","${context.resource.name}", +"${changeType}","${JSON.stringify(serializedData)}","${JSON.stringify( + serializedOldData + )}","${documentId}" +`.trim(); + + try { + // Write the CSV data to a temporary local file + const tempFilePath = path.join("/tmp", `${documentId}_${timestamp}.csv`); + await writeFileAsync(tempFilePath, csvData, "utf8"); + + // Upload the file to Google Cloud Storage + await storage.bucket(bucketName).upload(tempFilePath, { + destination: fileName, + contentType: "text/csv", + }); + + // Log the successful backup + functions.logger.info( + `Successfully backed up event for document ${documentId} to ${fileName}` + ); + + // Remove the temporary file after successful upload + fs.unlinkSync(tempFilePath); + } catch (err) { + // Log any errors that occur during the backup process + logs.error( + false, + `Failed to back up event for document ${documentId}`, + err + ); + throw err; // Rethrow the error to be handled by the calling function + } +} diff --git a/firestore-bigquery-export/functions/src/config.ts b/firestore-bigquery-export/functions/src/config.ts index c8436f0f3..b92f5bdbe 100644 --- a/firestore-bigquery-export/functions/src/config.ts +++ b/firestore-bigquery-export/functions/src/config.ts @@ -14,6 +14,8 @@ * limitations under the License. */ +import { backupToGCS } from "./cloud_storage_backups"; + function timePartitioning(type) { if ( type === "HOUR" || @@ -62,4 +64,9 @@ export default { ), kmsKeyName: process.env.KMS_KEY_NAME, maxEnqueueAttempts: parseInt(process.env.MAX_ENQUEUE_ATTEMPTS || "3"), + // backup bucket defaults to default firebase cloud storage bucket + backupToGCS: process.env.BACKUP_TO_GCS === "yes" ? true : false, + backupBucketName: + process.env.BACKUP_GCS_BUCKET || `${process.env.PROJECT_ID}.appspot.com`, + backupDir: `_${process.env.INSTANCE_ID || "firestore-bigquery-export"}`, }; diff --git a/firestore-bigquery-export/functions/src/index.ts b/firestore-bigquery-export/functions/src/index.ts index 4dcb2c60e..5ce946193 100644 --- a/firestore-bigquery-export/functions/src/index.ts +++ b/firestore-bigquery-export/functions/src/index.ts @@ -19,7 +19,6 @@ import * as functions from "firebase-functions"; import * as admin from "firebase-admin"; import { getExtensions } from "firebase-admin/extensions"; import { getFunctions } from "firebase-admin/functions"; - import { ChangeType, FirestoreBigQueryEventHistoryTracker, @@ -29,6 +28,7 @@ import { import * as logs from "./logs"; import * as events from "./events"; import { getChangeType, getDocumentId, resolveWildcardIds } from "./util"; +import { backupToGCS } from "./cloud_storage_backups"; // Configuration for the Firestore Event History Tracker. const eventTrackerConfig = { @@ -110,6 +110,18 @@ export const syncBigQuery = functions.tasks data, oldData, }); + + if (config.backupToGCS) { + // Backup to Google Cloud Storage as a last resort. + await backupToGCS(config.backupBucketName, config.backupDir, { + changeType, + documentId, + serializedData: data, + serializedOldData: oldData, + context, + }); + } + throw err; } } @@ -177,7 +189,7 @@ export const fsexportbigquery = functions context ); } catch (err) { - // Handle enqueue errors with retries. + // Handle enqueue errors with retries and backup to GCS. await handleEnqueueError( err, context, @@ -303,6 +315,17 @@ async function handleEnqueueError( event ); } + + if (config.backupToGCS) { + // Backup to Google Cloud Storage as a last resort. + await backupToGCS(config.backupBucketName, config.backupDir, { + changeType, + documentId, + serializedData, + serializedOldData, + context, + }); + } } } From 190e0e75a3dd089745f9045d74c3180f30d734ed Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Fri, 27 Sep 2024 17:21:46 +0100 Subject: [PATCH 08/13] chore(firestore-bigquery-export): temporarily disable GCS --- firestore-bigquery-export/README.md | 4 - firestore-bigquery-export/extension.yaml | 38 +++---- .../__snapshots__/config.test.ts.snap | 1 + ...restore-bigquery-change-tracker-1.1.37.tgz | Bin 0 -> 14193 bytes .../functions/package-lock.json | 5 +- .../functions/package.json | 2 +- .../functions/src/cloud_storage_backups.ts | 16 +-- .../functions/src/config.ts | 4 +- .../functions/src/index.ts | 55 ++++----- .../functions/stress_test/count.js | 33 ++++++ .../functions/stress_test/main.js | 104 ++++++++++++++++++ .../functions/stress_test/worker.js | 59 ++++++++++ 12 files changed, 260 insertions(+), 61 deletions(-) create mode 100644 firestore-bigquery-export/functions/firebaseextensions-firestore-bigquery-change-tracker-1.1.37.tgz create mode 100644 firestore-bigquery-export/functions/stress_test/count.js create mode 100644 firestore-bigquery-export/functions/stress_test/main.js create mode 100644 firestore-bigquery-export/functions/stress_test/worker.js diff --git a/firestore-bigquery-export/README.md b/firestore-bigquery-export/README.md index b77bdfc69..6e5c6612d 100644 --- a/firestore-bigquery-export/README.md +++ b/firestore-bigquery-export/README.md @@ -162,10 +162,6 @@ essential for the script to insert data into an already partitioned table.) * Maximum number of enqueue attempts: This parameter will set the maximum number of attempts to enqueue a document to cloud tasks for export to BigQuery. If the maximum number of attempts is reached, the failed export will be handled according to the `LOG_FAILED_EXPORTS` parameter. -* Backup to GCS: If enabled, failed BigQuery updates will be written to a GCS bucket. - -* Backup GCS Bucket Name: This (optional) parameter will allow you to specify a GCS bucket for which failed BigQuery updates will be written to, if this feature is enabled. - **Cloud Functions:** diff --git a/firestore-bigquery-export/extension.yaml b/firestore-bigquery-export/extension.yaml index 23694257f..0fbaf7561 100644 --- a/firestore-bigquery-export/extension.yaml +++ b/firestore-bigquery-export/extension.yaml @@ -425,25 +425,25 @@ params: validationErrorMessage: Please select an integer between 1 and 10 default: 3 - - param: BACKUP_TO_GCS - label: Backup to GCS - description: >- - If enabled, failed BigQuery updates will be written to a GCS bucket. - type: select - options: - - label: Yes - value: yes - - label: No - value: no - default: no - required: true - - - param: BACKUP_GCS_BUCKET - label: Backup GCS Bucket Name - description: >- - This (optional) parameter will allow you to specify a GCS bucket for which - failed BigQuery updates will be written to, if this feature is enabled. - type: string + # - param: BACKUP_TO_GCS + # label: Backup to GCS + # description: >- + # If enabled, failed BigQuery updates will be written to a GCS bucket. + # type: select + # options: + # - label: Yes + # value: yes + # - label: No + # value: no + # default: no + # required: true + + # - param: BACKUP_GCS_BUCKET + # label: Backup GCS Bucket Name + # description: >- + # This (optional) parameter will allow you to specify a GCS bucket for which + # failed BigQuery updates will be written to, if this feature is enabled. + # type: string events: - type: firebase.extensions.firestore-counter.v1.onStart diff --git a/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap b/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap index 73a6a8cc1..b9cb5f541 100644 --- a/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap +++ b/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap @@ -5,6 +5,7 @@ Object { "backupBucketName": "undefined.appspot.com", "backupCollectionId": undefined, "backupDir": "_firestore-bigquery-export", + "backupToGCS": false, "bqProjectId": undefined, "clustering": Array [ "data", diff --git a/firestore-bigquery-export/functions/firebaseextensions-firestore-bigquery-change-tracker-1.1.37.tgz b/firestore-bigquery-export/functions/firebaseextensions-firestore-bigquery-change-tracker-1.1.37.tgz new file mode 100644 index 0000000000000000000000000000000000000000..44c71a95366d6d134b9781af0fed5332dedf7aee GIT binary patch literal 14193 zcmV-%H;%|3iwFP!00002|LuMKR~tF9=={uIq4?%x+F8@)BOh;I*(?KQhP?@7fy}%) z*<5JcGVV#c+iA5ST;l)zohp5+)omNd%;qheITL6}rBX>M)t5@+sQ)p#pxrb%@17?Y zpC+`p?)HbY|MA^8j!MeH-`RI}{8?F9dGYKS@qJ%eS$Veha)o@i{`|$t>gtOZE6>Px zE3405tgVvoR=)cKekQDpitkod?qQHCD=S3(`;GjxCX5nR7D>NsZ7v<2|4#d57}G(L z(YHlDrbT%j(9h$%C|QTJPERR&lgE>kwmRg7Tt?}HZjiE=P%>?AE{TfaWkRox=%-0i zP{x-L*&?#AjLuV9DZR{-n5=9r_4ACCWE>S`QYLwpWEZEaWQ!EMe$WcLW?8GfxfHSM ztWO4$tPdZF2YhhDzjjDJ&j!gwhs1e*GNM_zAMfO8?{mUnbmOQTzKx1##Eg%58t+DB zWPF*n$;}cWgmj;hV_Hte1h)$#PrLY;kKl%kXc@&(8IdX3;{9)mW>3iuv>^R-!b(~s z*@fMfW4Xgi$rky?@!>%TbCg{qgX@47hq`rp(`bx6)wY#L8LeN1gM_9rBVWFd_a6kT zWIzHNjYnHs7VI|GgP($ylOn@a;EwPrN#lN0#ELio_t7IU0$I8rGX(FMfVp~+vq_Se z^z(Ew%B=1@lRX#MJdICr0tHNpO)b#w>X-|zxq$77w@S81Mz5Tm5WY434CHFYX|WbT z!hVU;Bv!S_7KyH+#O@(OKP`QIZdE<-o5Ylhh^=7OWrY9uH}&8f<*~}uZQxuZ>dSd!{k-SKY#f`TCGi|#V}&8=|$AP{-V#wVl+BbxY z#Ty)EMd&8eO$&sC9$HZJA9igRv%oXfBM$ssg}>)X7PLC8c6-yueBT!NC}A`#D9h7J zCAJ|q-n2h-i21glC!dWvmDSC#Q?O18v)HRXh1#Py03F69qsL9;#$%q@)zOU#;EZF9*_ zR|5;)4__tq(^(NiZ)|Sx{sULzH6b=_3ChsM=UL+@_*7(3h%zAn18z(9T&w-a zi7Ax#a1@P$;4!ZL$6*$YsEx)3dH=!fEip-9@v3l;q_8Rkz(*P{Emc1@nGtG;COENH z+s;-r=h9#jcGCs99h^SqvRzt$^ixChw7ANS!MFcod|f0L!;-A6to}fL%JYkqlGm?ymI#)=PWm)sG$xZQrUh#L z+vBJ|q_U(#exU^e`^;LnLISAJ5=E$hKPA`sgp8tV)La1rV60@2q?FLleL4o_MEdz? zoF-A$r{pRrhuDvR5(13h1dRN=jFOCiL1TPP@_|{AM5RC)4x6$ZvyE=|>gp8Os1DG@!C1q6Nr^>jX zqwala6#Ze8T~K~PoLrAhe}Y-`CXcJm1W$!PooF3${s}%xWdeT3Ll10OJz$8z8MY*k zZ&afxT3F&_fP#oPx5+4vX-IxdF1Q^E{@fssZ#ewvCOPI}ko>Ouo1WE-RAB3V@(oN7 z#U2=Zlmmm`y#w2*j!Xa+<0`Mm=D6TbJ}v;M$0fh(?rO&+kaa&jSatqQ>_5XOi&MH6 zB`J+hiYQ|dTH9|c8q7HVSDvl9=l{#qXAkH9w`c#6T1-LDBSuX#kNmbA#iJx^S#?>N z7u5Fh+SR+N7Bob4OI8S@@z_;1R9TWGWfG;y|I+R8SSpLEQ~?dzA)_Un+AO#s$wihI z^j*ej5DOwClo{jHN<&Ns(IhRk9iqsunD#qS!yLf}9G?T1Ug?-44=-rBC;w~*mM8B# zD*MCjqKLq;@jgBe@k7uS+FVLY0tn(EypvBdZq`}RrC5JIi|J?m9gut#4)UTG^@joA z?3hh1qk-jwDf1Hd_8Q+iFVVw5?Mwn!b7UjNa=F#ZC*NA)o zdItp^bR6|C48tP7>R8`UDLIX486_#(AT%6N#-a_`y|g^=QNKgbf2dU5+ZNX<;Jh8-SiPt=!51 z1L}dAV~~6`0w!wQpoL{&KOc>fGH9E|k$TJ3^x;VUPdxt!p*UIvko(^c)AKLKY`!aslb-B{y zhPv^uMlIo>uhbLYb|i$1?4_n zQ|+M8`(H6{x*tzN4(o(d?I^kumJHYe+yS--`2Q*-HDBjE?hXwxq7xwR26+wRD=`pe zQP8|8fZ-tb2PDd_37ecV`e_2>MB4L-AOn!Hk*HTmnv$q2>1bTS%oJ3sQ{1%2{q=M4 z`Z`a3#dY88WIWX<#AHiG4_Q^tKZ(Fe$?ijmm+37ORl{+S!_L`{RgdBMr?=|kmoH>o z3BW7jb~w zfSI5xXt0mTct1-@w|~RAt%(_U{bvr>{uz&oVve{}1CNS}6I4$)*>MEiYl*+t24$#@ zegR>FY*i2f?fI=$NkH86K)W$QK|7#t>flUt@M25z!8ca~=x9ssyNk`ZNPG#X1N>O;p{A1j?EvpaFGvMAWOqUxc`N>vBi? zyuAuYo`?|be1~uuKVG;5&Ba?=%?b`OV~n3XsqMM4oS!@)HyDt7I>rwb;_A9>GWF1M zYJW1e#5#5#RO(_fdsVgl03Ny~U+qQz5+_c(1(Q=r_8>BVPS=p*i%XuDO zSLX6Y>>uR0zR4WYD5eG5AUCA7lV>H(%4OJ{tqsx=eojEB3;e28nR+jSl|=XlGj1Ko zk0AhZBci%pa*`7^8KVHwkJ1z+C@F!mj*$IOga=TQly7w(Q#QMQ74pijur_)1$|}W= z4PhfdG;@J@w!RxVgzM{iO!EOzKTg0E+G^LGX*%G{oqcw3W>YV(6Emon(^Ou#;8oP6ks(Ewe1t+aIQr$`#}%5gxW7w!$AXJLc1GGIvkA$Nd>tqw_rjJ}~PHHHL)4s5D*^uUg21!Tcv3l@x4lyjgYRm~JQh;F0_FjP}HgRdqcD z)S6QhxLHusiJ~FYk$xrQE>>$*(j;hM#Z+l>6&PuaK4{@$4%lA7&FpgOVO^*ZYKV3H z+S{gv%G26|zjACE@M^drnm8EVM#={zn0KO&6xAMJAl#ONx0rxAM;B2^6))T-!zdk) zh%nmEvzWv*jjn;8aSk}9g^&`ih7d}Ib|Y~tN)Y`#E0b&zr70MPmc`ZGNmeE))-U84 z5)+}GBQBhmozs3kq9nSE5)@KG56e0QiDxe__N8`>(;ieKj3syxRV?il!a@~Pr0Kr2 zxEU*x*au=GM~n|1rz2 zxH9IiS43CBSn7JO9gI8PVEW?P%T(uI56)$8@Y3^2JnlVcS{ZH6tqo``j@Rr0X)hxx zHhs1uewhR7%ZcK{ZH7(b8qkKIi+=2Y`XpCA?NUK&5)6E-PA-}n2Irs(u#;MR-K&p| zmdZ(T<692?6~^6XVqrV|OkGu}_qk6gFuQCrI;RC}y8^Lg9m3$6Ws*txQpasni22B- zrZ=p^Lq)CD*87*VlFJEUbO7-%GlqAht;`@oLKJYsI46%Oh?P9tR4n|2K{RDDNaVHB z1+2wt+Rp`_I?;=XOA`T=LeQoIyvVNz_1kGpv0u0)Y-T7O5;~??3JO9DL`mN@@_UTx zQ++I=M6SIgNp_ikjJPQwDUtOtl347XM2t;F;@&o+23k=8{$McvgSJw`sI}qPG~ibl zn=-hVQ&F=$Cq>q@HmbmDt=Tmk(}pKm4HplqkU4)1KeKpIioE>V)6TncUe{cd8XbM9hDB0F;GJ z5OFob&`aPlYf0jqvh4ekL@77u@N^!{Y+G^2J1ZscMFXBE>Wk)^v)8VV9XBoC=GE&tyDIDWcFOYY_JCgrvoNUc z8?5B+8m?MZ_sp%bLs^2inIftt;Q<Ht3#B|goW}i(WZ&jq z)GJ%LXo$x5I$j&a@d3TsL4+F3A|2v|%N}`*Sy1U+z9tM5}$`+_F#M~$uSfd z(FY5UQ@b|4#!wx{Nwc}(=~MDfq{!ktT%~MG2KvsglOaSz;tumk8mkOd+Av}Vq55*# zwSy|fcs?7SRxn5|tPC{OO1|87l}f%`cT1ia89Vn~qrV>(i%ACGN&phWQR+*1SY@tO za>6K%&ExDO=Ru}{pJ98}67((7ojJ3MmKT0Lv#=f=ez-8>aH-+&c0!*dyHE|9SFhhA zGuNcgJ@Io-{_F+dTLjbjWbdjBpk(YiImy^-GU8vSWO^Tlq1yY^xM;B`KDarW+yI~% ze9X?UvLHK~`VLX;oFIcDp;??>A7CV&MFq*%W}*La#QvKi03Y_Ev-d5U^^5BYos8%;^bUxc>kZ{DzXXxzAU?m<;_*5qwe#InH;Y$}Mt?j-NApaH4_UfljrfMO)i z(rZ~!h0ZVuEgz65O?B{z+@B1+h_za1v!-cU_wH;e!QlNmSDo2YRau0luC55*YE?xi zKLx6|OTX@dd>(Se7%ha2Sg2`H?Q2ra^NF&HUz)gc+B zF%#iz=`|T7pWW^2sDZ&kdSLtC>k#4UmCO=mrvJjbp5K%0%3M07SrZ&zWEs-0l~tT==Gz zi~jDLe(?=|>&;%b(T$6K8`TSe)%y3r6sGOXX1U6CJykZ5Jq*cqGw6hy1TH zIk&nwe&@VF3-WrIW(cyu$V1jm8Dpjvnl<2gx?MOki$-j&VcjNg%q?@q7?}O(`DXWI1&@g7pChrnv6ul&Fi2-tXe(pSR5V zjLeBz4;!0iC4AHDAgEu+4?ffDxZ2a^dv_G;u0djHy~US)pPQ?Ct;djT+kxCii#{a<4M{16B*x|PiTCPml{yCfouaPn`FG_4qnC_C)vdr z&#v7x>dH4G*R7w1fxb90-$>H@A~=)n)ioyL;-OA@=^$K-gTJ5&b-qCEfV*`E>$b>q zENV(>&{ymKcF4OpJ9oGi8G^#Z``tx<#%>Sh-3ZjYNLWaC`r&FBV|&JLetXcLfyq48 z+`-M{eF1ly-DaYEacVrGWaD4i;~V^~Zl-49HD5({xW3~c$mVBe_5k|=hVWkF_#WQ? zxaq&Lnt}N+s4a8R_a1qHa`vi_vV+@>h{t}T0*NTZt4yC@(O9^8R6`FVA0k|LRJ+znKYw%YB`0em-!WQf)lQEcMfPl@oD>S^{ zy||m&#qE&S5bjKU6B&O&(-`iuf(#J%H44uv(k1RVnl(q24fF22Xu@c*ERu6-WBubB zvxmCTH6uAkt$9bySL2-qZ5|q5W6ahXt*_*c*b!_%Ai4AKWO@q;tZpcNk)}bIaSSI4 z+YVV+7eEN>>Ei%D$~QcrBH?;)hLQ3u{jb)@u?TR!iEvnNnVvQ z-&hT4;5JLFUKm(ptyLuf<7+L}&RV-CF<@c5t81%H1aQoox5B(0&$*6&2L<0}AJ*2{ ze;c%$j{Zd`?mPFb?+w`BVZAgS`nSNpx@M{ezF}w&7QnT&BiiA+yo7$H{R!TBw2?BP z%U@;3j^Q8D+2Nafix5n;e|#gGPa$1qz}j}gOi4wDW)2SKpH(!q@8VXt%I4_D1bA~J zO#;ko2(vlisSU-Y9oDWLk4CogtvWKXWN$wM0N~D3z_GCYHc_=*PUb_*1Np#lM^#+N z(!A%hF~e(>!mo%>Ln2fCst29%4d#w|D@ocD)zi$*DD43_U*t%ovkkIoEo3DWiL91Q{xo8r7>SX;r?gt4woJ9>9b$S?vM#qm(RcI z%1Opb8pW=lQirbhyK+|8{!p%{iRE+~>EwH%kWIfgtzwZ{S<~;uzIB}wHXi1m2C6mA z-LHnVy65fqU>ZhIgJMld<(a^GA9rBAp+l^_FB(TH9$_SqS1@5E(1X@bWZp;BzCd zxsiw+vw}@nnU7R@TEIkJyM3gF137|buU$nFGU5zx16SDT+wG$h3}f0q_~~?SzxR6g z%<2InfA6`6)05xc_Ka@a88Gwx-&?;&z2lR^qnQpb*`9aQ4(m_7SV%XQ7Ey)A4 z7)^PyVR5Rpyc@7J*KVm<;`3Mb8Yi9bq`-VwVj(_h2powL;H1F8vp>iQF#DMCLdEQj zcXaqGFwkv$j%mZZpgyxLe?|h{W7m!Nw}#gW$%>Kz>&9+5Y_LQaU)2+m7Kqd>^UY+E4f5d303bjLhaSCx^in0BX7ZM;I0=|5;`)%O ze3O{_0__$ylvj`!Pzf4SYi;AczAN99dfXg1$Z=k1+w34jL%gcK=_8O2b-It~sr;5y zLKSA9*i^i^q^!?1F_eLEjki7b7^RvcuKSXG(9jJ?Q-1mi+~914%rL_u&mdhfj&Esp zaA!?Pbx?NTXfXdc4syZ=!skUjUV?9;s8>qJu?RY{awB<1Q3+x7em6$Y93#8HP%6vL z5cZ)HJBJ*|qR?kZ_VNP7RcUhQU<|UV%paN=!pjJ8l%&@kGRm`Z2*2b7xuy|hjJYVH zY?4MrQeMwRteJtq%@wK(z{n#d+2g!)dzVRxEwa*A3Goul9Eu~7ILELw+|Iy-tmg-i z;NDeXv7*X1rIP#Ntfl%VFE}rLfcK*bTt(4{7dsx@24DUh zhIo7kZm%P{xsgyRti!t&5r2Voz}jK)X6%g0N50U3B^7T4)H$v?xyZeQ(#`|AuFVC`%pY_orw~5^`y8To!7<-QKYzc6M`Er@? zXLh-r50i}Z!>Cn#6B_bYfyt2D3(>~G0{8XMkvfX%6WI9j&nt4**AI+@I-7e2wi50;A0#`=!fnqe;dKD_8bz9%GHs75A zI0;j=UMC+VmR^F`jjPy$9Mvt85i98^8h23CjzP5I4^#8ejEg*;aBYA%v%-h;F2|DM=kdO*QREB%Ib{mLBni$v!c#Y zfH!APP9?dM@l>562kxOAw8S@&qpNrT0s-+gDs_+}Jygx)%5VyeTNZ~OP<_J=gdoCWmr(Rq@I``c(DL9E6_ewoBH zHab?6JWSLBS2VVOKK-=Lb~fYK)=>aW&8 zspN1cFuB4KZ#AEQyqe0&?(phUnkhK8eLKO{g7)&l zo<>`WyR8^U?pu!YQCB{`EO*CPZ|~|BG5W@MPxBpB4|i~tF{5xEbj@6+Go)s7j{13a z$sh2h7=p*$Fu%A8>K)EGO~;6#!ei)O&YU$Lc*4pQ+7U@m9Lf{S!T)2pd zG2pPw_m2;S0jAv!FL_L&kAXNt`U=BYa45CI2-)PEn}{HD_3FyptK`j3Y!lb=9m`mbk}A*l`b_$ z5eE&8rJb$HGwTeI4e-s^VFY~DcF&wFQGAl0__$OxU%I4~_XP!uesU4n$7mBe+J3p< zYhpm0feXe*68}v5F&%jDn(?Tanp7}3$5FwJs{RSR-J(O`e^wlFAa>wlA?Th-cQ&1n zj8J&uy>T^6J)`vggDbO&vkXrYErWA|g>g<)WW*YYs!5ts#OEg+^vHX#R5s6!r&ebtX0p( z9$C_{tLxta2N_Q1M4`=%LXCia2Tb%?%~#I+)Tr9I?gb4HL2ieHGz#7e8eD5^%{by_ zcwg!;sXk9-W9E5Dsuoijfu3h-Wl)9PBm}-K(+~xKtzT2Mu(cq4nMZ zm6HZ7eCA4PG+x|+2^ppqsRPvDQiaNsVr!tPp~-TiMrK<%l;rUuI#J;9WHIJsE>7;q zvI3?{E){fQ2M6vZoGg*^5Gn)+$`@@O-{^3BpDE9)tnznDAr#&Mi#RX&p5(@-o8;R$ z7qmQ}SGH8qHXW+wD>#**;YGdeIHnE)`p)Fc)izbPG06U|YGK8ayKA87QCGha?T<5` zZPs+pgMZ9RyKcy~x98eJdV#K+&2>(J#DWEgkSun}Rt$Ob$Oet*%nH_Ud8yddo}dm; z8Lr39`_xq@sh?x=7iICeXY`80omrWf6&N($1W)J{b2(avC#p}FFfk2iR89)Wr5@o` z9K1z|>@b{r%-%D%Y2XzxDh$KA`c;`raORm9&5*FnSF;=h?Jz&-1h8xwn1DnC0?EMi z#bdaZY8_oN9H1H|$Zd~SNCGc?w6(><=8TfoDWm;7i&+cqRLW>99U^Ezf3NF);5}uN z(K#(zHoPp#@nmX@8Z!@2X6E8u# zK&uZWdkV!X+eS&6XBSC1iD{)ED!t!J%Qa%+HKs-68!j6Bt38_@fJ&J$K>I$9yu~j| zF?o0g1rfL`8%u8izeE_wTZHh1%ZN~C1OE?Bz1G3I*RQvKeBBc^I)t_)MVrnN%xbqw zPKK0GL+@aifd<#ldBhSX_wiW~U7d-n4Y28!d;_wE5VW8O0{V_mj`k0Jg5MNQ#**?; z%LOE=oWdY%h)2DDzS}?Q?V1psX>ovI>1h(rhEth$2a+nE4XU!X98fw)w?pixNE4cO zzM_Hx>y8!k(6I$S(fn z`?1i4^h&3g%7@(%UAWFG!!Qa8M+VLCnumO6o@WdamJ3gEnIvX5_6|?A@Xpitl<(0y z(|7Ek_TwVlvLHs+@!IpGospl{}p!qvWW z)Psxzsv#3o$Hk{#sf-xm$LTeMxQGx-h>(FKxP5UOy@Tc#49UmOJcohID+kcPIy$68 zQU|HUkJg#zo5`v-$a|&D*%kybDF!F7iv6==s;e6o=dg;a6V}g1qr^;@hZB`B5s?GC zV+XER>O~fkeLk1gJng@Edw6sr>@i0AJx1s_?+<}&G-Jv(6J^&%doE%nBcLCRjYTpU z(}K~MGVGA|5gKEfcAAN(dHmuuv{rgO0rz+1+8*VsT!!S1z-a9!S41cm4!tZAu+I`1~?74;E3yN0XjGb=T{>JZhszn#%aQbucx$wBcPun z$nDyZ`CeGJpQn%qE6KC2WWTH%h;7&I%I2&KsnA&$q5~EfzHvZ*m7yPIIAG3(I^^A3 zp0#iB^=fe8uQojm+_^s~3TyQumtKg{_EGX@4C2;DjDMIlq6)9@!8&8TTF-qleI)}v zF#TII{bm|2%=EuKMt?5^_%<2+orm7Y=rM!UZtr#Pr1uT+e1}~|gHo7{?jn$LDmVi9 zf5I8ExSDdiBd6OXKhtzf3;i4n^kClXQAd%$N0^stEO#Q&iQ%5Q?S*j+!(}znQJesFV)YxnpewpU%8p4lCqxjw|7M6Q!9~H4IzD>7jUQf{XgRE!F%!m zHT!>7U#`EX{&ycCkeeo|tf0NU|q;UGy%S_Jw*Tx)*9B#Y_i!+|W` z+9J!V#%&~%sOqClTyav1Myl-iQ>vog&2>CunwyG^tg+2=RUx z2q;p0@!+HuChU-%n%}Cui&8teU6EfAeyd5n$pY8$`D!)ZEqG)H1_J{NAdy>NgU<)f z@Z767{U%f@=2E5j1}lWSbPx71yc8 z(RG?fv1?W2x|E|~X(*Zta~zx9b$bwv(6dWAZKTMy;SP{o+nw*>a^NWzISx^h6Gqq^-PGTcVdrEesZBN+3Zwr2jrAz zd;3SnC#Sz`zkb&X9^dHFsmWsxf9V|s>WvHX<8L-LcLbjw-*{NSl26zy91i>XC!P6C1^! zjt<|w^#EL8JrhCvhP5o&C1;OWp8W(h4VlS|m5|XK%1raYYz#nag)OUQ6vxM?XDbtg z`y`b-7c@+LC6w3o-qk-VL;3X7S>-aVJvbMh1@09&+dlYBZ8#3G4ZKK!x9V?@Gg)9& z+FUXsw&jI=5Q8)#b)%mPX%GBQf6Uo-kEh!m&bP06!rj*ycaNtWLDf@E+V-(Hg5)7Y zM$zZM&v0<@@^+`Hykqkg$)oR?GuE*(ENr`HsJ=aKoT0Zo!#$@s*H&<^*<2_XwA=O> zf3K8)$tn@(KMwZ~)}j@V~#TthCx~I3Oh?(N%R}%ms)~9zVadP5hB_{(jAaOe&RqoY6-va}D~|q{_oi z@K#5udSz(`8=RAxPjJkzd^8o$V+JrlY^SH-2q#A1U0eBP;Mf`C+rE zkvM3e{^|!q^J{Fx1xjqRhK~0E`9cCO0dJ;otG~+s zA+G!0x&JKwzw%;j&E@~A&(~KT`2V;6{)>_7(b~1u3fAte7Ry~+EqUKk+p1;kr@D$f z!LzK4f7Lh85pLK*|B2hrJIV$UnA_3Y*4UPR?)}e6nWXo~2%7i*^JlQ2~zSfczqRd7<93)!5)of^kzDd8b%_j!WYEFtTAPC}MAp`uw?#fm82u~egZbN(Y=bcZzHl=a zy#AMhZBL2mA~)55B;v72hQW#j#h^@45a*O--^`llAfMz@kiT+nV&7tT%KOU7T#ABv%ukW$^>ubjre{KelHW^0mD z{kJW15!DT4!Qt>B!72-2335Mp%i-K$vx`YSDMLsB?rb?oszw&wbz!SrgV1yaMeV=% zgKP4kbk9Pbj}m;%jr2jrxr|Psf^_lX`-)hPtb6Nhkk)n`e<^6(sv^Xb!g}9@4fNx( zGbd73ZP(?vop#9cl@;?@_@!Bgxc9pexKa@e`0R9~ahsS?82nv6thTA*P5jii(!O=c zvCPNIYva@=*~OBRcT!GfX!XJgK*d`JGKCO_RF^`CmwHnO3D}tJhk9`RSs=#?8PE2< z{dOM@pB#-i;%!}VsimN}qaa1sCFR2x8P(as1GF&Ni_-Kw66vt6hEyG>jAX)`6lGS2 zBok+}T)X@k%L$mucnuFmXHCof@r_&wugDpRCwL=SeVl?tzSWwBXXZ%MLxf_C(?&V8 z^6fu6_mEfr!z^)--HrR7?y3pSu>Y^ER`UNmUtfRrp#Ocd`k%9YD%#x*5hZLVO~4sF zt$$`$J}tgHin8c}2jq625}v}3(WvMTC9rLtlsg~cizbXH{bd?*w+2Wf=Ttq5PLd37 z!GSalDLO&1;s$tn3Es7W_VXf!jGc@YC71R20ICBNQgmWQRKNiyUJH;hxMt8qpO1Y@ z%J&9A0xLj9US@-PE?69T#x0v*^aKjTfz=umKnchVgcRSyvGO;=L6=mr66(SD-juSq zv8Za@VDT&q`^!TbxIc~mi+{u4S)To|_W$+Om6uif|FZ}G?;r958@z?VavP-ecbUpj zT@Pkmm8WJ|iU-SFOJ5e@0(e=iRnT$Hk}@x@TN{FFv@qxW25DU+rMS`7niQ${GMt=; z{e0AwU3PV!q0t~t3fdQ7TOM3EZO(#nC#k%0-NJHcP$Z)w8UU)QNYsSu{;V&*LcqudKH>}ozf-0tJ}&4uDyZ2z zM@N)j&{GhM zpO^b93A?Q5cTZ2DCObXt4wBEHQLrxily+t3GS=l-;eaN`kbO$^OhY^Y5S1rEXaU`y zN}7wmmB@<)OL*OtkXAG)hj{@5`bU(JU$5E!UT5rMLjHOse-%OsEaAWLNfcoO(1=Gv zKpcTwTVtBVH0vkSm}Fbdk(faH|9Kg{5N#{&8cFTg&?kHjyXjs`l9;mYPrZZQ(e_F2^mymz-ka^yU-o;y9*3j2O`afK-kG2@Y9r@jBXe|)lk@YCt;;m*4^y@M0PE?ipLFBt)$l#D0EIOlPQ7{e$X z3gn4+X$1$Oq+0IUdBPJGN-;sCu8", "license": "Apache-2.0", "dependencies": { - "@firebaseextensions/firestore-bigquery-change-tracker": "^1.1.37", + "@firebaseextensions/firestore-bigquery-change-tracker": "file:firebaseextensions-firestore-bigquery-change-tracker-1.1.37.tgz", "@google-cloud/bigquery": "^7.6.0", "@types/chai": "^4.1.6", "@types/express-serve-static-core": "4.17.30", diff --git a/firestore-bigquery-export/functions/src/cloud_storage_backups.ts b/firestore-bigquery-export/functions/src/cloud_storage_backups.ts index 7ffa5c712..bb1a11e57 100644 --- a/firestore-bigquery-export/functions/src/cloud_storage_backups.ts +++ b/firestore-bigquery-export/functions/src/cloud_storage_backups.ts @@ -21,8 +21,8 @@ import * as path from "path"; import * as fs from "fs"; import { promisify } from "util"; -// Promisify the writeFile function to use async/await -const writeFileAsync = promisify(fs.writeFile); +// TODO: we dont need to promisify in node 18+ +const writeFile = promisify(fs.writeFile); // Initialize Google Cloud Storage client const storage = new Storage(); @@ -51,11 +51,8 @@ export async function backupToGCS( context: functions.EventContext; } ) { - // Create a timestamp for the backup file - const timestamp = new Date().toISOString(); - // Define the filename using documentId and timestamp to ensure uniqueness - const fileName = `${dirName}/${documentId}_${timestamp}.csv`; + const fileName = `${dirName}/${documentId}_${context.eventId}.csv`; // Create a CSV string from the event data const csvData = ` @@ -68,8 +65,11 @@ timestamp,event_id,document_name,operation,data,old_data,document_id try { // Write the CSV data to a temporary local file - const tempFilePath = path.join("/tmp", `${documentId}_${timestamp}.csv`); - await writeFileAsync(tempFilePath, csvData, "utf8"); + const tempFilePath = path.join( + "/tmp", + `${documentId}_${context.eventId}.csv` + ); + await writeFile(tempFilePath, csvData, "utf8"); // Upload the file to Google Cloud Storage await storage.bucket(bucketName).upload(tempFilePath, { diff --git a/firestore-bigquery-export/functions/src/config.ts b/firestore-bigquery-export/functions/src/config.ts index b92f5bdbe..15134fc69 100644 --- a/firestore-bigquery-export/functions/src/config.ts +++ b/firestore-bigquery-export/functions/src/config.ts @@ -63,7 +63,9 @@ export default { process.env.MAX_DISPATCHES_PER_SECOND || "10" ), kmsKeyName: process.env.KMS_KEY_NAME, - maxEnqueueAttempts: parseInt(process.env.MAX_ENQUEUE_ATTEMPTS || "3"), + maxEnqueueAttempts: isNaN(parseInt(process.env.MAX_ENQUEUE_ATTEMPTS)) + ? 3 + : parseInt(process.env.MAX_ENQUEUE_ATTEMPTS), // backup bucket defaults to default firebase cloud storage bucket backupToGCS: process.env.BACKUP_TO_GCS === "yes" ? true : false, backupBucketName: diff --git a/firestore-bigquery-export/functions/src/index.ts b/firestore-bigquery-export/functions/src/index.ts index 5ce946193..70e82c2ce 100644 --- a/firestore-bigquery-export/functions/src/index.ts +++ b/firestore-bigquery-export/functions/src/index.ts @@ -111,16 +111,16 @@ export const syncBigQuery = functions.tasks oldData, }); - if (config.backupToGCS) { - // Backup to Google Cloud Storage as a last resort. - await backupToGCS(config.backupBucketName, config.backupDir, { - changeType, - documentId, - serializedData: data, - serializedOldData: oldData, - context, - }); - } + // if (config.backupToGCS) { + // // Backup to Google Cloud Storage as a last resort. + // await backupToGCS(config.backupBucketName, config.backupDir, { + // changeType, + // documentId, + // serializedData: data, + // serializedOldData: oldData, + // context, + // }); + // } throw err; } @@ -130,9 +130,8 @@ export const syncBigQuery = functions.tasks /** * Cloud Function triggered on Firestore document changes to export data to BigQuery. */ -export const fsexportbigquery = functions - .runWith({ failurePolicy: true }) - .firestore.database(config.databaseId) +export const fsexportbigquery = functions.firestore + .database(config.databaseId) .document(config.collectionPath) .onWrite(async (change, context) => { // Start logging the function execution. @@ -189,8 +188,12 @@ export const fsexportbigquery = functions context ); } catch (err) { + functions.logger.warn( + "Failed to write event to BigQuery Immediately. Will attempt to Enqueue to Cloud Tasks.", + err + ); // Handle enqueue errors with retries and backup to GCS. - await handleEnqueueError( + await attemptToEnqueue( err, context, changeType, @@ -232,7 +235,7 @@ async function recordEventToBigQuery( }; // Record the event in the Firestore Event History Tracker and BigQuery. - eventTracker.record([event]); + await eventTracker.record([event]); } /** @@ -245,7 +248,7 @@ async function recordEventToBigQuery( * @param serializedData - The serialized new data of the document. * @param serializedOldData - The serialized old data of the document. */ -async function handleEnqueueError( +async function attemptToEnqueue( err: Error, context: functions.EventContext, changeType: string, @@ -316,16 +319,16 @@ async function handleEnqueueError( ); } - if (config.backupToGCS) { - // Backup to Google Cloud Storage as a last resort. - await backupToGCS(config.backupBucketName, config.backupDir, { - changeType, - documentId, - serializedData, - serializedOldData, - context, - }); - } + // if (config.backupToGCS) { + // // Backup to Google Cloud Storage as a last resort. + // await backupToGCS(config.backupBucketName, config.backupDir, { + // changeType, + // documentId, + // serializedData, + // serializedOldData, + // context, + // }); + // } } } diff --git a/firestore-bigquery-export/functions/stress_test/count.js b/firestore-bigquery-export/functions/stress_test/count.js new file mode 100644 index 000000000..6f9ea64d9 --- /dev/null +++ b/firestore-bigquery-export/functions/stress_test/count.js @@ -0,0 +1,33 @@ +const admin = require("firebase-admin"); + +// Initialize Firebase Admin with your credentials +// Make sure you've already set up your Firebase Admin SDK +admin.initializeApp({ + projectId: "vertex-testing-1efc3", +}); + +const firestore = admin.firestore(); + +async function countDocuments(collectionPath) { + try { + const collectionRef = firestore.collection(collectionPath); + + // Perform an aggregate query to count the documents + const snapshot = await collectionRef.count().get(); + + // Access the count from the snapshot + const docCount = snapshot.data().count; + + console.log( + `Number of documents in collection '${collectionPath}':`, + docCount + ); + return docCount; + } catch (error) { + console.error("Error counting documents:", error); + throw error; + } +} + +// Call the function and pass the collection path +countDocuments("posts_2"); diff --git a/firestore-bigquery-export/functions/stress_test/main.js b/firestore-bigquery-export/functions/stress_test/main.js new file mode 100644 index 000000000..dc1289ac8 --- /dev/null +++ b/firestore-bigquery-export/functions/stress_test/main.js @@ -0,0 +1,104 @@ +const { Worker } = require("worker_threads"); +const { performance } = require("perf_hooks"); +const path = require("path"); + +const totalDocs = 10000000; // Total number of documents to write +const maxThreads = 20; // Maximum number of worker threads +const batchSize = 500; // Documents per batch +const rampUpDelay = 2000; // 5 seconds delay between ramp-ups +const rampUps = 20; // Number of ramp-ups (planned) + +const docsPerRampUp = Math.ceil(totalDocs / rampUps); // Documents per ramp-up + +// Start measuring total execution time +const totalStartTime = performance.now(); + +const workerJsPath = path.resolve(__dirname, "worker.js"); + +// Function to spawn worker threads for a specific ramp-up +const spawnWorkers = async (activeThreads, startDoc, docsPerRampUp) => { + console.log(`Spawning ${activeThreads} worker(s)...`); + let promises = []; + const docsPerThread = Math.ceil(docsPerRampUp / activeThreads); + + for (let i = 0; i < activeThreads; i++) { + const docsForThisThread = Math.min(docsPerThread, docsPerRampUp); + const start = startDoc + i * docsPerThread; + const end = Math.min(start + docsForThisThread, startDoc + docsPerRampUp); + + promises.push( + new Promise((resolve, reject) => { + const worker = new Worker(workerJsPath, { + workerData: { + start, + end, + batchSize, + }, + }); + + worker.on("message", (message) => { + console.log(`Worker ${i + 1}: ${message}`); + }); + + worker.on("error", (err) => { + console.error(`Worker ${i + 1} error: ${err}`); + reject(err); + }); + + worker.on("exit", (code) => { + if (code !== 0) { + reject(new Error(`Worker ${i + 1} stopped with exit code ${code}`)); + } else { + resolve(); + } + }); + }) + ); + } + + try { + await Promise.all(promises); + } catch (error) { + console.error("Error in worker threads: ", error); + throw error; + } +}; + +// Function to execute ramp-ups +const executeRampUps = async () => { + let activeThreads = 1; + let startDoc = 0; + + for (let i = 0; i < rampUps; i++) { + await spawnWorkers(activeThreads, startDoc, docsPerRampUp); + startDoc += docsPerRampUp; + + if (activeThreads < maxThreads) { + activeThreads++; // Increase the number of threads for next ramp-up + } + + if (i < rampUps - 1) { + console.log( + `Ramping up to ${activeThreads} worker(s) in ${ + rampUpDelay / 1000 + } seconds...` + ); + await new Promise((resolve) => setTimeout(resolve, rampUpDelay)); + } + } +}; + +// Run the ramp-ups +executeRampUps() + .then(() => { + const totalEndTime = performance.now(); + const totalDuration = (totalEndTime - totalStartTime) / 1000; // Convert to seconds + console.log( + `Successfully written ${totalDocs} documents to the collection in ${totalDuration.toFixed( + 2 + )} seconds.` + ); + }) + .catch((error) => { + console.error("Error in worker threads: ", error); + }); diff --git a/firestore-bigquery-export/functions/stress_test/worker.js b/firestore-bigquery-export/functions/stress_test/worker.js new file mode 100644 index 000000000..baea4c3e1 --- /dev/null +++ b/firestore-bigquery-export/functions/stress_test/worker.js @@ -0,0 +1,59 @@ +const { parentPort, workerData } = require("worker_threads"); +const admin = require("firebase-admin"); +const { v4: uuidv4 } = require("uuid"); +const { performance } = require("perf_hooks"); + +// Initialize Firebase Admin SDK +admin.initializeApp({ + projectId: "vertex-testing-1efc3", +}); + +// Get a reference to the Firestore service +const db = admin.firestore(); +const collectionName = "posts_2"; + +// Generate a random document +const generateRandomDocument = () => { + return { + id: uuidv4(), + name: `Name_${Math.random().toString(36).substring(7)}`, + age: Math.floor(Math.random() * 60) + 18, // Random age between 18 and 78 + email: `user_${Math.random().toString(36).substring(7)}@example.com`, + isActive: Math.random() > 0.5, // Random boolean value + createdAt: admin.firestore.Timestamp.now(), + }; +}; + +// Write a batch of documents to Firestore +const writeBatch = async (start, end, batchSize) => { + let count = start; + while (count < end) { + const batchStartTime = performance.now(); + + let batch = db.batch(); + for (let i = 0; i < batchSize && count < end; i++) { + let docRef = db.collection(collectionName).doc(); + batch.set(docRef, generateRandomDocument()); + count++; + } + + await batch.commit(); + + const batchEndTime = performance.now(); + const batchDuration = (batchEndTime - batchStartTime) / 1000; // Convert to seconds + parentPort.postMessage( + `Batch of ${batchSize} documents written in ${batchDuration.toFixed( + 2 + )} seconds.` + ); + } +}; + +// Start writing in batches +writeBatch(workerData.start, workerData.end, workerData.batchSize) + .then(() => { + parentPort.postMessage("Completed writing documents."); + }) + .catch((error) => { + parentPort.postMessage(`Error writing documents: ${error}`); + }); From 0bb63c7dadb28f8f019e944950c006e20b289835 Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Wed, 2 Oct 2024 07:32:29 +0100 Subject: [PATCH 09/13] chore: bump ext version --- firestore-bigquery-export/CHANGELOG.md | 4 ++++ firestore-bigquery-export/README.md | 2 +- firestore-bigquery-export/extension.yaml | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/firestore-bigquery-export/CHANGELOG.md b/firestore-bigquery-export/CHANGELOG.md index 06946ab46..2a420d974 100644 --- a/firestore-bigquery-export/CHANGELOG.md +++ b/firestore-bigquery-export/CHANGELOG.md @@ -1,3 +1,7 @@ +## Version 0.1.56 + +feat - improve sync strategy + ## Version 0.1.55 feat - log failed queued tasks diff --git a/firestore-bigquery-export/README.md b/firestore-bigquery-export/README.md index 6e5c6612d..59da6f844 100644 --- a/firestore-bigquery-export/README.md +++ b/firestore-bigquery-export/README.md @@ -126,7 +126,7 @@ To install an extension, your project must be on the [Blaze (pay as you go) plan * Collection path: What is the path of the collection that you would like to export? You may use `{wildcard}` notation to match a subcollection of all documents in a collection (for example: `chatrooms/{chatid}/posts`). Parent Firestore Document IDs from `{wildcards}` can be returned in `path_params` as a JSON formatted string. -* Enable logging failed exports: If enabled, the extension will export what failed to enqueue to the Firebase console, to mitigate data loss. +* Enable logging failed exports: If enabled, the extension will log event exports that failed to enqueue to Cloud Logging, to mitigate data loss. * Enable Wildcard Column field with Parent Firestore Document IDs: If enabled, creates a column containing a JSON object of all wildcard ids from a documents path. diff --git a/firestore-bigquery-export/extension.yaml b/firestore-bigquery-export/extension.yaml index 0fbaf7561..86fc912d3 100644 --- a/firestore-bigquery-export/extension.yaml +++ b/firestore-bigquery-export/extension.yaml @@ -13,7 +13,7 @@ # limitations under the License. name: firestore-bigquery-export -version: 0.1.55 +version: 0.1.56 specVersion: v1beta displayName: Stream Firestore to BigQuery From 5846c1cbe1d9b9893aaa9378cba9d0f9b7dcf840 Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Thu, 17 Oct 2024 16:06:18 +0100 Subject: [PATCH 10/13] fix(firstore-bigquery-export): comment out unused role for now and use logging --- firestore-bigquery-export/extension.yaml | 6 +- .../functions/src/index.ts | 2 +- .../functions/stress_test/main.js | 114 +++++++++++++++--- .../functions/stress_test/worker.js | 46 +++++-- 4 files changed, 139 insertions(+), 29 deletions(-) diff --git a/firestore-bigquery-export/extension.yaml b/firestore-bigquery-export/extension.yaml index 86fc912d3..a8a604d3c 100644 --- a/firestore-bigquery-export/extension.yaml +++ b/firestore-bigquery-export/extension.yaml @@ -48,8 +48,8 @@ roles: - role: datastore.user reason: Allows the extension to write updates to the database. - - role: storage.objectAdmin - reason: Allows the extension to create objects in the storage bucket. + # - role: storage.objectAdmin + # reason: Allows the extension to create objects in the storage bucket. resources: - name: fsexportbigquery @@ -207,7 +207,7 @@ params: value: yes - label: No value: no - required: true + default: yes - param: WILDCARD_IDS label: Enable Wildcard Column field with Parent Firestore Document IDs diff --git a/firestore-bigquery-export/functions/src/index.ts b/firestore-bigquery-export/functions/src/index.ts index 70e82c2ce..48ba3e690 100644 --- a/firestore-bigquery-export/functions/src/index.ts +++ b/firestore-bigquery-export/functions/src/index.ts @@ -310,7 +310,7 @@ async function attemptToEnqueue( await events.recordErrorEvent(enqueueErr as Error); // Log the error if it has not been logged already. - if (!enqueueErr.logged) { + if (!enqueueErr.logged && config.logFailedExportData) { logs.error( true, "Failed to enqueue task to syncBigQuery", diff --git a/firestore-bigquery-export/functions/stress_test/main.js b/firestore-bigquery-export/functions/stress_test/main.js index dc1289ac8..cdecdff8e 100644 --- a/firestore-bigquery-export/functions/stress_test/main.js +++ b/firestore-bigquery-export/functions/stress_test/main.js @@ -1,29 +1,57 @@ const { Worker } = require("worker_threads"); const { performance } = require("perf_hooks"); const path = require("path"); +const admin = require("firebase-admin"); -const totalDocs = 10000000; // Total number of documents to write +// Initialize Firebase Admin SDK +admin.initializeApp({ + projectId: "vertex-testing-1efc3", +}); + +// Get a reference to the Firestore service +const db = admin.firestore(); + +const totalDocs = 1000000; // Total number of documents to write const maxThreads = 20; // Maximum number of worker threads const batchSize = 500; // Documents per batch -const rampUpDelay = 2000; // 5 seconds delay between ramp-ups -const rampUps = 20; // Number of ramp-ups (planned) - +const targetRate = 500; // Target docs per second +const rampUpDelay = 1000; // Delay between ramp-ups +const rampUps = 5; // Number of ramp-ups const docsPerRampUp = Math.ceil(totalDocs / rampUps); // Documents per ramp-up +// Calculate the delay needed to meet the target rate (in milliseconds) +const delayBetweenBatches = Math.max(1000 / (targetRate / batchSize), 0); // Delay between batches in ms + +// Hardcoded collection paths with the form: A/{aid}/B/{bid}/C/{cid}/D/{did}/E/{eid}/F/{fid}/G +const collectionPaths = [ + "A/aid1/B/bid1/C/cid1/D/did1/E/eid1/F/fid1/G", + "A/aid2/B/bid2/C/cid2/D/did2/E/eid2/F/fid2/G", + "A/aid3/B/bid3/C/cid3/D/did3/E/eid3/F/fid3/G", + "A/aid4/B/bid4/C/cid4/D/did4/E/eid4/F/fid4/G", + "A/aid5/B/bid5/C/cid5/D/did5/E/eid5/F/fid5/G", +]; + // Start measuring total execution time const totalStartTime = performance.now(); const workerJsPath = path.resolve(__dirname, "worker.js"); // Function to spawn worker threads for a specific ramp-up -const spawnWorkers = async (activeThreads, startDoc, docsPerRampUp) => { - console.log(`Spawning ${activeThreads} worker(s)...`); +const spawnWorkers = async ( + activeThreads, + startDoc, + docsPerRampUp, + collectionPath +) => { + console.log( + `Spawning ${activeThreads} worker(s) for collection ${collectionPath}...` + ); let promises = []; const docsPerThread = Math.ceil(docsPerRampUp / activeThreads); for (let i = 0; i < activeThreads; i++) { const docsForThisThread = Math.min(docsPerThread, docsPerRampUp); - const start = startDoc + i * docsPerThread; + const start = startDoc + i * docsForThisThread; const end = Math.min(start + docsForThisThread, startDoc + docsPerRampUp); promises.push( @@ -33,6 +61,8 @@ const spawnWorkers = async (activeThreads, startDoc, docsPerRampUp) => { start, end, batchSize, + collectionPath, // Pass the collection path to the worker + delayBetweenBatches, // Pass the delay to the worker }, }); @@ -64,13 +94,44 @@ const spawnWorkers = async (activeThreads, startDoc, docsPerRampUp) => { } }; +// Function to query Firestore for the total document count using count() aggregation +const getCollectionCounts = async () => { + let counts = {}; + + for (const collectionPath of collectionPaths) { + const collectionRef = db.collection(collectionPath); + const snapshot = await collectionRef.count().get(); // Use the count aggregation query + const count = snapshot.data().count; + counts[collectionPath] = count; + console.log(`Collection ${collectionPath} has ${count} documents.`); + } + + return counts; +}; + +// Function to calculate the difference between two count objects +const calculateCountDifference = (beforeCounts, afterCounts) => { + let totalDifference = 0; + + for (const collectionPath in beforeCounts) { + const beforeCount = beforeCounts[collectionPath] || 0; + const afterCount = afterCounts[collectionPath] || 0; + const difference = afterCount - beforeCount; + console.log(`Collection ${collectionPath} difference: ${difference}`); + totalDifference += difference; + } + + return totalDifference; +}; + // Function to execute ramp-ups const executeRampUps = async () => { let activeThreads = 1; let startDoc = 0; for (let i = 0; i < rampUps; i++) { - await spawnWorkers(activeThreads, startDoc, docsPerRampUp); + const collectionPath = collectionPaths[i % collectionPaths.length]; // Rotate through collections + await spawnWorkers(activeThreads, startDoc, docsPerRampUp, collectionPath); startDoc += docsPerRampUp; if (activeThreads < maxThreads) { @@ -88,17 +149,38 @@ const executeRampUps = async () => { } }; -// Run the ramp-ups -executeRampUps() - .then(() => { +// Main execution flow +const main = async () => { + try { + // Count documents before writing + console.log("Counting documents before the operation..."); + const beforeCounts = await getCollectionCounts(); + + // Perform the writing operation + await executeRampUps(); + + // Count documents after writing + console.log("Counting documents after the operation..."); + const afterCounts = await getCollectionCounts(); + + // Calculate and log the difference + const totalDocsWritten = calculateCountDifference( + beforeCounts, + afterCounts + ); + console.log(`Total documents written: ${totalDocsWritten}`); + const totalEndTime = performance.now(); const totalDuration = (totalEndTime - totalStartTime) / 1000; // Convert to seconds console.log( - `Successfully written ${totalDocs} documents to the collection in ${totalDuration.toFixed( + `Successfully written ${totalDocsWritten} documents in ${totalDuration.toFixed( 2 )} seconds.` ); - }) - .catch((error) => { - console.error("Error in worker threads: ", error); - }); + } catch (error) { + console.error("Error during execution: ", error); + } +}; + +// Run the main function +main(); diff --git a/firestore-bigquery-export/functions/stress_test/worker.js b/firestore-bigquery-export/functions/stress_test/worker.js index baea4c3e1..3f53e985b 100644 --- a/firestore-bigquery-export/functions/stress_test/worker.js +++ b/firestore-bigquery-export/functions/stress_test/worker.js @@ -10,10 +10,15 @@ admin.initializeApp({ // Get a reference to the Firestore service const db = admin.firestore(); -const collectionName = "posts_2"; -// Generate a random document +// Generate a large random document closer to 1MB const generateRandomDocument = () => { + // const largeString = "x".repeat(300000); // A string of 300,000 characters (~300 KB) + // const largeArray = new Array(5000).fill().map((_, i) => ({ + // index: i, + // value: `Value_${Math.random().toString(36).substring(7)}`, + // })); + return { id: uuidv4(), name: `Name_${Math.random().toString(36).substring(7)}`, @@ -21,18 +26,32 @@ const generateRandomDocument = () => { email: `user_${Math.random().toString(36).substring(7)}@example.com`, isActive: Math.random() > 0.5, // Random boolean value createdAt: admin.firestore.Timestamp.now(), + // largeString, // Large string field + // largeArray, // Large array field }; }; -// Write a batch of documents to Firestore -const writeBatch = async (start, end, batchSize) => { +// Delay function for rate control +const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + +// Write a batch of documents to a specific collection in Firestore +const writeBatch = async ( + start, + end, + batchSize, + collectionPath, + delayBetweenBatches +) => { let count = start; while (count < end) { const batchStartTime = performance.now(); let batch = db.batch(); - for (let i = 0; i < batchSize && count < end; i++) { - let docRef = db.collection(collectionName).doc(); + const remainingDocs = end - count; + const adjustedBatchSize = Math.min(batchSize, remainingDocs); // Adjust batch size if remaining docs < batchSize + + for (let i = 0; i < adjustedBatchSize && count < end; i++) { + let docRef = db.collection(collectionPath).doc(); batch.set(docRef, generateRandomDocument()); count++; } @@ -42,15 +61,24 @@ const writeBatch = async (start, end, batchSize) => { const batchEndTime = performance.now(); const batchDuration = (batchEndTime - batchStartTime) / 1000; // Convert to seconds parentPort.postMessage( - `Batch of ${batchSize} documents written in ${batchDuration.toFixed( + `Batch of ${adjustedBatchSize} documents written in ${batchDuration.toFixed( 2 - )} seconds.` + )} seconds to ${collectionPath}.` ); + + // Introduce delay between batches to meet target rate + await delay(delayBetweenBatches); } }; // Start writing in batches -writeBatch(workerData.start, workerData.end, workerData.batchSize) +writeBatch( + workerData.start, + workerData.end, + workerData.batchSize, + workerData.collectionPath, + workerData.delayBetweenBatches // Pass the delay for rate control +) .then(() => { parentPort.postMessage("Completed writing documents."); }) From b19b46370c910fbc369fab12f5956ae859542ae9 Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Wed, 6 Nov 2024 10:00:28 +0000 Subject: [PATCH 11/13] fix(firestore-bigquery-export): implemented RC changes including logging keys --- _emulator/.firebaserc | 8 ++ _emulator/firebase.json | 7 +- firestore-bigquery-export/extension.yaml | 13 -- .../__snapshots__/config.test.ts.snap | 1 - .../functions/__tests__/e2e.test.ts | 6 +- .../functions/__tests__/functions.test.ts | 1 + ...restore-bigquery-change-tracker-1.1.37.tgz | Bin 14193 -> 0 bytes .../functions/package-lock.json | 2 +- .../functions/package.json | 2 +- .../functions/src/config.ts | 1 - .../functions/src/index.ts | 111 +++++++++--------- .../functions/src/logs.ts | 29 +++++ 12 files changed, 99 insertions(+), 82 deletions(-) delete mode 100644 firestore-bigquery-export/functions/firebaseextensions-firestore-bigquery-change-tracker-1.1.37.tgz diff --git a/_emulator/.firebaserc b/_emulator/.firebaserc index 5d2802a3c..8019a3d99 100644 --- a/_emulator/.firebaserc +++ b/_emulator/.firebaserc @@ -1,5 +1,13 @@ { "projects": { "default": "demo-test" + }, + "targets": {}, + "etags": { + "dev-extensions-testing": { + "extensionInstances": { + "firestore-bigquery-export": "02acbd8b443b9635716d52d65758a78db1e51140191caecaaf60d932d314a62a" + } + } } } \ No newline at end of file diff --git a/_emulator/firebase.json b/_emulator/firebase.json index 70e56c96d..c085b9180 100644 --- a/_emulator/firebase.json +++ b/_emulator/firebase.json @@ -1,11 +1,6 @@ { "extensions": { - "firestore-send-email": "../firestore-send-email", - "delete-user-data": "../delete-user-data", - "storage-resize-images": "../storage-resize-images", - "firestore-counter": "../firestore-counter", - "firestore-bigquery-export": "../firestore-bigquery-export", - "firestore-send-email-sendgrid": "../firestore-send-email" + "firestore-bigquery-export": "../firestore-bigquery-export" }, "storage": { "rules": "storage.rules" diff --git a/firestore-bigquery-export/extension.yaml b/firestore-bigquery-export/extension.yaml index a8a604d3c..e41c68043 100644 --- a/firestore-bigquery-export/extension.yaml +++ b/firestore-bigquery-export/extension.yaml @@ -196,19 +196,6 @@ params: default: posts required: true - - param: LOG_FAILED_EXPORTS - label: Enable logging failed exports - description: >- - If enabled, the extension will log event exports that failed to enqueue to - Cloud Logging, to mitigate data loss. - type: select - options: - - label: Yes - value: yes - - label: No - value: no - default: yes - - param: WILDCARD_IDS label: Enable Wildcard Column field with Parent Firestore Document IDs description: >- diff --git a/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap b/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap index b9cb5f541..8d6803f5b 100644 --- a/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap +++ b/firestore-bigquery-export/functions/__tests__/__snapshots__/config.test.ts.snap @@ -21,7 +21,6 @@ Object { "instanceId": undefined, "kmsKeyName": "test", "location": "us-central1", - "logFailedExportData": false, "maxDispatchesPerSecond": 10, "maxEnqueueAttempts": 3, "tableId": "my_table", diff --git a/firestore-bigquery-export/functions/__tests__/e2e.test.ts b/firestore-bigquery-export/functions/__tests__/e2e.test.ts index 48851c722..4773a3e25 100644 --- a/firestore-bigquery-export/functions/__tests__/e2e.test.ts +++ b/firestore-bigquery-export/functions/__tests__/e2e.test.ts @@ -3,8 +3,8 @@ import { BigQuery } from "@google-cloud/bigquery"; /** Set defaults */ const bqProjectId = process.env.BQ_PROJECT_ID || "dev-extensions-testing"; -const datasetId = process.env.DATASET_ID || "firestore_export_e2e"; -const tableId = process.env.TABLE_ID || "posts_raw_changelog"; +const datasetId = process.env.DATASET_ID || "firestore_export"; +const tableId = process.env.TABLE_ID || "bq_e2e_test_raw_changelog"; /** Init resources */ admin.initializeApp({ projectId: bqProjectId }); @@ -34,7 +34,7 @@ describe("e2e", () => { /** Get the latest record from this table */ const [changeLogQuery] = await bq.createQueryJob({ - query: `SELECT * FROM \`${bqProjectId}.${datasetId}.${tableId}\` ORDER BY timestamp DESC \ LIMIT 1`, + query: `SELECT * FROM \`${bqProjectId}.${datasetId}.${tableId}\` ORDER BY timestamp DESC LIMIT 1`, }); const [rows] = await changeLogQuery.getQueryResults(); diff --git a/firestore-bigquery-export/functions/__tests__/functions.test.ts b/firestore-bigquery-export/functions/__tests__/functions.test.ts index 801ef71dc..d9aefb52e 100644 --- a/firestore-bigquery-export/functions/__tests__/functions.test.ts +++ b/firestore-bigquery-export/functions/__tests__/functions.test.ts @@ -37,6 +37,7 @@ jest.mock("firebase-admin/functions", () => ({ })); jest.mock("../src/logs", () => ({ + ...jest.requireActual("../src/logs"), start: jest.fn(() => logger.log("Started execution of extension with configuration", config) ), diff --git a/firestore-bigquery-export/functions/firebaseextensions-firestore-bigquery-change-tracker-1.1.37.tgz b/firestore-bigquery-export/functions/firebaseextensions-firestore-bigquery-change-tracker-1.1.37.tgz deleted file mode 100644 index 44c71a95366d6d134b9781af0fed5332dedf7aee..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14193 zcmV-%H;%|3iwFP!00002|LuMKR~tF9=={uIq4?%x+F8@)BOh;I*(?KQhP?@7fy}%) z*<5JcGVV#c+iA5ST;l)zohp5+)omNd%;qheITL6}rBX>M)t5@+sQ)p#pxrb%@17?Y zpC+`p?)HbY|MA^8j!MeH-`RI}{8?F9dGYKS@qJ%eS$Veha)o@i{`|$t>gtOZE6>Px zE3405tgVvoR=)cKekQDpitkod?qQHCD=S3(`;GjxCX5nR7D>NsZ7v<2|4#d57}G(L z(YHlDrbT%j(9h$%C|QTJPERR&lgE>kwmRg7Tt?}HZjiE=P%>?AE{TfaWkRox=%-0i zP{x-L*&?#AjLuV9DZR{-n5=9r_4ACCWE>S`QYLwpWEZEaWQ!EMe$WcLW?8GfxfHSM ztWO4$tPdZF2YhhDzjjDJ&j!gwhs1e*GNM_zAMfO8?{mUnbmOQTzKx1##Eg%58t+DB zWPF*n$;}cWgmj;hV_Hte1h)$#PrLY;kKl%kXc@&(8IdX3;{9)mW>3iuv>^R-!b(~s z*@fMfW4Xgi$rky?@!>%TbCg{qgX@47hq`rp(`bx6)wY#L8LeN1gM_9rBVWFd_a6kT zWIzHNjYnHs7VI|GgP($ylOn@a;EwPrN#lN0#ELio_t7IU0$I8rGX(FMfVp~+vq_Se z^z(Ew%B=1@lRX#MJdICr0tHNpO)b#w>X-|zxq$77w@S81Mz5Tm5WY434CHFYX|WbT z!hVU;Bv!S_7KyH+#O@(OKP`QIZdE<-o5Ylhh^=7OWrY9uH}&8f<*~}uZQxuZ>dSd!{k-SKY#f`TCGi|#V}&8=|$AP{-V#wVl+BbxY z#Ty)EMd&8eO$&sC9$HZJA9igRv%oXfBM$ssg}>)X7PLC8c6-yueBT!NC}A`#D9h7J zCAJ|q-n2h-i21glC!dWvmDSC#Q?O18v)HRXh1#Py03F69qsL9;#$%q@)zOU#;EZF9*_ zR|5;)4__tq(^(NiZ)|Sx{sULzH6b=_3ChsM=UL+@_*7(3h%zAn18z(9T&w-a zi7Ax#a1@P$;4!ZL$6*$YsEx)3dH=!fEip-9@v3l;q_8Rkz(*P{Emc1@nGtG;COENH z+s;-r=h9#jcGCs99h^SqvRzt$^ixChw7ANS!MFcod|f0L!;-A6to}fL%JYkqlGm?ymI#)=PWm)sG$xZQrUh#L z+vBJ|q_U(#exU^e`^;LnLISAJ5=E$hKPA`sgp8tV)La1rV60@2q?FLleL4o_MEdz? zoF-A$r{pRrhuDvR5(13h1dRN=jFOCiL1TPP@_|{AM5RC)4x6$ZvyE=|>gp8Os1DG@!C1q6Nr^>jX zqwala6#Ze8T~K~PoLrAhe}Y-`CXcJm1W$!PooF3${s}%xWdeT3Ll10OJz$8z8MY*k zZ&afxT3F&_fP#oPx5+4vX-IxdF1Q^E{@fssZ#ewvCOPI}ko>Ouo1WE-RAB3V@(oN7 z#U2=Zlmmm`y#w2*j!Xa+<0`Mm=D6TbJ}v;M$0fh(?rO&+kaa&jSatqQ>_5XOi&MH6 zB`J+hiYQ|dTH9|c8q7HVSDvl9=l{#qXAkH9w`c#6T1-LDBSuX#kNmbA#iJx^S#?>N z7u5Fh+SR+N7Bob4OI8S@@z_;1R9TWGWfG;y|I+R8SSpLEQ~?dzA)_Un+AO#s$wihI z^j*ej5DOwClo{jHN<&Ns(IhRk9iqsunD#qS!yLf}9G?T1Ug?-44=-rBC;w~*mM8B# zD*MCjqKLq;@jgBe@k7uS+FVLY0tn(EypvBdZq`}RrC5JIi|J?m9gut#4)UTG^@joA z?3hh1qk-jwDf1Hd_8Q+iFVVw5?Mwn!b7UjNa=F#ZC*NA)o zdItp^bR6|C48tP7>R8`UDLIX486_#(AT%6N#-a_`y|g^=QNKgbf2dU5+ZNX<;Jh8-SiPt=!51 z1L}dAV~~6`0w!wQpoL{&KOc>fGH9E|k$TJ3^x;VUPdxt!p*UIvko(^c)AKLKY`!aslb-B{y zhPv^uMlIo>uhbLYb|i$1?4_n zQ|+M8`(H6{x*tzN4(o(d?I^kumJHYe+yS--`2Q*-HDBjE?hXwxq7xwR26+wRD=`pe zQP8|8fZ-tb2PDd_37ecV`e_2>MB4L-AOn!Hk*HTmnv$q2>1bTS%oJ3sQ{1%2{q=M4 z`Z`a3#dY88WIWX<#AHiG4_Q^tKZ(Fe$?ijmm+37ORl{+S!_L`{RgdBMr?=|kmoH>o z3BW7jb~w zfSI5xXt0mTct1-@w|~RAt%(_U{bvr>{uz&oVve{}1CNS}6I4$)*>MEiYl*+t24$#@ zegR>FY*i2f?fI=$NkH86K)W$QK|7#t>flUt@M25z!8ca~=x9ssyNk`ZNPG#X1N>O;p{A1j?EvpaFGvMAWOqUxc`N>vBi? zyuAuYo`?|be1~uuKVG;5&Ba?=%?b`OV~n3XsqMM4oS!@)HyDt7I>rwb;_A9>GWF1M zYJW1e#5#5#RO(_fdsVgl03Ny~U+qQz5+_c(1(Q=r_8>BVPS=p*i%XuDO zSLX6Y>>uR0zR4WYD5eG5AUCA7lV>H(%4OJ{tqsx=eojEB3;e28nR+jSl|=XlGj1Ko zk0AhZBci%pa*`7^8KVHwkJ1z+C@F!mj*$IOga=TQly7w(Q#QMQ74pijur_)1$|}W= z4PhfdG;@J@w!RxVgzM{iO!EOzKTg0E+G^LGX*%G{oqcw3W>YV(6Emon(^Ou#;8oP6ks(Ewe1t+aIQr$`#}%5gxW7w!$AXJLc1GGIvkA$Nd>tqw_rjJ}~PHHHL)4s5D*^uUg21!Tcv3l@x4lyjgYRm~JQh;F0_FjP}HgRdqcD z)S6QhxLHusiJ~FYk$xrQE>>$*(j;hM#Z+l>6&PuaK4{@$4%lA7&FpgOVO^*ZYKV3H z+S{gv%G26|zjACE@M^drnm8EVM#={zn0KO&6xAMJAl#ONx0rxAM;B2^6))T-!zdk) zh%nmEvzWv*jjn;8aSk}9g^&`ih7d}Ib|Y~tN)Y`#E0b&zr70MPmc`ZGNmeE))-U84 z5)+}GBQBhmozs3kq9nSE5)@KG56e0QiDxe__N8`>(;ieKj3syxRV?il!a@~Pr0Kr2 zxEU*x*au=GM~n|1rz2 zxH9IiS43CBSn7JO9gI8PVEW?P%T(uI56)$8@Y3^2JnlVcS{ZH6tqo``j@Rr0X)hxx zHhs1uewhR7%ZcK{ZH7(b8qkKIi+=2Y`XpCA?NUK&5)6E-PA-}n2Irs(u#;MR-K&p| zmdZ(T<692?6~^6XVqrV|OkGu}_qk6gFuQCrI;RC}y8^Lg9m3$6Ws*txQpasni22B- zrZ=p^Lq)CD*87*VlFJEUbO7-%GlqAht;`@oLKJYsI46%Oh?P9tR4n|2K{RDDNaVHB z1+2wt+Rp`_I?;=XOA`T=LeQoIyvVNz_1kGpv0u0)Y-T7O5;~??3JO9DL`mN@@_UTx zQ++I=M6SIgNp_ikjJPQwDUtOtl347XM2t;F;@&o+23k=8{$McvgSJw`sI}qPG~ibl zn=-hVQ&F=$Cq>q@HmbmDt=Tmk(}pKm4HplqkU4)1KeKpIioE>V)6TncUe{cd8XbM9hDB0F;GJ z5OFob&`aPlYf0jqvh4ekL@77u@N^!{Y+G^2J1ZscMFXBE>Wk)^v)8VV9XBoC=GE&tyDIDWcFOYY_JCgrvoNUc z8?5B+8m?MZ_sp%bLs^2inIftt;Q<Ht3#B|goW}i(WZ&jq z)GJ%LXo$x5I$j&a@d3TsL4+F3A|2v|%N}`*Sy1U+z9tM5}$`+_F#M~$uSfd z(FY5UQ@b|4#!wx{Nwc}(=~MDfq{!ktT%~MG2KvsglOaSz;tumk8mkOd+Av}Vq55*# zwSy|fcs?7SRxn5|tPC{OO1|87l}f%`cT1ia89Vn~qrV>(i%ACGN&phWQR+*1SY@tO za>6K%&ExDO=Ru}{pJ98}67((7ojJ3MmKT0Lv#=f=ez-8>aH-+&c0!*dyHE|9SFhhA zGuNcgJ@Io-{_F+dTLjbjWbdjBpk(YiImy^-GU8vSWO^Tlq1yY^xM;B`KDarW+yI~% ze9X?UvLHK~`VLX;oFIcDp;??>A7CV&MFq*%W}*La#QvKi03Y_Ev-d5U^^5BYos8%;^bUxc>kZ{DzXXxzAU?m<;_*5qwe#InH;Y$}Mt?j-NApaH4_UfljrfMO)i z(rZ~!h0ZVuEgz65O?B{z+@B1+h_za1v!-cU_wH;e!QlNmSDo2YRau0luC55*YE?xi zKLx6|OTX@dd>(Se7%ha2Sg2`H?Q2ra^NF&HUz)gc+B zF%#iz=`|T7pWW^2sDZ&kdSLtC>k#4UmCO=mrvJjbp5K%0%3M07SrZ&zWEs-0l~tT==Gz zi~jDLe(?=|>&;%b(T$6K8`TSe)%y3r6sGOXX1U6CJykZ5Jq*cqGw6hy1TH zIk&nwe&@VF3-WrIW(cyu$V1jm8Dpjvnl<2gx?MOki$-j&VcjNg%q?@q7?}O(`DXWI1&@g7pChrnv6ul&Fi2-tXe(pSR5V zjLeBz4;!0iC4AHDAgEu+4?ffDxZ2a^dv_G;u0djHy~US)pPQ?Ct;djT+kxCii#{a<4M{16B*x|PiTCPml{yCfouaPn`FG_4qnC_C)vdr z&#v7x>dH4G*R7w1fxb90-$>H@A~=)n)ioyL;-OA@=^$K-gTJ5&b-qCEfV*`E>$b>q zENV(>&{ymKcF4OpJ9oGi8G^#Z``tx<#%>Sh-3ZjYNLWaC`r&FBV|&JLetXcLfyq48 z+`-M{eF1ly-DaYEacVrGWaD4i;~V^~Zl-49HD5({xW3~c$mVBe_5k|=hVWkF_#WQ? zxaq&Lnt}N+s4a8R_a1qHa`vi_vV+@>h{t}T0*NTZt4yC@(O9^8R6`FVA0k|LRJ+znKYw%YB`0em-!WQf)lQEcMfPl@oD>S^{ zy||m&#qE&S5bjKU6B&O&(-`iuf(#J%H44uv(k1RVnl(q24fF22Xu@c*ERu6-WBubB zvxmCTH6uAkt$9bySL2-qZ5|q5W6ahXt*_*c*b!_%Ai4AKWO@q;tZpcNk)}bIaSSI4 z+YVV+7eEN>>Ei%D$~QcrBH?;)hLQ3u{jb)@u?TR!iEvnNnVvQ z-&hT4;5JLFUKm(ptyLuf<7+L}&RV-CF<@c5t81%H1aQoox5B(0&$*6&2L<0}AJ*2{ ze;c%$j{Zd`?mPFb?+w`BVZAgS`nSNpx@M{ezF}w&7QnT&BiiA+yo7$H{R!TBw2?BP z%U@;3j^Q8D+2Nafix5n;e|#gGPa$1qz}j}gOi4wDW)2SKpH(!q@8VXt%I4_D1bA~J zO#;ko2(vlisSU-Y9oDWLk4CogtvWKXWN$wM0N~D3z_GCYHc_=*PUb_*1Np#lM^#+N z(!A%hF~e(>!mo%>Ln2fCst29%4d#w|D@ocD)zi$*DD43_U*t%ovkkIoEo3DWiL91Q{xo8r7>SX;r?gt4woJ9>9b$S?vM#qm(RcI z%1Opb8pW=lQirbhyK+|8{!p%{iRE+~>EwH%kWIfgtzwZ{S<~;uzIB}wHXi1m2C6mA z-LHnVy65fqU>ZhIgJMld<(a^GA9rBAp+l^_FB(TH9$_SqS1@5E(1X@bWZp;BzCd zxsiw+vw}@nnU7R@TEIkJyM3gF137|buU$nFGU5zx16SDT+wG$h3}f0q_~~?SzxR6g z%<2InfA6`6)05xc_Ka@a88Gwx-&?;&z2lR^qnQpb*`9aQ4(m_7SV%XQ7Ey)A4 z7)^PyVR5Rpyc@7J*KVm<;`3Mb8Yi9bq`-VwVj(_h2powL;H1F8vp>iQF#DMCLdEQj zcXaqGFwkv$j%mZZpgyxLe?|h{W7m!Nw}#gW$%>Kz>&9+5Y_LQaU)2+m7Kqd>^UY+E4f5d303bjLhaSCx^in0BX7ZM;I0=|5;`)%O ze3O{_0__$ylvj`!Pzf4SYi;AczAN99dfXg1$Z=k1+w34jL%gcK=_8O2b-It~sr;5y zLKSA9*i^i^q^!?1F_eLEjki7b7^RvcuKSXG(9jJ?Q-1mi+~914%rL_u&mdhfj&Esp zaA!?Pbx?NTXfXdc4syZ=!skUjUV?9;s8>qJu?RY{awB<1Q3+x7em6$Y93#8HP%6vL z5cZ)HJBJ*|qR?kZ_VNP7RcUhQU<|UV%paN=!pjJ8l%&@kGRm`Z2*2b7xuy|hjJYVH zY?4MrQeMwRteJtq%@wK(z{n#d+2g!)dzVRxEwa*A3Goul9Eu~7ILELw+|Iy-tmg-i z;NDeXv7*X1rIP#Ntfl%VFE}rLfcK*bTt(4{7dsx@24DUh zhIo7kZm%P{xsgyRti!t&5r2Voz}jK)X6%g0N50U3B^7T4)H$v?xyZeQ(#`|AuFVC`%pY_orw~5^`y8To!7<-QKYzc6M`Er@? zXLh-r50i}Z!>Cn#6B_bYfyt2D3(>~G0{8XMkvfX%6WI9j&nt4**AI+@I-7e2wi50;A0#`=!fnqe;dKD_8bz9%GHs75A zI0;j=UMC+VmR^F`jjPy$9Mvt85i98^8h23CjzP5I4^#8ejEg*;aBYA%v%-h;F2|DM=kdO*QREB%Ib{mLBni$v!c#Y zfH!APP9?dM@l>562kxOAw8S@&qpNrT0s-+gDs_+}Jygx)%5VyeTNZ~OP<_J=gdoCWmr(Rq@I``c(DL9E6_ewoBH zHab?6JWSLBS2VVOKK-=Lb~fYK)=>aW&8 zspN1cFuB4KZ#AEQyqe0&?(phUnkhK8eLKO{g7)&l zo<>`WyR8^U?pu!YQCB{`EO*CPZ|~|BG5W@MPxBpB4|i~tF{5xEbj@6+Go)s7j{13a z$sh2h7=p*$Fu%A8>K)EGO~;6#!ei)O&YU$Lc*4pQ+7U@m9Lf{S!T)2pd zG2pPw_m2;S0jAv!FL_L&kAXNt`U=BYa45CI2-)PEn}{HD_3FyptK`j3Y!lb=9m`mbk}A*l`b_$ z5eE&8rJb$HGwTeI4e-s^VFY~DcF&wFQGAl0__$OxU%I4~_XP!uesU4n$7mBe+J3p< zYhpm0feXe*68}v5F&%jDn(?Tanp7}3$5FwJs{RSR-J(O`e^wlFAa>wlA?Th-cQ&1n zj8J&uy>T^6J)`vggDbO&vkXrYErWA|g>g<)WW*YYs!5ts#OEg+^vHX#R5s6!r&ebtX0p( z9$C_{tLxta2N_Q1M4`=%LXCia2Tb%?%~#I+)Tr9I?gb4HL2ieHGz#7e8eD5^%{by_ zcwg!;sXk9-W9E5Dsuoijfu3h-Wl)9PBm}-K(+~xKtzT2Mu(cq4nMZ zm6HZ7eCA4PG+x|+2^ppqsRPvDQiaNsVr!tPp~-TiMrK<%l;rUuI#J;9WHIJsE>7;q zvI3?{E){fQ2M6vZoGg*^5Gn)+$`@@O-{^3BpDE9)tnznDAr#&Mi#RX&p5(@-o8;R$ z7qmQ}SGH8qHXW+wD>#**;YGdeIHnE)`p)Fc)izbPG06U|YGK8ayKA87QCGha?T<5` zZPs+pgMZ9RyKcy~x98eJdV#K+&2>(J#DWEgkSun}Rt$Ob$Oet*%nH_Ud8yddo}dm; z8Lr39`_xq@sh?x=7iICeXY`80omrWf6&N($1W)J{b2(avC#p}FFfk2iR89)Wr5@o` z9K1z|>@b{r%-%D%Y2XzxDh$KA`c;`raORm9&5*FnSF;=h?Jz&-1h8xwn1DnC0?EMi z#bdaZY8_oN9H1H|$Zd~SNCGc?w6(><=8TfoDWm;7i&+cqRLW>99U^Ezf3NF);5}uN z(K#(zHoPp#@nmX@8Z!@2X6E8u# zK&uZWdkV!X+eS&6XBSC1iD{)ED!t!J%Qa%+HKs-68!j6Bt38_@fJ&J$K>I$9yu~j| zF?o0g1rfL`8%u8izeE_wTZHh1%ZN~C1OE?Bz1G3I*RQvKeBBc^I)t_)MVrnN%xbqw zPKK0GL+@aifd<#ldBhSX_wiW~U7d-n4Y28!d;_wE5VW8O0{V_mj`k0Jg5MNQ#**?; z%LOE=oWdY%h)2DDzS}?Q?V1psX>ovI>1h(rhEth$2a+nE4XU!X98fw)w?pixNE4cO zzM_Hx>y8!k(6I$S(fn z`?1i4^h&3g%7@(%UAWFG!!Qa8M+VLCnumO6o@WdamJ3gEnIvX5_6|?A@Xpitl<(0y z(|7Ek_TwVlvLHs+@!IpGospl{}p!qvWW z)Psxzsv#3o$Hk{#sf-xm$LTeMxQGx-h>(FKxP5UOy@Tc#49UmOJcohID+kcPIy$68 zQU|HUkJg#zo5`v-$a|&D*%kybDF!F7iv6==s;e6o=dg;a6V}g1qr^;@hZB`B5s?GC zV+XER>O~fkeLk1gJng@Edw6sr>@i0AJx1s_?+<}&G-Jv(6J^&%doE%nBcLCRjYTpU z(}K~MGVGA|5gKEfcAAN(dHmuuv{rgO0rz+1+8*VsT!!S1z-a9!S41cm4!tZAu+I`1~?74;E3yN0XjGb=T{>JZhszn#%aQbucx$wBcPun z$nDyZ`CeGJpQn%qE6KC2WWTH%h;7&I%I2&KsnA&$q5~EfzHvZ*m7yPIIAG3(I^^A3 zp0#iB^=fe8uQojm+_^s~3TyQumtKg{_EGX@4C2;DjDMIlq6)9@!8&8TTF-qleI)}v zF#TII{bm|2%=EuKMt?5^_%<2+orm7Y=rM!UZtr#Pr1uT+e1}~|gHo7{?jn$LDmVi9 zf5I8ExSDdiBd6OXKhtzf3;i4n^kClXQAd%$N0^stEO#Q&iQ%5Q?S*j+!(}znQJesFV)YxnpewpU%8p4lCqxjw|7M6Q!9~H4IzD>7jUQf{XgRE!F%!m zHT!>7U#`EX{&ycCkeeo|tf0NU|q;UGy%S_Jw*Tx)*9B#Y_i!+|W` z+9J!V#%&~%sOqClTyav1Myl-iQ>vog&2>CunwyG^tg+2=RUx z2q;p0@!+HuChU-%n%}Cui&8teU6EfAeyd5n$pY8$`D!)ZEqG)H1_J{NAdy>NgU<)f z@Z767{U%f@=2E5j1}lWSbPx71yc8 z(RG?fv1?W2x|E|~X(*Zta~zx9b$bwv(6dWAZKTMy;SP{o+nw*>a^NWzISx^h6Gqq^-PGTcVdrEesZBN+3Zwr2jrAz zd;3SnC#Sz`zkb&X9^dHFsmWsxf9V|s>WvHX<8L-LcLbjw-*{NSl26zy91i>XC!P6C1^! zjt<|w^#EL8JrhCvhP5o&C1;OWp8W(h4VlS|m5|XK%1raYYz#nag)OUQ6vxM?XDbtg z`y`b-7c@+LC6w3o-qk-VL;3X7S>-aVJvbMh1@09&+dlYBZ8#3G4ZKK!x9V?@Gg)9& z+FUXsw&jI=5Q8)#b)%mPX%GBQf6Uo-kEh!m&bP06!rj*ycaNtWLDf@E+V-(Hg5)7Y zM$zZM&v0<@@^+`Hykqkg$)oR?GuE*(ENr`HsJ=aKoT0Zo!#$@s*H&<^*<2_XwA=O> zf3K8)$tn@(KMwZ~)}j@V~#TthCx~I3Oh?(N%R}%ms)~9zVadP5hB_{(jAaOe&RqoY6-va}D~|q{_oi z@K#5udSz(`8=RAxPjJkzd^8o$V+JrlY^SH-2q#A1U0eBP;Mf`C+rE zkvM3e{^|!q^J{Fx1xjqRhK~0E`9cCO0dJ;otG~+s zA+G!0x&JKwzw%;j&E@~A&(~KT`2V;6{)>_7(b~1u3fAte7Ry~+EqUKk+p1;kr@D$f z!LzK4f7Lh85pLK*|B2hrJIV$UnA_3Y*4UPR?)}e6nWXo~2%7i*^JlQ2~zSfczqRd7<93)!5)of^kzDd8b%_j!WYEFtTAPC}MAp`uw?#fm82u~egZbN(Y=bcZzHl=a zy#AMhZBL2mA~)55B;v72hQW#j#h^@45a*O--^`llAfMz@kiT+nV&7tT%KOU7T#ABv%ukW$^>ubjre{KelHW^0mD z{kJW15!DT4!Qt>B!72-2335Mp%i-K$vx`YSDMLsB?rb?oszw&wbz!SrgV1yaMeV=% zgKP4kbk9Pbj}m;%jr2jrxr|Psf^_lX`-)hPtb6Nhkk)n`e<^6(sv^Xb!g}9@4fNx( zGbd73ZP(?vop#9cl@;?@_@!Bgxc9pexKa@e`0R9~ahsS?82nv6thTA*P5jii(!O=c zvCPNIYva@=*~OBRcT!GfX!XJgK*d`JGKCO_RF^`CmwHnO3D}tJhk9`RSs=#?8PE2< z{dOM@pB#-i;%!}VsimN}qaa1sCFR2x8P(as1GF&Ni_-Kw66vt6hEyG>jAX)`6lGS2 zBok+}T)X@k%L$mucnuFmXHCof@r_&wugDpRCwL=SeVl?tzSWwBXXZ%MLxf_C(?&V8 z^6fu6_mEfr!z^)--HrR7?y3pSu>Y^ER`UNmUtfRrp#Ocd`k%9YD%#x*5hZLVO~4sF zt$$`$J}tgHin8c}2jq625}v}3(WvMTC9rLtlsg~cizbXH{bd?*w+2Wf=Ttq5PLd37 z!GSalDLO&1;s$tn3Es7W_VXf!jGc@YC71R20ICBNQgmWQRKNiyUJH;hxMt8qpO1Y@ z%J&9A0xLj9US@-PE?69T#x0v*^aKjTfz=umKnchVgcRSyvGO;=L6=mr66(SD-juSq zv8Za@VDT&q`^!TbxIc~mi+{u4S)To|_W$+Om6uif|FZ}G?;r958@z?VavP-ecbUpj zT@Pkmm8WJ|iU-SFOJ5e@0(e=iRnT$Hk}@x@TN{FFv@qxW25DU+rMS`7niQ${GMt=; z{e0AwU3PV!q0t~t3fdQ7TOM3EZO(#nC#k%0-NJHcP$Z)w8UU)QNYsSu{;V&*LcqudKH>}ozf-0tJ}&4uDyZ2z zM@N)j&{GhM zpO^b93A?Q5cTZ2DCObXt4wBEHQLrxily+t3GS=l-;eaN`kbO$^OhY^Y5S1rEXaU`y zN}7wmmB@<)OL*OtkXAG)hj{@5`bU(JU$5E!UT5rMLjHOse-%OsEaAWLNfcoO(1=Gv zKpcTwTVtBVH0vkSm}Fbdk(faH|9Kg{5N#{&8cFTg&?kHjyXjs`l9;mYPrZZQ(e_F2^mymz-ka^yU-o;y9*3j2O`afK-kG2@Y9r@jBXe|)lk@YCt;;m*4^y@M0PE?ipLFBt)$l#D0EIOlPQ7{e$X z3gn4+X$1$Oq+0IUdBPJGN-;sCu8", "license": "Apache-2.0", "dependencies": { - "@firebaseextensions/firestore-bigquery-change-tracker": "file:firebaseextensions-firestore-bigquery-change-tracker-1.1.37.tgz", + "@firebaseextensions/firestore-bigquery-change-tracker": "^1.1.38", "@google-cloud/bigquery": "^7.6.0", "@types/chai": "^4.1.6", "@types/express-serve-static-core": "4.17.30", diff --git a/firestore-bigquery-export/functions/src/config.ts b/firestore-bigquery-export/functions/src/config.ts index 15134fc69..9e4b61219 100644 --- a/firestore-bigquery-export/functions/src/config.ts +++ b/firestore-bigquery-export/functions/src/config.ts @@ -34,7 +34,6 @@ export function clustering(clusters: string | undefined) { } export default { - logFailedExportData: process.env.LOG_FAILED_EXPORTS === "yes", bqProjectId: process.env.BIGQUERY_PROJECT_ID, databaseId: "(default)", collectionPath: process.env.COLLECTION_PATH, diff --git a/firestore-bigquery-export/functions/src/index.ts b/firestore-bigquery-export/functions/src/index.ts index 48ba3e690..13e1b577a 100644 --- a/firestore-bigquery-export/functions/src/index.ts +++ b/firestore-bigquery-export/functions/src/index.ts @@ -22,13 +22,12 @@ import { getFunctions } from "firebase-admin/functions"; import { ChangeType, FirestoreBigQueryEventHistoryTracker, - FirestoreEventHistoryTracker, + FirestoreDocumentChangeEvent, } from "@firebaseextensions/firestore-bigquery-change-tracker"; import * as logs from "./logs"; import * as events from "./events"; -import { getChangeType, getDocumentId, resolveWildcardIds } from "./util"; -import { backupToGCS } from "./cloud_storage_backups"; +import { getChangeType, getDocumentId } from "./util"; // Configuration for the Firestore Event History Tracker. const eventTrackerConfig = { @@ -53,7 +52,7 @@ const eventTrackerConfig = { }; // Initialize the Firestore Event History Tracker with the given configuration. -const eventTracker: FirestoreEventHistoryTracker = +const eventTracker: FirestoreBigQueryEventHistoryTracker = new FirestoreBigQueryEventHistoryTracker(eventTrackerConfig); // Initialize logging. @@ -74,6 +73,17 @@ export const syncBigQuery = functions.tasks .taskQueue() .onDispatch( async ({ context, changeType, documentId, data, oldData }, ctx) => { + const documentName = context.resource.name; + const eventId = context.eventId; + const operation = changeType; + + logs.logEventAction( + "Firestore event received by onDispatch trigger", + documentName, + eventId, + operation + ); + try { // Use the shared function to write the event to BigQuery await recordEventToBigQuery( @@ -103,24 +113,13 @@ export const syncBigQuery = functions.tasks logs.complete(); } catch (err) { // Log error and throw it to handle in the calling function. - logs.error(true, "Failed to process syncBigQuery task", err, { - context, - changeType, - documentId, - data, - oldData, - }); - - // if (config.backupToGCS) { - // // Backup to Google Cloud Storage as a last resort. - // await backupToGCS(config.backupBucketName, config.backupDir, { - // changeType, - // documentId, - // serializedData: data, - // serializedOldData: oldData, - // context, - // }); - // } + logs.logFailedEventAction( + "Failed to write event to BigQuery from onDispatch handler", + documentName, + eventId, + operation, + err as Error + ); throw err; } @@ -150,6 +149,17 @@ export const fsexportbigquery = functions.firestore const oldData = isCreated || config.excludeOldData ? undefined : change.before?.data(); + const documentName = context.resource.name; + const eventId = context.eventId; + const operation = changeType; + + logs.logEventAction( + "Firestore event received by onWrite trigger", + documentName, + eventId, + operation + ); + let serializedData: any; let serializedOldData: any; @@ -159,7 +169,13 @@ export const fsexportbigquery = functions.firestore serializedOldData = eventTracker.serializeData(oldData); } catch (err) { // Log serialization error and throw it. - logs.error(true, "Failed to serialize data", err, { data, oldData }); + logs.logFailedEventAction( + "Failed to serialize data", + documentName, + eventId, + operation, + err as Error + ); throw err; } @@ -217,18 +233,20 @@ export const fsexportbigquery = functions.firestore * @param context - The event context from Firestore. */ async function recordEventToBigQuery( - changeType: string, + changeType: ChangeType, documentId: string, serializedData: any, serializedOldData: any, context: functions.EventContext ) { - const event = { + const event: FirestoreDocumentChangeEvent = { timestamp: context.timestamp, // Cloud Firestore commit timestamp operation: changeType, // The type of operation performed documentName: context.resource.name, // The document name documentId, // The document ID - pathParams: config.wildcardIds ? context.params : null, // Path parameters, if any + pathParams: (config.wildcardIds ? context.params : null) as + | FirestoreDocumentChangeEvent["pathParams"] + | null, // Path parameters, if any eventId: context.eventId, // The event ID from Firestore data: serializedData, // Serialized new data oldData: serializedOldData, // Serialized old data @@ -251,7 +269,7 @@ async function recordEventToBigQuery( async function attemptToEnqueue( err: Error, context: functions.EventContext, - changeType: string, + changeType: ChangeType, documentId: string, serializedData: any, serializedOldData: any @@ -295,40 +313,21 @@ async function attemptToEnqueue( } } catch (enqueueErr) { // Prepare the event object for error logging. - const event = { - timestamp: context.timestamp, - operation: changeType, - documentName: context.resource.name, - documentId, - pathParams: config.wildcardIds ? context.params : null, - eventId: context.eventId, - data: serializedData, - oldData: serializedOldData, - }; // Record the error event. await events.recordErrorEvent(enqueueErr as Error); - // Log the error if it has not been logged already. - if (!enqueueErr.logged && config.logFailedExportData) { - logs.error( - true, - "Failed to enqueue task to syncBigQuery", - enqueueErr, - event - ); - } + const documentName = context.resource.name; + const eventId = context.eventId; + const operation = changeType; - // if (config.backupToGCS) { - // // Backup to Google Cloud Storage as a last resort. - // await backupToGCS(config.backupBucketName, config.backupDir, { - // changeType, - // documentId, - // serializedData, - // serializedOldData, - // context, - // }); - // } + logs.logFailedEventAction( + "Failed to enqueue event to Cloud Tasks from onWrite handler", + documentName, + eventId, + operation, + enqueueErr as Error + ); } } diff --git a/firestore-bigquery-export/functions/src/logs.ts b/firestore-bigquery-export/functions/src/logs.ts index d81ee7fb0..5eb112f3e 100644 --- a/firestore-bigquery-export/functions/src/logs.ts +++ b/firestore-bigquery-export/functions/src/logs.ts @@ -15,6 +15,7 @@ */ import { logger } from "firebase-functions"; import config from "./config"; +import { ChangeType } from "@firebaseextensions/firestore-bigquery-change-tracker"; export const arrayFieldInvalid = (fieldName: string) => { logger.warn(`Array field '${fieldName}' does not contain an array, skipping`); @@ -182,3 +183,31 @@ export const timestampMissingValue = (fieldName: string) => { `Missing value for timestamp field: ${fieldName}, using default timestamp instead.` ); }; + +export const logEventAction = ( + action: string, + document_name: string, + event_id: string, + operation: ChangeType +) => { + logger.info(action, { + document_name, + event_id, + operation, + }); +}; + +export const logFailedEventAction = ( + action: string, + document_name: string, + event_id: string, + operation: ChangeType, + error: Error +) => { + logger.error(action, { + document_name, + event_id, + operation, + error, + }); +}; From 4e8c0b0a59749240d9e60f89fc58e1d19840f0f0 Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Wed, 6 Nov 2024 10:10:46 +0000 Subject: [PATCH 12/13] chore(firestore-bigquery-export): update README and CHANGELOG --- _emulator/firebase.json | 7 ++++++- firestore-bigquery-export/CHANGELOG.md | 2 +- firestore-bigquery-export/README.md | 4 ---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/_emulator/firebase.json b/_emulator/firebase.json index c085b9180..70e56c96d 100644 --- a/_emulator/firebase.json +++ b/_emulator/firebase.json @@ -1,6 +1,11 @@ { "extensions": { - "firestore-bigquery-export": "../firestore-bigquery-export" + "firestore-send-email": "../firestore-send-email", + "delete-user-data": "../delete-user-data", + "storage-resize-images": "../storage-resize-images", + "firestore-counter": "../firestore-counter", + "firestore-bigquery-export": "../firestore-bigquery-export", + "firestore-send-email-sendgrid": "../firestore-send-email" }, "storage": { "rules": "storage.rules" diff --git a/firestore-bigquery-export/CHANGELOG.md b/firestore-bigquery-export/CHANGELOG.md index 2a420d974..08adeb448 100644 --- a/firestore-bigquery-export/CHANGELOG.md +++ b/firestore-bigquery-export/CHANGELOG.md @@ -1,6 +1,6 @@ ## Version 0.1.56 -feat - improve sync strategy +feat - improve sync strategy by immediately writing to BQ ## Version 0.1.55 diff --git a/firestore-bigquery-export/README.md b/firestore-bigquery-export/README.md index 59da6f844..2807b8c1a 100644 --- a/firestore-bigquery-export/README.md +++ b/firestore-bigquery-export/README.md @@ -126,8 +126,6 @@ To install an extension, your project must be on the [Blaze (pay as you go) plan * Collection path: What is the path of the collection that you would like to export? You may use `{wildcard}` notation to match a subcollection of all documents in a collection (for example: `chatrooms/{chatid}/posts`). Parent Firestore Document IDs from `{wildcards}` can be returned in `path_params` as a JSON formatted string. -* Enable logging failed exports: If enabled, the extension will log event exports that failed to enqueue to Cloud Logging, to mitigate data loss. - * Enable Wildcard Column field with Parent Firestore Document IDs: If enabled, creates a column containing a JSON object of all wildcard ids from a documents path. * Dataset ID: What ID would you like to use for your BigQuery dataset? This extension will create the dataset, if it doesn't already exist. @@ -191,5 +189,3 @@ This extension will operate with the following project IAM roles: * bigquery.dataEditor (Reason: Allows the extension to configure and export data into BigQuery.) * datastore.user (Reason: Allows the extension to write updates to the database.) - -* storage.objectAdmin (Reason: Allows the extension to create objects in the storage bucket.) From 64e3e56d515b61ac2b342ba2a106c25bc2e19ca8 Mon Sep 17 00:00:00 2001 From: Jacob Cable Date: Wed, 6 Nov 2024 11:10:22 +0000 Subject: [PATCH 13/13] chore(firestore-bigquery-export): update CHANGELOG --- firestore-bigquery-export/CHANGELOG.md | 8 +- .../functions/package-lock.json | 9 +- .../functions/src/cloud_storage_backups.ts | 96 ------------------- .../functions/src/config.ts | 2 - 4 files changed, 11 insertions(+), 104 deletions(-) delete mode 100644 firestore-bigquery-export/functions/src/cloud_storage_backups.ts diff --git a/firestore-bigquery-export/CHANGELOG.md b/firestore-bigquery-export/CHANGELOG.md index 08adeb448..6f684225b 100644 --- a/firestore-bigquery-export/CHANGELOG.md +++ b/firestore-bigquery-export/CHANGELOG.md @@ -1,6 +1,12 @@ ## Version 0.1.56 -feat - improve sync strategy by immediately writing to BQ +feat - improve sync strategy by immediately writing to BQ, and using cloud tasks only as a last resort + +refactor - improve observability/logging of events + +chore - remove legacy backfill code + +fix - improved usage of the types from change tracker package ## Version 0.1.55 diff --git a/firestore-bigquery-export/functions/package-lock.json b/firestore-bigquery-export/functions/package-lock.json index c4a9ef145..69656d49f 100644 --- a/firestore-bigquery-export/functions/package-lock.json +++ b/firestore-bigquery-export/functions/package-lock.json @@ -7,7 +7,7 @@ "name": "firestore-bigquery-export", "license": "Apache-2.0", "dependencies": { - "@firebaseextensions/firestore-bigquery-change-tracker": "file:firebaseextensions-firestore-bigquery-change-tracker-1.1.37.tgz", + "@firebaseextensions/firestore-bigquery-change-tracker": "^1.1.38", "@google-cloud/bigquery": "^7.6.0", "@types/chai": "^4.1.6", "@types/express-serve-static-core": "4.17.30", @@ -572,10 +572,9 @@ } }, "node_modules/@firebaseextensions/firestore-bigquery-change-tracker": { - "version": "1.1.37", - "resolved": "file:firebaseextensions-firestore-bigquery-change-tracker-1.1.37.tgz", - "integrity": "sha512-CojXoQch6TPZgWOt2Fikb4aVHTETUloVhCx9/S+1c2+0aHBhltvzwFbCxPvkWf4Cr7a/6CA8e771WvR2lVLXEQ==", - "license": "Apache-2.0", + "version": "1.1.38", + "resolved": "https://registry.npmjs.org/@firebaseextensions/firestore-bigquery-change-tracker/-/firestore-bigquery-change-tracker-1.1.38.tgz", + "integrity": "sha512-GPebB/JB3QyTph6/0mo3V9oUsD4C6wM1oTlhlKGOR/Km2fIPv022rvOVorfA4IgWRCRD8uQP1SpkDiKHJ4r5TQ==", "dependencies": { "@google-cloud/bigquery": "^7.6.0", "@google-cloud/resource-manager": "^5.1.0", diff --git a/firestore-bigquery-export/functions/src/cloud_storage_backups.ts b/firestore-bigquery-export/functions/src/cloud_storage_backups.ts deleted file mode 100644 index bb1a11e57..000000000 --- a/firestore-bigquery-export/functions/src/cloud_storage_backups.ts +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright 2019 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import * as functions from "firebase-functions"; -import { Storage } from "@google-cloud/storage"; -import * as logs from "./logs"; -import * as path from "path"; -import * as fs from "fs"; -import { promisify } from "util"; - -// TODO: we dont need to promisify in node 18+ -const writeFile = promisify(fs.writeFile); - -// Initialize Google Cloud Storage client -const storage = new Storage(); - -/** - * Backs up the event data to Google Cloud Storage as a CSV file. - * - * @param bucketName - The name of the GCS bucket. - * @param dirName - The directory path inside the bucket where the file will be stored. - * @param event - The event data containing changeType, documentId, serializedData, serializedOldData, context. - */ -export async function backupToGCS( - bucketName: string, - dirName: string, - { - changeType, - documentId, - serializedData, - serializedOldData, - context, - }: { - changeType: string; - documentId: string; - serializedData: any; - serializedOldData: any; - context: functions.EventContext; - } -) { - // Define the filename using documentId and timestamp to ensure uniqueness - const fileName = `${dirName}/${documentId}_${context.eventId}.csv`; - - // Create a CSV string from the event data - const csvData = ` -timestamp,event_id,document_name,operation,data,old_data,document_id -"${context.timestamp}","${context.eventId}","${context.resource.name}", -"${changeType}","${JSON.stringify(serializedData)}","${JSON.stringify( - serializedOldData - )}","${documentId}" -`.trim(); - - try { - // Write the CSV data to a temporary local file - const tempFilePath = path.join( - "/tmp", - `${documentId}_${context.eventId}.csv` - ); - await writeFile(tempFilePath, csvData, "utf8"); - - // Upload the file to Google Cloud Storage - await storage.bucket(bucketName).upload(tempFilePath, { - destination: fileName, - contentType: "text/csv", - }); - - // Log the successful backup - functions.logger.info( - `Successfully backed up event for document ${documentId} to ${fileName}` - ); - - // Remove the temporary file after successful upload - fs.unlinkSync(tempFilePath); - } catch (err) { - // Log any errors that occur during the backup process - logs.error( - false, - `Failed to back up event for document ${documentId}`, - err - ); - throw err; // Rethrow the error to be handled by the calling function - } -} diff --git a/firestore-bigquery-export/functions/src/config.ts b/firestore-bigquery-export/functions/src/config.ts index 9e4b61219..3ef09ebfa 100644 --- a/firestore-bigquery-export/functions/src/config.ts +++ b/firestore-bigquery-export/functions/src/config.ts @@ -14,8 +14,6 @@ * limitations under the License. */ -import { backupToGCS } from "./cloud_storage_backups"; - function timePartitioning(type) { if ( type === "HOUR" ||