diff --git a/ENV_VARS.md b/ENV_VARS.md index 97465552f2..9685efe6c5 100644 --- a/ENV_VARS.md +++ b/ENV_VARS.md @@ -92,6 +92,10 @@ applications. on which the Express daemon of the Geoprocessing service will listen. If running the API on the same host as the Geoprocessing application, you need to modify at least one. +* `CLEANUP_CRON_INTERVAL` (string, optional, default is 0 0-23/6 * * *): + String in cron notation for the interval of time where the cleanup will + trigger. The default value is every 6 hours. + ### PostgreSQL service - Geoprocessing database diff --git a/api/apps/api/test/utils/event-bus.test.utils.spec.ts b/api/apps/api/test/utils/event-bus.test.utils.spec.ts index 71daf06367..5cc06431a5 100644 --- a/api/apps/api/test/utils/event-bus.test.utils.spec.ts +++ b/api/apps/api/test/utils/event-bus.test.utils.spec.ts @@ -1,7 +1,6 @@ import { FixtureType } from '@marxan/utils/tests/fixture-type'; import { CqrsModule, EventBus, IEvent } from '@nestjs/cqrs'; import { Test } from '@nestjs/testing'; -import { delay } from '../../../geoprocessing/test/utils'; import { EventBusTestUtils } from './event-bus.test.utils'; class FirstEventType implements IEvent { @@ -78,7 +77,7 @@ const getFixtures = async () => { new Promise(async (resolve) => { eventBus.publish(new SecondEventType('Wrong event class')); - await delay(500); + await new Promise((resolve) => setTimeout(resolve, 500)); eventBus.publish(new FirstEventType('Expected event class')); @@ -97,13 +96,13 @@ const getFixtures = async () => { new Promise(async (resolve) => { eventBus.publish(new SecondEventType('Wrong event class')); - await delay(500); + await new Promise((resolve) => setTimeout(resolve, 500)); eventBus.publish( new FirstEventType('Expected event class but wrong id'), ); - await delay(500); + await new Promise((resolve) => setTimeout(resolve, 500)); eventBus.publish(new FirstEventType('Foo')); diff --git a/api/apps/geoprocessing/config/custom-environment-variables.json b/api/apps/geoprocessing/config/custom-environment-variables.json index 2b200d9007..0ba11f0c6e 100644 --- a/api/apps/geoprocessing/config/custom-environment-variables.json +++ b/api/apps/geoprocessing/config/custom-environment-variables.json @@ -56,5 +56,8 @@ "maxAgeForOrphanUploads": "UPLOADS_PLANNING_AREAS_MAX_AGE" } } + }, + "cleanupCronJobSettings": { + "interval": "CLEANUP_CRON_INTERVAL" } } diff --git a/api/apps/geoprocessing/config/default.json b/api/apps/geoprocessing/config/default.json index 07cc362e70..6b2f6dbf4c 100644 --- a/api/apps/geoprocessing/config/default.json +++ b/api/apps/geoprocessing/config/default.json @@ -52,5 +52,8 @@ }, "marxan": { "bin": "/opt/marxan-geoprocessing/apps/geoprocessing/src/marxan/marxan-linux-4-0-6" + }, + "cleanupCronJobSettings": { + "interval": "0 0-23/6 * * *" } } diff --git a/api/apps/geoprocessing/src/app.module.ts b/api/apps/geoprocessing/src/app.module.ts index 3857efd4cc..22a5af7811 100644 --- a/api/apps/geoprocessing/src/app.module.ts +++ b/api/apps/geoprocessing/src/app.module.ts @@ -1,5 +1,6 @@ import { Module } from '@nestjs/common'; import { TypeOrmModule } from '@nestjs/typeorm'; +import { ScheduleModule } from '@nestjs/schedule'; import { geoprocessingConnections } from './ormconfig'; import { AppController } from './app.controller'; import { AppService } from './app.service'; @@ -21,6 +22,7 @@ import { ImportModule } from '@marxan-geoprocessing/import/import.module'; import { PingController } from '@marxan-geoprocessing/modules/ping/ping.controller'; import { LegacyProjectImportModule } from './legacy-project-import/legacy-project-import.module'; import { UnusedResourcesCleanUpModule } from './modules/unused-resources-cleanup/unused-resources-cleanup.module'; +import { CleanupTasksModule } from './modules/cleanup-tasks/cleanup-tasks.module'; @Module({ imports: [ @@ -50,6 +52,8 @@ import { UnusedResourcesCleanUpModule } from './modules/unused-resources-cleanup ImportModule, LegacyProjectImportModule, UnusedResourcesCleanUpModule, + ScheduleModule.forRoot(), + CleanupTasksModule, ], controllers: [AppController, PingController], providers: [AppService], diff --git a/api/apps/geoprocessing/src/migrations/geoprocessing/1657120155233-CreateProjectGeodataCleanupPreparation.ts b/api/apps/geoprocessing/src/migrations/geoprocessing/1657120155233-CreateProjectGeodataCleanupPreparation.ts new file mode 100644 index 0000000000..e0dc9fca6a --- /dev/null +++ b/api/apps/geoprocessing/src/migrations/geoprocessing/1657120155233-CreateProjectGeodataCleanupPreparation.ts @@ -0,0 +1,17 @@ +import { MigrationInterface, QueryRunner } from 'typeorm'; + +export class CreateProjectGeodataCleanupPreparation1657182655233 + implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + CREATE TABLE project_geodata_cleanup_preparation ( + project_id uuid PRIMARY KEY + );`); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + DROP TABLE project_geodata_cleanup_preparation; + `); + } +} diff --git a/api/apps/geoprocessing/src/migrations/geoprocessing/1657120162302-CreateScenarioGeodataCleanupPreparation.ts b/api/apps/geoprocessing/src/migrations/geoprocessing/1657120162302-CreateScenarioGeodataCleanupPreparation.ts new file mode 100644 index 0000000000..7343e6fe02 --- /dev/null +++ b/api/apps/geoprocessing/src/migrations/geoprocessing/1657120162302-CreateScenarioGeodataCleanupPreparation.ts @@ -0,0 +1,17 @@ +import { MigrationInterface, QueryRunner } from 'typeorm'; + +export class CreateScenarioGeodataCleanupPreparation1657182662302 + implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + CREATE TABLE scenario_geodata_cleanup_preparation ( + scenario_id uuid PRIMARY KEY + );`); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + DROP TABLE scenario_geodata_cleanup_preparation; + `); + } +} diff --git a/api/apps/geoprocessing/src/modules/cleanup-tasks/cleanup-tasks.module.ts b/api/apps/geoprocessing/src/modules/cleanup-tasks/cleanup-tasks.module.ts new file mode 100644 index 0000000000..1d1c9b35f7 --- /dev/null +++ b/api/apps/geoprocessing/src/modules/cleanup-tasks/cleanup-tasks.module.ts @@ -0,0 +1,46 @@ +import { BlmPartialResultEntity } from '@marxan-geoprocessing/marxan-sandboxed-runner/adapters-blm/blm-partial-results.geo.entity'; +import { geoprocessingConnections } from '@marxan-geoprocessing/ormconfig'; +import { ProjectsPuEntity } from '@marxan-jobs/planning-unit-geometry'; +import { BlmFinalResultEntity } from '@marxan/blm-calibration'; +import { ScenarioFeaturesData } from '@marxan/features'; +import { GeoFeatureGeometry } from '@marxan/geofeatures'; +import { + MarxanExecutionMetadataGeoEntity, + OutputScenariosPuDataGeoEntity, +} from '@marxan/marxan-output'; +import { PlanningArea } from '@marxan/planning-area-repository/planning-area.geo.entity'; +import { ProtectedArea } from '@marxan/protected-areas'; +import { + ScenariosPlanningUnitGeoEntity, + ScenariosPuPaDataGeo, +} from '@marxan/scenarios-planning-unit'; +import { Module } from '@nestjs/common'; +import { TypeOrmModule } from '@nestjs/typeorm'; +import { UnusedResourcesCleanUpModule } from '../unused-resources-cleanup/unused-resources-cleanup.module'; +import { CleanupTasksService } from './cleanup-tasks.service'; + +@Module({ + imports: [ + TypeOrmModule.forFeature([], geoprocessingConnections.apiDB), + TypeOrmModule.forFeature( + [ + BlmFinalResultEntity, + BlmPartialResultEntity, + ScenariosPuPaDataGeo, + ScenariosPlanningUnitGeoEntity, + OutputScenariosPuDataGeoEntity, + PlanningArea, + ProtectedArea, + GeoFeatureGeometry, + ProjectsPuEntity, + ScenarioFeaturesData, + MarxanExecutionMetadataGeoEntity, + ], + geoprocessingConnections.default, + ), + UnusedResourcesCleanUpModule, + ], + providers: [CleanupTasksService], + exports: [CleanupTasksService], +}) +export class CleanupTasksModule {} diff --git a/api/apps/geoprocessing/src/modules/cleanup-tasks/cleanup-tasks.service.ts b/api/apps/geoprocessing/src/modules/cleanup-tasks/cleanup-tasks.service.ts new file mode 100644 index 0000000000..40e7a2ae57 --- /dev/null +++ b/api/apps/geoprocessing/src/modules/cleanup-tasks/cleanup-tasks.service.ts @@ -0,0 +1,172 @@ +import { geoprocessingConnections } from '@marxan-geoprocessing/ormconfig'; +import { Injectable, Logger } from '@nestjs/common'; +import { Cron } from '@nestjs/schedule'; +import { InjectEntityManager } from '@nestjs/typeorm'; +import { EntityManager } from 'typeorm'; +import { CleanupTasks } from './cleanup-tasks'; +import { chunk } from 'lodash'; +import { AppConfig } from '@marxan-geoprocessing/utils/config.utils'; + +const CHUNK_SIZE_FOR_BATCH_DB_OPERATIONS = 1000; +const cronJobInterval: string = AppConfig.get( + 'cleanupCronJobSettings.interval', +); +@Injectable() +export class CleanupTasksService implements CleanupTasks { + private readonly logger = new Logger(CleanupTasksService.name); + constructor( + @InjectEntityManager(geoprocessingConnections.apiDB) + private readonly apiEntityManager: EntityManager, + @InjectEntityManager(geoprocessingConnections.apiDB) + private readonly geoEntityManager: EntityManager, + ) {} + + async cleanupProjectsDanglingData() { + // Find all existing projects in API DB and return an array of their IDs + const projectIds = await this.apiEntityManager + .createQueryBuilder() + .from('projects', 'p') + .select(['id']) + .getRawMany() + .then((result) => result.map((i) => i.id)) + .catch((error) => { + throw new Error(error); + }); + + // Start cleaning up process inside transaction + await this.geoEntityManager.transaction(async (entityManager) => { + // Truncate table to be sure that not any projectId is inside before operation + await this.geoEntityManager.query( + `TRUNCATE TABLE project_nuke_preparation;`, + ); + + // Set batches to insert ids in intermediate table for processing + for (const [, summaryChunks] of chunk( + projectIds, + CHUNK_SIZE_FOR_BATCH_DB_OPERATIONS, + ).entries()) { + await entityManager.insert( + 'project_nuke_preparation', + summaryChunks.map((chunk: string) => ({ + project_id: chunk, + })), + ); + } + + // For every related entity, we look for non-matching ids inside entity table + // and compare it with intermediate projectId table to delete records that are not there + await this.geoEntityManager.query( + `DELETE FROM planning_areas pa + WHERE pa.project_id IS NOT NULL + AND pa.project_id NOT IN ( + SELECT pgcp.project_id FROM project_geodata_cleanup_preparation pgcp + );`, + ); + await this.geoEntityManager.query( + `DELETE FROM wdpa + WHERE wdpa.project_id IS NOT NULL + AND wdpa.project_id NOT IN ( + SELECT pgcp.project_id FROM project_geodata_cleanup_preparation pgcp + );`, + ); + await this.geoEntityManager.query( + `DELETE FROM projects_pu ppu + WHERE ppu.project_id IS NOT NULL + AND ppu.project_id NOT IN ( + SELECT pgcp.project_id FROM project_geodata_cleanup_preparation pgcp + );`, + ); + await this.apiEntityManager.query( + `DELETE FROM features_data fd + WHERE fd.project_id IS NOT NULL + AND fd.project_id NOT IN ( + SELECT p.id FROM projects + );`, + ); + + await this.geoEntityManager.query( + `TRUNCATE TABLE project_nuke_preparation;`, + ); + }); + } + + async cleanupScenariosDanglingData() { + const scenarioIds = await this.apiEntityManager + .createQueryBuilder() + .from('scenarios', 'p') + .select(['id']) + .getRawMany() + .then((result) => result.map((i) => i.id)) + .catch((error) => { + throw new Error(error); + }); + + await this.geoEntityManager.transaction(async (entityManager) => { + await this.geoEntityManager.query( + `TRUNCATE TABLE scenario_geodata_cleanup_preparation;`, + ); + + for (const [, summaryChunks] of chunk( + scenarioIds, + CHUNK_SIZE_FOR_BATCH_DB_OPERATIONS, + ).entries()) { + await entityManager.insert( + 'scenario_geodata_cleanup_preparation', + summaryChunks.map((chunk: string) => ({ + scenario_id: chunk, + })), + ); + } + + await this.geoEntityManager.query( + `DELETE FROM scenario_features_data sfd + WHERE sfd.scenario_id IS NOT NULL + AND sfd.scenario_id NOT IN ( + SELECT sgcp.scenario_id FROM scenario_geodata_cleanup_preparation sgcp + );`, + ); + await this.geoEntityManager.query( + `DELETE FROM blm_final_results bfr + WHERE bfr.scenario_id IS NOT NULL + AND bfr.scenario_id NOT IN ( + SELECT sgcp.scenario_id FROM scenario_geodata_cleanup_preparation sgcp + );`, + ); + await this.geoEntityManager.query( + `DELETE FROM blm_partial_results bpr + WHERE bpr.scenario_id IS NOT NULL + AND bpr.scenario_id NOT IN ( + SELECT sgcp.scenario_id FROM scenario_geodata_cleanup_preparation sgcp + );`, + ); + await this.geoEntityManager.query( + `DELETE FROM marxan_execution_metadata mem + WHERE mem.scenarioId IS NOT NULL + AND mem.scenarioId NOT IN ( + SELECT sgcp.scenario_id FROM scenario_geodata_cleanup_preparation sgcp + );`, + ); + await this.geoEntityManager.query( + `DELETE FROM scenarios_pu_data spd + WHERE spd.scenario_id IS NOT NULL + AND spd.scenario_id NOT IN ( + SELECT sgcp.scenario_id FROM scenario_geodata_cleanup_preparation sgcp + );`, + ); + + await this.geoEntityManager.query( + `TRUNCATE TABLE scenario_geodata_cleanup_preparation;`, + ); + }); + } + + @Cron(cronJobInterval) + async handleCron() { + this.logger.debug( + 'Preparing to clean dangling geo data for projects/scenarios', + ); + + await this.cleanupProjectsDanglingData(); + await this.cleanupScenariosDanglingData(); + } +} diff --git a/api/apps/geoprocessing/src/modules/cleanup-tasks/cleanup-tasks.ts b/api/apps/geoprocessing/src/modules/cleanup-tasks/cleanup-tasks.ts new file mode 100644 index 0000000000..c388ccfbed --- /dev/null +++ b/api/apps/geoprocessing/src/modules/cleanup-tasks/cleanup-tasks.ts @@ -0,0 +1,3 @@ +export abstract class CleanupTasks { + abstract handleCron(): Promise; +} diff --git a/api/apps/geoprocessing/src/modules/unused-resources-cleanup/delete-unused-resources/project-unused-resources.ts b/api/apps/geoprocessing/src/modules/unused-resources-cleanup/delete-unused-resources/project-unused-resources.ts index f4c64cc117..21a78fa0c4 100644 --- a/api/apps/geoprocessing/src/modules/unused-resources-cleanup/delete-unused-resources/project-unused-resources.ts +++ b/api/apps/geoprocessing/src/modules/unused-resources-cleanup/delete-unused-resources/project-unused-resources.ts @@ -9,7 +9,7 @@ import { Injectable } from '@nestjs/common'; import { InjectEntityManager, InjectRepository } from '@nestjs/typeorm'; import { EntityManager, In, Repository } from 'typeorm'; -type ProjectUnusedResourcesData = { projectCustomFeaturesIds: string[] }; +export type ProjectUnusedResourcesData = { projectCustomFeaturesIds: string[] }; @Injectable() export class ProjectUnusedResources diff --git a/api/apps/geoprocessing/src/modules/unused-resources-cleanup/unused-resources-cleanup.module.ts b/api/apps/geoprocessing/src/modules/unused-resources-cleanup/unused-resources-cleanup.module.ts index ed748f9e03..9e85e2ae17 100644 --- a/api/apps/geoprocessing/src/modules/unused-resources-cleanup/unused-resources-cleanup.module.ts +++ b/api/apps/geoprocessing/src/modules/unused-resources-cleanup/unused-resources-cleanup.module.ts @@ -7,5 +7,6 @@ import { UnusedResourcesCleanupWorker } from './unused-resources-cleanup.worker' @Module({ imports: [WorkerModule, UnusedResourcesModule], providers: [UnusedResourcesCleanupWorker, UnusedResourcesCleanupProcessor], + exports: [UnusedResourcesModule], }) export class UnusedResourcesCleanUpModule {} diff --git a/api/package.json b/api/package.json index 6d489dcf3b..891825f8f7 100644 --- a/api/package.json +++ b/api/package.json @@ -51,6 +51,7 @@ "@nestjs/jwt": "^7.2.0", "@nestjs/passport": "^7.1.5", "@nestjs/platform-express": "7.6.18", + "@nestjs/schedule": "^2.0.1", "@nestjs/swagger": "^4.8.0", "@nestjs/typeorm": "^7.1.5", "@pgtyped/cli": "0.11.0", @@ -114,6 +115,7 @@ "@types/config": "^0.0.38", "@types/cookie-parser": "^1.4.2", "@types/cors": "^2.8.9", + "@types/cron": "^2.0.0", "@types/express": "^4.17.8", "@types/faker": "^5.1.5", "@types/fs-extra": "9.0.11", diff --git a/api/yarn.lock b/api/yarn.lock index 8bbeefd51d..9e90e96c08 100644 --- a/api/yarn.lock +++ b/api/yarn.lock @@ -715,6 +715,14 @@ multer "1.4.2" tslib "2.2.0" +"@nestjs/schedule@^2.0.1": + version "2.0.1" + resolved "https://registry.yarnpkg.com/@nestjs/schedule/-/schedule-2.0.1.tgz#0f047e605c5c93fbc12be429b4d73dc322e58628" + integrity sha512-NqiCk3P7HDMw55kpefNIzAAQEsP+6dDIXUt4/KQANtAZ+opdLzo8rkzI0j8vDqgYeTh+PKq+V6zwSRjR61xPAQ== + dependencies: + cron "2.0.0" + uuid "8.3.2" + "@nestjs/schematics@^7.1.0", "@nestjs/schematics@^7.1.3": version "7.2.6" resolved "https://registry.npmjs.org/@nestjs/schematics/-/schematics-7.2.6.tgz" @@ -2153,6 +2161,14 @@ resolved "https://registry.yarnpkg.com/@types/cors/-/cors-2.8.9.tgz#4bd1fcac72eca8d5bec93e76c7fdcbdc1bc2cd4a" integrity sha512-zurD1ibz21BRlAOIKP8yhrxlqKx6L9VCwkB5kMiP6nZAhoF5MvC7qS1qPA7nRcr1GJolfkQC7/EAL4hdYejLtg== +"@types/cron@^2.0.0": + version "2.0.0" + resolved "https://registry.yarnpkg.com/@types/cron/-/cron-2.0.0.tgz#4fe75f2720a3b69a1f7b80e656749f4c2c96d727" + integrity sha512-xZM08fqvwIXgghtPVkSPKNgC+JoMQ2OHazEvyTKnNf7aWu1aB6/4lBbQFrb03Td2cUGG7ITzMv3mFYnMu6xRaQ== + dependencies: + "@types/luxon" "*" + "@types/node" "*" + "@types/debug@^4.1.4": version "4.1.7" resolved "https://registry.yarnpkg.com/@types/debug/-/debug-4.1.7.tgz#7cc0ea761509124709b8b2d1090d8f6c17aadb82" @@ -2322,6 +2338,11 @@ resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.14.167.tgz#ce7d78553e3c886d4ea643c37ec7edc20f16765e" integrity sha512-w7tQPjARrvdeBkX/Rwg95S592JwxqOjmms3zWQ0XZgSyxSLdzWaYH3vErBhdVS/lRBX7F8aBYcYJYTr5TMGOzw== +"@types/luxon@*": + version "2.3.2" + resolved "https://registry.yarnpkg.com/@types/luxon/-/luxon-2.3.2.tgz#8a3f2cdd4858ce698b56cd8597d9243b8e9d3c65" + integrity sha512-WOehptuhKIXukSUUkRgGbj2c997Uv/iUgYgII8U7XLJqq9W2oF0kQ6frEznRQbdurioz+L/cdaIm4GutTQfgmA== + "@types/mime@*": version "2.0.3" resolved "https://registry.yarnpkg.com/@types/mime/-/mime-2.0.3.tgz#c893b73721db73699943bfc3653b1deb7faa4a3a" @@ -4153,6 +4174,13 @@ cron-parser@^2.7.3: is-nan "^1.3.0" moment-timezone "^0.5.31" +cron@2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/cron/-/cron-2.0.0.tgz#15c6bf37c1cebf6da1d7a688b9ba1c68338bfe6b" + integrity sha512-RPeRunBCFr/WEo7WLp8Jnm45F/ziGJiHVvVQEBSDTSGu6uHW49b2FOP2O14DcXlGJRLhwE7TIoDzHHK4KmlL6g== + dependencies: + luxon "^1.23.x" + cross-spawn@^6.0.0: version "6.0.5" resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4" @@ -7227,6 +7255,11 @@ lru-cache@^6.0.0: dependencies: yallist "^4.0.0" +luxon@^1.23.x: + version "1.28.0" + resolved "https://registry.yarnpkg.com/luxon/-/luxon-1.28.0.tgz#e7f96daad3938c06a62de0fb027115d251251fbf" + integrity sha512-TfTiyvZhwBYM/7QdAVDh+7dBTBA29v4ik0Ce9zda3Mnf8on1S5KJI8P2jKFZ8+5C0jhmr0KwJEO/Wdpm0VeWJQ== + macos-release@^2.2.0: version "2.4.1" resolved "https://registry.yarnpkg.com/macos-release/-/macos-release-2.4.1.tgz#64033d0ec6a5e6375155a74b1a1eba8e509820ac"