From 4f00f487bafe700726aff9bcb4e1bc044ea911b5 Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 18 Sep 2024 09:10:21 +0100 Subject: [PATCH 01/19] explicitly stop scheduled ari ingest on int and prod --- api/serverless.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/serverless.yml b/api/serverless.yml index 779e4f9cc..46c751cb0 100644 --- a/api/serverless.yml +++ b/api/serverless.yml @@ -105,7 +105,8 @@ custom: automatic: true number: 3 scheduledAriIngestEnabled: - int: true + int: false + prod: false functions: - ${file(./serverless-config-default.yml):functions} - ${file(./serverless-config-deploy.yml):functions} \ No newline at end of file From 8f10bfe98b1b02ef6f1016a3c76691747a0a34eb Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 18 Sep 2024 09:36:07 +0100 Subject: [PATCH 02/19] rename directory to integration to fit convention --- api/README.md | 2 +- api/scripts/fullAriImport.ts | 2 +- api/src/components/{integrations => integration}/service.ts | 6 +++--- api/src/lib/{integrations => integration}/README.md | 0 .../lib/{integrations => integration}/__tests__/ari.test.ts | 2 +- api/src/lib/{integrations => integration}/ariUtils.ts | 0 6 files changed, 6 insertions(+), 6 deletions(-) rename api/src/components/{integrations => integration}/service.ts (90%) rename api/src/lib/{integrations => integration}/README.md (100%) rename api/src/lib/{integrations => integration}/__tests__/ari.test.ts (99%) rename api/src/lib/{integrations => integration}/ariUtils.ts (100%) diff --git a/api/README.md b/api/README.md index 58387a470..fe25144b3 100644 --- a/api/README.md +++ b/api/README.md @@ -139,7 +139,7 @@ A similar process happens when the database is seeded. After publications are in ## Integrations -Octopus is built to integrate with some external systems in order to import publications. For more information please read the dedicated [integrations readme](./src/lib/integrations/README.md). +Octopus is built to integrate with some external systems in order to import publications. For more information please read the dedicated [integrations readme](./src/lib/integration/README.md). --- diff --git a/api/scripts/fullAriImport.ts b/api/scripts/fullAriImport.ts index 1a71d9fe6..46c67c80b 100644 --- a/api/scripts/fullAriImport.ts +++ b/api/scripts/fullAriImport.ts @@ -5,7 +5,7 @@ import { expand } from 'dotenv-expand'; // Important to do this so that environment variables are treated the same as in deployed code. expand(dotenv.config()); -import * as ariUtils from 'lib/integrations/ariUtils'; +import * as ariUtils from 'lib/integration/ariUtils'; import * as I from 'interface'; const fullAriImport = async (): Promise => { diff --git a/api/src/components/integrations/service.ts b/api/src/components/integration/service.ts similarity index 90% rename from api/src/components/integrations/service.ts rename to api/src/components/integration/service.ts index f43ada5cb..c5c550cad 100644 --- a/api/src/components/integrations/service.ts +++ b/api/src/components/integration/service.ts @@ -1,12 +1,12 @@ import axios from 'axios'; -import * as ariUtils from 'lib/integrations/ariUtils'; +import * as ariUtils from 'lib/integration/ariUtils'; /** * Incremental ARI ingest. * Paginates through ARI questions from the ARI DB API and handles incoming ARIs. * If it encounters MAX_UNCHANGED_STREAK ARIs in a row not requiring changes, it stops. */ -export const incrementalAriIngest = async (): Promise => { +export const incrementalAriIngest = async (): Promise => { // Count sequential unchanged ARIs so that we can stop when the streak hits MAX_UNCHANGED_STREAK. const MAX_UNCHANGED_STREAK = 5; let unchangedStreak = 0; @@ -53,5 +53,5 @@ export const incrementalAriIngest = async (): Promise => { pageUrl = paginationInfo.links.next; } while (pageUrl && unchangedStreak < MAX_UNCHANGED_STREAK); - console.log(`Update complete. Updated ${writeCount} publication${writeCount !== 1 ? 's' : ''}.`); + return `Update complete. Updated ${writeCount} publication${writeCount !== 1 ? 's' : ''}.`; }; diff --git a/api/src/lib/integrations/README.md b/api/src/lib/integration/README.md similarity index 100% rename from api/src/lib/integrations/README.md rename to api/src/lib/integration/README.md diff --git a/api/src/lib/integrations/__tests__/ari.test.ts b/api/src/lib/integration/__tests__/ari.test.ts similarity index 99% rename from api/src/lib/integrations/__tests__/ari.test.ts rename to api/src/lib/integration/__tests__/ari.test.ts index ccb9366d8..23d390bee 100644 --- a/api/src/lib/integrations/__tests__/ari.test.ts +++ b/api/src/lib/integration/__tests__/ari.test.ts @@ -1,4 +1,4 @@ -import * as ariUtils from 'lib/integrations/ariUtils'; +import * as ariUtils from 'lib/integration/ariUtils'; import * as I from 'interface'; import * as testUtils from 'lib/testUtils'; diff --git a/api/src/lib/integrations/ariUtils.ts b/api/src/lib/integration/ariUtils.ts similarity index 100% rename from api/src/lib/integrations/ariUtils.ts rename to api/src/lib/integration/ariUtils.ts From e685b799d1a1883a3c23a33a196bc3dd0c0f232e Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 18 Sep 2024 09:37:04 +0100 Subject: [PATCH 03/19] allow incremental ari ingest to be triggered over HTTP or scheduled --- api/.env.example | 5 ++++- api/serverless-config-default.yml | 9 ++++++++- api/src/components/integration/controller.ts | 21 ++++++++++++++++++++ api/src/components/integration/routes.ts | 3 +++ api/tsconfig.json | 3 +++ 5 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 api/src/components/integration/controller.ts create mode 100644 api/src/components/integration/routes.ts diff --git a/api/.env.example b/api/.env.example index 557877723..db3c1e6f4 100644 --- a/api/.env.example +++ b/api/.env.example @@ -33,4 +33,7 @@ MAIL_SERVER=localhost LOCALSTACK_SERVER=http://localhost:4566 QUEUE_URL=http://localhost:4566/000000000000/your-queue-name -SQS_ENDPOINT=http://localhost:4566 \ No newline at end of file +SQS_ENDPOINT=http://localhost:4566 + +LIST_USERS_API_KEY=12345 +TRIGGER_ARI_INGEST_API_KEY=12345 \ No newline at end of file diff --git a/api/serverless-config-default.yml b/api/serverless-config-default.yml index 831328aca..820d30a91 100644 --- a/api/serverless-config-default.yml +++ b/api/serverless-config-default.yml @@ -494,9 +494,16 @@ functions: cors: true # Integrations incrementalAriIngest: - handler: src/components/integrations/service.incrementalAriIngest + handler: src/components/integration/service.incrementalAriIngest timeout: 900 events: - schedule: rate: cron(0 5 ? * TUE *) # Every Tuesday at 5 a.m. enabled: ${self:custom.scheduledAriIngestEnabled.${opt:stage}, false} + triggerIncrementalAriIngest: + handler: src/components/integration/routes.incrementalAriIngest + events: + - http: + path: ${self:custom.versions.v1}/integrations/ari/trigger-incremental + method: POST + cors: true diff --git a/api/src/components/integration/controller.ts b/api/src/components/integration/controller.ts new file mode 100644 index 000000000..7cc5102e0 --- /dev/null +++ b/api/src/components/integration/controller.ts @@ -0,0 +1,21 @@ +import * as I from 'interface'; +import * as integrationService from 'integration/service'; +import * as response from 'lib/response'; + +export const incrementalAriIngest = async (event: I.APIRequest): Promise => { + const apiKey = event.queryStringParameters?.apiKey; + + if (apiKey !== process.env.TRIGGER_ARI_INGEST_API_KEY) { + return response.json(401, { message: "Please provide a valid 'apiKey'." }); + } + + try { + const userList = await integrationService.incrementalAriIngest(); + + return response.json(200, userList); + } catch (error) { + console.log(error); + + return response.json(500, { message: 'Unknown server error.' }); + } +}; diff --git a/api/src/components/integration/routes.ts b/api/src/components/integration/routes.ts new file mode 100644 index 000000000..9eb3000a4 --- /dev/null +++ b/api/src/components/integration/routes.ts @@ -0,0 +1,3 @@ +import * as integrationController from 'integration/controller'; + +export const incrementalAriIngest = integrationController.incrementalAriIngest; diff --git a/api/tsconfig.json b/api/tsconfig.json index 925d585c3..14629cb01 100644 --- a/api/tsconfig.json +++ b/api/tsconfig.json @@ -72,6 +72,9 @@ "image/*": [ "src/components/image/*" ], + "integration/*": [ + "src/components/integration/*" + ], "link/*": [ "src/components/link/*" ], From c862c9a7fc2124575ff945b0827e4ed5a308aa57 Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 18 Sep 2024 14:37:05 +0100 Subject: [PATCH 04/19] send email with details at end of incremental ari ingest --- api/src/components/integration/service.ts | 44 +++++++++++-- api/src/lib/email.ts | 57 ++++++++++++++++ api/src/lib/integration/__tests__/ari.test.ts | 39 ++++++++++- api/src/lib/integration/ariUtils.ts | 66 +++++++++++++++---- api/src/lib/interface.ts | 2 + 5 files changed, 189 insertions(+), 19 deletions(-) diff --git a/api/src/components/integration/service.ts b/api/src/components/integration/service.ts index c5c550cad..ee4cfb174 100644 --- a/api/src/components/integration/service.ts +++ b/api/src/components/integration/service.ts @@ -1,5 +1,6 @@ import axios from 'axios'; import * as ariUtils from 'lib/integration/ariUtils'; +import * as email from 'lib/email'; /** * Incremental ARI ingest. @@ -7,6 +8,7 @@ import * as ariUtils from 'lib/integration/ariUtils'; * If it encounters MAX_UNCHANGED_STREAK ARIs in a row not requiring changes, it stops. */ export const incrementalAriIngest = async (): Promise => { + const start = new Date(); // Count sequential unchanged ARIs so that we can stop when the streak hits MAX_UNCHANGED_STREAK. const MAX_UNCHANGED_STREAK = 5; let unchangedStreak = 0; @@ -14,7 +16,12 @@ export const incrementalAriIngest = async (): Promise => { // Pagination loop. let pageUrl = ariUtils.ariEndpoint; let paginationInfo; - let writeCount = 0; + // Keep count of things to report on at the end. + let checkedCount = 0; + let createdCount = 0; + let updatedCount = 0; + const unrecognisedDepartments = new Set(); + const unrecognisedTopics = new Set(); do { // Get page. @@ -25,6 +32,15 @@ export const incrementalAriIngest = async (): Promise => { if (!pageAri.isArchived) { // Create, update, or skip this ARI as appropriate. const handle = await ariUtils.handleIncomingARI(pageAri); + checkedCount++; + + if (handle.unrecognisedDepartment) { + unrecognisedDepartments.add(handle.unrecognisedDepartment); + } + + if (handle.unrecognisedTopics) { + handle.unrecognisedTopics.forEach((topic) => unrecognisedTopics.add(topic)); + } if (!handle.success) { console.log(`Error when handling ARI with question ID ${pageAri.questionId}: ${handle.message}`); @@ -40,9 +56,15 @@ export const incrementalAriIngest = async (): Promise => { unchangedStreak = 0; // Log action taken. console.log(`ARI ${pageAri.questionId} handled successfully with action: ${handle.actionTaken}`); - writeCount++; - // Artificial delay to avoid hitting datacite rate limits with publication creates/updates. - await new Promise((resolve) => setTimeout(resolve, 1000)); + + if (handle.actionTaken === 'create') { + createdCount++; + // Artificial delay to avoid hitting datacite rate limits with publication creates/updates. + await new Promise((resolve) => setTimeout(resolve, 1000)); + } else if (handle.actionTaken === 'update') { + updatedCount++; + await new Promise((resolve) => setTimeout(resolve, 500)); + } } } } @@ -53,5 +75,19 @@ export const incrementalAriIngest = async (): Promise => { pageUrl = paginationInfo.links.next; } while (pageUrl && unchangedStreak < MAX_UNCHANGED_STREAK); + const end = new Date(); + // Get duration in seconds to the nearest 1st decimal place. + const durationSeconds = Math.round((end.getTime() - start.getTime()) / 100) / 10; + await email.incrementalAriIngestReport({ + checkedCount, + durationSeconds, + createdCount, + updatedCount, + unrecognisedDepartments: Array.from(unrecognisedDepartments).sort(), + unrecognisedTopics: Array.from(unrecognisedTopics).sort() + }); + + const writeCount = createdCount + updatedCount; + return `Update complete. Updated ${writeCount} publication${writeCount !== 1 ? 's' : ''}.`; }; diff --git a/api/src/lib/email.ts b/api/src/lib/email.ts index bb4c560c8..1dca0022b 100644 --- a/api/src/lib/email.ts +++ b/api/src/lib/email.ts @@ -920,3 +920,60 @@ export const newAriChildPublication = async (options: { subject }); }; + +export const incrementalAriIngestReport = async (options: { + checkedCount: number; + durationSeconds: number; + createdCount: number; + updatedCount: number; + unrecognisedDepartments: string[]; + unrecognisedTopics: string[]; +}): Promise => { + const cleanDepartments = options.unrecognisedDepartments.map((department) => Helpers.getSafeHTML(department)); + const cleanTopics = options.unrecognisedTopics.map((topic) => Helpers.getSafeHTML(topic)); + const html = ` + + +

Incremental ARI import run completed in ${options.durationSeconds} seconds.

+
    +
  • ARIs checked: ${options.checkedCount}
  • Publications created: ${ + options.createdCount + }
  • Publications updated: ${options.updatedCount}
  • + ${ + cleanDepartments.length + ? '
  • Unrecognised departments:
    • ' + + cleanDepartments.join('
    • ') + + '
  • ' + : '' + } + ${ + cleanTopics.length + ? '
  • Unrecognised topics:
    • ' + cleanTopics.join('
    • ') + '
  • ' + : '' + } +
+ + + `; + const text = ` + Incremental ARI ingest run completed in ${options.durationSeconds} seconds. + ARIs checked: ${options.checkedCount}. + Publications created: ${options.createdCount}. + Publications updated: ${options.updatedCount}. + ${ + options.unrecognisedDepartments.length + ? 'Unrecognised departments: "' + options.unrecognisedDepartments.join('", "') + '".' + : '' + } + ${ + options.unrecognisedTopics.length + ? 'Unrecognised topics: "' + options.unrecognisedTopics.join('", "') + '".' + : '' + }`; + await send({ + html, + text, + to: process.env.SLACK_CHANNEL_EMAIL || '', + subject: 'Incremental ARI ingest report' + }); +}; diff --git a/api/src/lib/integration/__tests__/ari.test.ts b/api/src/lib/integration/__tests__/ari.test.ts index 23d390bee..bf6b01620 100644 --- a/api/src/lib/integration/__tests__/ari.test.ts +++ b/api/src/lib/integration/__tests__/ari.test.ts @@ -120,6 +120,18 @@ describe('ARI Mapping', () => { }); }); + test('Unrecognised topics are reported', async () => { + const mappingAttempt = await ariUtils.mapAriQuestionToPublicationVersion({ + ...sampleARIQuestion, + topics: ['unrecognised topic'] + }); + expect(mappingAttempt).toMatchObject({ + success: true, + message: 'Found unrecognised topic(s).', + unrecognisedTopics: ['unrecognised topic'] + }); + }); + test('Department is matched to existing user', async () => { const mappingAttempt = await ariUtils.mapAriQuestionToPublicationVersion(sampleARIQuestion); expect(mappingAttempt).toMatchObject({ @@ -135,7 +147,8 @@ describe('ARI Mapping', () => { expect(mappingAttempt).toMatchObject({ success: false, mappedData: null, - message: 'User not found for department: unrecognised department.' + message: 'User not found for department: unrecognised department.', + unrecognisedDepartment: 'unrecognised department' }); }); @@ -221,7 +234,7 @@ describe('ARI handling', () => { }); }); - test('ARI with unrecognised department is skipped', async () => { + test('ARI with unrecognised department is skipped and dept name is reported in a field', async () => { const handleARI = await ariUtils.handleIncomingARI({ ...sampleARIQuestion, department: 'Unrecognised Department name' @@ -231,7 +244,8 @@ describe('ARI handling', () => { actionTaken: 'none', success: false, message: - 'Failed to map ARI data to octopus data. User not found for department: Unrecognised Department name.' + 'Failed to map ARI data to octopus data. User not found for department: Unrecognised Department name.', + unrecognisedDepartment: 'Unrecognised Department name' }); }); @@ -281,6 +295,25 @@ describe('ARI handling', () => { }); }); + test('Unrecognised topics are reported', async () => { + const handleARI = await ariUtils.handleIncomingARI({ + ...sampleARIQuestion, + topics: [...sampleARIQuestion.topics, 'unrecognised topic'] + }); + expect(handleARI).toMatchObject({ + actionTaken: 'none', + success: true, + publicationVersion: { + topics: [ + { + id: 'test-topic-1a' + } + ] + }, + unrecognisedTopics: ['unrecognised topic'] + }); + }); + test('Keywords update when fieldsOfResearch/tags change', async () => { const handleARI = await ariUtils.handleIncomingARI({ ...sampleARIQuestion, diff --git a/api/src/lib/integration/ariUtils.ts b/api/src/lib/integration/ariUtils.ts index d2278bfd2..a8a971b79 100644 --- a/api/src/lib/integration/ariUtils.ts +++ b/api/src/lib/integration/ariUtils.ts @@ -8,6 +8,8 @@ import * as topicMappingService from 'topicMapping/service'; import * as userMappingService from 'userMapping/service'; import * as userService from 'user/service'; +import { Prisma } from '@prisma/client'; + const parseAriTextField = (value: string): string => { // Sometimes ARI text fields are enclosed in quotes and we don't want to show those in a publication body. const noQuotes = Helpers.stripEnclosingQuotes(value); @@ -22,9 +24,16 @@ export const mapAriQuestionToPublicationVersion = async ( | { success: true; mappedData: I.MappedARIQuestion; - message: null; + message: string | null; + unrecognisedTopics?: string[]; + } + | { + success: false; + mappedData: null; + message: string; + unrecognisedDepartment?: string; + unrecognisedTopics?: string[]; } - | { success: false; mappedData: null; message: string } > => { if (questionData.isArchived) { return { @@ -69,6 +78,26 @@ export const mapAriQuestionToPublicationVersion = async ( // Ensure uniqueness. const keywords = [...new Set(fieldsOfResearch.concat(tags))]; + // Map ARI topics to octopus topics. + const unrecognisedTopics: string[] = []; + type TopicMappingResult = Prisma.PromiseReturnType; + const topicMappings: TopicMappingResult[] = []; + + for await (const ariTopic of ariTopics) { + const mapping = await topicMappingService.get(ariTopic, 'ARI'); + + if (mapping) { + topicMappings.push(mapping); + } else { + unrecognisedTopics.push(ariTopic); + } + } + + // We intentionally don't map some ARI topics, so filter those out. + const octopusTopicIds = topicMappings.flatMap((topicMapping) => + topicMapping && topicMapping.isMapped && topicMapping.topic ? [topicMapping.topic.id] : [] + ); + // Find user by department title. const userMapping = await userMappingService.get(department, 'ARI'); @@ -76,18 +105,13 @@ export const mapAriQuestionToPublicationVersion = async ( return { success: false, mappedData: null, - message: 'User not found for department: ' + department + '.' + message: 'User not found for department: ' + department + '.', + unrecognisedDepartment: department }; } const user = userMapping.user; - // Map ARI topics to octopus topics. - const topicMappings = await Promise.all(ariTopics.map((ariTopic) => topicMappingService.get(ariTopic, 'ARI'))); - // We intentionally don't map some ARI topics, so filter those out. - const octopusTopicIds = topicMappings.flatMap((topicMapping) => - topicMapping && topicMapping.isMapped && topicMapping.topic ? [topicMapping.topic.id] : [] - ); // If no topics listed in ARI, fall back to default topic for the department (user). // Otherwise use the mapped topics, in a Set to ensure uniqueness. const finalTopicIds = octopusTopicIds.length @@ -96,6 +120,8 @@ export const mapAriQuestionToPublicationVersion = async ( ? [user.defaultTopicId] : []; + const unrecognisedTopicsFound = unrecognisedTopics.length; + return { success: true, mappedData: { @@ -107,7 +133,8 @@ export const mapAriQuestionToPublicationVersion = async ( externalId: questionId.toString(), userId: user.id }, - message: null + message: unrecognisedTopicsFound ? 'Found unrecognised topic(s).' : null, + ...(unrecognisedTopicsFound ? { unrecognisedTopics } : {}) }; }; @@ -173,18 +200,27 @@ export const handleIncomingARI = async (question: I.ARIQuestion): Promise Date: Wed, 18 Sep 2024 14:38:52 +0100 Subject: [PATCH 05/19] remove mostly unused helper --- api/src/lib/helpers.ts | 10 ---------- api/src/lib/sqs.ts | 27 ++++++++++++--------------- 2 files changed, 12 insertions(+), 25 deletions(-) diff --git a/api/src/lib/helpers.ts b/api/src/lib/helpers.ts index 6ed494aca..faa648b5e 100644 --- a/api/src/lib/helpers.ts +++ b/api/src/lib/helpers.ts @@ -42,16 +42,6 @@ export const formatAffiliationName = (affiliation: I.MappedOrcidAffiliation): st export const isEmptyContent = (content: string): boolean => (content ? /^(

\s*<\/p>)+$/.test(content) : true); -export const checkEnvVariable = (variableName: keyof NodeJS.ProcessEnv): string => { - const value = process.env[variableName]; - - if (value === undefined) { - throw new Error(`Environment Variable ${variableName} is undefined`); - } - - return value; -}; - const generateOTPCharacter = (OTP: string, characterSet: string): string => { const randomNumberArray = webcrypto.getRandomValues(new Uint32Array(1)); const randomIndex = Math.floor(randomNumberArray[0] * Math.pow(2, -32) * characterSet.length); diff --git a/api/src/lib/sqs.ts b/api/src/lib/sqs.ts index abd688746..b8632bff9 100644 --- a/api/src/lib/sqs.ts +++ b/api/src/lib/sqs.ts @@ -1,24 +1,21 @@ import AWS_SQS, { SQS } from '@aws-sdk/client-sqs'; -import * as Helpers from './helpers'; -const queueUrl = Helpers.checkEnvVariable('QUEUE_URL'); -const endpoint = Helpers.checkEnvVariable('SQS_ENDPOINT'); +const queueUrl = process.env.QUEUE_URL; +const endpoint = process.env.SQS_ENDPOINT; const config = { - region: 'eu-west-1' + region: 'eu-west-1', + ...(process.env.STAGE === 'local' + ? { + credentials: { + accessKeyId: 'dummy', + secretAccessKey: 'dummy' + }, + endpoint + } + : {}) }; -if (process.env.STAGE === 'local') { - // @ts-ignore - config.credentials = { - accessKeyId: 'dummy', - secretAccessKey: 'dummy' - }; - - // @ts-ignore - config.endpoint = endpoint; -} - const sqs = new SQS(config); export const createQueue = async (): Promise => { From 9694200a43819097e046bbb5e775d284ed70993b Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 18 Sep 2024 15:37:56 +0100 Subject: [PATCH 06/19] add new environment variable for ingest report recipients --- api/.env.example | 5 ++++- api/serverless.yml | 2 ++ api/src/lib/email.ts | 2 +- api/src/lib/interface.ts | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/api/.env.example b/api/.env.example index db3c1e6f4..1c8c1d6d1 100644 --- a/api/.env.example +++ b/api/.env.example @@ -36,4 +36,7 @@ QUEUE_URL=http://localhost:4566/000000000000/your-queue-name SQS_ENDPOINT=http://localhost:4566 LIST_USERS_API_KEY=12345 -TRIGGER_ARI_INGEST_API_KEY=12345 \ No newline at end of file +TRIGGER_ARI_INGEST_API_KEY=12345 + +SLACK_CHANNEL_EMAIL=example@mailinator.com +INGEST_REPORT_RECIPIENTS=example.one@mailinator.com,example.two@mailinator.com \ No newline at end of file diff --git a/api/serverless.yml b/api/serverless.yml index 46c751cb0..4bd53376b 100644 --- a/api/serverless.yml +++ b/api/serverless.yml @@ -50,6 +50,8 @@ provider: SQS_ENDPOINT: ${ssm:/sqs_endpoint_${self:provider.stage}_octopus} MAIL_SERVER: ${ssm:/mail_server_${self:provider.stage}_octopus} SLACK_CHANNEL_EMAIL: ${ssm:/slack_channel_email_${self:provider.stage}_octopus} + TRIGGER_ARI_INGEST_API_KEY: ${ssm:/trigger_ari_ingest_api_key_${self:provider.stage}_octopus} + INGEST_REPORT_RECIPIENTS: ${ssm:/ingest_report_recipients_${self:provider.stage}_octopus} deploymentBucket: tags: Project: Octopus diff --git a/api/src/lib/email.ts b/api/src/lib/email.ts index 1dca0022b..60aab7915 100644 --- a/api/src/lib/email.ts +++ b/api/src/lib/email.ts @@ -973,7 +973,7 @@ export const incrementalAriIngestReport = async (options: { await send({ html, text, - to: process.env.SLACK_CHANNEL_EMAIL || '', + to: process.env.INGEST_REPORT_RECIPIENTS ? process.env.INGEST_REPORT_RECIPIENTS.split(',') : '', subject: 'Incremental ARI ingest report' }); }; diff --git a/api/src/lib/interface.ts b/api/src/lib/interface.ts index fb9fdf5d5..31214dfd9 100644 --- a/api/src/lib/interface.ts +++ b/api/src/lib/interface.ts @@ -657,7 +657,7 @@ export interface DestroyImagePathParams { } export interface EmailSendOptions { - to: string; + to: string | string[]; subject: string; html: string; text: string; From e250c06bc119abb3c1ac3805b0c896ff10a4581f Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 18 Sep 2024 15:38:40 +0100 Subject: [PATCH 07/19] serverless offline doesnt need ssm parameters defined, it uses env file --- api/serverless-offline.yml | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/api/serverless-offline.yml b/api/serverless-offline.yml index a34cb1e4f..fb2de0002 100644 --- a/api/serverless-offline.yml +++ b/api/serverless-offline.yml @@ -17,31 +17,6 @@ provider: runtime: nodejs18.x region: eu-west-1 stage: 'local' - environment: - STAGE: ${self:provider.stage} - ELASTICSEARCH_USER: ${ssm:/elasticsearch_user_${self:provider.stage}_octopus} - ELASTICSEARCH_PASSWORD: ${ssm:/elasticsearch_password_${self:provider.stage}_octopus} - ELASTICSEARCH_ENDPOINT: ${ssm:/elasticsearch_endpoint_${self:provider.stage}_octopus} - ELASTICSEARCH_PROTOCOL: ${ssm:/elastic_search_protocol_${self:provider.stage}_octopus} - DATABASE_URL: ${ssm:/db_connection_string_${self:provider.stage}_octopus} - JWT_SECRET: ${ssm:/jwt_secret_${self:provider.stage}_octopus} - VALIDATION_CODE_EXPIRY: 10 - VALIDATION_CODE_ATTEMPTS: 3 - ORCID_ID: ${ssm:/orcid_app_id_${self:provider.stage}_octopus} - ORCID_SECRET: ${ssm:/orcid_secret_key_${self:provider.stage}_octopus} - ORCID_AUTH_URL: ${ssm:/orcid_auth_url_${self:provider.stage}_octopus} - ORCID_MEMBER_API_URL: ${ssm:/orcid_member_api_url_${self:provider.stage}_octopus} - DOI_PREFIX: ${ssm:/doi_prefix_${self:provider.stage}_octopus} - DATACITE_ENDPOINT: ${ssm:/datacite_endpoint_${self:provider.stage}_octopus} - DATACITE_USER: ${ssm:/datacite_user_${self:provider.stage}_octopus} - DATACITE_PASSWORD: ${ssm:/datacite_password_${self:provider.stage}_octopus} - EMAIL_SENDER_ADDRESS: ${ssm:/email_sender_address_${self:provider.stage}_octopus} - BASE_URL: ${ssm:/base_url_${self:provider.stage}_octopus} - AUTHORISATION_CALLBACK_URL: ${ssm:/authorization_callback_url_${self:provider.stage}_octopus} - LIST_USERS_API_KEY: ${ssm:/list_users_api_key_${self:provider.stage}_octopus} - QUEUE_URL: ${ssm:/queue_url_${self:provider.stage}_octopus} - SQS_ENDPOINT: ${ssm:/sqs_endpoint_${self:provider.stage}_octopus} - MAIL_SERVER: ${ssm:/mail_server_${self:provider.stage}_octopus} deploymentBucket: tags: Project: Octopus From 49e1f49ad8da88f94142571a76a8304b4dfbc46a Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Tue, 24 Sep 2024 11:12:00 +0100 Subject: [PATCH 08/19] move ari utils and tests into integration component --- api/scripts/fullAriImport.ts | 2 +- api/src/{lib => components}/integration/README.md | 2 +- api/src/{lib => components}/integration/__tests__/ari.test.ts | 2 +- api/src/{lib => components}/integration/ariUtils.ts | 0 api/src/components/integration/service.ts | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename api/src/{lib => components}/integration/README.md (97%) rename api/src/{lib => components}/integration/__tests__/ari.test.ts (99%) rename api/src/{lib => components}/integration/ariUtils.ts (100%) diff --git a/api/scripts/fullAriImport.ts b/api/scripts/fullAriImport.ts index 46c67c80b..e358b549b 100644 --- a/api/scripts/fullAriImport.ts +++ b/api/scripts/fullAriImport.ts @@ -5,7 +5,7 @@ import { expand } from 'dotenv-expand'; // Important to do this so that environment variables are treated the same as in deployed code. expand(dotenv.config()); -import * as ariUtils from 'lib/integration/ariUtils'; +import * as ariUtils from 'integration/ariUtils'; import * as I from 'interface'; const fullAriImport = async (): Promise => { diff --git a/api/src/lib/integration/README.md b/api/src/components/integration/README.md similarity index 97% rename from api/src/lib/integration/README.md rename to api/src/components/integration/README.md index 1eb42e219..bb9f2dda9 100644 --- a/api/src/lib/integration/README.md +++ b/api/src/components/integration/README.md @@ -32,7 +32,7 @@ On import, ARIs go through a handling flow: #### How ARI data is mapped to octopus data -Various ARI fields are mapped to octpous ones in the `mapAriQuestionToPublicationVersion` function in [ariUtils.ts](./ariUtils.ts). +Various ARI fields are mapped to octopus ones in the `mapAriQuestionToPublicationVersion` function in [ariUtils.ts](./ariUtils.ts). Of particular importance is how ARIs are matched to an owning organisational user account. The mapping process expects a UserMapping to exist associating the `department` field value from the ARI (where the title matches, case insensitive, and the mapping source is 'ARI') with the user ID of an organisational account. diff --git a/api/src/lib/integration/__tests__/ari.test.ts b/api/src/components/integration/__tests__/ari.test.ts similarity index 99% rename from api/src/lib/integration/__tests__/ari.test.ts rename to api/src/components/integration/__tests__/ari.test.ts index bf6b01620..9b6a97e41 100644 --- a/api/src/lib/integration/__tests__/ari.test.ts +++ b/api/src/components/integration/__tests__/ari.test.ts @@ -1,4 +1,4 @@ -import * as ariUtils from 'lib/integration/ariUtils'; +import * as ariUtils from 'integration/ariUtils'; import * as I from 'interface'; import * as testUtils from 'lib/testUtils'; diff --git a/api/src/lib/integration/ariUtils.ts b/api/src/components/integration/ariUtils.ts similarity index 100% rename from api/src/lib/integration/ariUtils.ts rename to api/src/components/integration/ariUtils.ts diff --git a/api/src/components/integration/service.ts b/api/src/components/integration/service.ts index bdb4feaa8..5c1a98fec 100644 --- a/api/src/components/integration/service.ts +++ b/api/src/components/integration/service.ts @@ -1,5 +1,5 @@ import axios from 'axios'; -import * as ariUtils from 'lib/integration/ariUtils'; +import * as ariUtils from 'integration/ariUtils'; import * as email from 'lib/email'; import * as ingestLogService from 'ingestLog/service'; From 8285f73e7e8e700dcc26e8147ad8655c286388a6 Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Tue, 24 Sep 2024 13:18:28 +0100 Subject: [PATCH 09/19] use schedule and http trigger in one lambda function --- api/serverless-config-default.yml | 7 ++----- .../integration/__tests__/ari.test.ts | 16 ++++++++++++++++ api/src/components/integration/controller.ts | 18 ++++++++++++------ api/src/lib/interface.ts | 7 ++++++- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/api/serverless-config-default.yml b/api/serverless-config-default.yml index 820d30a91..56a3db039 100644 --- a/api/serverless-config-default.yml +++ b/api/serverless-config-default.yml @@ -494,16 +494,13 @@ functions: cors: true # Integrations incrementalAriIngest: - handler: src/components/integration/service.incrementalAriIngest + handler: src/components/integration/routes.incrementalAriIngest timeout: 900 events: - schedule: rate: cron(0 5 ? * TUE *) # Every Tuesday at 5 a.m. enabled: ${self:custom.scheduledAriIngestEnabled.${opt:stage}, false} - triggerIncrementalAriIngest: - handler: src/components/integration/routes.incrementalAriIngest - events: - http: - path: ${self:custom.versions.v1}/integrations/ari/trigger-incremental + path: ${self:custom.versions.v1}/integrations/ari/incremental method: POST cors: true diff --git a/api/src/components/integration/__tests__/ari.test.ts b/api/src/components/integration/__tests__/ari.test.ts index 9b6a97e41..2390b9dca 100644 --- a/api/src/components/integration/__tests__/ari.test.ts +++ b/api/src/components/integration/__tests__/ari.test.ts @@ -385,3 +385,19 @@ describe('ARI handling', () => { }); }); }); + +describe('ARI import processes', () => { + beforeEach(async () => { + await testUtils.clearDB(); + await testUtils.testSeed(); + }); + + test('Incremental import endpoint requires API key', async () => { + const triggerImport = await testUtils.agent.post('/integrations/ari/incremental'); + + expect(triggerImport.status).toEqual(401); + expect(triggerImport.body).toMatchObject({ + message: "Please provide a valid 'apiKey'." + }); + }); +}); diff --git a/api/src/components/integration/controller.ts b/api/src/components/integration/controller.ts index 7cc5102e0..9adcbc8a1 100644 --- a/api/src/components/integration/controller.ts +++ b/api/src/components/integration/controller.ts @@ -2,17 +2,23 @@ import * as I from 'interface'; import * as integrationService from 'integration/service'; import * as response from 'lib/response'; -export const incrementalAriIngest = async (event: I.APIRequest): Promise => { - const apiKey = event.queryStringParameters?.apiKey; +export const incrementalAriIngest = async ( + event: I.APIRequest | I.EventBridgeEvent<'Scheduled Event', string> +): Promise => { + // This can also be triggered on a schedule, in which case we don't need to check for an API key, + // so only check for the API key if the event is an API request. + if (event && 'headers' in event) { + const apiKey = event.queryStringParameters?.apiKey; - if (apiKey !== process.env.TRIGGER_ARI_INGEST_API_KEY) { - return response.json(401, { message: "Please provide a valid 'apiKey'." }); + if (apiKey !== process.env.TRIGGER_ARI_INGEST_API_KEY) { + return response.json(401, { message: "Please provide a valid 'apiKey'." }); + } } try { - const userList = await integrationService.incrementalAriIngest(); + const ingestResult = await integrationService.incrementalAriIngest(); - return response.json(200, userList); + return response.json(200, ingestResult); } catch (error) { console.log(error); diff --git a/api/src/lib/interface.ts b/api/src/lib/interface.ts index 31214dfd9..d3f770ced 100644 --- a/api/src/lib/interface.ts +++ b/api/src/lib/interface.ts @@ -32,7 +32,12 @@ export { Topic } from '@prisma/client'; export { JSONSchemaType, Schema } from 'ajv'; -export { APIGatewayProxyEventV2, APIGatewayProxyHandlerV2, APIGatewayProxyResultV2 } from 'aws-lambda'; +export { + APIGatewayProxyEventV2, + APIGatewayProxyHandlerV2, + APIGatewayProxyResultV2, + EventBridgeEvent +} from 'aws-lambda'; export type RequestType = 'body' | 'queryStringParameters' | 'pathParameters'; From 1750b750f7d9a3f0604849438becf1616664d1dd Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Tue, 24 Sep 2024 13:48:02 +0100 Subject: [PATCH 10/19] reject ari ingest trigger if already running --- api/src/components/ingestLog/service.ts | 20 ++++--------------- .../integration/__tests__/ari.test.ts | 14 +++++++++++++ api/src/components/integration/controller.ts | 10 ++++++++++ api/src/components/integration/service.ts | 6 ++++-- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/api/src/components/ingestLog/service.ts b/api/src/components/ingestLog/service.ts index 84616b259..78e78af5e 100644 --- a/api/src/components/ingestLog/service.ts +++ b/api/src/components/ingestLog/service.ts @@ -13,26 +13,14 @@ export const setEndTime = (id: string, end: Date) => } }); -export const getMostRecentStartTime = async (source: I.PublicationImportSource): Promise => { - const mostRecentStartQuery = await client.prisma.ingestLog.findFirst({ +export const getMostRecentLog = (source: I.PublicationImportSource, includeOpenLogs?: boolean) => + client.prisma.ingestLog.findFirst({ where: { source, - // Successful runs only. - end: { - not: null - } + // By default, get successful (having an end time) logs only. + ...(includeOpenLogs ? {} : { end: { not: null } }) }, orderBy: { start: 'desc' - }, - select: { - start: true } }); - - if (mostRecentStartQuery) { - return mostRecentStartQuery.start; - } else { - return null; - } -}; diff --git a/api/src/components/integration/__tests__/ari.test.ts b/api/src/components/integration/__tests__/ari.test.ts index 2390b9dca..a4e8615a0 100644 --- a/api/src/components/integration/__tests__/ari.test.ts +++ b/api/src/components/integration/__tests__/ari.test.ts @@ -1,5 +1,6 @@ import * as ariUtils from 'integration/ariUtils'; import * as I from 'interface'; +import * as ingestLogService from 'ingestLog/service'; import * as testUtils from 'lib/testUtils'; // This ARI will match a publication in the seed data via the questionId. @@ -400,4 +401,17 @@ describe('ARI import processes', () => { message: "Please provide a valid 'apiKey'." }); }); + + test('Incremental ingest cancels if already in progress', async () => { + // Create an open ended log first. + await ingestLogService.create('ARI'); + const triggerImport = await testUtils.agent + .post('/integrations/ari/incremental') + .query({ apiKey: process.env.TRIGGER_ARI_INGEST_API_KEY }); + + expect(triggerImport.status).toEqual(202); + expect(triggerImport.body).toMatchObject({ + message: 'Cancelling ingest. Either an import is already in progress or the last import failed.' + }); + }); }); diff --git a/api/src/components/integration/controller.ts b/api/src/components/integration/controller.ts index 9adcbc8a1..bf8569a66 100644 --- a/api/src/components/integration/controller.ts +++ b/api/src/components/integration/controller.ts @@ -1,10 +1,20 @@ import * as I from 'interface'; +import * as ingestLogService from 'ingestLog/service'; import * as integrationService from 'integration/service'; import * as response from 'lib/response'; export const incrementalAriIngest = async ( event: I.APIRequest | I.EventBridgeEvent<'Scheduled Event', string> ): Promise => { + // Check if a process is currently running. + const lastLog = await ingestLogService.getMostRecentLog('ARI', true); + + if (lastLog && !lastLog.end) { + return response.json(202, { + message: 'Cancelling ingest. Either an import is already in progress or the last import failed.' + }); + } + // This can also be triggered on a schedule, in which case we don't need to check for an API key, // so only check for the API key if the event is an API request. if (event && 'headers' in event) { diff --git a/api/src/components/integration/service.ts b/api/src/components/integration/service.ts index 5c1a98fec..6a14343b3 100644 --- a/api/src/components/integration/service.ts +++ b/api/src/components/integration/service.ts @@ -15,14 +15,16 @@ export const incrementalAriIngest = async (): Promise => { const start = new Date(); const MAX_UNCHANGED_STREAK = 5; // Get most start time of last successful run to help us know when to stop. - const mostRecentStart = await ingestLogService.getMostRecentStartTime('ARI'); + const mostRecentLog = await ingestLogService.getMostRecentLog('ARI'); - if (!mostRecentStart) { + if (!mostRecentLog) { console.log( `Unable to get most recent start time. This job will stop when it encounters ${MAX_UNCHANGED_STREAK} unchanged ARIs, regardless of their dateUpdated value.` ); } + const mostRecentStart = mostRecentLog?.start; + // Log start time. const log = await ingestLogService.create('ARI'); From ee7480435450977621b22d824e23ae00c3192d2f Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Tue, 24 Sep 2024 13:56:31 +0100 Subject: [PATCH 11/19] fix integrations readme link --- api/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/README.md b/api/README.md index 4e1163ae9..bfb83519f 100644 --- a/api/README.md +++ b/api/README.md @@ -139,7 +139,7 @@ A similar process happens when the database is seeded. After publications are in ## Integrations -Octopus is built to integrate with some external systems in order to import publications. For more information please read the dedicated [integrations readme](./src/lib/integration/README.md). +Octopus is built to integrate with some external systems in order to import publications. For more information please read the dedicated [integrations readme](./src/components/integration/README.md). --- From 86e96562b8a9a85e7c9383e7a64fe32da33d65cd Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Tue, 24 Sep 2024 13:59:54 +0100 Subject: [PATCH 12/19] fix ingest log tests --- api/src/components/ingestLog/__tests__/ingestLog.test.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/api/src/components/ingestLog/__tests__/ingestLog.test.ts b/api/src/components/ingestLog/__tests__/ingestLog.test.ts index 399c22cc1..89763a354 100644 --- a/api/src/components/ingestLog/__tests__/ingestLog.test.ts +++ b/api/src/components/ingestLog/__tests__/ingestLog.test.ts @@ -26,14 +26,14 @@ describe('Ingest log functions', () => { }); }); - test('Get most recent start time', async () => { - const mostRecentStart = await ingestLogService.getMostRecentStartTime('ARI'); - expect(mostRecentStart).toEqual(new Date('2024-09-11T12:53:00.000Z')); + test('Get most recent log', async () => { + const mostRecentLog = await ingestLogService.getMostRecentLog('ARI'); + expect(mostRecentLog?.start).toEqual(new Date('2024-09-11T12:53:00.000Z')); }); test('Most recent start is null if no run that ended successfully is present', async () => { await client.prisma.ingestLog.update({ where: { id: 'ingest-log-1' }, data: { end: null } }); - const mostRecentStart = await ingestLogService.getMostRecentStartTime('ARI'); + const mostRecentStart = await ingestLogService.getMostRecentLog('ARI'); expect(mostRecentStart).toBeNull(); }); }); From 4e54f55b90e8e84244bcdfd3e5c2c9a51df44f80 Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Tue, 24 Sep 2024 14:24:33 +0100 Subject: [PATCH 13/19] reintroduce local env vars, but don't use unnecessary plugin --- api/package-lock.json | 10 ---------- api/package.json | 1 - api/serverless-offline.yml | 30 +++++++++++++++++++++++++++++- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/api/package-lock.json b/api/package-lock.json index a174dd5e6..bbf1a0e08 100644 --- a/api/package-lock.json +++ b/api/package-lock.json @@ -58,7 +58,6 @@ "prisma": "^5.11.0", "puppeteer": "^22.12.0", "serverless-offline": "^12.0.4", - "serverless-offline-ssm": "^6.2.0", "serverless-plugin-split-stacks": "^1.13.0", "serverless-prune-plugin": "^2.0.2", "serverless-webpack": "^5.13.0", @@ -16130,15 +16129,6 @@ "serverless": "^3.2.0" } }, - "node_modules/serverless-offline-ssm": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/serverless-offline-ssm/-/serverless-offline-ssm-6.2.0.tgz", - "integrity": "sha512-Af7JeLbU4OHAx7ZgAs2OLP2DGKyp/g45rX7SWQ8KiBzn47jme+MqN+GOV6qd5oZ8V9khd3p62+RueHR3ezEZZQ==", - "dev": true, - "engines": { - "node": ">=6.0" - } - }, "node_modules/serverless-offline/node_modules/ansi-regex": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", diff --git a/api/package.json b/api/package.json index 748754862..47a5888d3 100644 --- a/api/package.json +++ b/api/package.json @@ -82,7 +82,6 @@ "prisma": "^5.11.0", "puppeteer": "^22.12.0", "serverless-offline": "^12.0.4", - "serverless-offline-ssm": "^6.2.0", "serverless-plugin-split-stacks": "^1.13.0", "serverless-prune-plugin": "^2.0.2", "serverless-webpack": "^5.13.0", diff --git a/api/serverless-offline.yml b/api/serverless-offline.yml index fb2de0002..7e97ce6bf 100644 --- a/api/serverless-offline.yml +++ b/api/serverless-offline.yml @@ -5,7 +5,6 @@ frameworkVersion: ${file(./serverless-config-default.yml):frameworkVersion} useDotenv: ${file(./serverless-config-default.yml):useDotenv} plugins: - - serverless-offline-ssm - serverless-offline - serverless-webpack - serverless-webpack-prisma @@ -17,6 +16,35 @@ provider: runtime: nodejs18.x region: eu-west-1 stage: 'local' + environment: + STAGE: ${self:provider.stage} + DATABASE_URL: ${env:DATABASE_URL} + ORCID_SECRET: ${env:ORCID_SECRET} + ORCID_ID: ${env:ORCID_ID} + ORCID_AUTH_URL: ${env:ORCID_AUTH_URL} + ORCID_MEMBER_API_URL: ${env:ORCID_MEMBER_API_URL} + JWT_SECRET: ${env:JWT_SECRET} + EMAIL_SENDER_ADDRESS: ${env:EMAIL_SENDER_ADDRESS} + BASE_URL: ${env:BASE_URL} + AUTHORISATION_CALLBACK_URL: ${env:AUTHORISATION_CALLBACK_URL} + ELASTICSEARCH_PROTOCOL: ${env:ELASTICSEARCH_PROTOCOL} + ELASTICSEARCH_USER: ${env:ELASTICSEARCH_USER} + ELASTICSEARCH_PASSWORD: ${env:ELASTICSEARCH_PASSWORD} + ELASTICSEARCH_ENDPOINT: ${env:ELASTICSEARCH_ENDPOINT} + VALIDATION_CODE_EXPIRY: 10 + VALIDATION_CODE_ATTEMPTS: 3 + DOI_PREFIX: ${env:DOI_PREFIX} + DATACITE_ENDPOINT: ${env:DATACITE_ENDPOINT} + DATACITE_USER: ${env:DATACITE_USER} + DATACITE_PASSWORD: ${env:DATACITE_PASSWORD} + MAIL_SERVER: ${env:MAIL_SERVER} + LOCALSTACK_SERVER: ${env:LOCALSTACK_SERVER} + QUEUE_URL: ${env:QUEUE_URL} + SQS_ENDPOINT: ${env:SQS_ENDPOINT} + LIST_USERS_API_KEY: ${env:LIST_USERS_API_KEY} + TRIGGER_ARI_INGEST_API_KEY: ${env:TRIGGER_ARI_INGEST_API_KEY} + SLACK_CHANNEL_EMAIL: ${env:SLACK_CHANNEL_EMAIL} + INGEST_REPORT_RECIPIENTS: ${env:INGEST_REPORT_RECIPIENTS} deploymentBucket: tags: Project: Octopus From 66562740f80feb57a82c1397e7c6b17bf3853c5d Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Tue, 24 Sep 2024 16:36:55 +0100 Subject: [PATCH 14/19] vscode playwright seems to handle paths differently now --- e2e/playwright.config.ts | 8 +++----- e2e/tests/LoggedIn/publish.e2e.spec.ts | 22 ++++++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/e2e/playwright.config.ts b/e2e/playwright.config.ts index 96d681ccf..d8785cb45 100644 --- a/e2e/playwright.config.ts +++ b/e2e/playwright.config.ts @@ -1,12 +1,10 @@ import type { PlaywrightTestConfig } from '@playwright/test'; import { devices } from '@playwright/test'; import * as dotenv from 'dotenv'; +import path from 'path'; -/** - * Read environment variables from .env file. - * https://github.com/motdotla/dotenv - */ -dotenv.config(); +// Read from ".env" file. +dotenv.config({ path: path.resolve(__dirname, '.env') }); /** * See https://playwright.dev/docs/test-configuration. diff --git a/e2e/tests/LoggedIn/publish.e2e.spec.ts b/e2e/tests/LoggedIn/publish.e2e.spec.ts index 31d021d3c..1e354d2e7 100644 --- a/e2e/tests/LoggedIn/publish.e2e.spec.ts +++ b/e2e/tests/LoggedIn/publish.e2e.spec.ts @@ -2,6 +2,7 @@ import * as Helpers from '../helpers'; import { expect, test, Page, Browser } from '@playwright/test'; import { PageModel } from '../PageModel'; import cuid2 from '@paralleldrive/cuid2'; +import path from 'path'; const createPublication = async (page: Page, publicationTitle: string, pubType: string) => { await page.goto(`/create`); @@ -1892,6 +1893,7 @@ test.describe('Publication flow + co-authors', () => { test.describe('Publication Flow + File import', () => { let page: Page; + const assetsDirName = path.join(__dirname, '../../assets/'); test.beforeAll(async ({ browser }) => { page = await Helpers.getPageAsUser(browser); @@ -1912,7 +1914,7 @@ test.describe('Publication Flow + File import', () => { ); // import initial playwright file - await Helpers.openFileImportModal(page, 'assets/Playwright.docx'); + await Helpers.openFileImportModal(page, assetsDirName + 'Playwright.docx'); await page.locator(PageModel.publish.insertButton).click(); // Ensure modal has closed and file import @@ -1920,7 +1922,7 @@ test.describe('Publication Flow + File import', () => { await expect(page.locator(PageModel.publish.text.editor)).toContainText('File Import – Playwright'); // replace playwright file - await Helpers.openFileImportModal(page, 'assets/Playwright - Replace.docx'); + await Helpers.openFileImportModal(page, assetsDirName + 'Playwright - Replace.docx'); await page.locator(PageModel.publish.replaceButton).click(); // Ensure modal has closed and file import @@ -1966,12 +1968,12 @@ test.describe('Publication Flow + File import', () => { ]); const validImageFiles = [ - 'assets/apng-image-test.png', - 'assets/avif-image-test.avif', - 'assets/gif-image-test.gif', - 'assets/jpeg-image-test.jpeg', - 'assets/jpg-image-test.jpg', - 'assets/webp-image-test.webp' + assetsDirName + 'apng-image-test.png', + assetsDirName + 'avif-image-test.avif', + assetsDirName + 'gif-image-test.gif', + assetsDirName + 'jpeg-image-test.jpeg', + assetsDirName + 'jpg-image-test.jpg', + assetsDirName + 'webp-image-test.webp' ]; // import correct file formats @@ -1993,7 +1995,7 @@ test.describe('Publication Flow + File import', () => { await expect(page.locator('button[title="Upload image"]')).not.toBeVisible(); for (const image of validImageFiles) { await expect( - page.locator(`div[contenteditable="true"] img[title="${image.split('assets/').pop()}"]`) + page.locator(`div[contenteditable="true"] img[title="${image.split(assetsDirName).pop()}"]`) ).toBeVisible(); } @@ -2003,7 +2005,7 @@ test.describe('Publication Flow + File import', () => { page.waitForEvent('filechooser'), page.click('label[for="file-upload"]') ]); - await fileChooser2.setFiles(['assets/Playwright.docx']); + await fileChooser2.setFiles([assetsDirName + 'Playwright.docx']); await page.click('button[title="Upload image"]'); await expect(page.getByText('Failed to upload "Playwright.docx". The format is not supported.')).toBeVisible(); }); From 30c1a753a2c23063314a4ad1026bbdf14d09e078 Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 25 Sep 2024 11:02:12 +0100 Subject: [PATCH 15/19] remove remaining serverless-offline-ssm config --- api/serverless-offline.yml | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/api/serverless-offline.yml b/api/serverless-offline.yml index 7e97ce6bf..15bb3c0e9 100644 --- a/api/serverless-offline.yml +++ b/api/serverless-offline.yml @@ -84,34 +84,5 @@ custom: v1: v1 serverless-offline: useChildProcesses: true - serverless-offline-ssm: - stages: - - local - ssm: - /local_octopus_sls_sg: '1' - /local_octopus_private_subnet_az1: '1' - /local_octopus_private_subnet_az2: '2' - /local_octopus_private_subnet_az3: '3' - /elasticsearch_user_local_octopus: ${env:ELASTICSEARCH_USER} - /elasticsearch_password_local_octopus: ${env:ELASTICSEARCH_PASSWORD} - /elasticsearch_endpoint_local_octopus: ${env:ELASTICSEARCH_ENDPOINT} - /elastic_search_protocol_local_octopus: ${env:ELASTICSEARCH_PROTOCOL} - /db_connection_string_local_octopus: ${env:DATABASE_URL} - /jwt_secret_local_octopus: ${env:JWT_SECRET} - /orcid_secret_key_local_octopus: ${env:ORCID_SECRET} - /orcid_app_id_local_octopus: ${env:ORCID_ID} - /orcid_auth_url_local_octopus: ${env:ORCID_AUTH_URL} - /orcid_member_api_url_local_octopus: ${env:ORCID_MEMBER_API_URL} - /doi_prefix_local_octopus: ${env:DOI_PREFIX} - /datacite_endpoint_local_octopus: ${env:DATACITE_ENDPOINT} - /datacite_user_local_octopus: ${env:DATACITE_USER} - /datacite_password_local_octopus: ${env:DATACITE_PASSWORD} - /email_sender_address_local_octopus: ${env:EMAIL_SENDER_ADDRESS} - /base_url_local_octopus: ${env:BASE_URL} - /authorization_callback_url_local_octopus: ${env:AUTHORISATION_CALLBACK_URL} - /list_users_api_key_local_octopus: ${env:LIST_USERS_API_KEY} - /queue_url_local_octopus: ${env:QUEUE_URL} - /sqs_endpoint_local_octopus: ${env:SQS_ENDPOINT} - /mail_server_local_octopus: ${env:MAIL_SERVER} functions: - ${file(./serverless-config-default.yml):functions} \ No newline at end of file From 522f178abaabe671ead5092c20cc86e9d405b446 Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 25 Sep 2024 11:02:23 +0100 Subject: [PATCH 16/19] add new env var to dockerfile --- api/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/api/docker-compose.yml b/api/docker-compose.yml index ebe1b6a92..3f8605228 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -106,6 +106,7 @@ services: - QUEUE_URL=http://localhost:4566/000000000000/science-octopus-pdf-queue-local - SQS_ENDPOINT=http://localstack:4566 - LIST_USERS_API_KEY=123456789 + - TRIGGER_ARI_INGEST_API_KEY=123456789 volumes: opensearch-data1: From bfba3b8909b25c89ee8fc256665377265c87ffd9 Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 25 Sep 2024 11:08:03 +0100 Subject: [PATCH 17/19] add ingest recipients email to local env vars --- api/docker-compose.yml | 1 + api/serverless-offline.yml | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/api/docker-compose.yml b/api/docker-compose.yml index 3f8605228..12c90d493 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -107,6 +107,7 @@ services: - SQS_ENDPOINT=http://localstack:4566 - LIST_USERS_API_KEY=123456789 - TRIGGER_ARI_INGEST_API_KEY=123456789 + - INGEST_REPORT_RECIPIENTS=example.jisc@mailinator.com volumes: opensearch-data1: diff --git a/api/serverless-offline.yml b/api/serverless-offline.yml index 15bb3c0e9..27766e230 100644 --- a/api/serverless-offline.yml +++ b/api/serverless-offline.yml @@ -43,7 +43,6 @@ provider: SQS_ENDPOINT: ${env:SQS_ENDPOINT} LIST_USERS_API_KEY: ${env:LIST_USERS_API_KEY} TRIGGER_ARI_INGEST_API_KEY: ${env:TRIGGER_ARI_INGEST_API_KEY} - SLACK_CHANNEL_EMAIL: ${env:SLACK_CHANNEL_EMAIL} INGEST_REPORT_RECIPIENTS: ${env:INGEST_REPORT_RECIPIENTS} deploymentBucket: tags: From 4d61426f1f96698595b5ee45ee0f98e1fb6ca0ff Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 25 Sep 2024 13:13:47 +0100 Subject: [PATCH 18/19] Revert "remove mostly unused helper" This reverts commit 8b8b62018737ed74f0db8c5255df823450121a26. --- api/src/lib/helpers.ts | 10 ++++++++++ api/src/lib/sqs.ts | 27 +++++++++++++++------------ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/api/src/lib/helpers.ts b/api/src/lib/helpers.ts index faa648b5e..6ed494aca 100644 --- a/api/src/lib/helpers.ts +++ b/api/src/lib/helpers.ts @@ -42,6 +42,16 @@ export const formatAffiliationName = (affiliation: I.MappedOrcidAffiliation): st export const isEmptyContent = (content: string): boolean => (content ? /^(

\s*<\/p>)+$/.test(content) : true); +export const checkEnvVariable = (variableName: keyof NodeJS.ProcessEnv): string => { + const value = process.env[variableName]; + + if (value === undefined) { + throw new Error(`Environment Variable ${variableName} is undefined`); + } + + return value; +}; + const generateOTPCharacter = (OTP: string, characterSet: string): string => { const randomNumberArray = webcrypto.getRandomValues(new Uint32Array(1)); const randomIndex = Math.floor(randomNumberArray[0] * Math.pow(2, -32) * characterSet.length); diff --git a/api/src/lib/sqs.ts b/api/src/lib/sqs.ts index b8632bff9..abd688746 100644 --- a/api/src/lib/sqs.ts +++ b/api/src/lib/sqs.ts @@ -1,21 +1,24 @@ import AWS_SQS, { SQS } from '@aws-sdk/client-sqs'; +import * as Helpers from './helpers'; -const queueUrl = process.env.QUEUE_URL; -const endpoint = process.env.SQS_ENDPOINT; +const queueUrl = Helpers.checkEnvVariable('QUEUE_URL'); +const endpoint = Helpers.checkEnvVariable('SQS_ENDPOINT'); const config = { - region: 'eu-west-1', - ...(process.env.STAGE === 'local' - ? { - credentials: { - accessKeyId: 'dummy', - secretAccessKey: 'dummy' - }, - endpoint - } - : {}) + region: 'eu-west-1' }; +if (process.env.STAGE === 'local') { + // @ts-ignore + config.credentials = { + accessKeyId: 'dummy', + secretAccessKey: 'dummy' + }; + + // @ts-ignore + config.endpoint = endpoint; +} + const sqs = new SQS(config); export const createQueue = async (): Promise => { From d41b6f7bc47ff23764ef5ea5fe0d23d5193abfe4 Mon Sep 17 00:00:00 2001 From: Finlay Birnie Date: Wed, 25 Sep 2024 13:16:42 +0100 Subject: [PATCH 19/19] rework code not to use ts-ignore --- api/src/lib/sqs.ts | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/api/src/lib/sqs.ts b/api/src/lib/sqs.ts index abd688746..f78b08b97 100644 --- a/api/src/lib/sqs.ts +++ b/api/src/lib/sqs.ts @@ -5,20 +5,18 @@ const queueUrl = Helpers.checkEnvVariable('QUEUE_URL'); const endpoint = Helpers.checkEnvVariable('SQS_ENDPOINT'); const config = { - region: 'eu-west-1' + region: 'eu-west-1', + ...(process.env.STAGE === 'local' + ? { + credentials: { + accessKeyId: 'dummy', + secretAccessKey: 'dummy' + }, + endpoint + } + : {}) }; -if (process.env.STAGE === 'local') { - // @ts-ignore - config.credentials = { - accessKeyId: 'dummy', - secretAccessKey: 'dummy' - }; - - // @ts-ignore - config.endpoint = endpoint; -} - const sqs = new SQS(config); export const createQueue = async (): Promise => {