diff --git a/api/.env.example b/api/.env.example index 557877723..1c8c1d6d1 100644 --- a/api/.env.example +++ b/api/.env.example @@ -33,4 +33,10 @@ MAIL_SERVER=localhost LOCALSTACK_SERVER=http://localhost:4566 QUEUE_URL=http://localhost:4566/000000000000/your-queue-name -SQS_ENDPOINT=http://localhost:4566 \ No newline at end of file +SQS_ENDPOINT=http://localhost:4566 + +LIST_USERS_API_KEY=12345 +TRIGGER_ARI_INGEST_API_KEY=12345 + +SLACK_CHANNEL_EMAIL=example@mailinator.com +INGEST_REPORT_RECIPIENTS=example.one@mailinator.com,example.two@mailinator.com \ No newline at end of file diff --git a/api/README.md b/api/README.md index eca369617..bfb83519f 100644 --- a/api/README.md +++ b/api/README.md @@ -139,7 +139,7 @@ A similar process happens when the database is seeded. After publications are in ## Integrations -Octopus is built to integrate with some external systems in order to import publications. For more information please read the dedicated [integrations readme](./src/lib/integrations/README.md). +Octopus is built to integrate with some external systems in order to import publications. For more information please read the dedicated [integrations readme](./src/components/integration/README.md). --- diff --git a/api/docker-compose.yml b/api/docker-compose.yml index ebe1b6a92..12c90d493 100644 --- a/api/docker-compose.yml +++ b/api/docker-compose.yml @@ -106,6 +106,8 @@ services: - QUEUE_URL=http://localhost:4566/000000000000/science-octopus-pdf-queue-local - SQS_ENDPOINT=http://localstack:4566 - LIST_USERS_API_KEY=123456789 + - TRIGGER_ARI_INGEST_API_KEY=123456789 + - INGEST_REPORT_RECIPIENTS=example.jisc@mailinator.com volumes: opensearch-data1: diff --git a/api/package-lock.json b/api/package-lock.json index a174dd5e6..bbf1a0e08 100644 --- a/api/package-lock.json +++ b/api/package-lock.json @@ -58,7 +58,6 @@ "prisma": "^5.11.0", "puppeteer": "^22.12.0", "serverless-offline": "^12.0.4", - "serverless-offline-ssm": "^6.2.0", "serverless-plugin-split-stacks": "^1.13.0", "serverless-prune-plugin": "^2.0.2", "serverless-webpack": "^5.13.0", @@ -16130,15 +16129,6 @@ "serverless": "^3.2.0" } }, - "node_modules/serverless-offline-ssm": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/serverless-offline-ssm/-/serverless-offline-ssm-6.2.0.tgz", - "integrity": "sha512-Af7JeLbU4OHAx7ZgAs2OLP2DGKyp/g45rX7SWQ8KiBzn47jme+MqN+GOV6qd5oZ8V9khd3p62+RueHR3ezEZZQ==", - "dev": true, - "engines": { - "node": ">=6.0" - } - }, "node_modules/serverless-offline/node_modules/ansi-regex": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", diff --git a/api/package.json b/api/package.json index 748754862..47a5888d3 100644 --- a/api/package.json +++ b/api/package.json @@ -82,7 +82,6 @@ "prisma": "^5.11.0", "puppeteer": "^22.12.0", "serverless-offline": "^12.0.4", - "serverless-offline-ssm": "^6.2.0", "serverless-plugin-split-stacks": "^1.13.0", "serverless-prune-plugin": "^2.0.2", "serverless-webpack": "^5.13.0", diff --git a/api/scripts/fullAriImport.ts b/api/scripts/fullAriImport.ts index 1a71d9fe6..e358b549b 100644 --- a/api/scripts/fullAriImport.ts +++ b/api/scripts/fullAriImport.ts @@ -5,7 +5,7 @@ import { expand } from 'dotenv-expand'; // Important to do this so that environment variables are treated the same as in deployed code. expand(dotenv.config()); -import * as ariUtils from 'lib/integrations/ariUtils'; +import * as ariUtils from 'integration/ariUtils'; import * as I from 'interface'; const fullAriImport = async (): Promise => { diff --git a/api/serverless-config-default.yml b/api/serverless-config-default.yml index 831328aca..56a3db039 100644 --- a/api/serverless-config-default.yml +++ b/api/serverless-config-default.yml @@ -494,9 +494,13 @@ functions: cors: true # Integrations incrementalAriIngest: - handler: src/components/integrations/service.incrementalAriIngest + handler: src/components/integration/routes.incrementalAriIngest timeout: 900 events: - schedule: rate: cron(0 5 ? * TUE *) # Every Tuesday at 5 a.m. enabled: ${self:custom.scheduledAriIngestEnabled.${opt:stage}, false} + - http: + path: ${self:custom.versions.v1}/integrations/ari/incremental + method: POST + cors: true diff --git a/api/serverless-offline.yml b/api/serverless-offline.yml index a34cb1e4f..27766e230 100644 --- a/api/serverless-offline.yml +++ b/api/serverless-offline.yml @@ -5,7 +5,6 @@ frameworkVersion: ${file(./serverless-config-default.yml):frameworkVersion} useDotenv: ${file(./serverless-config-default.yml):useDotenv} plugins: - - serverless-offline-ssm - serverless-offline - serverless-webpack - serverless-webpack-prisma @@ -19,29 +18,32 @@ provider: stage: 'local' environment: STAGE: ${self:provider.stage} - ELASTICSEARCH_USER: ${ssm:/elasticsearch_user_${self:provider.stage}_octopus} - ELASTICSEARCH_PASSWORD: ${ssm:/elasticsearch_password_${self:provider.stage}_octopus} - ELASTICSEARCH_ENDPOINT: ${ssm:/elasticsearch_endpoint_${self:provider.stage}_octopus} - ELASTICSEARCH_PROTOCOL: ${ssm:/elastic_search_protocol_${self:provider.stage}_octopus} - DATABASE_URL: ${ssm:/db_connection_string_${self:provider.stage}_octopus} - JWT_SECRET: ${ssm:/jwt_secret_${self:provider.stage}_octopus} + DATABASE_URL: ${env:DATABASE_URL} + ORCID_SECRET: ${env:ORCID_SECRET} + ORCID_ID: ${env:ORCID_ID} + ORCID_AUTH_URL: ${env:ORCID_AUTH_URL} + ORCID_MEMBER_API_URL: ${env:ORCID_MEMBER_API_URL} + JWT_SECRET: ${env:JWT_SECRET} + EMAIL_SENDER_ADDRESS: ${env:EMAIL_SENDER_ADDRESS} + BASE_URL: ${env:BASE_URL} + AUTHORISATION_CALLBACK_URL: ${env:AUTHORISATION_CALLBACK_URL} + ELASTICSEARCH_PROTOCOL: ${env:ELASTICSEARCH_PROTOCOL} + ELASTICSEARCH_USER: ${env:ELASTICSEARCH_USER} + ELASTICSEARCH_PASSWORD: ${env:ELASTICSEARCH_PASSWORD} + ELASTICSEARCH_ENDPOINT: ${env:ELASTICSEARCH_ENDPOINT} VALIDATION_CODE_EXPIRY: 10 VALIDATION_CODE_ATTEMPTS: 3 - ORCID_ID: ${ssm:/orcid_app_id_${self:provider.stage}_octopus} - ORCID_SECRET: ${ssm:/orcid_secret_key_${self:provider.stage}_octopus} - ORCID_AUTH_URL: ${ssm:/orcid_auth_url_${self:provider.stage}_octopus} - ORCID_MEMBER_API_URL: ${ssm:/orcid_member_api_url_${self:provider.stage}_octopus} - DOI_PREFIX: ${ssm:/doi_prefix_${self:provider.stage}_octopus} - DATACITE_ENDPOINT: ${ssm:/datacite_endpoint_${self:provider.stage}_octopus} - DATACITE_USER: ${ssm:/datacite_user_${self:provider.stage}_octopus} - DATACITE_PASSWORD: ${ssm:/datacite_password_${self:provider.stage}_octopus} - EMAIL_SENDER_ADDRESS: ${ssm:/email_sender_address_${self:provider.stage}_octopus} - BASE_URL: ${ssm:/base_url_${self:provider.stage}_octopus} - AUTHORISATION_CALLBACK_URL: ${ssm:/authorization_callback_url_${self:provider.stage}_octopus} - LIST_USERS_API_KEY: ${ssm:/list_users_api_key_${self:provider.stage}_octopus} - QUEUE_URL: ${ssm:/queue_url_${self:provider.stage}_octopus} - SQS_ENDPOINT: ${ssm:/sqs_endpoint_${self:provider.stage}_octopus} - MAIL_SERVER: ${ssm:/mail_server_${self:provider.stage}_octopus} + DOI_PREFIX: ${env:DOI_PREFIX} + DATACITE_ENDPOINT: ${env:DATACITE_ENDPOINT} + DATACITE_USER: ${env:DATACITE_USER} + DATACITE_PASSWORD: ${env:DATACITE_PASSWORD} + MAIL_SERVER: ${env:MAIL_SERVER} + LOCALSTACK_SERVER: ${env:LOCALSTACK_SERVER} + QUEUE_URL: ${env:QUEUE_URL} + SQS_ENDPOINT: ${env:SQS_ENDPOINT} + LIST_USERS_API_KEY: ${env:LIST_USERS_API_KEY} + TRIGGER_ARI_INGEST_API_KEY: ${env:TRIGGER_ARI_INGEST_API_KEY} + INGEST_REPORT_RECIPIENTS: ${env:INGEST_REPORT_RECIPIENTS} deploymentBucket: tags: Project: Octopus @@ -81,34 +83,5 @@ custom: v1: v1 serverless-offline: useChildProcesses: true - serverless-offline-ssm: - stages: - - local - ssm: - /local_octopus_sls_sg: '1' - /local_octopus_private_subnet_az1: '1' - /local_octopus_private_subnet_az2: '2' - /local_octopus_private_subnet_az3: '3' - /elasticsearch_user_local_octopus: ${env:ELASTICSEARCH_USER} - /elasticsearch_password_local_octopus: ${env:ELASTICSEARCH_PASSWORD} - /elasticsearch_endpoint_local_octopus: ${env:ELASTICSEARCH_ENDPOINT} - /elastic_search_protocol_local_octopus: ${env:ELASTICSEARCH_PROTOCOL} - /db_connection_string_local_octopus: ${env:DATABASE_URL} - /jwt_secret_local_octopus: ${env:JWT_SECRET} - /orcid_secret_key_local_octopus: ${env:ORCID_SECRET} - /orcid_app_id_local_octopus: ${env:ORCID_ID} - /orcid_auth_url_local_octopus: ${env:ORCID_AUTH_URL} - /orcid_member_api_url_local_octopus: ${env:ORCID_MEMBER_API_URL} - /doi_prefix_local_octopus: ${env:DOI_PREFIX} - /datacite_endpoint_local_octopus: ${env:DATACITE_ENDPOINT} - /datacite_user_local_octopus: ${env:DATACITE_USER} - /datacite_password_local_octopus: ${env:DATACITE_PASSWORD} - /email_sender_address_local_octopus: ${env:EMAIL_SENDER_ADDRESS} - /base_url_local_octopus: ${env:BASE_URL} - /authorization_callback_url_local_octopus: ${env:AUTHORISATION_CALLBACK_URL} - /list_users_api_key_local_octopus: ${env:LIST_USERS_API_KEY} - /queue_url_local_octopus: ${env:QUEUE_URL} - /sqs_endpoint_local_octopus: ${env:SQS_ENDPOINT} - /mail_server_local_octopus: ${env:MAIL_SERVER} functions: - ${file(./serverless-config-default.yml):functions} \ No newline at end of file diff --git a/api/serverless.yml b/api/serverless.yml index 779e4f9cc..4bd53376b 100644 --- a/api/serverless.yml +++ b/api/serverless.yml @@ -50,6 +50,8 @@ provider: SQS_ENDPOINT: ${ssm:/sqs_endpoint_${self:provider.stage}_octopus} MAIL_SERVER: ${ssm:/mail_server_${self:provider.stage}_octopus} SLACK_CHANNEL_EMAIL: ${ssm:/slack_channel_email_${self:provider.stage}_octopus} + TRIGGER_ARI_INGEST_API_KEY: ${ssm:/trigger_ari_ingest_api_key_${self:provider.stage}_octopus} + INGEST_REPORT_RECIPIENTS: ${ssm:/ingest_report_recipients_${self:provider.stage}_octopus} deploymentBucket: tags: Project: Octopus @@ -105,7 +107,8 @@ custom: automatic: true number: 3 scheduledAriIngestEnabled: - int: true + int: false + prod: false functions: - ${file(./serverless-config-default.yml):functions} - ${file(./serverless-config-deploy.yml):functions} \ No newline at end of file diff --git a/api/src/components/ingestLog/__tests__/ingestLog.test.ts b/api/src/components/ingestLog/__tests__/ingestLog.test.ts index 399c22cc1..89763a354 100644 --- a/api/src/components/ingestLog/__tests__/ingestLog.test.ts +++ b/api/src/components/ingestLog/__tests__/ingestLog.test.ts @@ -26,14 +26,14 @@ describe('Ingest log functions', () => { }); }); - test('Get most recent start time', async () => { - const mostRecentStart = await ingestLogService.getMostRecentStartTime('ARI'); - expect(mostRecentStart).toEqual(new Date('2024-09-11T12:53:00.000Z')); + test('Get most recent log', async () => { + const mostRecentLog = await ingestLogService.getMostRecentLog('ARI'); + expect(mostRecentLog?.start).toEqual(new Date('2024-09-11T12:53:00.000Z')); }); test('Most recent start is null if no run that ended successfully is present', async () => { await client.prisma.ingestLog.update({ where: { id: 'ingest-log-1' }, data: { end: null } }); - const mostRecentStart = await ingestLogService.getMostRecentStartTime('ARI'); + const mostRecentStart = await ingestLogService.getMostRecentLog('ARI'); expect(mostRecentStart).toBeNull(); }); }); diff --git a/api/src/components/ingestLog/service.ts b/api/src/components/ingestLog/service.ts index 84616b259..78e78af5e 100644 --- a/api/src/components/ingestLog/service.ts +++ b/api/src/components/ingestLog/service.ts @@ -13,26 +13,14 @@ export const setEndTime = (id: string, end: Date) => } }); -export const getMostRecentStartTime = async (source: I.PublicationImportSource): Promise => { - const mostRecentStartQuery = await client.prisma.ingestLog.findFirst({ +export const getMostRecentLog = (source: I.PublicationImportSource, includeOpenLogs?: boolean) => + client.prisma.ingestLog.findFirst({ where: { source, - // Successful runs only. - end: { - not: null - } + // By default, get successful (having an end time) logs only. + ...(includeOpenLogs ? {} : { end: { not: null } }) }, orderBy: { start: 'desc' - }, - select: { - start: true } }); - - if (mostRecentStartQuery) { - return mostRecentStartQuery.start; - } else { - return null; - } -}; diff --git a/api/src/lib/integrations/README.md b/api/src/components/integration/README.md similarity index 97% rename from api/src/lib/integrations/README.md rename to api/src/components/integration/README.md index 1eb42e219..bb9f2dda9 100644 --- a/api/src/lib/integrations/README.md +++ b/api/src/components/integration/README.md @@ -32,7 +32,7 @@ On import, ARIs go through a handling flow: #### How ARI data is mapped to octopus data -Various ARI fields are mapped to octpous ones in the `mapAriQuestionToPublicationVersion` function in [ariUtils.ts](./ariUtils.ts). +Various ARI fields are mapped to octopus ones in the `mapAriQuestionToPublicationVersion` function in [ariUtils.ts](./ariUtils.ts). Of particular importance is how ARIs are matched to an owning organisational user account. The mapping process expects a UserMapping to exist associating the `department` field value from the ARI (where the title matches, case insensitive, and the mapping source is 'ARI') with the user ID of an organisational account. diff --git a/api/src/lib/integrations/__tests__/ari.test.ts b/api/src/components/integration/__tests__/ari.test.ts similarity index 85% rename from api/src/lib/integrations/__tests__/ari.test.ts rename to api/src/components/integration/__tests__/ari.test.ts index ccb9366d8..a4e8615a0 100644 --- a/api/src/lib/integrations/__tests__/ari.test.ts +++ b/api/src/components/integration/__tests__/ari.test.ts @@ -1,5 +1,6 @@ -import * as ariUtils from 'lib/integrations/ariUtils'; +import * as ariUtils from 'integration/ariUtils'; import * as I from 'interface'; +import * as ingestLogService from 'ingestLog/service'; import * as testUtils from 'lib/testUtils'; // This ARI will match a publication in the seed data via the questionId. @@ -120,6 +121,18 @@ describe('ARI Mapping', () => { }); }); + test('Unrecognised topics are reported', async () => { + const mappingAttempt = await ariUtils.mapAriQuestionToPublicationVersion({ + ...sampleARIQuestion, + topics: ['unrecognised topic'] + }); + expect(mappingAttempt).toMatchObject({ + success: true, + message: 'Found unrecognised topic(s).', + unrecognisedTopics: ['unrecognised topic'] + }); + }); + test('Department is matched to existing user', async () => { const mappingAttempt = await ariUtils.mapAriQuestionToPublicationVersion(sampleARIQuestion); expect(mappingAttempt).toMatchObject({ @@ -135,7 +148,8 @@ describe('ARI Mapping', () => { expect(mappingAttempt).toMatchObject({ success: false, mappedData: null, - message: 'User not found for department: unrecognised department.' + message: 'User not found for department: unrecognised department.', + unrecognisedDepartment: 'unrecognised department' }); }); @@ -221,7 +235,7 @@ describe('ARI handling', () => { }); }); - test('ARI with unrecognised department is skipped', async () => { + test('ARI with unrecognised department is skipped and dept name is reported in a field', async () => { const handleARI = await ariUtils.handleIncomingARI({ ...sampleARIQuestion, department: 'Unrecognised Department name' @@ -231,7 +245,8 @@ describe('ARI handling', () => { actionTaken: 'none', success: false, message: - 'Failed to map ARI data to octopus data. User not found for department: Unrecognised Department name.' + 'Failed to map ARI data to octopus data. User not found for department: Unrecognised Department name.', + unrecognisedDepartment: 'Unrecognised Department name' }); }); @@ -281,6 +296,25 @@ describe('ARI handling', () => { }); }); + test('Unrecognised topics are reported', async () => { + const handleARI = await ariUtils.handleIncomingARI({ + ...sampleARIQuestion, + topics: [...sampleARIQuestion.topics, 'unrecognised topic'] + }); + expect(handleARI).toMatchObject({ + actionTaken: 'none', + success: true, + publicationVersion: { + topics: [ + { + id: 'test-topic-1a' + } + ] + }, + unrecognisedTopics: ['unrecognised topic'] + }); + }); + test('Keywords update when fieldsOfResearch/tags change', async () => { const handleARI = await ariUtils.handleIncomingARI({ ...sampleARIQuestion, @@ -352,3 +386,32 @@ describe('ARI handling', () => { }); }); }); + +describe('ARI import processes', () => { + beforeEach(async () => { + await testUtils.clearDB(); + await testUtils.testSeed(); + }); + + test('Incremental import endpoint requires API key', async () => { + const triggerImport = await testUtils.agent.post('/integrations/ari/incremental'); + + expect(triggerImport.status).toEqual(401); + expect(triggerImport.body).toMatchObject({ + message: "Please provide a valid 'apiKey'." + }); + }); + + test('Incremental ingest cancels if already in progress', async () => { + // Create an open ended log first. + await ingestLogService.create('ARI'); + const triggerImport = await testUtils.agent + .post('/integrations/ari/incremental') + .query({ apiKey: process.env.TRIGGER_ARI_INGEST_API_KEY }); + + expect(triggerImport.status).toEqual(202); + expect(triggerImport.body).toMatchObject({ + message: 'Cancelling ingest. Either an import is already in progress or the last import failed.' + }); + }); +}); diff --git a/api/src/lib/integrations/ariUtils.ts b/api/src/components/integration/ariUtils.ts similarity index 86% rename from api/src/lib/integrations/ariUtils.ts rename to api/src/components/integration/ariUtils.ts index 3f23c0464..93a278a91 100644 --- a/api/src/lib/integrations/ariUtils.ts +++ b/api/src/components/integration/ariUtils.ts @@ -8,6 +8,8 @@ import * as topicMappingService from 'topicMapping/service'; import * as userMappingService from 'userMapping/service'; import * as userService from 'user/service'; +import { Prisma } from '@prisma/client'; + const parseAriTextField = (value: string): string => { // Sometimes ARI text fields are enclosed in quotes and we don't want to show those in a publication body. const noQuotes = Helpers.stripEnclosingQuotes(value); @@ -22,9 +24,16 @@ export const mapAriQuestionToPublicationVersion = async ( | { success: true; mappedData: I.MappedARIQuestion; - message: null; + message: string | null; + unrecognisedTopics?: string[]; + } + | { + success: false; + mappedData: null; + message: string; + unrecognisedDepartment?: string; + unrecognisedTopics?: string[]; } - | { success: false; mappedData: null; message: string } > => { if (questionData.isArchived) { return { @@ -69,6 +78,26 @@ export const mapAriQuestionToPublicationVersion = async ( // Ensure uniqueness. const keywords = [...new Set(fieldsOfResearch.concat(tags))]; + // Map ARI topics to octopus topics. + const unrecognisedTopics: string[] = []; + type TopicMappingResult = Prisma.PromiseReturnType; + const topicMappings: TopicMappingResult[] = []; + + for await (const ariTopic of ariTopics) { + const mapping = await topicMappingService.get(ariTopic, 'ARI'); + + if (mapping) { + topicMappings.push(mapping); + } else { + unrecognisedTopics.push(ariTopic); + } + } + + // We intentionally don't map some ARI topics, so filter those out. + const octopusTopicIds = topicMappings.flatMap((topicMapping) => + topicMapping && topicMapping.isMapped && topicMapping.topic ? [topicMapping.topic.id] : [] + ); + // Find user by department title. const userMapping = await userMappingService.get(department, 'ARI'); @@ -76,18 +105,13 @@ export const mapAriQuestionToPublicationVersion = async ( return { success: false, mappedData: null, - message: 'User not found for department: ' + department + '.' + message: 'User not found for department: ' + department + '.', + unrecognisedDepartment: department }; } const user = userMapping.user; - // Map ARI topics to octopus topics. - const topicMappings = await Promise.all(ariTopics.map((ariTopic) => topicMappingService.get(ariTopic, 'ARI'))); - // We intentionally don't map some ARI topics, so filter those out. - const octopusTopicIds = topicMappings.flatMap((topicMapping) => - topicMapping && topicMapping.isMapped && topicMapping.topic ? [topicMapping.topic.id] : [] - ); // If no topics listed in ARI, fall back to default topic for the department (user). // Otherwise use the mapped topics, in a Set to ensure uniqueness. const finalTopicIds = octopusTopicIds.length @@ -96,6 +120,8 @@ export const mapAriQuestionToPublicationVersion = async ( ? [user.defaultTopicId] : []; + const unrecognisedTopicsFound = unrecognisedTopics.length; + return { success: true, mappedData: { @@ -107,7 +133,8 @@ export const mapAriQuestionToPublicationVersion = async ( externalId: questionId.toString(), userId: user.id }, - message: null + message: unrecognisedTopicsFound ? 'Found unrecognised topic(s).' : null, + ...(unrecognisedTopicsFound ? { unrecognisedTopics } : {}) }; }; @@ -173,18 +200,27 @@ export const handleIncomingARI = async (question: I.ARIQuestion): Promise +): Promise => { + // Check if a process is currently running. + const lastLog = await ingestLogService.getMostRecentLog('ARI', true); + + if (lastLog && !lastLog.end) { + return response.json(202, { + message: 'Cancelling ingest. Either an import is already in progress or the last import failed.' + }); + } + + // This can also be triggered on a schedule, in which case we don't need to check for an API key, + // so only check for the API key if the event is an API request. + if (event && 'headers' in event) { + const apiKey = event.queryStringParameters?.apiKey; + + if (apiKey !== process.env.TRIGGER_ARI_INGEST_API_KEY) { + return response.json(401, { message: "Please provide a valid 'apiKey'." }); + } + } + + try { + const ingestResult = await integrationService.incrementalAriIngest(); + + return response.json(200, ingestResult); + } catch (error) { + console.log(error); + + return response.json(500, { message: 'Unknown server error.' }); + } +}; diff --git a/api/src/components/integration/routes.ts b/api/src/components/integration/routes.ts new file mode 100644 index 000000000..9eb3000a4 --- /dev/null +++ b/api/src/components/integration/routes.ts @@ -0,0 +1,3 @@ +import * as integrationController from 'integration/controller'; + +export const incrementalAriIngest = integrationController.incrementalAriIngest; diff --git a/api/src/components/integrations/service.ts b/api/src/components/integration/service.ts similarity index 70% rename from api/src/components/integrations/service.ts rename to api/src/components/integration/service.ts index c584e66cb..6a14343b3 100644 --- a/api/src/components/integrations/service.ts +++ b/api/src/components/integration/service.ts @@ -1,5 +1,6 @@ import axios from 'axios'; -import * as ariUtils from 'lib/integrations/ariUtils'; +import * as ariUtils from 'integration/ariUtils'; +import * as email from 'lib/email'; import * as ingestLogService from 'ingestLog/service'; /** @@ -10,17 +11,20 @@ import * as ingestLogService from 'ingestLog/service'; * - It encounters an ARI with dateUpdated before the start time of the most * recent successful ingest (if this start time is available). */ -export const incrementalAriIngest = async (): Promise => { +export const incrementalAriIngest = async (): Promise => { + const start = new Date(); const MAX_UNCHANGED_STREAK = 5; // Get most start time of last successful run to help us know when to stop. - const mostRecentStart = await ingestLogService.getMostRecentStartTime('ARI'); + const mostRecentLog = await ingestLogService.getMostRecentLog('ARI'); - if (!mostRecentStart) { + if (!mostRecentLog) { console.log( `Unable to get most recent start time. This job will stop when it encounters ${MAX_UNCHANGED_STREAK} unchanged ARIs, regardless of their dateUpdated value.` ); } + const mostRecentStart = mostRecentLog?.start; + // Log start time. const log = await ingestLogService.create('ARI'); @@ -33,7 +37,12 @@ export const incrementalAriIngest = async (): Promise => { // Pagination loop. let pageUrl = ariUtils.ariEndpoint; let paginationInfo; - let writeCount = 0; + // Keep count of things to report on at the end. + let checkedCount = 0; + let createdCount = 0; + let updatedCount = 0; + const unrecognisedDepartments = new Set(); + const unrecognisedTopics = new Set(); do { // Get page. @@ -49,6 +58,15 @@ export const incrementalAriIngest = async (): Promise => { if (!pageAri.isArchived) { // Create, update, or skip this ARI as appropriate. const handle = await ariUtils.handleIncomingARI(pageAri); + checkedCount++; + + if (handle.unrecognisedDepartment) { + unrecognisedDepartments.add(handle.unrecognisedDepartment); + } + + if (handle.unrecognisedTopics) { + handle.unrecognisedTopics.forEach((topic) => unrecognisedTopics.add(topic)); + } if (!handle.success) { console.log(`Error when handling ARI with question ID ${pageAri.questionId}: ${handle.message}`); @@ -65,16 +83,17 @@ export const incrementalAriIngest = async (): Promise => { unchangedStreak = 0; // Log action taken. console.log(`ARI ${pageAri.questionId} handled successfully with action: ${handle.actionTaken}`); - writeCount++; // Artificial delay to avoid hitting datacite rate limits with publication creates/updates. // https://support.datacite.org/docs/is-there-a-rate-limit-for-making-requests-against-the-datacite-apis if (handle.actionTaken === 'create') { + createdCount++; // Datacite is hit twice, to initialise DOI and get publication ID, then update DOI with data. await new Promise((resolve) => setTimeout(resolve, 1000)); } if (handle.actionTaken === 'update') { + updatedCount++; // Datacite is hit once, to update the DOI with changes. await new Promise((resolve) => setTimeout(resolve, 500)); } @@ -88,7 +107,20 @@ export const incrementalAriIngest = async (): Promise => { pageUrl = paginationInfo.links.next; } while (pageUrl && unchangedStreak < MAX_UNCHANGED_STREAK && !timeOverlap); - // Log end time. - await ingestLogService.setEndTime(log.id, new Date()); - console.log(`Update complete. Updated ${writeCount} publication${writeCount !== 1 ? 's' : ''}.`); + const end = new Date(); + // Get duration in seconds to the nearest 1st decimal place. + const durationSeconds = Math.round((end.getTime() - start.getTime()) / 100) / 10; + await ingestLogService.setEndTime(log.id, end); + await email.incrementalAriIngestReport({ + checkedCount, + durationSeconds, + createdCount, + updatedCount, + unrecognisedDepartments: Array.from(unrecognisedDepartments).sort(), + unrecognisedTopics: Array.from(unrecognisedTopics).sort() + }); + + const writeCount = createdCount + updatedCount; + + return `Update complete. Updated ${writeCount} publication${writeCount !== 1 ? 's' : ''}.`; }; diff --git a/api/src/lib/email.ts b/api/src/lib/email.ts index bb4c560c8..60aab7915 100644 --- a/api/src/lib/email.ts +++ b/api/src/lib/email.ts @@ -920,3 +920,60 @@ export const newAriChildPublication = async (options: { subject }); }; + +export const incrementalAriIngestReport = async (options: { + checkedCount: number; + durationSeconds: number; + createdCount: number; + updatedCount: number; + unrecognisedDepartments: string[]; + unrecognisedTopics: string[]; +}): Promise => { + const cleanDepartments = options.unrecognisedDepartments.map((department) => Helpers.getSafeHTML(department)); + const cleanTopics = options.unrecognisedTopics.map((topic) => Helpers.getSafeHTML(topic)); + const html = ` + + +

Incremental ARI import run completed in ${options.durationSeconds} seconds.

+
    +
  • ARIs checked: ${options.checkedCount}
  • Publications created: ${ + options.createdCount + }
  • Publications updated: ${options.updatedCount}
  • + ${ + cleanDepartments.length + ? '
  • Unrecognised departments:
    • ' + + cleanDepartments.join('
    • ') + + '
  • ' + : '' + } + ${ + cleanTopics.length + ? '
  • Unrecognised topics:
    • ' + cleanTopics.join('
    • ') + '
  • ' + : '' + } +
+ + + `; + const text = ` + Incremental ARI ingest run completed in ${options.durationSeconds} seconds. + ARIs checked: ${options.checkedCount}. + Publications created: ${options.createdCount}. + Publications updated: ${options.updatedCount}. + ${ + options.unrecognisedDepartments.length + ? 'Unrecognised departments: "' + options.unrecognisedDepartments.join('", "') + '".' + : '' + } + ${ + options.unrecognisedTopics.length + ? 'Unrecognised topics: "' + options.unrecognisedTopics.join('", "') + '".' + : '' + }`; + await send({ + html, + text, + to: process.env.INGEST_REPORT_RECIPIENTS ? process.env.INGEST_REPORT_RECIPIENTS.split(',') : '', + subject: 'Incremental ARI ingest report' + }); +}; diff --git a/api/src/lib/interface.ts b/api/src/lib/interface.ts index 5b26519f5..d3f770ced 100644 --- a/api/src/lib/interface.ts +++ b/api/src/lib/interface.ts @@ -32,7 +32,12 @@ export { Topic } from '@prisma/client'; export { JSONSchemaType, Schema } from 'ajv'; -export { APIGatewayProxyEventV2, APIGatewayProxyHandlerV2, APIGatewayProxyResultV2 } from 'aws-lambda'; +export { + APIGatewayProxyEventV2, + APIGatewayProxyHandlerV2, + APIGatewayProxyResultV2, + EventBridgeEvent +} from 'aws-lambda'; export type RequestType = 'body' | 'queryStringParameters' | 'pathParameters'; @@ -657,7 +662,7 @@ export interface DestroyImagePathParams { } export interface EmailSendOptions { - to: string; + to: string | string[]; subject: string; html: string; text: string; @@ -1056,4 +1061,6 @@ export interface HandledARI { success: boolean; message?: string; publicationVersion?: PublicationVersion; + unrecognisedDepartment?: string; + unrecognisedTopics?: string[]; } diff --git a/api/src/lib/sqs.ts b/api/src/lib/sqs.ts index abd688746..f78b08b97 100644 --- a/api/src/lib/sqs.ts +++ b/api/src/lib/sqs.ts @@ -5,20 +5,18 @@ const queueUrl = Helpers.checkEnvVariable('QUEUE_URL'); const endpoint = Helpers.checkEnvVariable('SQS_ENDPOINT'); const config = { - region: 'eu-west-1' + region: 'eu-west-1', + ...(process.env.STAGE === 'local' + ? { + credentials: { + accessKeyId: 'dummy', + secretAccessKey: 'dummy' + }, + endpoint + } + : {}) }; -if (process.env.STAGE === 'local') { - // @ts-ignore - config.credentials = { - accessKeyId: 'dummy', - secretAccessKey: 'dummy' - }; - - // @ts-ignore - config.endpoint = endpoint; -} - const sqs = new SQS(config); export const createQueue = async (): Promise => { diff --git a/api/tsconfig.json b/api/tsconfig.json index 44eeedd78..f2c304cee 100644 --- a/api/tsconfig.json +++ b/api/tsconfig.json @@ -75,6 +75,9 @@ "ingestLog/*": [ "src/components/ingestLog/*" ], + "integration/*": [ + "src/components/integration/*" + ], "link/*": [ "src/components/link/*" ], diff --git a/e2e/playwright.config.ts b/e2e/playwright.config.ts index 96d681ccf..d8785cb45 100644 --- a/e2e/playwright.config.ts +++ b/e2e/playwright.config.ts @@ -1,12 +1,10 @@ import type { PlaywrightTestConfig } from '@playwright/test'; import { devices } from '@playwright/test'; import * as dotenv from 'dotenv'; +import path from 'path'; -/** - * Read environment variables from .env file. - * https://github.com/motdotla/dotenv - */ -dotenv.config(); +// Read from ".env" file. +dotenv.config({ path: path.resolve(__dirname, '.env') }); /** * See https://playwright.dev/docs/test-configuration. diff --git a/e2e/tests/LoggedIn/publish.e2e.spec.ts b/e2e/tests/LoggedIn/publish.e2e.spec.ts index 31d021d3c..1e354d2e7 100644 --- a/e2e/tests/LoggedIn/publish.e2e.spec.ts +++ b/e2e/tests/LoggedIn/publish.e2e.spec.ts @@ -2,6 +2,7 @@ import * as Helpers from '../helpers'; import { expect, test, Page, Browser } from '@playwright/test'; import { PageModel } from '../PageModel'; import cuid2 from '@paralleldrive/cuid2'; +import path from 'path'; const createPublication = async (page: Page, publicationTitle: string, pubType: string) => { await page.goto(`/create`); @@ -1892,6 +1893,7 @@ test.describe('Publication flow + co-authors', () => { test.describe('Publication Flow + File import', () => { let page: Page; + const assetsDirName = path.join(__dirname, '../../assets/'); test.beforeAll(async ({ browser }) => { page = await Helpers.getPageAsUser(browser); @@ -1912,7 +1914,7 @@ test.describe('Publication Flow + File import', () => { ); // import initial playwright file - await Helpers.openFileImportModal(page, 'assets/Playwright.docx'); + await Helpers.openFileImportModal(page, assetsDirName + 'Playwright.docx'); await page.locator(PageModel.publish.insertButton).click(); // Ensure modal has closed and file import @@ -1920,7 +1922,7 @@ test.describe('Publication Flow + File import', () => { await expect(page.locator(PageModel.publish.text.editor)).toContainText('File Import – Playwright'); // replace playwright file - await Helpers.openFileImportModal(page, 'assets/Playwright - Replace.docx'); + await Helpers.openFileImportModal(page, assetsDirName + 'Playwright - Replace.docx'); await page.locator(PageModel.publish.replaceButton).click(); // Ensure modal has closed and file import @@ -1966,12 +1968,12 @@ test.describe('Publication Flow + File import', () => { ]); const validImageFiles = [ - 'assets/apng-image-test.png', - 'assets/avif-image-test.avif', - 'assets/gif-image-test.gif', - 'assets/jpeg-image-test.jpeg', - 'assets/jpg-image-test.jpg', - 'assets/webp-image-test.webp' + assetsDirName + 'apng-image-test.png', + assetsDirName + 'avif-image-test.avif', + assetsDirName + 'gif-image-test.gif', + assetsDirName + 'jpeg-image-test.jpeg', + assetsDirName + 'jpg-image-test.jpg', + assetsDirName + 'webp-image-test.webp' ]; // import correct file formats @@ -1993,7 +1995,7 @@ test.describe('Publication Flow + File import', () => { await expect(page.locator('button[title="Upload image"]')).not.toBeVisible(); for (const image of validImageFiles) { await expect( - page.locator(`div[contenteditable="true"] img[title="${image.split('assets/').pop()}"]`) + page.locator(`div[contenteditable="true"] img[title="${image.split(assetsDirName).pop()}"]`) ).toBeVisible(); } @@ -2003,7 +2005,7 @@ test.describe('Publication Flow + File import', () => { page.waitForEvent('filechooser'), page.click('label[for="file-upload"]') ]); - await fileChooser2.setFiles(['assets/Playwright.docx']); + await fileChooser2.setFiles([assetsDirName + 'Playwright.docx']); await page.click('button[title="Upload image"]'); await expect(page.getByText('Failed to upload "Playwright.docx". The format is not supported.')).toBeVisible(); });