Skip to content

Commit

Permalink
Merge pull request #692 from JiscSD/OC-888
Browse files Browse the repository at this point in the history
OC-888: Deploying ARI ingests to prod
  • Loading branch information
finlay-jisc authored Sep 27, 2024
2 parents 7123ec6 + d41b6f7 commit 10c488b
Show file tree
Hide file tree
Showing 21 changed files with 334 additions and 127 deletions.
8 changes: 7 additions & 1 deletion api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,10 @@ MAIL_SERVER=localhost
LOCALSTACK_SERVER=http://localhost:4566

QUEUE_URL=http://localhost:4566/000000000000/your-queue-name
SQS_ENDPOINT=http://localhost:4566
SQS_ENDPOINT=http://localhost:4566

LIST_USERS_API_KEY=12345
TRIGGER_ARI_INGEST_API_KEY=12345

SLACK_CHANNEL_EMAIL=example@mailinator.com
INGEST_REPORT_RECIPIENTS=example.one@mailinator.com,example.two@mailinator.com
2 changes: 1 addition & 1 deletion api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ A similar process happens when the database is seeded. After publications are in

## Integrations

Octopus is built to integrate with some external systems in order to import publications. For more information please read the dedicated [integrations readme](./src/lib/integrations/README.md).
Octopus is built to integrate with some external systems in order to import publications. For more information please read the dedicated [integrations readme](./src/components/integration/README.md).

---

Expand Down
2 changes: 2 additions & 0 deletions api/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ services:
- QUEUE_URL=http://localhost:4566/000000000000/science-octopus-pdf-queue-local
- SQS_ENDPOINT=http://localstack:4566
- LIST_USERS_API_KEY=123456789
- TRIGGER_ARI_INGEST_API_KEY=123456789
- INGEST_REPORT_RECIPIENTS=example.jisc@mailinator.com

volumes:
opensearch-data1:
10 changes: 0 additions & 10 deletions api/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@
"prisma": "^5.11.0",
"puppeteer": "^22.12.0",
"serverless-offline": "^12.0.4",
"serverless-offline-ssm": "^6.2.0",
"serverless-plugin-split-stacks": "^1.13.0",
"serverless-prune-plugin": "^2.0.2",
"serverless-webpack": "^5.13.0",
Expand Down
2 changes: 1 addition & 1 deletion api/scripts/fullAriImport.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { expand } from 'dotenv-expand';
// Important to do this so that environment variables are treated the same as in deployed code.
expand(dotenv.config());

import * as ariUtils from 'lib/integrations/ariUtils';
import * as ariUtils from 'integration/ariUtils';
import * as I from 'interface';

const fullAriImport = async (): Promise<string> => {
Expand Down
6 changes: 5 additions & 1 deletion api/serverless-config-default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -494,9 +494,13 @@ functions:
cors: true
# Integrations
incrementalAriIngest:
handler: src/components/integrations/service.incrementalAriIngest
handler: src/components/integration/routes.incrementalAriIngest
timeout: 900
events:
- schedule:
rate: cron(0 5 ? * TUE *) # Every Tuesday at 5 a.m.
enabled: ${self:custom.scheduledAriIngestEnabled.${opt:stage}, false}
- http:
path: ${self:custom.versions.v1}/integrations/ari/incremental
method: POST
cors: true
75 changes: 24 additions & 51 deletions api/serverless-offline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ frameworkVersion: ${file(./serverless-config-default.yml):frameworkVersion}
useDotenv: ${file(./serverless-config-default.yml):useDotenv}

plugins:
- serverless-offline-ssm
- serverless-offline
- serverless-webpack
- serverless-webpack-prisma
Expand All @@ -19,29 +18,32 @@ provider:
stage: 'local'
environment:
STAGE: ${self:provider.stage}
ELASTICSEARCH_USER: ${ssm:/elasticsearch_user_${self:provider.stage}_octopus}
ELASTICSEARCH_PASSWORD: ${ssm:/elasticsearch_password_${self:provider.stage}_octopus}
ELASTICSEARCH_ENDPOINT: ${ssm:/elasticsearch_endpoint_${self:provider.stage}_octopus}
ELASTICSEARCH_PROTOCOL: ${ssm:/elastic_search_protocol_${self:provider.stage}_octopus}
DATABASE_URL: ${ssm:/db_connection_string_${self:provider.stage}_octopus}
JWT_SECRET: ${ssm:/jwt_secret_${self:provider.stage}_octopus}
DATABASE_URL: ${env:DATABASE_URL}
ORCID_SECRET: ${env:ORCID_SECRET}
ORCID_ID: ${env:ORCID_ID}
ORCID_AUTH_URL: ${env:ORCID_AUTH_URL}
ORCID_MEMBER_API_URL: ${env:ORCID_MEMBER_API_URL}
JWT_SECRET: ${env:JWT_SECRET}
EMAIL_SENDER_ADDRESS: ${env:EMAIL_SENDER_ADDRESS}
BASE_URL: ${env:BASE_URL}
AUTHORISATION_CALLBACK_URL: ${env:AUTHORISATION_CALLBACK_URL}
ELASTICSEARCH_PROTOCOL: ${env:ELASTICSEARCH_PROTOCOL}
ELASTICSEARCH_USER: ${env:ELASTICSEARCH_USER}
ELASTICSEARCH_PASSWORD: ${env:ELASTICSEARCH_PASSWORD}
ELASTICSEARCH_ENDPOINT: ${env:ELASTICSEARCH_ENDPOINT}
VALIDATION_CODE_EXPIRY: 10
VALIDATION_CODE_ATTEMPTS: 3
ORCID_ID: ${ssm:/orcid_app_id_${self:provider.stage}_octopus}
ORCID_SECRET: ${ssm:/orcid_secret_key_${self:provider.stage}_octopus}
ORCID_AUTH_URL: ${ssm:/orcid_auth_url_${self:provider.stage}_octopus}
ORCID_MEMBER_API_URL: ${ssm:/orcid_member_api_url_${self:provider.stage}_octopus}
DOI_PREFIX: ${ssm:/doi_prefix_${self:provider.stage}_octopus}
DATACITE_ENDPOINT: ${ssm:/datacite_endpoint_${self:provider.stage}_octopus}
DATACITE_USER: ${ssm:/datacite_user_${self:provider.stage}_octopus}
DATACITE_PASSWORD: ${ssm:/datacite_password_${self:provider.stage}_octopus}
EMAIL_SENDER_ADDRESS: ${ssm:/email_sender_address_${self:provider.stage}_octopus}
BASE_URL: ${ssm:/base_url_${self:provider.stage}_octopus}
AUTHORISATION_CALLBACK_URL: ${ssm:/authorization_callback_url_${self:provider.stage}_octopus}
LIST_USERS_API_KEY: ${ssm:/list_users_api_key_${self:provider.stage}_octopus}
QUEUE_URL: ${ssm:/queue_url_${self:provider.stage}_octopus}
SQS_ENDPOINT: ${ssm:/sqs_endpoint_${self:provider.stage}_octopus}
MAIL_SERVER: ${ssm:/mail_server_${self:provider.stage}_octopus}
DOI_PREFIX: ${env:DOI_PREFIX}
DATACITE_ENDPOINT: ${env:DATACITE_ENDPOINT}
DATACITE_USER: ${env:DATACITE_USER}
DATACITE_PASSWORD: ${env:DATACITE_PASSWORD}
MAIL_SERVER: ${env:MAIL_SERVER}
LOCALSTACK_SERVER: ${env:LOCALSTACK_SERVER}
QUEUE_URL: ${env:QUEUE_URL}
SQS_ENDPOINT: ${env:SQS_ENDPOINT}
LIST_USERS_API_KEY: ${env:LIST_USERS_API_KEY}
TRIGGER_ARI_INGEST_API_KEY: ${env:TRIGGER_ARI_INGEST_API_KEY}
INGEST_REPORT_RECIPIENTS: ${env:INGEST_REPORT_RECIPIENTS}
deploymentBucket:
tags:
Project: Octopus
Expand Down Expand Up @@ -81,34 +83,5 @@ custom:
v1: v1
serverless-offline:
useChildProcesses: true
serverless-offline-ssm:
stages:
- local
ssm:
/local_octopus_sls_sg: '1'
/local_octopus_private_subnet_az1: '1'
/local_octopus_private_subnet_az2: '2'
/local_octopus_private_subnet_az3: '3'
/elasticsearch_user_local_octopus: ${env:ELASTICSEARCH_USER}
/elasticsearch_password_local_octopus: ${env:ELASTICSEARCH_PASSWORD}
/elasticsearch_endpoint_local_octopus: ${env:ELASTICSEARCH_ENDPOINT}
/elastic_search_protocol_local_octopus: ${env:ELASTICSEARCH_PROTOCOL}
/db_connection_string_local_octopus: ${env:DATABASE_URL}
/jwt_secret_local_octopus: ${env:JWT_SECRET}
/orcid_secret_key_local_octopus: ${env:ORCID_SECRET}
/orcid_app_id_local_octopus: ${env:ORCID_ID}
/orcid_auth_url_local_octopus: ${env:ORCID_AUTH_URL}
/orcid_member_api_url_local_octopus: ${env:ORCID_MEMBER_API_URL}
/doi_prefix_local_octopus: ${env:DOI_PREFIX}
/datacite_endpoint_local_octopus: ${env:DATACITE_ENDPOINT}
/datacite_user_local_octopus: ${env:DATACITE_USER}
/datacite_password_local_octopus: ${env:DATACITE_PASSWORD}
/email_sender_address_local_octopus: ${env:EMAIL_SENDER_ADDRESS}
/base_url_local_octopus: ${env:BASE_URL}
/authorization_callback_url_local_octopus: ${env:AUTHORISATION_CALLBACK_URL}
/list_users_api_key_local_octopus: ${env:LIST_USERS_API_KEY}
/queue_url_local_octopus: ${env:QUEUE_URL}
/sqs_endpoint_local_octopus: ${env:SQS_ENDPOINT}
/mail_server_local_octopus: ${env:MAIL_SERVER}
functions:
- ${file(./serverless-config-default.yml):functions}
5 changes: 4 additions & 1 deletion api/serverless.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ provider:
SQS_ENDPOINT: ${ssm:/sqs_endpoint_${self:provider.stage}_octopus}
MAIL_SERVER: ${ssm:/mail_server_${self:provider.stage}_octopus}
SLACK_CHANNEL_EMAIL: ${ssm:/slack_channel_email_${self:provider.stage}_octopus}
TRIGGER_ARI_INGEST_API_KEY: ${ssm:/trigger_ari_ingest_api_key_${self:provider.stage}_octopus}
INGEST_REPORT_RECIPIENTS: ${ssm:/ingest_report_recipients_${self:provider.stage}_octopus}
deploymentBucket:
tags:
Project: Octopus
Expand Down Expand Up @@ -105,7 +107,8 @@ custom:
automatic: true
number: 3
scheduledAriIngestEnabled:
int: true
int: false
prod: false
functions:
- ${file(./serverless-config-default.yml):functions}
- ${file(./serverless-config-deploy.yml):functions}
8 changes: 4 additions & 4 deletions api/src/components/ingestLog/__tests__/ingestLog.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ describe('Ingest log functions', () => {
});
});

test('Get most recent start time', async () => {
const mostRecentStart = await ingestLogService.getMostRecentStartTime('ARI');
expect(mostRecentStart).toEqual(new Date('2024-09-11T12:53:00.000Z'));
test('Get most recent log', async () => {
const mostRecentLog = await ingestLogService.getMostRecentLog('ARI');
expect(mostRecentLog?.start).toEqual(new Date('2024-09-11T12:53:00.000Z'));
});

test('Most recent start is null if no run that ended successfully is present', async () => {
await client.prisma.ingestLog.update({ where: { id: 'ingest-log-1' }, data: { end: null } });
const mostRecentStart = await ingestLogService.getMostRecentStartTime('ARI');
const mostRecentStart = await ingestLogService.getMostRecentLog('ARI');
expect(mostRecentStart).toBeNull();
});
});
20 changes: 4 additions & 16 deletions api/src/components/ingestLog/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,14 @@ export const setEndTime = (id: string, end: Date) =>
}
});

export const getMostRecentStartTime = async (source: I.PublicationImportSource): Promise<Date | null> => {
const mostRecentStartQuery = await client.prisma.ingestLog.findFirst({
export const getMostRecentLog = (source: I.PublicationImportSource, includeOpenLogs?: boolean) =>
client.prisma.ingestLog.findFirst({
where: {
source,
// Successful runs only.
end: {
not: null
}
// By default, get successful (having an end time) logs only.
...(includeOpenLogs ? {} : { end: { not: null } })
},
orderBy: {
start: 'desc'
},
select: {
start: true
}
});

if (mostRecentStartQuery) {
return mostRecentStartQuery.start;
} else {
return null;
}
};
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ On import, ARIs go through a handling flow:

#### How ARI data is mapped to octopus data

Various ARI fields are mapped to octpous ones in the `mapAriQuestionToPublicationVersion` function in [ariUtils.ts](./ariUtils.ts).
Various ARI fields are mapped to octopus ones in the `mapAriQuestionToPublicationVersion` function in [ariUtils.ts](./ariUtils.ts).

Of particular importance is how ARIs are matched to an owning organisational user account. The mapping process expects a UserMapping to exist associating the `department` field value from the ARI (where the title matches, case insensitive, and the mapping source is 'ARI') with the user ID of an organisational account.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import * as ariUtils from 'lib/integrations/ariUtils';
import * as ariUtils from 'integration/ariUtils';
import * as I from 'interface';
import * as ingestLogService from 'ingestLog/service';
import * as testUtils from 'lib/testUtils';

// This ARI will match a publication in the seed data via the questionId.
Expand Down Expand Up @@ -120,6 +121,18 @@ describe('ARI Mapping', () => {
});
});

test('Unrecognised topics are reported', async () => {
const mappingAttempt = await ariUtils.mapAriQuestionToPublicationVersion({
...sampleARIQuestion,
topics: ['unrecognised topic']
});
expect(mappingAttempt).toMatchObject({
success: true,
message: 'Found unrecognised topic(s).',
unrecognisedTopics: ['unrecognised topic']
});
});

test('Department is matched to existing user', async () => {
const mappingAttempt = await ariUtils.mapAriQuestionToPublicationVersion(sampleARIQuestion);
expect(mappingAttempt).toMatchObject({
Expand All @@ -135,7 +148,8 @@ describe('ARI Mapping', () => {
expect(mappingAttempt).toMatchObject({
success: false,
mappedData: null,
message: 'User not found for department: unrecognised department.'
message: 'User not found for department: unrecognised department.',
unrecognisedDepartment: 'unrecognised department'
});
});

Expand Down Expand Up @@ -221,7 +235,7 @@ describe('ARI handling', () => {
});
});

test('ARI with unrecognised department is skipped', async () => {
test('ARI with unrecognised department is skipped and dept name is reported in a field', async () => {
const handleARI = await ariUtils.handleIncomingARI({
...sampleARIQuestion,
department: 'Unrecognised Department name'
Expand All @@ -231,7 +245,8 @@ describe('ARI handling', () => {
actionTaken: 'none',
success: false,
message:
'Failed to map ARI data to octopus data. User not found for department: Unrecognised Department name.'
'Failed to map ARI data to octopus data. User not found for department: Unrecognised Department name.',
unrecognisedDepartment: 'Unrecognised Department name'
});
});

Expand Down Expand Up @@ -281,6 +296,25 @@ describe('ARI handling', () => {
});
});

test('Unrecognised topics are reported', async () => {
const handleARI = await ariUtils.handleIncomingARI({
...sampleARIQuestion,
topics: [...sampleARIQuestion.topics, 'unrecognised topic']
});
expect(handleARI).toMatchObject({
actionTaken: 'none',
success: true,
publicationVersion: {
topics: [
{
id: 'test-topic-1a'
}
]
},
unrecognisedTopics: ['unrecognised topic']
});
});

test('Keywords update when fieldsOfResearch/tags change', async () => {
const handleARI = await ariUtils.handleIncomingARI({
...sampleARIQuestion,
Expand Down Expand Up @@ -352,3 +386,32 @@ describe('ARI handling', () => {
});
});
});

describe('ARI import processes', () => {
beforeEach(async () => {
await testUtils.clearDB();
await testUtils.testSeed();
});

test('Incremental import endpoint requires API key', async () => {
const triggerImport = await testUtils.agent.post('/integrations/ari/incremental');

expect(triggerImport.status).toEqual(401);
expect(triggerImport.body).toMatchObject({
message: "Please provide a valid 'apiKey'."
});
});

test('Incremental ingest cancels if already in progress', async () => {
// Create an open ended log first.
await ingestLogService.create('ARI');
const triggerImport = await testUtils.agent
.post('/integrations/ari/incremental')
.query({ apiKey: process.env.TRIGGER_ARI_INGEST_API_KEY });

expect(triggerImport.status).toEqual(202);
expect(triggerImport.body).toMatchObject({
message: 'Cancelling ingest. Either an import is already in progress or the last import failed.'
});
});
});
Loading

0 comments on commit 10c488b

Please sign in to comment.