diff --git a/.gitignore b/.gitignore index 2b26729ed..d60e44222 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,7 @@ coverage # VSC settings .vscode .env -*.code-workspace \ No newline at end of file +*.code-workspace + +# Docker +volume \ No newline at end of file diff --git a/api/scripts/ariImport.ts b/api/scripts/ariImport.ts index 23df91ffa..b565e4f04 100644 --- a/api/scripts/ariImport.ts +++ b/api/scripts/ariImport.ts @@ -27,29 +27,43 @@ const checkBooleanArgValue = (arg: string): void => { * - full: If "true", the script will import all ARIs from the ARI DB, instead of stopping when it * thinks it has found all the new ones (the incremental way). * - Default: false + * - reportFormat: Controls how the output of the job is reported. Can be "email" or "file". Emails + * are sent to the addresses listed in the INGEST_REPORT_RECIPIENTS environment variable. Files are + * written to "ari-import-report.txt". + * - Default: "file" * * e.g.: * npm run ariImport -- allDepartments=true full=true */ -const parseArguments = (): { importAllDepartments: boolean; dryRun: boolean; full: boolean } => { +const parseArguments = (): { + importAllDepartments: boolean; + dryRun: boolean; + full: boolean; + reportFormat: I.IngestReportFormat; +} => { const args = Helpers.parseNpmScriptArgs(); for (const arg of Object.keys(args)) { - if (!['allDepartments', 'dryRun', 'full'].includes(arg)) { + if (!['allDepartments', 'dryRun', 'full', 'reportFormat'].includes(arg)) { throw new Error(`Unexpected argument: ${arg}`); } } - const { allDepartments: allDepartmentsArg, dryRun: dryRunArg, full: fullArg } = args; + const { allDepartments: allDepartmentsArg, dryRun: dryRunArg, full: fullArg, reportFormat: reportFormatArg } = args; for (const arg of [allDepartmentsArg, dryRunArg, fullArg]) { checkBooleanArgValue(arg); } + if (reportFormatArg && !(reportFormatArg === 'email' || reportFormatArg === 'file')) { + throw new Error(`"reportFormat" must be "email" or "file"`); + } + return { - importAllDepartments: !!allDepartmentsArg, - dryRun: !!dryRunArg, - full: !!fullArg + importAllDepartments: allDepartmentsArg === 'true', + dryRun: dryRunArg === 'true', + full: fullArg === 'true', + reportFormat: reportFormatArg ? (reportFormatArg as I.IngestReportFormat) : 'file' }; }; @@ -58,7 +72,11 @@ const parseArguments = (): { importAllDepartments: boolean; dryRun: boolean; ful * Differs from incremental ingest by fetching all ARIs before processing them. * It will not stop until all ARIs have been processed. */ -export const fullAriIngest = async (allDepartments: boolean, dryRun: boolean): Promise => { +export const fullAriIngest = async ( + allDepartments: boolean, + dryRun: boolean, + reportFormat: I.IngestReportFormat +): Promise => { const startTime = performance.now(); // Collect all ARIs in a variable. @@ -176,7 +194,7 @@ export const fullAriIngest = async (allDepartments: boolean, dryRun: boolean): P const durationSeconds = Math.round((endTime - startTime) / 100) / 10; // Write report file. - await ariUtils.ingestReport('file', { + await ariUtils.ingestReport(reportFormat, { checkedCount: aris.length, durationSeconds, createdCount, @@ -192,16 +210,21 @@ export const fullAriIngest = async (allDepartments: boolean, dryRun: boolean): P } ARIs in ${durationSeconds} seconds.`; }; -const ariImport = async (allDepartments: boolean, dryRun: boolean, full: boolean): Promise => { +const ariImport = async ( + allDepartments: boolean, + dryRun: boolean, + full: boolean, + reportFormat: I.IngestReportFormat +): Promise => { if (!full) { - return await integrationService.incrementalAriIngest(dryRun, 'file'); + return await integrationService.incrementalAriIngest(dryRun, reportFormat); } else { - return await fullAriIngest(allDepartments, dryRun); + return await fullAriIngest(allDepartments, dryRun, reportFormat); } }; -const { importAllDepartments, dryRun, full } = parseArguments(); +const { importAllDepartments, dryRun, full, reportFormat } = parseArguments(); -ariImport(importAllDepartments, dryRun, full) +ariImport(importAllDepartments, dryRun, full, reportFormat) .then((message) => console.log(message)) .catch((err) => console.log(err)); diff --git a/api/src/components/integration/README.md b/api/src/components/integration/README.md index 9dcaace93..221ef7a87 100644 --- a/api/src/components/integration/README.md +++ b/api/src/components/integration/README.md @@ -15,6 +15,14 @@ Publications imported via an integration with another system should have the fol They should also always be owned by an organisational user account. That is, a user with the value `ORGANISATION` for the `role` field. +### Where/how does this run? + +On deployed environments, integrations are run in containers on AWS Elastic Container Service. These containers are defined in the infrastructure code (see [Dockerfile](../../../../infra/docker/ariImportRunner/Dockerfile)), so they can be built and tested locally from the `infra/docker/ariImportRunner` directory with `docker compose up` (see [compose.yml](../../../../infra/docker/ariImportRunner/compose.yml)). + +They can also be run ad hoc on the local environment via npm scripts, for example (from the `api` directory): + +`npm run ariImport -- dryRun=true allDepartments=true full=false` + ## Specific integrations ### ARI DB diff --git a/api/src/components/integration/ariUtils.ts b/api/src/components/integration/ariUtils.ts index 7c332bbdd..c5276e191 100644 --- a/api/src/components/integration/ariUtils.ts +++ b/api/src/components/integration/ariUtils.ts @@ -412,7 +412,7 @@ export const getParticipatingDepartmentNames = async (): Promise => { }; export const ingestReport = async ( - format: 'email' | 'file', + format: I.IngestReportFormat, ingestDetails: { checkedCount: number; durationSeconds: number; diff --git a/api/src/components/integration/service.ts b/api/src/components/integration/service.ts index b7b897787..d635ba709 100644 --- a/api/src/components/integration/service.ts +++ b/api/src/components/integration/service.ts @@ -3,6 +3,7 @@ import * as ariUtils from 'integration/ariUtils'; import * as ecs from 'lib/ecs'; import * as ingestLogService from 'ingestLog/service'; import * as Helpers from 'lib/helpers'; +import * as I from 'interface'; /** * Incremental ARI ingest. @@ -12,7 +13,7 @@ import * as Helpers from 'lib/helpers'; * - It encounters an ARI with dateUpdated before the start time of the most * recent successful ingest (if this start time is available). */ -export const incrementalAriIngest = async (dryRun: boolean, reportFormat: 'email' | 'file'): Promise => { +export const incrementalAriIngest = async (dryRun: boolean, reportFormat: I.IngestReportFormat): Promise => { const start = new Date(); const MAX_UNCHANGED_STREAK = 5; // Get most start time of last successful run to help us know when to stop. diff --git a/api/src/lib/interface.ts b/api/src/lib/interface.ts index 62b89aeab..02520a499 100644 --- a/api/src/lib/interface.ts +++ b/api/src/lib/interface.ts @@ -1068,3 +1068,5 @@ export interface HandledARI { unrecognisedDepartment?: string; unrecognisedTopics?: string[]; } + +export type IngestReportFormat = 'email' | 'file'; diff --git a/infra/docker/ariImportRunner/Dockerfile b/infra/docker/ariImportRunner/Dockerfile new file mode 100644 index 000000000..ad4294e05 --- /dev/null +++ b/infra/docker/ariImportRunner/Dockerfile @@ -0,0 +1,19 @@ +# syntax=docker/dockerfile:1.7-labs +FROM public.ecr.aws/docker/library/node:20-alpine + +RUN apk add \ + ca-certificates \ + curl \ + gnupg \ + git \ + openssl \ + openssl-dev \ + libc6-compat + +WORKDIR /app + +COPY package.json package-lock.json ./ +RUN npm i +COPY --exclude=**/node_modules . . +RUN npx prisma generate +CMD ["npm", "run", "ariImport", "--", "dryRun=false", "reportFormat=email"] \ No newline at end of file diff --git a/infra/docker/ariImportRunner/compose.yml b/infra/docker/ariImportRunner/compose.yml new file mode 100644 index 000000000..b5efb182d --- /dev/null +++ b/infra/docker/ariImportRunner/compose.yml @@ -0,0 +1,10 @@ +# For testing the ARI import runner docker image locally. +services: + ari-import-runner: + build: + context: ../../../api + dockerfile: ../infra/docker/ariImportRunner/Dockerfile + command: /bin/sh -cx "npm run ariImport -- dryRun=true reportFormat=file && cat ari-import-report.txt" + env_file: ../../../api/.env + # So it can access the DB and opensearch that are exposed on host ports. + network_mode: host diff --git a/infra/docker/poc/Dockerfile b/infra/docker/poc/Dockerfile deleted file mode 100644 index 25c7ddc94..000000000 --- a/infra/docker/poc/Dockerfile +++ /dev/null @@ -1,2 +0,0 @@ -FROM public.ecr.aws/docker/library/node:18-alpine -CMD ["echo", "Hello again, World!"] \ No newline at end of file diff --git a/infra/modules/codepipeline/buildspec/deploy-docker-image.yml b/infra/modules/codepipeline/buildspec/deploy-docker-image.yml index 5d5c4d6cd..c1dbb9b40 100644 --- a/infra/modules/codepipeline/buildspec/deploy-docker-image.yml +++ b/infra/modules/codepipeline/buildspec/deploy-docker-image.yml @@ -8,7 +8,7 @@ phases: build: commands: - echo Building docker image... - - docker build --platform linux/arm64 --tag $IMAGE_NAME ./infra/docker/poc + - docker build --platform linux/arm64 --tag $IMAGE_NAME -f ./infra/docker/ariImportRunner/Dockerfile ./api - docker tag $IMAGE_NAME:latest $ACCOUNT_ID.dkr.ecr.$DEFAULT_REGION.amazonaws.com/$PROJECT_NAME-$ENVIRONMENT:$IMAGE_NAME - docker tag $IMAGE_NAME:latest $ACCOUNT_ID.dkr.ecr.$DEFAULT_REGION.amazonaws.com/$PROJECT_NAME-$ENVIRONMENT:$COMMIT_ID post_build: diff --git a/infra/modules/codepipeline/codebuild.tf b/infra/modules/codepipeline/codebuild.tf index b6ecc80d4..ca34e11ff 100644 --- a/infra/modules/codepipeline/codebuild.tf +++ b/infra/modules/codepipeline/codebuild.tf @@ -19,8 +19,8 @@ resource "aws_codebuild_project" "deploy-docker-image" { environment { compute_type = "BUILD_GENERAL1_SMALL" - image = "aws/codebuild/standard:5.0" - type = "LINUX_CONTAINER" + image = "aws/codebuild/amazonlinux-aarch64-standard:3.0" + type = "ARM_CONTAINER" privileged_mode = true environment_variable { diff --git a/infra/modules/ecs/iam.tf b/infra/modules/ecs/iam.tf index 7c9c649f0..e1f57dc03 100644 --- a/infra/modules/ecs/iam.tf +++ b/infra/modules/ecs/iam.tf @@ -25,6 +25,31 @@ data "aws_iam_policy_document" "task-exec-policy" { ] resources = ["*"] } + statement { + effect = "Allow" + actions = [ + "ssm:GetParameters" + ] + resources = [ + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/base_url_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/datacite_endpoint_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/datacite_password_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/datacite_user_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/doi_prefix_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/db_connection_string_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/elastic_search_protocol_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/elasticsearch_user_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/elasticsearch_password_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/elasticsearch_endpoint_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/email_sender_address_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/ingest_report_recipients_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/mail_server_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/participating_ari_user_ids_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/queue_url_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/slack_channel_email_${var.environment}_${var.project_name}", + "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/sqs_endpoint_${var.environment}_${var.project_name}" + ] + } } resource "aws_iam_role_policy" "task-exec-policy" { @@ -33,7 +58,7 @@ resource "aws_iam_role_policy" "task-exec-policy" { policy = data.aws_iam_policy_document.task-exec-policy.json } -data "aws_iam_policy_document" "ecs-task-role-policy" { +data "aws_iam_policy_document" "ecs-task-assume-role-policy" { statement { effect = "Allow" actions = [ @@ -45,10 +70,9 @@ data "aws_iam_policy_document" "ecs-task-role-policy" { } } } - resource "aws_iam_role" "ecs-task-role" { name = "${var.project_name}-ecs-task-role-${var.environment}" - assume_role_policy = data.aws_iam_policy_document.ecs-task-role-policy.json + assume_role_policy = data.aws_iam_policy_document.ecs-task-assume-role-policy.json } data "aws_iam_policy_document" "task-policy" { @@ -62,6 +86,16 @@ data "aws_iam_policy_document" "task-policy" { ] resources = ["*"] } + statement { + effect = "Allow" + actions = [ + "ses:SendEmail", + "ses:SendRawEmail" + ] + resources = [ + "arn:aws:ses:eu-west-1:${local.account_id}:identity/*" + ] + } } resource "aws_iam_role_policy" "task-policy" { diff --git a/infra/modules/ecs/outputs.tf b/infra/modules/ecs/outputs.tf index 4e3688e77..bed26ff2a 100644 --- a/infra/modules/ecs/outputs.tf +++ b/infra/modules/ecs/outputs.tf @@ -1,3 +1,3 @@ output "task_security_group_id" { - value = aws_security_group.hello-world-task-sg.id + value = aws_security_group.ari-import-task-sg.id } diff --git a/infra/modules/ecs/tasks.tf b/infra/modules/ecs/tasks.tf index 8f3968376..00c233d2f 100644 --- a/infra/modules/ecs/tasks.tf +++ b/infra/modules/ecs/tasks.tf @@ -7,8 +7,8 @@ locals { region_name = data.aws_region.current.name } -resource "aws_ecs_task_definition" "hello-world" { - family = "${var.project_name}-hello-world-${var.environment}" +resource "aws_ecs_task_definition" "ari-import" { + family = "${var.project_name}-ari-import-${var.environment}" requires_compatibilities = ["FARGATE"] cpu = 256 @@ -26,7 +26,7 @@ resource "aws_ecs_task_definition" "hello-world" { container_definitions = jsonencode([ { - "name" : "hello-world", + "name" : "ari-import", "image" : "${local.account_id}.dkr.ecr.${local.region_name}.amazonaws.com/${var.project_name}-${var.environment}:latest", "entryPoints" : [ "sh", "-c" @@ -36,19 +36,95 @@ resource "aws_ecs_task_definition" "hello-world" { "logDriver" : "awslogs", "options" : { "awslogs-create-group" : "true", - "awslogs-group" : "${var.project_name}-hello-world-ecs-task-${var.environment}", + "awslogs-group" : "${var.project_name}-ari-import-ecs-task-${var.environment}", "awslogs-region" : "${local.region_name}", "awslogs-stream-prefix" : "ecs" }, "secretOptions" : [] - } + }, + "environment" : [ + { + "name" : "STAGE", + "value" : "${var.environment}" + } + ], + "secrets" : [ + { + "name" : "BASE_URL", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/base_url_${var.environment}_${var.project_name}" + }, + { + "name" : "DATABASE_URL", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/db_connection_string_${var.environment}_${var.project_name}" + }, + { + "name" : "DATACITE_ENDPOINT", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/datacite_endpoint_${var.environment}_${var.project_name}" + }, + { + "name" : "DATACITE_PASSWORD", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/datacite_password_${var.environment}_${var.project_name}" + }, + { + "name" : "DATACITE_USER", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/datacite_user_${var.environment}_${var.project_name}" + }, + { + "name" : "DOI_PREFIX", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/doi_prefix_${var.environment}_${var.project_name}" + }, + { + "name" : "ELASTICSEARCH_PROTOCOL", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/elastic_search_protocol_${var.environment}_${var.project_name}" + }, + { + "name" : "ELASTICSEARCH_USER", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/elasticsearch_user_${var.environment}_${var.project_name}" + }, + { + "name" : "ELASTICSEARCH_PASSWORD", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/elasticsearch_password_${var.environment}_${var.project_name}" + }, + { + "name" : "ELASTICSEARCH_ENDPOINT", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/elasticsearch_endpoint_${var.environment}_${var.project_name}" + }, + { + "name" : "EMAIL_SENDER_ADDRESS", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/email_sender_address_${var.environment}_${var.project_name}" + }, + { + "name" : "INGEST_REPORT_RECIPIENTS", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/ingest_report_recipients_${var.environment}_${var.project_name}" + }, + { + "name" : "MAIL_SERVER", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/mail_server_${var.environment}_${var.project_name}" + }, + { + "name" : "PARTICIPATING_ARI_USER_IDS", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/participating_ari_user_ids_${var.environment}_${var.project_name}" + }, + { + "name" : "QUEUE_URL", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/queue_url_${var.environment}_${var.project_name}" + }, + { + "name" : "SLACK_CHANNEL_EMAIL", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/slack_channel_email_${var.environment}_${var.project_name}" + }, + { + "name" : "SQS_ENDPOINT", + "valueFrom" : "arn:aws:ssm:${local.region_name}:${local.account_id}:parameter/sqs_endpoint_${var.environment}_${var.project_name}" + }, + ] } ]) } -resource "aws_security_group" "hello-world-task-sg" { - name = "${var.project_name}-hello-world-task-sg-${var.environment}" - description = "Security group for hello world ecs task" +resource "aws_security_group" "ari-import-task-sg" { + name = "${var.project_name}-ari-import-task-sg-${var.environment}" + description = "Security group for ari import ecs task" vpc_id = var.vpc_id revoke_rules_on_delete = true @@ -60,18 +136,18 @@ resource "aws_security_group" "hello-world-task-sg" { } tags = { - Name = "${var.project_name}-hello-world-task-sg-${var.environment}" + Name = "${var.project_name}-ari-import-task-sg-${var.environment}" } } resource "aws_ssm_parameter" "ecs-security-group-id" { name = "ecs_task_security_group_id_${var.environment}_${var.project_name}" type = "String" - value = aws_security_group.hello-world-task-sg.id + value = aws_security_group.ari-import-task-sg.id } resource "aws_ssm_parameter" "ecs-task-definition-id" { name = "ecs_task_definition_id_${var.environment}_${var.project_name}" type = "String" - value = "${aws_ecs_task_definition.hello-world.id}:${aws_ecs_task_definition.hello-world.revision}" + value = "${aws_ecs_task_definition.ari-import.id}:${aws_ecs_task_definition.ari-import.revision}" }