Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2535 - Crash Loop Back offs Error #2577

Merged
merged 21 commits into from
Dec 12, 2023
12 changes: 6 additions & 6 deletions devops/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -242,47 +242,47 @@ build-db-migrations:
test -n "$(BUILD_REF)"
test -n "$(DB_MIGRATIONS_BUILD_REF)"
@echo "+\n++ BUILDING DB migrations with tag: $(BUILD_REF)\n+"
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p BASE_IMAGE_NAME="nodejs-18" -p BASE_IMAGE_TAG="1-71.1697652955" -p BASE_IMAGE_REPO="artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/" -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)backend -p DOCKER_FILE_PATH=apps/db-migrations/Dockerfile -p NAME=$(DB_MIGRATIONS_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)backend -p DOCKER_FILE_PATH=apps/db-migrations/Dockerfile -p NAME=$(DB_MIGRATIONS_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

@oc -n $(BUILD_NAMESPACE) start-build bc/$(DB_MIGRATIONS_BUILD_REF) --wait

build-api:
test -n "$(BUILD_NAMESPACE)"
test -n "$(BUILD_REF)"
test -n "$(API_BUILD_REF)"
@echo "+\n++ BUILDING API with tag: $(BUILD_REF)\n+"
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p BASE_IMAGE_NAME="nodejs-18" -p BASE_IMAGE_TAG="1-71.1697652955" -p BASE_IMAGE_REPO="artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/" -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)backend -p DOCKER_FILE_PATH=apps/api/Dockerfile -p NAME=$(API_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)backend -p DOCKER_FILE_PATH=apps/api/Dockerfile -p NAME=$(API_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) start-build bc/$(API_BUILD_REF) --wait

build-workers:
test -n "$(BUILD_NAMESPACE)"
test -n "$(BUILD_REF)"
test -n "$(WORKERS_BUILD_REF)"
@echo "+\n++ BUILDING WORKERS with tag: $(BUILD_REF)\n+"
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p BASE_IMAGE_NAME="nodejs-18" -p BASE_IMAGE_TAG="1-71.1697652955" -p BASE_IMAGE_REPO="artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/" -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)backend -p DOCKER_FILE_PATH=apps/workers/Dockerfile -p NAME=$(WORKERS_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)backend -p DOCKER_FILE_PATH=apps/workers/Dockerfile -p NAME=$(WORKERS_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) start-build bc/$(WORKERS_BUILD_REF) --wait

build-queue-consumers:
test -n "$(BUILD_NAMESPACE)"
test -n "$(BUILD_REF)"
test -n "$(QUEUE_CONSUMERS_BUILD_REF)"
@echo "+\n++ BUILDING QUEUE_CONSUMERS with tag: $(BUILD_REF)\n+"
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p BASE_IMAGE_NAME="nodejs-18" -p BASE_IMAGE_TAG="1-71.1697652955" -p BASE_IMAGE_REPO="artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/" -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)backend -p DOCKER_FILE_PATH=apps/queue-consumers/Dockerfile -p NAME=$(QUEUE_CONSUMERS_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)backend -p DOCKER_FILE_PATH=apps/queue-consumers/Dockerfile -p NAME=$(QUEUE_CONSUMERS_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) start-build bc/$(QUEUE_CONSUMERS_BUILD_REF) --wait

build-load-test-gateway:
test -n "$(BUILD_NAMESPACE)"
test -n "$(BUILD_REF)"
test -n "$(LOAD_TEST_GATEWAY_BUILD_REF)"
@echo "+\n++ BUILDING LOAD_TEST_GATEWAY with tag: $(BUILD_REF)\n+"
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p BASE_IMAGE_NAME="nodejs-18" -p BASE_IMAGE_TAG="1-71.1697652955" -p BASE_IMAGE_REPO="artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/" -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)backend -p DOCKER_FILE_PATH=apps/load-test-gateway/Dockerfile -p NAME=$(LOAD_TEST_GATEWAY_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)backend -p DOCKER_FILE_PATH=apps/load-test-gateway/Dockerfile -p NAME=$(LOAD_TEST_GATEWAY_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) start-build bc/$(LOAD_TEST_GATEWAY_BUILD_REF) --wait

build-web:
test -n "$(BUILD_NAMESPACE)"
test -n "$(BUILD_REF)"
test -n "$(WEB_BUILD_REF)"
@echo "+\n++ BUILDING WEB with tag: $(BUILD_REF)\n+"
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p BASE_IMAGE_NAME="nginx-122" -p BASE_IMAGE_TAG="1" -p BASE_IMAGE_REPO="artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/" -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)web -p NAME=$(WEB_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) process -f $(BUILD_TEMPLATE_PATH) -p TAG=$(BUILD_REF) -p SOURCE_REPOSITORY_REF=$(BUILD_REF) -p SOURCE_CONTEXT_DIR=$(SOURCE_CONTEXT_DIR)web -p NAME=$(WEB_BUILD_REF) | oc -n $(BUILD_NAMESPACE) apply -f -
@oc -n $(BUILD_NAMESPACE) start-build bc/$(WEB_BUILD_REF) --wait

init-patroni:
Expand Down
27 changes: 0 additions & 27 deletions devops/openshift/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,6 @@ kind: Template
metadata:
name: ${NAME}
objects:
- apiVersion: image.openshift.io/v1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my understanding, what was the effect of having it before and not having it now?

Copy link
Collaborator Author

@guru-aot guru-aot Dec 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To answer your question straight, the configuration was not configured properly and it was not at all used in our project.
More on the tags:
The imagestream is usually used to mention or represent what is the container's base image and the imagestreamtag is used to associate a specific tag version of the image.
These configurations(only imagestreamtag) inturn helped us to override the base image defined in the Dockerfile which was defined at the top of it "FROM".
In our case, for our deployments we used the same base image for (Api, web, workers, queue-consumers, load-test-gateway & db-migration), so there is no need to override the once in the dockerfile, as we run all our builds using Github action. Even in the future if we wanted to specifically change the base image for a particular container, we can change the dockerfile for the particular base image and run the Github action, it should eventually work.

kind: ImageStream
metadata:
name: ${BASE_IMAGE_NAME}
spec:
lookupPolicy:
local: false
- apiVersion: v1
kind: ImageStreamTag
lookupPolicy:
local: false
metadata:
name: ${BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
tag:
annotations: null
from:
kind: DockerImage
name: ${BASE_IMAGE_REPO}${BASE_IMAGE_NAME}:${BASE_IMAGE_TAG}
importPolicy:
scheduled: true
referencePolicy:
type: Source
- kind: ImageStream
apiVersion: v1
metadata:
Expand Down Expand Up @@ -88,11 +66,6 @@ parameters:
value: Dockerfile
- name: TAG
value: "latest"
- name: BASE_IMAGE_REPO
- name: BASE_IMAGE_NAME
required: true
- name: BASE_IMAGE_TAG
required: true
- name: CPU_LIMIT
value: "2"
- name: MEMORY_LIMIT
Expand Down
7 changes: 6 additions & 1 deletion sources/packages/backend/apps/api/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Base Image
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-71.1697652955
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-81
Copy link
Collaborator

@andrewsignori-aot andrewsignori-aot Dec 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I support the node upgrade either way and the research looks great, we are still missing the root cause.
Can you please evaluate if the below assumptions make sense?

1 - Even the Openshift docker image defining a non-root user 1001, the container will be executed with a random user, as the error in the ticket also points out.
image
The same is also supported by the below documentation.
https://docs.openshift.com/container-platform/4.11/openshift_images/create-images.html#use-uid_create-images

2 - Checking the BC git I found at least one entry applying the recommended solution due to a npm 9 issue.
image
Image source: https://github.com/bcgov/common-hosted-form-service/blob/master/Dockerfile

The above would also explain why the error started when we moved from node 16 to 18 (npm 8 to 10).
Should we consider also applying the same?

Copy link
Collaborator

@dheepak-aot dheepak-aot Dec 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see and I agree what @andrewsignori-aot about the OpenShift container running with arbitrarily assigned user.

image

I am 100% on same page that we need to run a fix permission command to have the highest level of certainty that the issue is taken care at it's root cause.

When I look at the openshift, see that write permissions are not present outside the owner for the directory.

image

There is one more thing which I want to share here @guru-aot @andrewsignori-aot @cditcher . It may also be a possible solution or may be not. But I would recommend to try.

Please go through this thread, sclorg/s2i-nodejs-container#396

There is a mention about same error (we are using npm ci)

Following screenshots are the highlights linking to our issue

image

image

Docker s2i example:(May be by this way we make the container to run with 1001 user instead of default arbitrarily assigned one)

image

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestions @andrewsignori-aot and @dheepak-aot , as suggested, i have updated the group permissions for user in group 0 to have write access to the folder./.npm and tested the build and deployments.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the further verifications @dheepak-aot 😉


LABEL maintainer="BC GOV"

Expand All @@ -23,5 +23,10 @@ RUN npm run build api
# Exposing application port
EXPOSE ${PORT}

# Grant access to group 0 to allow npm v9 to work
# See: https://docs.openshift.com/container-platform/4.11/openshift_images/create-images.html#use-uid_create-images
RUN chgrp -R 0 ./.npm && \
andrepestana-aot marked this conversation as resolved.
Show resolved Hide resolved
chmod -R g=u ./.npm

# Entry point
CMD [ "npm", "run", "start:prod:api" ]
7 changes: 6 additions & 1 deletion sources/packages/backend/apps/db-migrations/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Base Image
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-71.1697652955
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-81

LABEL maintainer="BC GOV"

Expand All @@ -15,5 +15,10 @@ COPY tsconfig.json ./
COPY tsconfig.build.json ./
COPY env-setup.js ./

# Grant access to group 0 to allow npm v9 to work
# See: https://docs.openshift.com/container-platform/4.11/openshift_images/create-images.html#use-uid_create-images
RUN chgrp -R 0 ./.npm && \
chmod -R g=u ./.npm

# Entry point
CMD [ "npm", "run", "start:prod:db-migrations" ]
7 changes: 6 additions & 1 deletion sources/packages/backend/apps/load-test-gateway/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Base Image
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-71.1697652955
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-81

LABEL maintainer="BC GOV"

Expand All @@ -16,5 +16,10 @@ COPY ./libs ./libs
# Building app
RUN npm run build load-test-gateway

# Grant access to group 0 to allow npm v9 to work
# See: https://docs.openshift.com/container-platform/4.11/openshift_images/create-images.html#use-uid_create-images
RUN chgrp -R 0 ./.npm && \
chmod -R g=u ./.npm

# Entry point
CMD [ "npm", "run", "start:prod:load-test-gateway" ]
7 changes: 6 additions & 1 deletion sources/packages/backend/apps/queue-consumers/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Base Image
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-71.1697652955
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-81

LABEL maintainer="BC GOV"

Expand All @@ -16,5 +16,10 @@ COPY ./libs ./libs
# Building app
RUN npm run build queue-consumers

# Grant access to group 0 to allow npm v9 to work
# See: https://docs.openshift.com/container-platform/4.11/openshift_images/create-images.html#use-uid_create-images
RUN chgrp -R 0 ./.npm && \
chmod -R g=u ./.npm

# Entry point
CMD [ "npm", "run", "start:prod:queue-consumers" ]
7 changes: 6 additions & 1 deletion sources/packages/backend/apps/workers/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Base Image
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-71.1697652955
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-81

LABEL maintainer="BC GOV"

Expand All @@ -16,5 +16,10 @@ COPY ./libs ./libs
# Building app
RUN npm run build workers

# Grant access to group 0 to allow npm v9 to work
# See: https://docs.openshift.com/container-platform/4.11/openshift_images/create-images.html#use-uid_create-images
RUN chgrp -R 0 ./.npm && \
chmod -R g=u ./.npm

# Entry point
CMD [ "npm", "run", "start:prod:workers" ]
2 changes: 1 addition & 1 deletion sources/packages/web/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-71.1697652955 AS builder
FROM artifacts.developer.gov.bc.ca/redhat-access-docker-remote/ubi8/nodejs-18:1-81 AS builder

# Application Port.
ENV PORT 3030
Expand Down