Skip to content

Commit

Permalink
Remove sock files on nginx startup (#2131)
Browse files Browse the repository at this point in the history
Problem: NGF Pod cannot recover if NGINX master process fails without cleaning up

Solution: Update the nginx Dockerfile CMD to clean up sock files on start-up
  • Loading branch information
ciarams87 authored Jun 24, 2024
1 parent 747bf0a commit 0021cec
Show file tree
Hide file tree
Showing 13 changed files with 31 additions and 51 deletions.
2 changes: 2 additions & 0 deletions build/Dockerfile.nginx
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ RUN chown -R 101:1001 /etc/nginx /var/cache/nginx /var/lib/nginx
LABEL org.nginx.ngf.image.build.agent="${BUILD_AGENT}"

USER 101:1001

CMD ["sh", "-c", "rm -rf /var/run/nginx/*.sock && /docker-entrypoint.sh nginx -g 'daemon off;'"]
2 changes: 1 addition & 1 deletion build/Dockerfile.nginxplus
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ USER 101:1001

LABEL org.nginx.ngf.image.build.agent="${BUILD_AGENT}"

CMD ["nginx", "-g", "daemon off;"]
CMD ["sh", "-c", "rm -rf /var/run/nginx/*.sock && nginx -g 'daemon off;'"]
4 changes: 0 additions & 4 deletions charts/nginx-gateway-fabric/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,6 @@ spec:
mountPath: /var/run/nginx
- name: nginx-cache
mountPath: /var/cache/nginx
- name: nginx-lib
mountPath: /var/lib/nginx
- name: nginx-includes
mountPath: /etc/nginx/includes
{{- with .Values.nginx.extraVolumeMounts -}}
Expand Down Expand Up @@ -197,8 +195,6 @@ spec:
emptyDir: {}
- name: nginx-cache
emptyDir: {}
- name: nginx-lib
emptyDir: {}
- name: nginx-includes
emptyDir: {}
{{- with .Values.extraVolumes -}}
Expand Down
4 changes: 0 additions & 4 deletions config/tests/static-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,6 @@ spec:
mountPath: /var/run/nginx
- name: nginx-cache
mountPath: /var/cache/nginx
- name: nginx-lib
mountPath: /var/lib/nginx
- name: nginx-includes
mountPath: /etc/nginx/includes
terminationGracePeriodSeconds: 30
Expand All @@ -127,7 +125,5 @@ spec:
emptyDir: {}
- name: nginx-cache
emptyDir: {}
- name: nginx-lib
emptyDir: {}
- name: nginx-includes
emptyDir: {}
4 changes: 0 additions & 4 deletions deploy/manifests/nginx-gateway-experimental.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,6 @@ spec:
mountPath: /var/run/nginx
- name: nginx-cache
mountPath: /var/cache/nginx
- name: nginx-lib
mountPath: /var/lib/nginx
- name: nginx-includes
mountPath: /etc/nginx/includes
terminationGracePeriodSeconds: 30
Expand All @@ -283,8 +281,6 @@ spec:
emptyDir: {}
- name: nginx-cache
emptyDir: {}
- name: nginx-lib
emptyDir: {}
- name: nginx-includes
emptyDir: {}
---
Expand Down
4 changes: 0 additions & 4 deletions deploy/manifests/nginx-gateway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,6 @@ spec:
mountPath: /var/run/nginx
- name: nginx-cache
mountPath: /var/cache/nginx
- name: nginx-lib
mountPath: /var/lib/nginx
- name: nginx-includes
mountPath: /etc/nginx/includes
terminationGracePeriodSeconds: 30
Expand All @@ -279,8 +277,6 @@ spec:
emptyDir: {}
- name: nginx-cache
emptyDir: {}
- name: nginx-lib
emptyDir: {}
- name: nginx-includes
emptyDir: {}
---
Expand Down
4 changes: 0 additions & 4 deletions deploy/manifests/nginx-plus-gateway-experimental.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,6 @@ spec:
mountPath: /var/run/nginx
- name: nginx-cache
mountPath: /var/cache/nginx
- name: nginx-lib
mountPath: /var/lib/nginx
- name: nginx-includes
mountPath: /etc/nginx/includes
terminationGracePeriodSeconds: 30
Expand All @@ -290,8 +288,6 @@ spec:
emptyDir: {}
- name: nginx-cache
emptyDir: {}
- name: nginx-lib
emptyDir: {}
- name: nginx-includes
emptyDir: {}
---
Expand Down
4 changes: 0 additions & 4 deletions deploy/manifests/nginx-plus-gateway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,6 @@ spec:
mountPath: /var/run/nginx
- name: nginx-cache
mountPath: /var/cache/nginx
- name: nginx-lib
mountPath: /var/lib/nginx
- name: nginx-includes
mountPath: /etc/nginx/includes
terminationGracePeriodSeconds: 30
Expand All @@ -286,8 +284,6 @@ spec:
emptyDir: {}
- name: nginx-cache
emptyDir: {}
- name: nginx-lib
emptyDir: {}
- name: nginx-includes
emptyDir: {}
---
Expand Down
4 changes: 2 additions & 2 deletions internal/mode/static/nginx/config/servers_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ server {
{{- end }}
{{ end }}
server {
listen unix:/var/lib/nginx/nginx-502-server.sock;
listen unix:/var/run/nginx/nginx-502-server.sock;
access_log off;
return 502;
}
server {
listen unix:/var/lib/nginx/nginx-500-server.sock;
listen unix:/var/run/nginx/nginx-500-server.sock;
access_log off;
return 500;
Expand Down
4 changes: 2 additions & 2 deletions internal/mode/static/nginx/config/upstreams.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ var upstreamsTemplate = gotemplate.Must(gotemplate.New("upstreams").Parse(upstre

const (
// nginx502Server is used as a backend for services that cannot be resolved (have no IP address).
nginx502Server = "unix:/var/lib/nginx/nginx-502-server.sock"
nginx502Server = "unix:/var/run/nginx/nginx-502-server.sock"
// nginx500Server is used as a server for the invalid backend ref upstream.
nginx500Server = "unix:/var/lib/nginx/nginx-500-server.sock"
nginx500Server = "unix:/var/run/nginx/nginx-500-server.sock"
// invalidBackendRef is used as an upstream name for invalid backend references.
invalidBackendRef = "invalid-backend-ref"
// ossZoneSize is the upstream zone size for nginx open source.
Expand Down
2 changes: 1 addition & 1 deletion internal/mode/static/nginx/config/upstreams_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func TestExecuteUpstreams(t *testing.T) {
"upstream invalid-backend-ref",
"server 10.0.0.0:80;",
"server 11.0.0.0:80;",
"server unix:/var/lib/nginx/nginx-502-server.sock;",
"server unix:/var/run/nginx/nginx-502-server.sock;",
}

upstreamResults := gen.executeUpstreams(dataplane.Configuration{Upstreams: stateUpstreams})
Expand Down
2 changes: 1 addition & 1 deletion site/content/overview/gateway-architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ The following list describes the connections, preceeded by their types in parent
- Write: The _NGINX master_ writes its PID to the `nginx.pid` file stored in the `nginx-run` volume.
- Read: The _NGINX master_ reads _configuration files_ and the _TLS cert and keys_ referenced in the configuration when it starts or during a reload. These files, certificates, and keys are stored in the `nginx-conf` and `nginx-secrets` volumes that are mounted to both the `nginx-gateway` and `nginx` containers.
1. (File I/O)
- Write: The _NGINX master_ writes to the auxiliary Unix sockets folder, which is located in the `/var/lib/nginx`
- Write: The _NGINX master_ writes to the auxiliary Unix sockets folder, which is located in the `/var/run/nginx`
directory.
- Read: The _NGINX master_ reads the `nginx.conf` file from the `/etc/nginx` directory. This [file](https://github.com/nginxinc/nginx-gateway-fabric/blob/v1.3.0/internal/mode/static/nginx/conf/nginx.conf) contains the global and http configuration settings for NGINX. In addition, _NGINX master_ reads the NJS modules referenced in the configuration when it starts or during a reload. NJS modules are stored in the `/usr/lib/nginx/modules` directory.
1. (File I/O) The _NGINX master_ sends logs to its _stdout_ and _stderr_, which are collected by the container runtime.
Expand Down
42 changes: 22 additions & 20 deletions tests/suite/graceful_recovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const (
)

// Since checkContainerLogsForErrors may experience interference from previous tests (as explained in the function
// documentation), this test is recommended to be run separate from other nfr tests.
// documentation), this test is recommended to be run separate from other tests.
var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "graceful-recovery"), func() {
files := []string{
"graceful-recovery/cafe.yaml",
Expand Down Expand Up @@ -81,7 +81,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "gracefu
func() error {
return checkForWorkingTraffic(teaURL, coffeeURL)
}).
WithTimeout(timeoutConfig.RequestTimeout).
WithTimeout(timeoutConfig.RequestTimeout * 2).
WithPolling(500 * time.Millisecond).
Should(Succeed())
})
Expand All @@ -96,8 +96,6 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "gracefu
})

It("recovers when nginx container is restarted", func() {
// FIXME(bjee19) remove Skip() when https://github.com/nginxinc/nginx-gateway-fabric/issues/1108 is completed.
Skip("Test currently fails due to this issue: https://github.com/nginxinc/nginx-gateway-fabric/issues/1108")
runRecoveryTest(teaURL, coffeeURL, ngfPodName, nginxContainerName, files, &ns)
})
})
Expand Down Expand Up @@ -154,11 +152,11 @@ func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files
func() error {
return checkForWorkingTraffic(teaURL, coffeeURL)
}).
WithTimeout(timeoutConfig.RequestTimeout).
WithTimeout(timeoutConfig.RequestTimeout * 2).
WithPolling(500 * time.Millisecond).
Should(Succeed())

checkContainerLogsForErrors(ngfPodName)
checkContainerLogsForErrors(ngfPodName, containerName == nginxContainerName)
}

func restartContainer(ngfPodName, containerName string) {
Expand Down Expand Up @@ -260,15 +258,17 @@ func expectRequestToFail(appURL, address string) error {
// Since this function retrieves all the logs from both containers and the NGF pod may be shared between tests,
// the logs retrieved may contain log messages from previous tests, thus any errors in the logs from previous tests
// may cause an interference with this test and cause this test to fail.
func checkContainerLogsForErrors(ngfPodName string) {
logs, err := resourceManager.GetPodLogs(
// Additionally, when the NGINX process is killed, some errors are expected in the NGF logs while we wait for the
// NGINX container to be restarted.
func checkContainerLogsForErrors(ngfPodName string, checkNginxLogsOnly bool) {
nginxLogs, err := resourceManager.GetPodLogs(
ngfNamespace,
ngfPodName,
&core.PodLogOptions{Container: nginxContainerName},
)
Expect(err).ToNot(HaveOccurred())

for _, line := range strings.Split(logs, "\n") {
for _, line := range strings.Split(nginxLogs, "\n") {
Expect(line).ToNot(ContainSubstring("[crit]"), line)
Expect(line).ToNot(ContainSubstring("[alert]"), line)
Expect(line).ToNot(ContainSubstring("[emerg]"), line)
Expand All @@ -281,18 +281,20 @@ func checkContainerLogsForErrors(ngfPodName string) {
}
}

logs, err = resourceManager.GetPodLogs(
ngfNamespace,
ngfPodName,
&core.PodLogOptions{Container: ngfContainerName},
)
Expect(err).ToNot(HaveOccurred())
if !checkNginxLogsOnly {
ngfLogs, err := resourceManager.GetPodLogs(
ngfNamespace,
ngfPodName,
&core.PodLogOptions{Container: ngfContainerName},
)
Expect(err).ToNot(HaveOccurred())

for _, line := range strings.Split(logs, "\n") {
if *plusEnabled && strings.Contains(line, "\"level\":\"error\"") {
Expect(line).To(ContainSubstring("Usage reporting must be enabled when using NGINX Plus"), line)
} else {
Expect(line).ToNot(ContainSubstring("\"level\":\"error\""), line)
for _, line := range strings.Split(ngfLogs, "\n") {
if *plusEnabled && strings.Contains(line, "\"level\":\"error\"") {
Expect(line).To(ContainSubstring("Usage reporting must be enabled when using NGINX Plus"), line)
} else {
Expect(line).ToNot(ContainSubstring("\"level\":\"error\""), line)
}
}
}
}
Expand Down

0 comments on commit 0021cec

Please sign in to comment.