-
Notifications
You must be signed in to change notification settings - Fork 4
/
docker-compose.yaml
243 lines (242 loc) · 8.69 KB
/
docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
version: "3.6"
x-backend:
&x-backend
image: ${SPARROW_BACKEND_IMAGE:-ghcr.io/earthcubegeochron/sparrow/backend}
build:
context: backend
cache_from:
- ${SPARROW_BACKEND_IMAGE:-ghcr.io/earthcubegeochron/sparrow/backend}
command: /bin/run
# Make sure we get colorized terminal output
tty: true
depends_on:
- db
environment:
- SPARROW_ENV
- SPARROW_SECRET_KEY
- SPARROW_BASE_URL
- SPARROW_LAB_NAME
- SPARROW_ECHO_SQL
- SPARROW_DATABASE=postgresql+psycopg2://postgres@db/sparrow
# Inside the container, the data directory is mapped to a standardized folder
- SPARROW_DATA_DIR=/data
# A cache for files that need to persist but can be rebuilt.
- SPARROW_CACHE_DIR=/cache
# We have an experimental caching mechanism for SQLAlchemy models, but this
# may cause bugs for importers and is not enabled by default. (added in v2.2.0)
- SPARROW_CACHE_DATABASE_MODELS
- SPARROW_INIT_SQL=/init-sql
# For cloud data support
# NOTE: it may be useful to split this into a
# separate container, but it's centralized
# for now...
- SPARROW_S3_ENDPOINT
- SPARROW_S3_BUCKET
- SPARROW_S3_KEY
- SPARROW_S3_SECRET
# Whether we should enable a worker pool for running tasks. If we don't,
# tasks must be run via the command line.
- SPARROW_TASK_WORKER
- SPARROW_TASK_BROKER=redis://broker:6379/0
volumes:
# Volume for scripts to make things like
# migrations work...
# TODO: come up with a better way to organize
- ./_cli/bin:/sparrow-bin
# Read-only volume for source code
# How this mount is set up has severe implications for dev-mode performance,
# as server reloaders tend to use non-performant file watchers...
- type: bind
source: ./backend/
target: /app
read_only: true
consistency: cached
# Nested volume to keep built files
# separate from those on our local system
#- /app/sparrow.egg-info
# Link the data directory so we can find files if we are running
# the importer in the backend
# NOTE: we should think about moving this to another container
# but this is precluded by our current plugin architecture.
- ${SPARROW_DATA_DIR:-placeholder}:/data
# Share some configuration between backend
# and frontend
- ${SPARROW_INIT_SQL:-placeholder}:/init-sql/:ro
# The docker-compose volume for sparrow commands needs to be set
# even if a `SPARROW_COMMANDS` directory is not provided, so we can
# use a placeholder.
- ${SPARROW_COMMANDS_VOLUME:-placeholder}:/sparrow-commands/:ro
# Link `SPARROW_PLUGIN_DIR` directly into site-packages as a folder
# ...we could probably do this in a more elegant way
- ${SPARROW_PLUGIN_DIR:-placeholder}:/usr/local/lib/python3.9/site-packages/sparrow_plugins:ro
# Save ipython configuration to anonymous volume so
# we keep command history between app runs
- ipython_config:/root/.ipython/profile_default
# Save CLI help info for rapid access
- runtime_data:/run:ro
# A working location for caching files that need to persist over time
# (for example, git repositorites for the PyChron importer). This could conceivably
# be delegated to a separate importer container, but that seems overcomplex for now.
- backend_cache:/cache
- model_cache:/root/.sqlalchemy-cache
services:
gateway:
ports:
- ${SPARROW_HTTP_PORT:-5002}:80
volumes:
- ./nginx-config/locations/core.conf:/etc/nginx/locations/core.conf:ro
# By default, in production mode, we serve Sparrow's compiled frontend application
# at the root route. In development mode, this is replaced by a dynamic webpack server
# that recompiles on application changes.
- ./nginx-config/locations/frontend-production.conf:/etc/nginx/locations/frontend.conf:ro
- frontend_build:/frontend
# Application error pages
- ./nginx-config/error-pages:/usr/share/nginx/error-pages/error:ro
# SPARROW_DATA_DIR is made accessible from the server container but not exposed.
- ${SPARROW_DATA_DIR:-placeholder}:/data:ro
backend:
<<: *x-backend
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5000/api/v2/"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
depends_on:
- gateway
expose:
- 5000
profiles:
- core
db:
# Right now we only support running Sparrow with a Docker-managed PostgreSQL cluster.
# In the future, we may loosen this restriction to allow externally-managed databases
# to ease the integration of Sparrow with other tools. However, managing the database
# in Docker eliminates a substantial amount of configuration complexity.
image: ${SPARROW_DATABASE_IMAGE:-ghcr.io/earthcubegeochron/sparrow/database:2.0}
expose:
- 5432
ports:
# A port range can be specified, but only with recent versions of Docker,
# so we default to a single port.
- ${SPARROW_DB_PORT:-54321}:5432
environment:
- POSTGRES_DB=sparrow
- PGUSER=postgres
- POSTGRES_HOST_AUTH_METHOD=trust
volumes:
- db_cluster:/var/lib/postgresql/data
profiles:
- core
healthcheck:
test: ["CMD", "pg_isready", "-U", "postgres"]
interval: 10s
timeout: 5s
retries: 3
pg_api:
image: postgrest/postgrest:v11.2.0
environment:
- PGRST_DB_URI=postgresql://authenticator:@db:5432/sparrow
- PGRST_DB_SCHEMAS=sparrow_api
- PGRST_DB_ANON_ROLE=view_public
- PGRST_JWT_SECRET=${SPARROW_SECRET_KEY}
- PGRST_DB_MAX_ROWS=100
profiles:
- core
depends_on:
gateway:
condition: service_started
db:
condition: service_healthy
frontend:
image: ${SPARROW_FRONTEND_IMAGE:-ghcr.io/earthcubegeochron/sparrow/frontend}
build:
context: frontend
cache_from:
- ${SPARROW_FRONTEND_IMAGE:-ghcr.io/earthcubegeochron/sparrow/frontend}
profiles:
- frontend
depends_on:
- gateway
environment:
- CONTAINERIZED=1
- SPARROW_LAB_NAME
- SPARROW_ECHO_SQL
- BASE_URL=${SPARROW_BASE_URL:-/}
- API_BASE_URL=${SPARROW_BASE_URL:-/}
# We need to forward SPARROW_HTTP_PORT to the frontend so that we can
# load BrowserSync on the correct port if in development mode
- SPARROW_HTTP_PORT
- MAPBOX_API_TOKEN
# SPARROW_SITE_CONTENT variable is different
# inside and outside of the container
- SPARROW_SITE_CONTENT=/site-content
- SPARROW_FRONTEND_BUILD_DIR=/build
- SPARROW_ENV
volumes:
- frontend_build:/build
# Right now, we configure default site content
# rather than allowing any customization
- ${SPARROW_SITE_CONTENT:-./frontend/default-content}:/site-content:ro
# API TESTS container is disabled for now
# api-tests:
# build: api-tests
# # A placeholder command
# command: echo
# depends_on:
# - backend
database_backup:
image: ghcr.io/uw-macrostrat/pg-backup-service:main
# This service backs up the database periodically
# to a remote S3 bucket or a local directory on the server.
restart: unless-stopped
# This service will only run under the "production" profile
# https://docs.docker.com/compose/profiles/
profiles:
- production
environment:
- S3_ENDPOINT=${SPARROW_S3_ENDPOINT:-""}
- S3_ACCESS_KEY=${SPARROW_S3_KEY:-""}
- S3_SECRET_KEY=${SPARROW_S3_SECRET:-""}
- S3_BACKUP_BUCKET=${SPARROW_BACKUP_BUCKET:-""}
# If we don't specify all S3 parameters,
# we fall back to using the local backup directory
- DB_BACKUP_DIR=${SPARROW_BACKUP_DIR:-""}
# Mounted volume for local backups (mapped to Sparrow backup dir)
- DB_BACKUP_VOLUME=/local-backups
# The unique-ish identifier of the lab (used to prefix backups)
- DB_BACKUP_PREFIX=${SPARROW_LAB_ID:-""}
- POSTGRES_DB=sparrow
# Go-cron syntax for backup schedule
- SCHEDULE=${SPARROW_BACKUP_SCHEDULE:-@daily}
tty: true
command: backup-service
depends_on:
- db
volumes:
- ${SPARROW_BACKUP_DIR:-placeholder}:/local-backups
# The broker and worker containers create an architecture
# for running import and processing tasks on a separate thread
# from the web server. These only run in "worker" profile.
broker:
profiles:
- task-worker
image: redis:latest
worker:
<<: *x-backend
depends_on:
- db
- broker
profiles:
- task-worker
command: /bin/run-worker
volumes:
frontend_build:
# Anonymous volume to preserve `sparrow shell` history
ipython_config:
db_cluster:
runtime_data:
backend_cache:
# Placeholder volumes in case we don't have these volumes to mount
placeholder:
model_cache: