docker-compose.yaml

version: "3.6"
x-backend:
  &x-backend
  image: ${SPARROW_BACKEND_IMAGE:-ghcr.io/earthcubegeochron/sparrow/backend}
  build:
    context: backend
    cache_from:
      - ${SPARROW_BACKEND_IMAGE:-ghcr.io/earthcubegeochron/sparrow/backend}
  command: /bin/run
  # Make sure we get colorized terminal output
  tty: true
  depends_on:
    - db
  environment:
    - SPARROW_ENV
    - SPARROW_SECRET_KEY
    - SPARROW_BASE_URL
    - SPARROW_LAB_NAME
    - SPARROW_ECHO_SQL
    - SPARROW_DATABASE=postgresql+psycopg2://postgres@db/sparrow
    # Inside the container, the data directory is mapped to a standardized folder
    - SPARROW_DATA_DIR=/data
    # A cache for files that need to persist but can be rebuilt.
    - SPARROW_CACHE_DIR=/cache
    # We have an experimental caching mechanism for SQLAlchemy models, but this
    # may cause bugs for importers and is not enabled by default. (added in v2.2.0)
    - SPARROW_CACHE_DATABASE_MODELS
    - SPARROW_INIT_SQL=/init-sql
    # For cloud data support
    # NOTE: it may be useful to split this into a
    # separate container, but it's centralized
    # for now...
    - SPARROW_S3_ENDPOINT
    - SPARROW_S3_BUCKET
    - SPARROW_S3_KEY
    - SPARROW_S3_SECRET
    # Whether we should enable a worker pool for running tasks. If we don't,
    # tasks must be run via the command line.
    - SPARROW_TASK_WORKER
    - SPARROW_TASK_BROKER=redis://broker:6379/0
  volumes:
    # Volume for scripts to make things like
    # migrations work...
    # TODO: come up with a better way to organize
    - ./_cli/bin:/sparrow-bin
    # Read-only volume for source code
    # How this mount is set up has severe implications for dev-mode performance,
    # as server reloaders tend to use non-performant file watchers...
    - type: bind
      source: ./backend/
      target: /app
      read_only: true
      consistency: cached
    # Nested volume to keep built files
    # separate from those on our local system
    #- /app/sparrow.egg-info
    # Link the data directory so we can find files if we are running
    # the importer in the backend
    # NOTE: we should think about moving this to another container
    # but this is precluded by our current plugin architecture.
    - ${SPARROW_DATA_DIR:-placeholder}:/data
    # Share some configuration between backend
    # and frontend
    - ${SPARROW_INIT_SQL:-placeholder}:/init-sql/:ro
    # The docker-compose volume for sparrow commands needs to be set
    # even if a `SPARROW_COMMANDS` directory is not provided, so we can
    # use a placeholder.
    - ${SPARROW_COMMANDS_VOLUME:-placeholder}:/sparrow-commands/:ro
    # Link `SPARROW_PLUGIN_DIR` directly into site-packages as a folder
    # ...we could probably do this in a more elegant way
    - ${SPARROW_PLUGIN_DIR:-placeholder}:/usr/local/lib/python3.9/site-packages/sparrow_plugins:ro
    # Save ipython configuration to anonymous volume so
    # we keep command history between app runs
    - ipython_config:/root/.ipython/profile_default
    # Save CLI help info for rapid access
    - runtime_data:/run:ro
    # A working location for caching files that need to persist over time
    # (for example, git repositorites for the PyChron importer). This could conceivably
    # be delegated to a separate importer container, but that seems overcomplex for now.
    - backend_cache:/cache
    - model_cache:/root/.sqlalchemy-cache
services:
  gateway:
    ports:
      - ${SPARROW_HTTP_PORT:-5002}:80
    volumes:
      - ./nginx-config/locations/core.conf:/etc/nginx/locations/core.conf:ro
      # By default, in production mode, we serve Sparrow's compiled frontend application
      # at the root route. In development mode, this is replaced by a dynamic webpack server
      # that recompiles on application changes.
      - ./nginx-config/locations/frontend-production.conf:/etc/nginx/locations/frontend.conf:ro
      - frontend_build:/frontend
      # Application error pages
      - ./nginx-config/error-pages:/usr/share/nginx/error-pages/error:ro
      # SPARROW_DATA_DIR is made accessible from the server container but not exposed.
      - ${SPARROW_DATA_DIR:-placeholder}:/data:ro
  backend: 
    <<: *x-backend
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:5000/api/v2/"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    depends_on:
      - gateway
    expose:
      - 5000
    profiles:
      - core
  db:
    # Right now we only support running Sparrow with a Docker-managed PostgreSQL cluster.
    # In the future, we may loosen this restriction to allow externally-managed databases
    # to ease the integration of Sparrow with other tools. However, managing the database
    # in Docker eliminates a substantial amount of configuration complexity.
    image: ${SPARROW_DATABASE_IMAGE:-ghcr.io/earthcubegeochron/sparrow/database:2.0}
    expose:
      - 5432
    ports:
      # A port range can be specified, but only with recent versions of Docker,
      # so we default to a single port.
      - ${SPARROW_DB_PORT:-54321}:5432
    environment:
      - POSTGRES_DB=sparrow
      - PGUSER=postgres
      - POSTGRES_HOST_AUTH_METHOD=trust
    volumes:
      - db_cluster:/var/lib/postgresql/data
    profiles:
      - core
    healthcheck:
      test: ["CMD", "pg_isready", "-U", "postgres"]
      interval: 10s
      timeout: 5s
      retries: 3
  pg_api:
    image: postgrest/postgrest:v11.2.0
    environment:
      - PGRST_DB_URI=postgresql://authenticator:@db:5432/sparrow
      - PGRST_DB_SCHEMAS=sparrow_api
      - PGRST_DB_ANON_ROLE=view_public
      - PGRST_JWT_SECRET=${SPARROW_SECRET_KEY}
      - PGRST_DB_MAX_ROWS=100
    profiles:
      - core
    depends_on:
      gateway:
        condition: service_started
      db:
        condition: service_healthy
  frontend:
    image: ${SPARROW_FRONTEND_IMAGE:-ghcr.io/earthcubegeochron/sparrow/frontend}
    build:
      context: frontend
      cache_from:
        - ${SPARROW_FRONTEND_IMAGE:-ghcr.io/earthcubegeochron/sparrow/frontend}
    profiles:
      - frontend
    depends_on:
      - gateway
    environment:
      - CONTAINERIZED=1
      - SPARROW_LAB_NAME
      - SPARROW_ECHO_SQL
      - BASE_URL=${SPARROW_BASE_URL:-/}
      - API_BASE_URL=${SPARROW_BASE_URL:-/}
      # We need to forward SPARROW_HTTP_PORT to the frontend so that we can
      # load BrowserSync on the correct port if in development mode
      - SPARROW_HTTP_PORT
      - MAPBOX_API_TOKEN
      # SPARROW_SITE_CONTENT variable is different
      # inside and outside of the container
      - SPARROW_SITE_CONTENT=/site-content
      - SPARROW_FRONTEND_BUILD_DIR=/build
      - SPARROW_ENV
    volumes:
      - frontend_build:/build
      # Right now, we configure default site content
      # rather than allowing any customization
      - ${SPARROW_SITE_CONTENT:-./frontend/default-content}:/site-content:ro
  # API TESTS container is disabled for now
  # api-tests:
  #   build: api-tests
  #   # A placeholder command
  #   command: echo
  #   depends_on:
  #     - backend
  database_backup:
    image: ghcr.io/uw-macrostrat/pg-backup-service:main
    # This service backs up the database periodically
    # to a remote S3 bucket or a local directory on the server.
    restart: unless-stopped
    # This service will only run under the "production" profile
    # https://docs.docker.com/compose/profiles/
    profiles:
      - production
    environment:
      - S3_ENDPOINT=${SPARROW_S3_ENDPOINT:-""}
      - S3_ACCESS_KEY=${SPARROW_S3_KEY:-""}
      - S3_SECRET_KEY=${SPARROW_S3_SECRET:-""}
      - S3_BACKUP_BUCKET=${SPARROW_BACKUP_BUCKET:-""}
      # If we don't specify all S3 parameters,
      # we fall back to using the local backup directory
      - DB_BACKUP_DIR=${SPARROW_BACKUP_DIR:-""}
      # Mounted volume for local backups (mapped to Sparrow backup dir)
      - DB_BACKUP_VOLUME=/local-backups
      # The unique-ish identifier of the lab (used to prefix backups)
      - DB_BACKUP_PREFIX=${SPARROW_LAB_ID:-""}
      - POSTGRES_DB=sparrow
      # Go-cron syntax for backup schedule
      - SCHEDULE=${SPARROW_BACKUP_SCHEDULE:-@daily}
    tty: true
    command: backup-service
    depends_on:
      - db
    volumes:
      - ${SPARROW_BACKUP_DIR:-placeholder}:/local-backups
  # The broker and worker containers create an architecture
  # for running import and processing tasks on a separate thread
  # from the web server. These only run in "worker" profile.
  broker:
    profiles:
      - task-worker
    image: redis:latest
  worker:
    <<: *x-backend
    depends_on:
      - db
      - broker
    profiles:
      - task-worker
    command: /bin/run-worker
volumes:
  frontend_build:
  # Anonymous volume to preserve `sparrow shell` history
  ipython_config:
  db_cluster:
  runtime_data:
  backend_cache:
  # Placeholder volumes in case we don't have these volumes to mount
  placeholder:
  model_cache: