diff --git a/.gitignore b/.gitignore index f1592719..2bfb8c34 100644 --- a/.gitignore +++ b/.gitignore @@ -121,6 +121,8 @@ celerybeat.pid # Environments .env +.env.mainnet +.env.testnet .venv env/ venv/ diff --git a/caddy/Caddyfile b/caddy/Caddyfile index 063028d8..735e713d 100644 --- a/caddy/Caddyfile +++ b/caddy/Caddyfile @@ -16,6 +16,20 @@ uri strip_prefix /nuc reverse_proxy nuc-api:8080 } + handle_path /prod/nuc/* { + uri strip_prefix /prod/nuc + reverse_proxy prod-nilai-nuc-api:8080 + } + + handle_path /prod/grafana/* { + uri strip_prefix /prod/grafana + reverse_proxy prod-grafana:3000 + } + + handle_path /prod/* { + uri strip_prefix /prod/base + reverse_proxy prod-nilai-api:8080 + } handle { reverse_proxy api:8080 diff --git a/docker-compose.v2.dev.yml b/docker-compose.v2.dev.yml new file mode 100644 index 00000000..a0533e79 --- /dev/null +++ b/docker-compose.v2.dev.yml @@ -0,0 +1,28 @@ +#! This is used to deploy the nilai-api for the development environment +#! It include the api, nuc-api, postgres dbs, redis, prometheus, grafana and node-exporter +#! It doesn't use caddy (as it is unique for the whole system) +#! It doesn't include any models as it is redundant with the main nilAI instance +services: + api: + platform: linux/amd64 # for macOS to force running on Rosetta 2 + ports: + - "8081:8080" + volumes: + - ./nilai-api/:/app/nilai-api/ + - ./packages/:/app/packages/ + - ./nilai-auth/nuc-helpers/:/app/nilai-auth/nuc-helpers/ + networks: + - nilauth + nuc-api: + platform: linux/amd64 # for macOS to force running on Rosetta 2 + ports: + - "8089:8080" + volumes: + - ./nilai-api/:/app/nilai-api/ + - ./packages/:/app/packages/ + - ./nilai-auth/nuc-helpers/:/app/nilai-auth/nuc-helpers/ + networks: + - nilauth + +networks: + nilauth: diff --git a/docker-compose.v2.yml b/docker-compose.v2.yml new file mode 100644 index 00000000..3f83f1ef --- /dev/null +++ b/docker-compose.v2.yml @@ -0,0 +1,185 @@ +#! This is a docker compose to deploy another endpoint for the nilai-api +#! This is used to deploy the nilai-api for the production environment +#! It include the api, nuc-api, postgres dbs, redis, prometheus, grafana and node-exporter +#! It doesn't use caddy (as it is unique for the whole system) +#! It doesn't include any models as it is redundant with the main nilAI instance +services: + redis: + image: 'redis:latest' + networks: + - frontend_net + restart: unless-stopped + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 5s + postgres: + image: postgres:16 + restart: always + env_file: + - .env + networks: + - frontend_net + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD", "sh", "-c", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB} -h localhost"] + interval: 30s + retries: 5 + start_period: 10s + timeout: 10s + + nuc-postgres: + image: postgres:16 + restart: always + env_file: + - .env + environment: + - POSTGRES_HOST=nuc-postgres + networks: + - frontend_net + volumes: + - nuc_postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD", "sh", "-c", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB} -h localhost"] + interval: 30s + retries: 5 + start_period: 10s + timeout: 10s + prometheus: + image: prom/prometheus:v3.1.0 + restart: unless-stopped + networks: + - frontend_net + user: "$UID:$GID" + volumes: + - ${PWD}/prometheus/config/prometheus.yml:/etc/prometheus/prometheus.yml + - ${PWD}/prometheus/data:/prometheus/data + command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.retention.time=30d --web.enable-admin-api" + healthcheck: + test: ["CMD", "wget", "http://localhost:9090/-/healthy", "-O", "/dev/null", "-o", "/dev/null"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + + node_exporter: + image: quay.io/prometheus/node-exporter:v1.8.2 + command: + - '--path.rootfs=/host' + networks: + - frontend_net + pid: host + restart: unless-stopped + volumes: + - '/:/host:ro,rslave' + healthcheck: + test: ["CMD", "wget", "http://localhost:9100/", "-O", "/dev/null", "-o", "/dev/null"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + + grafana: + container_name: prod-grafana + image: 'grafana/grafana:11.5.1' + restart: unless-stopped + networks: + - frontend_net + - proxy_net + user: "$UID:$GID" + depends_on: + - prometheus + volumes: + - ${PWD}/grafana/runtime-data:/var/lib/grafana + - ${PWD}/grafana/datasources/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml + - ${PWD}/grafana/dashboards/filesystem.yml:/etc/grafana/provisioning/dashboards/filesystem.yml + - ${PWD}/grafana/config/grafana.ini:/etc/grafana/grafana.ini + env_file: + - .env + environment: + - GF_USERS_ALLOW_SIGN_UP=false + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/api/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + + api: + container_name: prod-nilai-api + image: nillion/nilai-api:latest + privileged: true + volumes: + - ./nilai-api/src/nilai_api/config/:/app/nilai-api/src/nilai_api/config/ + depends_on: + postgres: + condition: service_healthy + restart: unless-stopped + networks: + - frontend_net + - backend_net + - proxy_net + env_file: + - .env + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/v1/health"] + interval: 30s + retries: 3 + start_period: 15s + timeout: 10s + nuc-api: + container_name: prod-nilai-nuc-api + image: nillion/nilai-api:latest + privileged: true + volumes: + - ./nilai-api/src/nilai_api/config/:/app/nilai-api/src/nilai_api/config/ + depends_on: + nuc-postgres: + condition: service_healthy + api: + condition: service_healthy + restart: unless-stopped + networks: + - frontend_net + - backend_net + - proxy_net + env_file: + - .env + environment: + - AUTH_STRATEGY=nuc # Overwrite the default strategy + - POSTGRES_HOST=nuc-postgres + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/v1/health"] + interval: 30s + retries: 3 + start_period: 15s + timeout: 10s + +networks: + # * This is the network that is used for the "api" related services + ## * API, user management (postgres), caching (redis) and monitoring (prometheus, grafana) + ## * This network is used to connect the api, nuc-api, postgres, redis, prometheus, grafana and node-exporter + ## * Because they are unique, the network is not external + frontend_net: + # * This is the network that is used for the model related services: + ## * API, models, and etcd + ## * This network is used to connect the api, nuc-api, models, and etcd + ## * This network is created by the main nilAI instance + ## * This network is external + backend_net: + name: backend_net + external: true + # * This network is meant to connect from the outside world to the API + ## * This connects the API and Caddy + ## * This network is created by the main nilAI instance + ## * This requires each API instance to have a unique container name + proxy_net: + name: proxy_net + external: true + +volumes: + postgres_data: + nuc_postgres_data: diff --git a/docker-compose.yml b/docker-compose.yml index 5ca3aed8..bd4dc392 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -207,15 +207,26 @@ services: - ./caddy/caddy_data:/data - ./caddy/caddy_config:/config networks: - # This is the network that is used for the "api" related services - ## API, user management (postgres), caching (redis) and monitoring (prometheus, grafana) + # * This is the network that is used for the "api" related services + ## * API, user management (postgres), caching (redis) and monitoring (prometheus, grafana) + ## * This network is used to connect the api, nuc-api, postgres, redis, prometheus, grafana and node-exporter + ## * Because they are unique to the deployment the name is not specified, the network is not external frontend_net: - # This is the network that is used for the model related services: - ## API, models, and etcd + # * This is the network that is used for the model related services: + ## * API, models, and etcd + ## * This network is used to connect the api, nuc-api, models, and etcd + ## * This network is meant to be external, so the name is specified + ## * Other API instances will use this network backend_net: - # This network is meant to connect from the outside world to the API - ## This connects the API and Caddy + name: backend_net + # * This network is meant to connect from the outside world to the API + ## * This connects the API and Caddy + ## * This network is created by the main nilAI instance + ## * This requires each API instance to have a unique container name + ## * This network is meant to be external, so the name is specified + ## * Other API instances will use this network proxy_net: + name: proxy_net volumes: postgres_data: diff --git a/grafana/runtime-data/dashboards/nuc-query-data.json b/grafana/runtime-data/dashboards/nuc-query-data.json index 299f0557..d66fd428 100644 --- a/grafana/runtime-data/dashboards/nuc-query-data.json +++ b/grafana/runtime-data/dashboards/nuc-query-data.json @@ -556,7 +556,7 @@ }, "datasource": { "type": "PostgreSQL", - "uid": "eehsf95n2at4we" + "uid": "eehsf95n2at4wf" }, "definition": "SELECT DISTINCT \n name AS text, \n name AS value \nFROM users \nORDER BY name", "includeAll": true, @@ -576,7 +576,7 @@ }, "datasource": { "type": "PostgreSQL", - "uid": "eehsf95n2at4we" + "uid": "eehsf95n2at4wf" }, "definition": "SELECT DISTINCT model FROM query_logs ORDER BY model", "includeAll": true, @@ -608,7 +608,7 @@ ] }, "timezone": "browser", - "title": "PostgreSQL Query Analytics Dashboard", + "title": "NUC PostgreSQL Query Analytics Dashboard", "uid": "den2cki7odfrek", "version": 3, "weekStart": ""