From 1801e05911d7c803b0d6f358d3f47f3ef8e95e16 Mon Sep 17 00:00:00 2001 From: Roger Meier Date: Thu, 29 Feb 2024 15:07:52 +0100 Subject: [PATCH 1/3] feat: add a docker-compose-distributed example with multiple workers --- .../docker/docker-compose-distributed.yml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 xinference/deploy/docker/docker-compose-distributed.yml diff --git a/xinference/deploy/docker/docker-compose-distributed.yml b/xinference/deploy/docker/docker-compose-distributed.yml new file mode 100644 index 0000000000..bcbfbc5f7e --- /dev/null +++ b/xinference/deploy/docker/docker-compose-distributed.yml @@ -0,0 +1,48 @@ +version: '3.8' + +services: + xinference: &xinference + image: xprobe/xinference:latest + deploy: + resources: + reservations: + devices: + - capabilities: [gpu] + driver: nvidia + count: all +# volumes: +# # Replace with your xinference home path on the host machine +# - :/root/.xinference +# # Replace with your huggingface cache path, default is +# # /.cache/huggingface +# - :/root/.cache/huggingface +# # If models are downloaded from modelscope, replace with +# # your modelscope cache path, default is /.cache/modelscope +# - :/root/.cache/modelscope +# environment: +# # add envs here. Here's an example, if you want to download model from modelscope +# - XINFERENCE_MODEL_SRC=modelscope + + xinference-supervisor: + <<: *xinference + ports: + - "9997:9997" + - "9999:9999" + command: xinference-supervisor --host xinference-supervisor --port 9997 --supervisor-port 9999 + restart: always + + # This examples is just using two workers. You can add more by incrementing + # the worker suffix and port number. + xinference-worker-1: + <<: *xinference + ports: + - "30001:30001" + command: xinference-worker -e http://xinference-supervisor:9997 --host xinference-worker-1 --worker-port 30001 + restart: always + + xinference-worker-2: + <<: *xinference + ports: + - "30002:30002" + command: xinference-worker -e http://xinference-supervisor:9997 --host xinference-worker-2 --worker-port 30002 + restart: always From 4ccd3a0f495dd7671c224704cb5757d4859a6393 Mon Sep 17 00:00:00 2001 From: Roger Meier Date: Fri, 1 Mar 2024 10:53:55 +0100 Subject: [PATCH 2/3] feat: add healthcheck to docker-compose-distributed --- .../deploy/docker/docker-compose-distributed.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/xinference/deploy/docker/docker-compose-distributed.yml b/xinference/deploy/docker/docker-compose-distributed.yml index bcbfbc5f7e..2c15f02f63 100644 --- a/xinference/deploy/docker/docker-compose-distributed.yml +++ b/xinference/deploy/docker/docker-compose-distributed.yml @@ -30,6 +30,12 @@ services: - "9999:9999" command: xinference-supervisor --host xinference-supervisor --port 9997 --supervisor-port 9999 restart: always + healthcheck: + test: curl --fail http://xinference-supervisor:9997/status || exit 1 + interval: 60s + retries: 5 + start_period: 20s + timeout: 5s # This examples is just using two workers. You can add more by incrementing # the worker suffix and port number. @@ -39,6 +45,9 @@ services: - "30001:30001" command: xinference-worker -e http://xinference-supervisor:9997 --host xinference-worker-1 --worker-port 30001 restart: always + depends_on: + xinference-supervisor: + condition: service_healthy xinference-worker-2: <<: *xinference @@ -46,3 +55,6 @@ services: - "30002:30002" command: xinference-worker -e http://xinference-supervisor:9997 --host xinference-worker-2 --worker-port 30002 restart: always + depends_on: + xinference-supervisor: + condition: service_healthy From 20221c09c7e9863d6253804805142f066df12350 Mon Sep 17 00:00:00 2001 From: Roger Meier Date: Wed, 6 Mar 2024 07:06:26 +0100 Subject: [PATCH 3/3] refactor: set interval/start_period to 5s within docker-compose-distributed --- xinference/deploy/docker/docker-compose-distributed.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xinference/deploy/docker/docker-compose-distributed.yml b/xinference/deploy/docker/docker-compose-distributed.yml index 2c15f02f63..3d2a4c3d00 100644 --- a/xinference/deploy/docker/docker-compose-distributed.yml +++ b/xinference/deploy/docker/docker-compose-distributed.yml @@ -32,9 +32,9 @@ services: restart: always healthcheck: test: curl --fail http://xinference-supervisor:9997/status || exit 1 - interval: 60s + interval: 5s retries: 5 - start_period: 20s + start_period: 5s timeout: 5s # This examples is just using two workers. You can add more by incrementing