From 1933b157269dac2a10f86df13747efd45c87f3be Mon Sep 17 00:00:00 2001 From: greg pereira Date: Wed, 12 Jun 2024 17:10:09 -0700 Subject: [PATCH] adding ansible playbook pieces Signed-off-by: greg pereira --- .env | 2 +- .../workflows/ai-lab-remote-rhel-build.yaml | 61 ++++++-- build/ci/rhel-ansible/ansible.cfg | 4 + build/ci/rhel-ansible/playbook.yaml | 141 ++++++++++++++++++ build/ci/rhel-ansible/requirements.yaml | 3 + build/docker/builder/cpu/rhel9/Dockerfile | 10 +- build/docker/milvus/rhel9/Dockerfile | 4 +- build/docker/milvus/rhel9/install-openblas.sh | 2 +- docker-compose.yml | 6 +- 9 files changed, 210 insertions(+), 23 deletions(-) create mode 100644 build/ci/rhel-ansible/ansible.cfg create mode 100644 build/ci/rhel-ansible/playbook.yaml create mode 100644 build/ci/rhel-ansible/requirements.yaml diff --git a/.env b/.env index 578cd5de2b326..b7131d681c5dd 100644 --- a/.env +++ b/.env @@ -3,7 +3,7 @@ # If this is uncommented it will pull and not rebuild # IMAGE_REPO=quay.io/grpereir IMAGE_REPO=quay.io/ai-lab -IMAGE_ARCH=arm64 +IMAGE_ARCH=amd64 OS_NAME=rhel9 # for services.builder.image in docker-compose.yml diff --git a/.github/workflows/ai-lab-remote-rhel-build.yaml b/.github/workflows/ai-lab-remote-rhel-build.yaml index 8a47a34ea60b4..2a7c1180b7a99 100644 --- a/.github/workflows/ai-lab-remote-rhel-build.yaml +++ b/.github/workflows/ai-lab-remote-rhel-build.yaml @@ -20,15 +20,18 @@ env: TF_VAR_ami_id: ${{ secrets.AMI_ID }} jobs: - ai-lab-podman-remote: + rhel9-milvus: runs-on: ubuntu-24.04 + strategy: + fail-fast: false + max-parallel: 1 steps: - name: Setup Terraform uses: hashicorp/setup-terraform@v3.1.1 with: terraform_version: "1.7.5" - - name: Checkout + - name: Checkout code on runner uses: actions/checkout@v4.1.6 - name: sshkeygen for ansible @@ -39,20 +42,58 @@ jobs: - name: Terraform Apply run: terraform apply -auto-approve + + - name: Set up Python on runner + uses: actions/setup-python@v5.1.0 + with: + python-version: '3.11' - - name: Terraform Output - id: terraform-output + - name: Install Ansible on runner run: | - echo "id=$(terraform output id | xargs)" >> $GITHUB_OUTPUT - echo "url=$(terraform output host | xargs)" >> $GITHUB_OUTPUT - echo "ssh_public_key=$(terraform output ssh_public_key | xargs)" >> $GITHUB_OUTPUT - echo "pem_filename=$(terraform output pem_filename | xargs)" >> $GITHUB_OUTPUT + python3 -m pip install --upgrade pip + pip install ansible + + # currently no reqs + # - name: Ansible Collections + # working-directory: build/ci/rhel-ansible + # run: ansible-galaxy install -r requirements.yaml - - name: Install podman remote + - name: Install jq and build inventory on runner run: | - sudo apt-get install -y podman podman-remote sudo apt-get install -y jq + PUBLIC_IP=$(terraform output -json | jq -r '.public_ip.value') + # PUBLIC_IP=$(terraform output -json | jq -r '.public_ip.value' | cut -d "\"" -f 2) + echo "public_ip=$PUBLIC_IP" >> $GITHUB_OUTPUT + echo "[test_environments]" > build/ci/rhel-ansible/inventory.ini + echo "test_environment_host ansible_host=${PUBLIC_IP}" >> build/ci/rhel-ansible/inventory.ini + # cat build/ci/rhel-ansible/inventory.ini + + - name: Setup tmate session + # if: ${{ failure() }} + uses: mxschmitt/action-tmate@v3.18 + timeout-minutes: 17 + with: + detached: true + limit-access-to-actor: true + + - name: Provision runner to ec2 + working-directory: build/ci/rhel-ansible + run: | + ansible-playbook -vv playbook.yaml \ + -i inventory.ini \ + --private-key=/home/runner/.ssh/id_rsa \ + --extra-vars "registry_user=${{ secrets.REGISTRY_USER }}" \ + --extra-vars "registry_pass=${{ secrets.REGISTRY_PASS }}" \ + --extra-vars "subman_user=${{ secrets.SUBMAN_USER }}" \ + --extra-vars "subman_pass=${{ secrets.SUBMAN_PASS }}" + env: + ANSIBLE_CONFIG: ansible.cfg - name: Terraform Destroy if: always() run: terraform destroy -auto-approve + + # For stacked runs of CI with concurrency allow for destroy to work + - name: Wait for 30 seconds for destroy to work + if: always() + run: sleep 30 diff --git a/build/ci/rhel-ansible/ansible.cfg b/build/ci/rhel-ansible/ansible.cfg new file mode 100644 index 0000000000000..c0f038aa3984b --- /dev/null +++ b/build/ci/rhel-ansible/ansible.cfg @@ -0,0 +1,4 @@ +[ssh_connection] +ssh_common_args = -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ; +[defaults] +host_key_checking = False diff --git a/build/ci/rhel-ansible/playbook.yaml b/build/ci/rhel-ansible/playbook.yaml new file mode 100644 index 0000000000000..a443e5f9d06b2 --- /dev/null +++ b/build/ci/rhel-ansible/playbook.yaml @@ -0,0 +1,141 @@ +--- +- name: Building rhel9-milvus + hosts: test_environments + remote_user: ec2-user + become: true + gather_facts: false + +# THIS RUNS ON RHEL AMI AS BUILDER FOR SUBMAN + + tasks: + + - name: Wait until the instance is ready + ansible.builtin.wait_for_connection: + delay: 15 + timeout: 180 + + - name: Gather facts for first time + ansible.builtin.setup: + + # - name: DEBUG - sleep + # ignore_unreachable: true + # ansible.builtin.shell: | + # sleep 600 + + - name: remove podman for clean docker install + ansible.builtin.shell: | + sudo dnf -y remove \ + docker \ + docker-client \ + docker-client-latest \ + docker-common \ + docker-latest \ + docker-latest-logrotate \ + docker-logrotate \ + docker-engine \ + podman \ + runc + + - name: setup docker server and docker compose + async: 1000 + poll: 0 + register: docker_install_result + ansible.builtin.shell: | + sudo yum install -y yum-utils + sudo yum-config-manager --add-repo https://download.docker.com/linux/rhel/docker-ce.repo + sudo yum install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + sudo systemctl start docker + + - name: Check on downloading docker + docker tools + async_status: + jid: "{{ docker_install_result.ansible_job_id }}" + register: job_result + until: job_result.finished + retries: 25 + delay: 10 + + - name: Ensure Docker is running + ansible.builtin.systemd: + name: docker + state: started + enabled: yes + + - name: Install the docker-compose binary + ansible.builtin.shell: | + cd /tmp + export ARCH=$(uname -m) + if [[ "$ARCH" == "arm64" ]] || [[ "$ARCH" == "aarch64" ]]; then + curl -sLO https://github.com/docker/compose/releases/download/v2.28.0/docker-compose-linux-aarch64 + sudo mv /tmp/docker-compose-linux-aarch64 /usr/bin/docker-compose + elif [[ "$ARCH" == "amd64" ]] || [[ "$ARCH" == "x86_64" ]]; then + curl -sLO https://github.com/docker/compose/releases/download/v2.28.0/docker-compose-linux-x86_64 + sudo mv /tmp/docker-compose-linux-x86_64 /usr/bin/docker-compose + fi + chmod +x /usr/bin/docker-compose + + - name: Log in to quay.io + community.docker.docker_login: + username: "{{ registry_user }}" + password: "{{ registry_pass }}" + registry: quay.io + + - name: Clone Git repository + ansible.builtin.git: + repo: https://github.com/redhat-et/milvus.git + dest: "/home/ec2-user/milvus" + version: "rhel9-milvus" + clone: yes + update: yes + + - name: DEBUG - sleep + ansible.builtin.shell: | + sleep 400 + + - name: Make the builder image + async: 1000 + poll: 0 + register: builder_result + become: true + + ansible.builtin.shell: | + set -x + cd /home/ec2-user/milvus/ + ./build/builder.sh make install + ls -al /home/ec2-user/milvus/ + set +x + + - name: Check on the builder image + async_status: + jid: "{{ builder_result.ansible_job_id }}" + register: job_result + until: job_result.finished + retries: 100 + delay: 10 + + - name: Make the milvus image + async: 1000 + poll: 0 + register: milvus_result + ansible.builtin.shell: | + cd /home/ec2-user/milvus + ls -al /home/ec2-user/milvus + mv /home/ec2-user/milvus/bin /home/ec2-user/milvus/build/docker/milvus/rhel9/ + mv /home/ec2-user/milvus/configs /home/ec2-user/milvus/build/docker/milvus/rhel9/ + mv /home/ec2-user/milvus/lib /home/ec2-user/milvus/build/docker/milvus/rhel9/ + sudo su && /home/ec2-user/milvus/build/build_image.sh make + + - name: Check on the milvus image + async_status: + jid: "{{ milvus_result.ansible_job_id }}" + register: job_result + until: job_result.finished + retries: 100 + delay: 10 + + - name: log docker images + ansible.builtin.shell: | + sudo su && docker images + + - name: DEBUG - sleep + ansible.builtin.shell: | + sleep 400 diff --git a/build/ci/rhel-ansible/requirements.yaml b/build/ci/rhel-ansible/requirements.yaml new file mode 100644 index 0000000000000..d764e6348d354 --- /dev/null +++ b/build/ci/rhel-ansible/requirements.yaml @@ -0,0 +1,3 @@ +--- +collections: + - name: community.docker \ No newline at end of file diff --git a/build/docker/builder/cpu/rhel9/Dockerfile b/build/docker/builder/cpu/rhel9/Dockerfile index 6913832fa3d94..7779122be21ca 100644 --- a/build/docker/builder/cpu/rhel9/Dockerfile +++ b/build/docker/builder/cpu/rhel9/Dockerfile @@ -24,8 +24,8 @@ RUN /opt/vcpkg/bootstrap-vcpkg.sh -disableMetrics && \ FROM registry.access.redhat.com/ubi9/ubi:9.4-947.1717074712 ARG TARGETARCH -ARG SUBMAN_USER -ARG SUBMAN_PASS +# ARG SUBMAN_USER +# ARG SUBMAN_PASS USER 0 # basic deps @@ -36,9 +36,9 @@ RUN dnf install -y make cmake automake gcc gcc-c++ \ RUN alias python3='python3.11' -# Assumes you have a valid subman subscription - # This gets used for the codeready-builder-for-rhel-9- stream for openblas-devel - +# Assumes you have a valid subman subscription at the host machine +# RUN subscription-manager register --username $SUBMAN_USER --password $SUBMAN_PASS --force +# This gets used for the codeready-builder-for-rhel-9- stream for openblas-devel COPY build/docker/builder/cpu/rhel9/install-rpms.sh /root/install-rpms.sh RUN chmod +x /root/install-rpms.sh RUN /root/install-rpms.sh diff --git a/build/docker/milvus/rhel9/Dockerfile b/build/docker/milvus/rhel9/Dockerfile index cd7f2defbfee8..e6b142afdeebe 100644 --- a/build/docker/milvus/rhel9/Dockerfile +++ b/build/docker/milvus/rhel9/Dockerfile @@ -19,8 +19,8 @@ RUN dnf install -y wget libgomp libaio libatomic USER 0 # Assumes you have a valid subman subscription - # This gets used for the codeready-builder-for-rhel-9- stream for openblas-devel - +RUN subscription-manager register --auto-attach +# This gets used for the codeready-builder-for-rhel-9- stream for openblas-devel COPY build/docker/milvus/rhel9/install-openblas.sh /home/install-openblas.sh RUN chmod +x /home/install-openblas.sh RUN TARGETARCH=$TARGETARCH /home/install-openblas.sh diff --git a/build/docker/milvus/rhel9/install-openblas.sh b/build/docker/milvus/rhel9/install-openblas.sh index 3695b274de479..722b010f5a52f 100644 --- a/build/docker/milvus/rhel9/install-openblas.sh +++ b/build/docker/milvus/rhel9/install-openblas.sh @@ -19,4 +19,4 @@ if [[ "$RUN_MODE" == "aarch64" ]] || [[ "$RUN_MODE" == "x86_64" ]]; then else echo "uncaught runmode based on invalid \$TARGETARCH." exit 1 -fi \ No newline at end of file +fi diff --git a/docker-compose.yml b/docker-compose.yml index b89e9f2c4ad6d..536b7b6a72290 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.5' - x-ccache: &ccache CCACHE_COMPILERCHECK: content CCACHE_COMPRESS: 1 @@ -16,8 +14,8 @@ services: dockerfile: build/docker/builder/cpu/${OS_NAME}/Dockerfile args: TARGETARCH: ${IMAGE_ARCH} - SUBMAN_USER: ${SUBMAN_USER} - SUBMAN_PASS: ${SUBMAN_PASS} + # SUBMAN_USER: ${SUBMAN_USER} + # SUBMAN_PASS: ${SUBMAN_PASS} cache_from: - ${IMAGE_REPO}/milvus-env:${OS_NAME}-${LATEST_DATE_VERSION} platform: linux/${IMAGE_ARCH}