Skip to content

Commit

Permalink
Merge pull request #2060 from cfe-lab/CompileSlurm
Browse files Browse the repository at this point in the history
Ansible scripts and other tools and docs for migration to Ubuntu
  • Loading branch information
donkirkby authored Jan 26, 2024
2 parents 7d4bdd6 + 68bb6f9 commit c1c0c1a
Show file tree
Hide file tree
Showing 131 changed files with 3,567 additions and 903 deletions.
11 changes: 11 additions & 0 deletions cluster-setup/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
ARG PYTHON_VERSION="bookworm"

FROM python:${PYTHON_VERSION} AS base

RUN apt update -y && apt upgrade -y

RUN pip install --upgrade pip

RUN pip install pyyaml

CMD ["/bin/bash", "-l"]
531 changes: 488 additions & 43 deletions cluster-setup/README.md

Large diffs are not rendered by default.

66 changes: 45 additions & 21 deletions cluster-setup/Vagrantfile
Original file line number Diff line number Diff line change
@@ -1,69 +1,93 @@
HEAD_IP = "192.168.45.10"
WORKER_IP = "192.168.45.11"
HEAD_IP = "192.168.56.10"
WORKER_IP = "192.168.56.11"

# Copy the test SSH keys into `/home/vagrant/.ssh/`. Keys are copied manually
# to allow easy SSH traffic between VMs.
def add_keys(vm)
def add_keys(vm, home_dir="/home/vagrant")
vm.provision "file",
source: "./setupfiles/vagrant_testkey",
destination: "/home/vagrant/.ssh/id_ed25519"
destination: "#{home_dir}/.ssh/id_ed25519"
vm.provision "file",
source: "./setupfiles/vagrant_testkey.pub",
destination: "/home/vagrant/.ssh/id_ed25519.pub"
destination: "#{home_dir}/.ssh/id_ed25519.pub"
vm.provision "shell", inline: <<-EOS
chmod 600 /home/vagrant/.ssh/id_ed25519
chmod 644 /home/vagrant/.ssh/id_ed25519.pub
chmod 600 "#{home_dir}/.ssh/id_ed25519"
chmod 644 "#{home_dir}/.ssh/id_ed25519.pub"
EOS
end

# Enable SSH access by copying the test SSH public key into
# `/home/vagrant/.ssh/authorized_keys`
def add_key_access(vm)
def add_key_access(vm, home_dir="/home/vagrant")
vm.provision "file",
source: "./setupfiles/vagrant_testkey.pub",
destination: "/tmp/vagrant_testkey.pub"
vm.provision "shell", inline: <<-EOS
cat /tmp/vagrant_testkey.pub >> /home/vagrant/.ssh/authorized_keys
chmod 600 /home/vagrant/.ssh/authorized_keys
cat /tmp/vagrant_testkey.pub >> "#{home_dir}/.ssh/authorized_keys"
chmod 600 "#{home_dir}/.ssh/authorized_keys"
EOS
end

# Add a synced folder to emulate the cluster's network drive.
def add_data_dir(vm)
Dir.mkdir("./data") if not Dir.exists?("./data")
vm.synced_folder "./data", "/data", mount_options: ["dmode=777", "fmode=664"]
# vm.synced_folder "./data", "/data", mount_options: ["dmode=777", "fmode=664"]
end

Vagrant.configure("2") do |config|
config.vm.box = "geerlingguy/centos8"
config.vm.box = "bento/ubuntu-22.04"

# Give preference to VMware when it's available.
config.vm.provider "vmware_desktop" do |vmw|
vmw.gui = false
vmw.memory = 8 * 1024
vmw.cpus = 2
end

config.vm.provider "virtualbox" do |vb|
vb.gui = false
vb.memory = 8 * 1024
vb.cpus = 4
vb.cpus = 2
end

config.vm.define :head do |head|
head.vm.hostname = "head"
head.vm.network "private_network", ip: HEAD_IP

# Static IPs are not supported by the VMware provider,
# but we can use them when using Virtualbox.
head.vm.network "private_network"
# head.vm.provider "virtualbox" do |vb, override|
# head.vm.network "private_network", ip: HEAD_IP
# end

add_keys(head.vm)
add_key_access(head.vm)
# add_keys(head.vm, home_dir="/root")
# add_key_access(head.vm, home_dir="/root")
add_data_dir(head.vm)
head.vm.provision "shell", path: "./setupfiles/install-ansible.sh"
end

config.vm.define :worker do |worker|
worker.vm.hostname = "worker"
worker.vm.network "private_network", ip: WORKER_IP

# As for the head node we set up Virtualbox and VMware providers differently.
worker.vm.network "private_network"
# worker.vm.provider "virtualbox" do |vb, override|
# override.vm.network "private_network", ip: WORKER_IP
# end

add_key_access(worker.vm)
# add_key_access(worker.vm, home_dir="/root")
worker.vm.provision "shell", inline: <<-EOS
dnf install -q -y python3 epel-release
dnf config-manager --set-enabled PowerTools
apt update
apt install -y python3
EOS
add_data_dir(worker.vm)
end

config.vm.provision "shell", inline: <<-EOS
echo "#{HEAD_IP}\thead\n#{WORKER_IP}\tworker" >> /etc/hosts
EOS
# # Note: when using a VMware provider, these IP addresses won't be meaningful
# # and you'll have to manually configure /etc/hosts.
# config.vm.provision "shell", inline: <<-EOS
# echo "#{HEAD_IP}\thead\n#{WORKER_IP}\tworker" >> /etc/hosts
# EOS
end
8 changes: 8 additions & 0 deletions cluster-setup/compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: '3.9'

services:
kive_export_clusters:
image: ${KIVE_EXPORT_USERS_IMAGE:-kive_export_users:latest}
restart: unless-stopped
volumes:
- ${KIVE_CLUSTER_SETUP_PATH:-/usr/local/share/Kive/cluster_setup}:/app
6 changes: 6 additions & 0 deletions cluster-setup/configure_hosts_file.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#! /usr/bin/env bash

name=$1
ip=$2

echo -e "${ip}\t${name}" >> /etc/hosts
38 changes: 38 additions & 0 deletions cluster-setup/deploy_cluster_setup.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash

# Use rsync to copy the cluster setup code to a target server.

# Command-line parameters:
# prod|test
# e.g.
# ./deploy_cluster_setup.sh prod

# If you need to override the default login/server or upload path, set
# the environment variables CLUSTER_SETUP_LOGIN and/or CLUSTER_SETUP_PATH.
# Check out the version of the code you want before running, as this script
# does not check out a fresh repository; we want this script to transfer over
# config files that would not be in a stock repo.

# Make sure you have (or the account you log into the server with has) appropriate
# permissions on the deployment path.

prod_or_test=$1

echo "Deployed tag/commit/branch $git_tag on $(date)." > deployment_notes.txt
echo 'Output of "git describe":' >> deployment_notes.txt
git describe --tags >> deployment_notes.txt
echo 'Output of "git show --format=oneline --no-patch":' >> deployment_notes.txt
git show --format=oneline --no-patch >> deployment_notes.txt

if [ "$prod_or_test" == "prod" ]; then
server="kive-int.cfenet.ubc.ca"
else
server="testkive-int.cfenet.ubc.ca"
fi
server_login=${CLUSTER_SETUP_LOGIN:-"${USER}@${server}"}

deployment_path=${CLUSTER_SETUP_PATH:-"/usr/local/src/cluster-setup"}

rsync -avz --exclude-from deploy_exclude_list.txt -a ./ ${server_login}:${deployment_path}

echo "... done."
4 changes: 4 additions & 0 deletions cluster-setup/deploy_exclude_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
deployment/group_vars/all.yml
deployment/group_vars/all.yaml
deployment/ansible.cfg
initialization/worker/head_node_root_id_ed25519.pub
3 changes: 3 additions & 0 deletions cluster-setup/deployment/ansible_bulbasaur.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[defaults]
inventory = ./inventory_bulbasaur.ini
interpreter_python = /usr/bin/python3
8 changes: 8 additions & 0 deletions cluster-setup/deployment/ansible_dev.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# The main Ansible configuration file. Copy this to "ansible.cfg" and
# fill in the appropriate inventory file to use.
# See the following for available sections and keys:
# https://docs.ansible.com/ansible/latest/reference_appendices/config.html

[defaults]
inventory = ./inventory_dev.ini
interpreter_python = /usr/bin/python3
3 changes: 3 additions & 0 deletions cluster-setup/deployment/ansible_octomore.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[defaults]
inventory = ./inventory_octomore.ini
interpreter_python = /usr/bin/python3
8 changes: 8 additions & 0 deletions cluster-setup/deployment/ansible_template.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# The main Ansible configuration file. Copy this to "ansible.cfg" and
# fill in the appropriate inventory file to use.
# See the following for available sections and keys:
# https://docs.ansible.com/ansible/latest/reference_appendices/config.html

[defaults]
inventory = ./inventory_dev.ini
interpreter_python = /usr/bin/python3
46 changes: 46 additions & 0 deletions cluster-setup/deployment/bulbasaur_preliminary_setup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
---

- name: preliminary setup to run on the Octomore head node
hosts: head
become: true
roles:
- head_node_internal_interface
tasks:
- name: create /opt
file:
path: /opt
owner: root
group: root
mode: '0755'
state: directory

- name: create the /data mount point
file:
path: /data
owner: root
group: root
mode: '0755'
state: directory

- name: mount the already-existing filesystem
mount:
path: /data
src: /dev/data-vg/data-lv
fstype: ext4
state: mounted

- name: move the existing home folder to the side
command: mv /data/home /data/home_old
args:
creates: /data/home_old
removes: /data/home

- name: move the existing Kive folder to the side
command: mv /data/kive /data/kive_old
args:
creates: /data/kive_old
removes: /data/kive

- name: set up head node networking
include_role:
name: head_node_networking
6 changes: 6 additions & 0 deletions cluster-setup/deployment/copy_users_and_groups.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---

- name: copy users and groups from head to workers
hosts: workers
roles:
- copy_users_and_groups
57 changes: 57 additions & 0 deletions cluster-setup/deployment/create_backup_filesystem.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---

- name: create backup filesystem
hosts: head
# vars:
# backup_physical_volumes:
# - "ata-ST10000NM001G-2MW103_ZS51H7QX"
# Replaced the above with the `backup_physical_volumes` variable in group_vars.
tasks:
- name: create a single partition on each of the physical volumes
loop: "{{ backup_physical_volumes }}"
community.general.parted:
device: "/dev/disk/by-id/{{ item }}"
number: 1
state: present
label: gpt

- name: construct a list of the partition names
block:
- name: initialize the list as empty
set_fact:
partition_names: [ ]
- name: append names to the list
loop: "{{ backup_physical_volumes }}"
set_fact:
partition_names: "{{ partition_names + ['/dev/disk/by-id/' ~ item ~ '-part1'] }}"

- name: create a volume group out of the partitions
lvg:
vg: backup-vg
pvs: "{{ partition_names | join(',') }}"

- name: create a logical volume from the volume group
community.general.lvol:
vg: backup-vg
lv: backup-lv
size: 100%VG

- name: create the filesystem
community.general.filesystem:
fstype: ext4
dev: /dev/backup-vg/backup-lv

- name: create the /media/backup mount point
file:
path: /media/backup
owner: root
group: root
mode: '0755'
state: directory

- name: mount the filesystem
mount:
path: /media/backup
src: /dev/backup-vg/backup-lv
fstype: ext4
state: mounted
Loading

0 comments on commit c1c0c1a

Please sign in to comment.