forked from 2i2c-org/infrastructure
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge Azure TF code from utoronto deploy 2i2c-org#513
We preserve history with some git tricks, thanks to https://bneijt.nl/blog/merge-a-subdirectory-of-another-repository-with-git/ Ref 2i2c-org#512
- Loading branch information
Showing
6 changed files
with
420 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
"nfs_servers": | ||
"hosts": | ||
"jupyterhub-2i2c-nfs-vm": | ||
"ansible_ssh_common_args": "-o ProxyCommand='./proxycommand.py %h %p'" | ||
"ansible_ssh_private_key_file": "../secrets/ssh-key.unsafe" | ||
"ansible_user": "hubadmin" | ||
"vars": | ||
"disk_lun": 0 | ||
"disk_name": "jupyterhub-2i2c-nfs-data-disk-1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,277 @@ | ||
provider "azurerm" { | ||
# whilst the `version` attribute is optional, we recommend pinning to a given version of the Provider | ||
version = "=2.20.0" | ||
features {} | ||
} | ||
|
||
terraform { | ||
backend "azurerm" { | ||
resource_group_name = "terraform-state" | ||
storage_account_name = "utorontoterraformstate" | ||
container_name = "terraformstate" | ||
key = "prod.terraform.tfstate" | ||
} | ||
} | ||
|
||
|
||
provider "local" { | ||
version = "1.4.0" | ||
} | ||
|
||
resource "azurerm_resource_group" "jupyterhub" { | ||
name = "${var.prefix}-rg" | ||
location = var.region | ||
} | ||
|
||
resource "azurerm_virtual_network" "jupyterhub" { | ||
name = "${var.prefix}-network" | ||
location = azurerm_resource_group.jupyterhub.location | ||
resource_group_name = azurerm_resource_group.jupyterhub.name | ||
address_space = ["10.0.0.0/8"] | ||
} | ||
|
||
resource "azurerm_subnet" "node_subnet" { | ||
name = "${var.prefix}-node-subnet" | ||
virtual_network_name = azurerm_virtual_network.jupyterhub.name | ||
resource_group_name = azurerm_resource_group.jupyterhub.name | ||
address_prefixes = ["10.1.0.0/16"] | ||
} | ||
|
||
resource "azurerm_kubernetes_cluster" "jupyterhub" { | ||
name = "${var.prefix}-cluster" | ||
location = azurerm_resource_group.jupyterhub.location | ||
resource_group_name = azurerm_resource_group.jupyterhub.name | ||
dns_prefix = "${var.prefix}-cluster" | ||
kubernetes_version = "1.18.8" | ||
|
||
linux_profile { | ||
admin_username = "hubadmin" | ||
ssh_key { | ||
key_data = file("${path.module}/ssh-key.pub") | ||
} | ||
} | ||
# Core node-pool | ||
default_node_pool { | ||
name = "core" | ||
node_count = 1 | ||
# Unfortunately, changing anything about VM type / size recreates *whole cluster | ||
vm_size = var.core_vm_size | ||
os_disk_size_gb = 100 | ||
enable_auto_scaling = true | ||
min_count = 1 | ||
max_count = 8 | ||
vnet_subnet_id = azurerm_subnet.node_subnet.id | ||
node_labels = { | ||
"hub.jupyter.org/pool-name" = "core-pool" | ||
} | ||
|
||
orchestrator_version = "1.18.8" | ||
} | ||
|
||
auto_scaler_profile { | ||
# Let's get rid of unready nodes ASAP | ||
# Azure nodes love being unready | ||
scale_down_unready = "1m" | ||
} | ||
identity { | ||
type = "SystemAssigned" | ||
} | ||
|
||
network_profile { | ||
# I don't trust Azure CNI | ||
network_plugin = "kubenet" | ||
network_policy = "calico" | ||
} | ||
|
||
tags = { | ||
Environment = "Production" | ||
ManagedBy = "2i2c" | ||
} | ||
} | ||
|
||
resource "azurerm_kubernetes_cluster_node_pool" "user_pool" { | ||
name = "user" | ||
kubernetes_cluster_id = azurerm_kubernetes_cluster.jupyterhub.id | ||
vm_size = var.user_vm_size | ||
enable_auto_scaling = true | ||
os_disk_size_gb = 200 | ||
node_taints = ["hub.jupyter.org_dedicated=user:NoSchedule"] | ||
vnet_subnet_id = azurerm_subnet.node_subnet.id | ||
|
||
orchestrator_version = "1.18.8" | ||
node_labels = { | ||
"hub.jupyter.org/pool-name" = "user-alpha-pool" | ||
} | ||
|
||
min_count = 1 | ||
max_count = 100 | ||
tags = { | ||
Environment = "Production" | ||
ManagedBy = "2i2c" | ||
} | ||
} | ||
|
||
# AZure container registry | ||
|
||
resource "azurerm_container_registry" "container_registry" { | ||
# meh, only alphanumberic chars. No separators. BE CONSISTENT, AZURE | ||
name = var.global_container_registry_name | ||
resource_group_name = azurerm_resource_group.jupyterhub.name | ||
location = azurerm_resource_group.jupyterhub.location | ||
sku = "premium" | ||
admin_enabled = true | ||
} | ||
# NFS VM | ||
resource "azurerm_network_interface" "nfs_vm" { | ||
name = "${var.prefix}-nfs-vm-inet" | ||
location = azurerm_resource_group.jupyterhub.location | ||
resource_group_name = azurerm_resource_group.jupyterhub.name | ||
|
||
ip_configuration { | ||
name = "internal" | ||
subnet_id = azurerm_subnet.node_subnet.id | ||
private_ip_address_allocation = "Dynamic" | ||
} | ||
} | ||
|
||
resource "azurerm_network_security_group" "nfs_vm" { | ||
name = "${var.prefix}-nfs-vm-nsg" | ||
location = azurerm_resource_group.jupyterhub.location | ||
resource_group_name = azurerm_resource_group.jupyterhub.name | ||
|
||
# SSH from the world | ||
security_rule { | ||
access = "Allow" | ||
direction = "Inbound" | ||
name = "ssh" | ||
priority = 100 | ||
protocol = "Tcp" | ||
source_port_range = "*" | ||
source_address_prefix = "*" | ||
destination_port_range = "22" | ||
destination_address_prefix = azurerm_network_interface.nfs_vm.private_ip_address | ||
} | ||
|
||
# NFS from internal network | ||
security_rule { | ||
access = "Allow" | ||
direction = "Inbound" | ||
name = "nfs" | ||
priority = 101 | ||
protocol = "Tcp" | ||
source_port_range = "*" | ||
source_address_prefix = "*" | ||
destination_port_range = "2049" | ||
destination_address_prefix = azurerm_network_interface.nfs_vm.private_ip_address | ||
} | ||
# | ||
# Prometheus from internal network | ||
security_rule { | ||
access = "Allow" | ||
direction = "Inbound" | ||
name = "prometheus" | ||
priority = 102 | ||
protocol = "Tcp" | ||
source_port_range = "*" | ||
source_address_prefix = "*" | ||
destination_port_range = "9100" | ||
destination_address_prefix = azurerm_network_interface.nfs_vm.private_ip_address | ||
} | ||
} | ||
|
||
resource "azurerm_network_interface_security_group_association" "main" { | ||
network_interface_id = azurerm_network_interface.nfs_vm.id | ||
network_security_group_id = azurerm_network_security_group.nfs_vm.id | ||
} | ||
|
||
resource "azurerm_linux_virtual_machine" "nfs_vm" { | ||
name = "${var.prefix}-nfs-vm" | ||
resource_group_name = azurerm_resource_group.jupyterhub.name | ||
location = azurerm_resource_group.jupyterhub.location | ||
size = var.nfs_vm_size | ||
admin_username = "hubadmin" | ||
|
||
network_interface_ids = [ | ||
azurerm_network_interface.nfs_vm.id, | ||
] | ||
|
||
admin_ssh_key { | ||
username = "hubadmin" | ||
public_key = file("${path.module}/ssh-key.pub") | ||
} | ||
|
||
os_disk { | ||
caching = "None" | ||
storage_account_type = "StandardSSD_LRS" | ||
disk_size_gb = 100 | ||
} | ||
|
||
source_image_reference { | ||
publisher = "Canonical" | ||
offer = "0001-com-ubuntu-server-focal" | ||
sku = "20_04-lts" | ||
version = "latest" | ||
} | ||
} | ||
|
||
resource "azurerm_managed_disk" "nfs_data_disk_1" { | ||
name = "${var.prefix}-nfs-data-disk-1" | ||
location = azurerm_resource_group.jupyterhub.location | ||
resource_group_name = azurerm_resource_group.jupyterhub.name | ||
storage_account_type = "Premium_LRS" | ||
create_option = "Empty" | ||
disk_size_gb = "1024" | ||
|
||
lifecycle { | ||
# Terraform plz never destroy data thx | ||
prevent_destroy = true | ||
} | ||
tags = { | ||
Environment = "Production" | ||
} | ||
} | ||
|
||
resource "azurerm_virtual_machine_data_disk_attachment" "nfs_data_disk_1" { | ||
virtual_machine_id = azurerm_linux_virtual_machine.nfs_vm.id | ||
managed_disk_id = azurerm_managed_disk.nfs_data_disk_1.id | ||
lun = 0 | ||
caching = "None" | ||
} | ||
|
||
locals { | ||
registry_creds = { | ||
"imagePullSecret" = { | ||
"username": azurerm_container_registry.container_registry.admin_username, | ||
"password": azurerm_container_registry.container_registry.admin_password, | ||
"registry": "https://${azurerm_container_registry.container_registry.login_server}" | ||
} | ||
} | ||
ansible_hosts = { | ||
"nfs_servers" = { | ||
hosts = { | ||
(azurerm_linux_virtual_machine.nfs_vm.name) = { | ||
ansible_ssh_common_args = "-o ProxyCommand='./proxycommand.py %h %p'" | ||
ansible_user = "hubadmin" | ||
ansible_ssh_private_key_file = "../secrets/ssh-key.unsafe" | ||
} | ||
} | ||
"vars" = { | ||
disk_name = (azurerm_managed_disk.nfs_data_disk_1.name) | ||
disk_lun = (azurerm_virtual_machine_data_disk_attachment.nfs_data_disk_1.lun) | ||
} | ||
} | ||
} | ||
} | ||
|
||
resource "local_file" "ansible_hosts_file" { | ||
content = yamlencode(local.ansible_hosts) | ||
filename = "ansible-hosts.yaml" | ||
} | ||
|
||
output "kubeconfig" { | ||
value = azurerm_kubernetes_cluster.jupyterhub.kube_config_raw | ||
} | ||
|
||
output "registry_creds_config" { | ||
value = jsonencode(local.registry_creds) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
- name: nfs server setup | ||
hosts: nfs_servers | ||
connection: ssh | ||
become: true | ||
handlers: | ||
- name: re-export NFS Shares | ||
command: | ||
cmd: exportfs -ra | ||
tasks: | ||
- name: Install NFS packages | ||
apt: | ||
pkg: | ||
- nfs-kernel-server | ||
- nfs-common | ||
- xfsprogs | ||
- name: Setup XFS | ||
filesystem: | ||
fstype: xfs | ||
dev: /dev/disk/azure/scsi1/lun{{ disk_lun }} | ||
resizefs: true | ||
|
||
- name: Mount disk | ||
mount: | ||
path: /export/{{ disk_name }} | ||
src: /dev/disk/azure/scsi1/lun{{ disk_lun }} | ||
state: mounted | ||
fstype: xfs | ||
opts: inode64,prjquota | ||
|
||
- name: Create home container directory | ||
file: | ||
state: directory | ||
owner: "1000" | ||
group: "1000" | ||
path: /export/{{disk_name}}/homes | ||
mode: 0700 | ||
|
||
- name: setup exports file | ||
notify: | ||
- re-export NFS Shares | ||
copy: | ||
dest: /etc/exports | ||
content: > | ||
/export/{{disk_name}} 10.0.0.0/8(all_squash,anonuid=1000,anongid=1000,no_subtree_check,rw,sync) | ||
- name: Install prometheus-node-exporter | ||
apt: | ||
pkg: | ||
- prometheus-node-exporter |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/usr/bin/env python3 | ||
import sys | ||
import subprocess | ||
import time | ||
|
||
|
||
POD_NAME = "ssh-proxycommand-pod" | ||
POD_IMAGE = "alpine/socat" | ||
HOST = sys.argv[1] | ||
PORT = sys.argv[2] | ||
|
||
# Just 'sleep infinity' doesn't handle signals properly | ||
SCRIPT = "trap 'trap - INT; kill \"$!\"; exit' INT; exec sleep infinity & wait $!" | ||
|
||
log = open('log', 'w') | ||
|
||
def delete_pod(): | ||
try: | ||
subprocess.check_output([ | ||
'kubectl', 'delete', 'pod', POD_NAME, '--wait', '--now' | ||
]) | ||
except subprocess.CalledProcessError as e: | ||
print(e.stdout) | ||
delete_pod() | ||
|
||
try: | ||
subprocess.check_call([ | ||
'kubectl', 'run', '--image', POD_IMAGE, '--command', '--wait', | ||
POD_NAME, '--', "/bin/sh", "-c", SCRIPT | ||
]) | ||
|
||
|
||
time.sleep(2) | ||
|
||
print("starting", file=log, flush=True) | ||
subprocess.check_call([ | ||
'kubectl', 'exec', '-i', POD_NAME, '--', | ||
'socat', '-', f"tcp:{HOST}:{PORT}" | ||
]) | ||
print("ending", file=log, flush=True) | ||
finally: | ||
print("deleting", file=log, flush=True) | ||
delete_pod() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
prefix = "jupyterhub-2i2c" | ||
ssh_pub_key = "ssh-key.pub" |
Oops, something went wrong.