Skip to content

Commit

Permalink
func: Add more workloads
Browse files Browse the repository at this point in the history
  • Loading branch information
Juanadelacuesta committed Feb 20, 2025
1 parent d63c1d0 commit 25cf0ba
Show file tree
Hide file tree
Showing 13 changed files with 223 additions and 46 deletions.
54 changes: 25 additions & 29 deletions enos/enos-scenario-upgrade.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ scenario "upgrade" {
//os = ["linux", "windows"]
edition = ["ent"]
os = ["linux"]

exclude {
os = ["windows"]
arch = ["arm64"]
Expand Down Expand Up @@ -65,7 +65,8 @@ scenario "upgrade" {
module = module.provision_cluster
variables {
name = local.cluster_name
nomad_local_binary = step.copy_initial_binary.nomad_local_binary
nomad_local_binary = step.copy_initial_binary.binary_path[matrix.os]
nomad_local_binary_server = step.copy_initial_binary.binary_path[local.server_os]
server_count = var.server_count
client_count_linux = local.linux_count
client_count_windows_2016 = local.windows_count
Expand All @@ -91,6 +92,14 @@ scenario "upgrade" {
cert_file = step.provision_cluster.cert_file
key_file = step.provision_cluster.key_file
nomad_token = step.provision_cluster.nomad_token
workloads = {
service_raw_exec = { job_spec = "jobs/raw-exec-service.nomad.hcl", alloc_count = 3, type = "service" }
service_docker = { job_spec = "jobs/docker-service.nomad.hcl", alloc_count = 3, type = "service" }
system_docker = { job_spec = "jobs/docker-system.nomad.hcl", alloc_count = 0, type = "system" }
batch_docker = { job_spec = "jobs/docker-batch.nomad.hcl", alloc_count = 3, type = "batch" }
batch_raw_exec = { job_spec = "jobs/raw-exec-batch.nomad.hcl", alloc_count = 3, type = "batch" }
system_raw_exec = { job_spec = "jobs/raw-exec-system.nomad.hcl", alloc_count = 0, type = "system" }
}
}

verifies = [
Expand Down Expand Up @@ -150,8 +159,8 @@ scenario "upgrade" {
arch = local.arch
edition = matrix.edition
product_version = var.upgrade_version
os = matrix.os
download_binary = false
oss = [local.server_os, matrix.os]
download_binaries = false
}
}

Expand Down Expand Up @@ -193,8 +202,8 @@ scenario "upgrade" {
ssh_key_path = step.provision_cluster.ssh_key_file
artifactory_username = var.artifactory_username
artifactory_token = var.artifactory_token
artifact_url = step.fetch_upgrade_binary.artifact_url
artifact_sha = step.fetch_upgrade_binary.artifact_sha
artifact_url = step.fetch_upgrade_binary.artifact_url[local.server_os]
artifact_sha = step.fetch_upgrade_binary.artifact_sha[local.server_os]
}
}

Expand Down Expand Up @@ -235,27 +244,6 @@ scenario "upgrade" {
]
}

/* step "run_workloads" {
depends_on = [step.server_upgrade_test_cluster_health]
description = <<-EOF
Verify the health of the cluster by running new workloads
EOF
module = module.run_workloads
variables {
nomad_addr = step.provision_cluster.nomad_addr
ca_file = step.provision_cluster.ca_file
cert_file = step.provision_cluster.cert_file
key_file = step.provision_cluster.key_file
nomad_token = step.provision_cluster.nomad_token
}
verifies = [
quality.nomad_register_job,
]
}
*/
step "upgrade_clients" {
depends_on = [step.server_upgrade_test_cluster_health]

Expand Down Expand Up @@ -295,8 +283,8 @@ scenario "upgrade" {
ssh_key_path = step.provision_cluster.ssh_key_file
artifactory_username = var.artifactory_username
artifactory_token = var.artifactory_token
artifact_url = step.fetch_upgrade_binary.artifact_url
artifact_sha = step.fetch_upgrade_binary.artifact_sha
artifact_url = step.fetch_upgrade_binary.artifact_url[matrix.os]
artifact_sha = step.fetch_upgrade_binary.artifact_sha[matrix.os]
}
}

Expand Down Expand Up @@ -377,4 +365,12 @@ scenario "upgrade" {
value = step.provision_cluster.nomad_token
sensitive = true
}

output "binary_path" {
value = step.copy_initial_binary.binary_path
}

output "allocs" {
value = step.run_initial_workloads.allocs_count
}
}
30 changes: 30 additions & 0 deletions enos/modules/run_workloads/jobs/docker-batch.nomad.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
variable "alloc_count" {
type = number
default = 1
}

job "batch-docker" {
type = "batch"

group "batch-docker" {
count = var.alloc_count

task "batch-docker" {
driver = "docker"

config {
image = "alpine:latest"
command = "sh"
args = ["-c", "while true; do sleep 30000; done"]

}

resources {
cpu = 50
memory = 64
}
}
}
}
4 changes: 2 additions & 2 deletions enos/modules/run_workloads/jobs/docker-service.nomad.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ job "service-docker" {
}

resources {
cpu = 100
memory = 128
cpu = 50
memory = 64
}
}
}
Expand Down
29 changes: 29 additions & 0 deletions enos/modules/run_workloads/jobs/docker-system.nomad.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1
variable "alloc_count" {
type = number
default = 1
}

job "system-docker" {
type = "system"

group "system-docker" {

task "system-docker" {
driver = "docker"

config {
image = "alpine:latest"
command = "sh"
args = ["-c", "while true; do sleep 30000; done"]

}

resources {
cpu = 50
memory = 64
}
}
}
}
41 changes: 41 additions & 0 deletions enos/modules/run_workloads/jobs/raw-exec-batch.nomad.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

variable "alloc_count" {
type = number
default = 1
}

job "batch-raw-exec" {
type = "batch"

group "batch-raw-exec" {
count = var.alloc_count

task "batch-raw-exec" {
driver = "raw_exec"

config {
command = "bash"
args = ["-c", "./local/runme.sh"]
}

template {
data = <<EOH
#!/bin/bash
while true; do
sleep 30000
done
EOH
destination = "local/runme.sh"
perms = "755"
}

resources {
cpu = 50
memory = 64
}
}
}
}
5 changes: 5 additions & 0 deletions enos/modules/run_workloads/jobs/raw-exec-service.nomad.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ EOH
destination = "local/runme.sh"
perms = "755"
}

resources {
cpu = 50
memory = 64
}
}
}
}
40 changes: 40 additions & 0 deletions enos/modules/run_workloads/jobs/raw-exec-system.nomad.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

variable "alloc_count" {
type = number
default = 1
}

job "system-raw-exec" {
type = "system"

group "system-raw-exec" {

task "system-raw-exec" {
driver = "raw_exec"

config {
command = "bash"
args = ["-c", "./local/runme.sh"]
}

template {
data = <<EOH
#!/bin/bash
while true; do
sleep 30000
done
EOH
destination = "local/runme.sh"
perms = "755"
}

resources {
cpu = 50
memory = 64
}
}
}
}
27 changes: 25 additions & 2 deletions enos/modules/run_workloads/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ terraform {
}

locals {
nomad_env = { NOMAD_ADDR = var.nomad_addr
nomad_env = {
NOMAD_ADDR = var.nomad_addr
NOMAD_CACERT = var.ca_file
NOMAD_CLIENT_CERT = var.cert_file
NOMAD_CLIENT_KEY = var.key_file
NOMAD_TOKEN = var.nomad_token }
NOMAD_TOKEN = var.nomad_token
}

system_job_count = length({for k, v in var.workloads : k => v if v.type == "system"})
service_batch_allocs = sum([for wl in var.workloads : wl.alloc_count])
}

resource "enos_local_exec" "wait_for_nomad_api" {
Expand All @@ -23,6 +28,24 @@ resource "enos_local_exec" "wait_for_nomad_api" {
scripts = [abspath("${path.module}/scripts/wait_for_nomad_api.sh")]
}

resource "enos_local_exec" "get_nodes" {
environment = local.nomad_env

inline = [ "nomad node status -json | jq '[.[] | select(.Status == \"ready\")] | length'"]
}

resource "enos_local_exec" "get_jobs" {
environment = local.nomad_env

inline = ["nomad job status| awk '$4 == \"running\" {count++} END {print count+0}'"]
}

resource "enos_local_exec" "get_allocs" {
environment = local.nomad_env

inline = ["nomad alloc status -json | jq '[.[] | select(.ClientStatus == \"running\")] | length'"]
}

resource "enos_local_exec" "workloads" {
for_each = var.workloads

Expand Down
21 changes: 16 additions & 5 deletions enos/modules/run_workloads/outputs.tf
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

/* output "jobs_count" {
value = length(local.job_names)
} */

output "jobs_count" {
description = "The number of jobs thar should be running in the cluster"
value = length(var.workloads) + chomp(enos_local_exec.get_jobs.stdout)
}

output "new_jobs_count" {
description = "The number of jobs that were triggered by the module"
value = length(var.workloads)
}

output "allocs_count" {
description = "The number of allocs that should be running in the cluster"
value = sum([for wl in var.workloads : wl.alloc_count])
value = local.system_job_count * chomp(enos_local_exec.get_nodes.stdout) + local.service_batch_allocs + chomp(enos_local_exec.get_allocs.stdout)
}

output "nodes" {
description = "Number of current clients in the cluster"
value = chomp(enos_local_exec.get_nodes.stdout)
}

output "new_allocs_count" {
description = "The number of allocs that should be running in the cluster"
value = local.system_job_count * chomp(enos_local_exec.get_nodes.stdout) + local.service_batch_allocs
}
6 changes: 1 addition & 5 deletions enos/modules/run_workloads/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ variable "workloads" {
type = map(object({
job_spec = string
alloc_count = number
type = string
}))

default = {
service_raw_exec = { job_spec = "jobs/raw-exec-service.nomad.hcl", alloc_count = 3 }
service_docker = { job_spec = "jobs/docker-service.nomad.hcl", alloc_count = 3 }
}
}
6 changes: 5 additions & 1 deletion enos/modules/test_cluster_health/scripts/allocs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,17 @@ while true; do
error_exit "Some allocs are not running:\n$(nomad alloc status -json | jq -r '.[] | select(.ClientStatus != "running") | .ID')"
fi

echo "Running allocs: $running_allocs, expected $ALLOC_COUNT. Waiting for $elapsed_time Retrying in $POLL_INTERVAL seconds..."
echo "Running allocs: $running_allocs, expected "$ALLOC_COUNT". Waiting for $elapsed_time Retrying in $POLL_INTERVAL seconds..."
sleep $POLL_INTERVAL
elapsed_time=$((elapsed_time + POLL_INTERVAL))
done

echo "All ALLOCS are running."

if [ "$allocs_length" -eq 0 ]; then
exit 0
fi

# Quality: nomad_reschedule_alloc: A POST / PUT call to /v1/allocation/:alloc_id/stop results in the stopped allocation being rescheduled

random_index=$((RANDOM % allocs_length))
Expand Down
5 changes: 4 additions & 1 deletion enos/modules/test_cluster_health/scripts/jobs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ if [ -z "$jobs_length" ]; then
fi

if [ "$jobs_length" -ne "$JOB_COUNT" ]; then
error_exit "The number of running jobs ($jobs_length) does not match the expected count ($JOB_COUNT)\n$(nomad job status | awk 'NR > 1 && $4 != "running" {print $4}')"
output_file="nomad_job_status_$(date +'%Y-%m-%d_%H-%M-%S').json"
echo $jobs_length > "len.txt"
nomad job status -json | jq '.' > "$output_file"
error_exit "The number of running jobs ($jobs_length) does not match the expected count ($JOB_COUNT) $(nomad job status | awk 'NR > 1 && $4 != "running" {print $4}') "
fi

echo "All JOBS are running."
Loading

0 comments on commit 25cf0ba

Please sign in to comment.