Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(infra): dynamically generate nomad server count in install script #981

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 9 additions & 14 deletions infra/tf/k8s_infra/nomad.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,9 @@
# complicated + adds another point of failure and (b) it doesn't fix the problem with Nomad server addresses changing.

locals {
# !!! DO NOT CHANGE !!!
#
# This value must be 3, 5, or 7. More = better redundancy, but does not make things faster.
#
# See https://developer.hashicorp.com/nomad/tutorials/enterprise/production-reference-architecture-vm-with-consul
nomad_server_count = var.deploy_method_cluster ? 3 : 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dead variable?


nomad_server_addrs = [for i in range(0, local.nomad_server_count): "127.0.0.1:${6000 + i}"]
nomad_server_addrs = [for i in range(0, var.nomad_server_count): "127.0.0.1:${6000 + i}"]
nomad_server_addrs_escaped = [for addr in local.nomad_server_addrs : "\"${addr}\""]
nomad_server_configmap_data = {
"server.hcl" = <<-EOT
Expand All @@ -36,7 +31,7 @@ locals {

server {
enabled = true
bootstrap_expect = ${local.nomad_server_count}
bootstrap_expect = ${var.nomad_server_count}

server_join {
retry_join = [${join(", ", local.nomad_server_addrs_escaped)}]
Expand Down Expand Up @@ -128,7 +123,7 @@ resource "kubernetes_service" "nomad_server" {
}

resource "kubernetes_service" "nomad_server_indexed" {
count = var.edge_enabled ? local.nomad_server_count : 0
count = var.edge_enabled ? var.nomad_server_count : 0

metadata {
namespace = kubernetes_namespace.nomad.0.metadata.0.name
Expand Down Expand Up @@ -202,7 +197,7 @@ resource "kubernetes_stateful_set" "nomad_server" {
}
}
spec {
replicas = local.nomad_server_count
replicas = var.nomad_server_count

selector {
match_labels = {
Expand Down Expand Up @@ -324,7 +319,7 @@ resource "kubernetes_stateful_set" "nomad_server" {

# Entrypoints
flatten([
for i in range(0, local.nomad_server_count):
for i in range(0, var.nomad_server_count):
[
"--entryPoints.nomad-${i}-rpc-tcp.address=:${5000 + i}/tcp",
"--entryPoints.nomad-${i}-serf-tcp.address=:${6000 + i}/tcp",
Expand All @@ -334,7 +329,7 @@ resource "kubernetes_stateful_set" "nomad_server" {
])

dynamic "port" {
for_each = [for i in range(0, local.nomad_server_count) : i]
for_each = [for i in range(0, var.nomad_server_count) : i]
content {
name = "n-${port.value}-rpc-tcp"
container_port = 5000 + port.value
Expand All @@ -343,7 +338,7 @@ resource "kubernetes_stateful_set" "nomad_server" {
}

dynamic "port" {
for_each = [for i in range(0, local.nomad_server_count) : i]
for_each = [for i in range(0, var.nomad_server_count) : i]
content {
name = "n-${port.value}-serf-tcp"
container_port = 6000 + port.value
Expand All @@ -352,7 +347,7 @@ resource "kubernetes_stateful_set" "nomad_server" {
}

dynamic "port" {
for_each = [for i in range(0, local.nomad_server_count) : i]
for_each = [for i in range(0, var.nomad_server_count) : i]
content {
name = "n-${port.value}-serf-udp"
container_port = 6000 + port.value
Expand Down Expand Up @@ -421,7 +416,7 @@ resource "kubernetes_config_map" "nomad_server_sidecar_traefik_config" {
}

data = {
for i in range(0, local.nomad_server_count):
for i in range(0, var.nomad_server_count):
"nomad-${i}.yaml" => yamlencode({
tcp = {
routers = {
Expand Down
4 changes: 4 additions & 0 deletions infra/tf/k8s_infra/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ variable "authenticate_all_docker_hub_pulls" {
}

# MARK: Nomad
variable "nomad_server_count" {
type = number
}

variable "edge_enabled" {
type = bool
}
Expand Down
12 changes: 12 additions & 0 deletions lib/bolt/core/src/context/project.rs
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,18 @@ impl ProjectContextData {
.and_then(|dns| dns.provider.as_ref())
.is_some()
}

pub fn nomad_server_count(&self) -> usize {
// !!! DO NOT CHANGE !!!
//
// This value must be 1, 3, 5, or 7. More = better redundancy, but does not make things faster.
//
// See https://developer.hashicorp.com/nomad/tutorials/enterprise/production-reference-architecture-vm-with-consul
match self.ns().cluster.kind {
config::ns::ClusterKind::Distributed { .. } => 3,
config::ns::ClusterKind::SingleNode { .. } => 1,
}
}
}

pub struct S3Credentials {
Expand Down
7 changes: 6 additions & 1 deletion lib/bolt/core/src/context/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,12 @@ impl ServiceContextData {
env.insert("RIVET_PROFANITY_FILTER_DISABLE".into(), "1".into());
}

// Nomad
env.insert(
"NOMAD_SERVER_COUNT".into(),
project_ctx.nomad_server_count().to_string(),
);

if let Some(provisioning) = &project_ctx.ns().rivet.provisioning {
if self.depends_on_cluster_config() || matches!(run_context, RunContext::Test { .. }) {
env.insert(
Expand Down Expand Up @@ -1302,7 +1308,6 @@ impl ServiceContextData {

// if self.depends_on_infra() && project_ctx.ns().rivet.provisioning.is_some() {
let tls = terraform::output::read_tls(&project_ctx).await;
let k8s_infra = terraform::output::read_k8s_infra(&project_ctx).await;

env.insert(
"TLS_CERT_LOCALLY_SIGNED_JOB_CERT_PEM".into(),
Expand Down
1 change: 1 addition & 0 deletions lib/bolt/core/src/dep/terraform/gen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ async fn vars(ctx: &ProjectContext) {
}

// Edge nodes
vars.insert("nomad_server_count".into(), json!(ctx.nomad_server_count()));
vars.insert(
"edge_enabled".into(),
json!(config.rivet.provisioning.is_some()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ pub fn install() -> String {
include_str!("../files/nomad_install.sh").to_string()
}

pub fn configure() -> String {
let servers = &["127.0.0.1:5000", "127.0.0.1:5001", "127.0.0.1:5002"];
pub fn configure() -> GlobalResult<String> {
let nomad_server_count = util::env::var("NOMAD_SERVER_COUNT")?.parse::<usize>()?;
let servers = (0..nomad_server_count)
.map(|idx| format!("127.0.0.1:{}", 5000 + idx))
.collect::<Vec<_>>();

include_str!("../files/nomad_configure.sh")
Ok(include_str!("../files/nomad_configure.sh")
// HACK: Hardcoded to Linode
.replace("__PUBLIC_IFACE__", "eth0")
// HACK: Hardcoded to Linode
Expand All @@ -27,5 +30,5 @@ pub fn configure() -> String {
.replace(
"__ATS_VLAN_SUBNET__",
&util::net::ats::vlan_ip_net().to_string(),
)
))
}
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ pub async fn gen_initialize(
// MARK: Specific pool components
match pool_type {
backend::cluster::PoolType::Job => {
script.push(components::nomad::configure());
script.push(components::nomad::configure()?);

prometheus_targets.insert(
"nomad".into(),
Expand Down
Loading