Skip to content

Commit

Permalink
feat: add fdb pool
Browse files Browse the repository at this point in the history
  • Loading branch information
MasterPtato committed Nov 4, 2024
1 parent f2ec038 commit b5d3547
Show file tree
Hide file tree
Showing 31 changed files with 1,248 additions and 130 deletions.
2 changes: 1 addition & 1 deletion packages/api/traefik-provider/src/route/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub async fn config(
let mut config = types::TraefikConfigResponse::default();

// Fetch configs and catch any errors
// build_cdn(&ctx, &mut config).await?;
build_cdn(&ctx, &mut config).await?;

// tracing::info!(
// http_services = ?config.http.services.len(),
Expand Down
1 change: 1 addition & 0 deletions packages/common/config/src/config/rivet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ pub enum PoolType {
Ats,
Pegboard,
PegboardIsolate,
Fdb,
}

#[derive(Debug, Serialize, Deserialize, Clone)]
Expand Down
2 changes: 2 additions & 0 deletions packages/common/convert/src/impls/admin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ impl ApiFrom<models::AdminClustersPoolType> for cluster::types::PoolType {
models::AdminClustersPoolType::PegboardIsolate => {
cluster::types::PoolType::PegboardIsolate
}
models::AdminClustersPoolType::Fdb => cluster::types::PoolType::Fdb,
}
}
}
Expand All @@ -29,6 +30,7 @@ impl ApiFrom<cluster::types::PoolType> for models::AdminClustersPoolType {
cluster::types::PoolType::PegboardIsolate => {
models::AdminClustersPoolType::PegboardIsolate
}
cluster::types::PoolType::Fdb => models::AdminClustersPoolType::Fdb,
}
}
}
Expand Down
20 changes: 20 additions & 0 deletions packages/common/util/core/src/net.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,23 @@ pub mod job {
]
}
}

pub mod fdb {
use std::net::Ipv4Addr;

use ipnet::{Ipv4AddrRange, Ipv4Net};

use super::{default_firewall, FirewallRule};

pub fn vlan_ip_net() -> Ipv4Net {
Ipv4Net::new(Ipv4Addr::new(10, 0, 2, 0), 26).unwrap()
}

pub fn vlan_addr_range() -> Ipv4AddrRange {
vlan_ip_net().hosts()
}

pub fn firewall() -> Vec<FirewallRule> {
vec![default_firewall()]
}
}
4 changes: 2 additions & 2 deletions packages/services/cluster/src/ops/datacenter/topology_get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::{
};

use chirp_workflow::prelude::*;
use nomad_client::apis::{allocations_api, configuration::Configuration, nodes_api};
use nomad_client::apis::{allocations_api, nodes_api};
use server_spec::types::ServerSpec;

use crate::types::PoolType;
Expand Down Expand Up @@ -177,7 +177,7 @@ pub async fn cluster_datacenter_topology_get(
.filter(|server| {
matches!(
server.pool_type,
PoolType::Gg | PoolType::Ats | PoolType::PegboardIsolate
PoolType::Gg | PoolType::Ats | PoolType::PegboardIsolate | PoolType::Fdb
)
})
.collect::<Vec<_>>();
Expand Down
7 changes: 6 additions & 1 deletion packages/services/cluster/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ pub enum PoolType {
Ats = 2,
Pegboard = 3,
PegboardIsolate = 4,
Fdb = 5,
}

impl std::fmt::Display for PoolType {
Expand All @@ -95,6 +96,7 @@ impl std::fmt::Display for PoolType {
PoolType::Ats => write!(f, "ats"),
PoolType::Pegboard => write!(f, "pegboard"),
PoolType::PegboardIsolate => write!(f, "pegboard-isolate"),
PoolType::Fdb => write!(f, "fdb"),
}
}
}
Expand All @@ -107,6 +109,7 @@ impl From<rivet_config::config::rivet::PoolType> for PoolType {
rivet_config::config::rivet::PoolType::Ats => PoolType::Ats,
rivet_config::config::rivet::PoolType::Pegboard => PoolType::Pegboard,
rivet_config::config::rivet::PoolType::PegboardIsolate => PoolType::PegboardIsolate,
rivet_config::config::rivet::PoolType::Fdb => PoolType::Fdb,
}
}
}
Expand Down Expand Up @@ -148,7 +151,9 @@ impl From<rivet_config::config::rivet::BuildDeliveryMethod> for BuildDeliveryMet
rivet_config::config::rivet::BuildDeliveryMethod::TrafficServer => {
BuildDeliveryMethod::TrafficServer
}
rivet_config::config::rivet::BuildDeliveryMethod::S3Direct => BuildDeliveryMethod::S3Direct,
rivet_config::config::rivet::BuildDeliveryMethod::S3Direct => {
BuildDeliveryMethod::S3Direct
}
}
}
}
Expand Down
121 changes: 44 additions & 77 deletions packages/services/cluster/src/workflows/datacenter/scale.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,22 +311,20 @@ async fn scale_servers(
.await?;
}
}
PoolType::Gg => {
PoolType::Gg | PoolType::Ats | PoolType::Fdb => {
let installed_servers = active_servers.filter(|server| server.is_installed);
let installed_count = installed_servers.clone().count();

if pctx.desired_count < installed_count {
scale_down_gg_servers(ctx, tx, actions, pctx, installed_servers, installed_count)
.await?;
}
}
PoolType::Ats => {
let installed_servers = active_servers.filter(|server| server.is_installed);
let installed_count = installed_servers.clone().count();

if pctx.desired_count < installed_count {
scale_down_ats_servers(ctx, tx, actions, pctx, installed_servers, installed_count)
.await?;
scale_down_servers_basic(
ctx,
tx,
actions,
pctx,
installed_servers,
installed_count,
)
.await?;
}
}
PoolType::Pegboard | PoolType::PegboardIsolate => {
Expand All @@ -350,6 +348,40 @@ async fn scale_servers(
Ok(())
}

async fn scale_down_servers_basic<
'a,
I: Iterator<Item = &'a Server> + DoubleEndedIterator + Clone,
>(
ctx: &ActivityCtx,
tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
actions: &mut Vec<Action>,
pctx: &PoolCtx,
installed_servers: I,
installed_count: usize,
) -> GlobalResult<()> {
tracing::info!(
datacenter_id=?pctx.datacenter_id,
desired=%pctx.desired_count,
installed=%installed_count,
"scaling down {}", pctx.pool_type
);

let drain_count = installed_count.saturating_sub(pctx.desired_count);

// Drain servers
if drain_count != 0 {
tracing::info!(count=%drain_count, "draining {} servers", pctx.pool_type);

let drain_candidates = installed_servers
.take(drain_count)
.map(|server| server.server_id);

drain_servers(ctx, tx, actions, drain_candidates).await?;
}

Ok(())
}

async fn scale_down_job_servers(
ctx: &ActivityCtx,
tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
Expand Down Expand Up @@ -402,71 +434,6 @@ async fn scale_down_job_servers(
Ok(())
}

async fn scale_down_gg_servers<'a, I: Iterator<Item = &'a Server> + DoubleEndedIterator + Clone>(
ctx: &ActivityCtx,
tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
actions: &mut Vec<Action>,
pctx: &PoolCtx,
installed_servers: I,
installed_count: usize,
) -> GlobalResult<()> {
tracing::info!(
datacenter_id=?pctx.datacenter_id,
desired=%pctx.desired_count,
installed=%installed_count,
"scaling down gg"
);

let drain_count = installed_count.saturating_sub(pctx.desired_count);

// Drain servers
if drain_count != 0 {
tracing::info!(count=%drain_count, "draining gg servers");

let drain_candidates = installed_servers
.take(drain_count)
.map(|server| server.server_id);

drain_servers(ctx, tx, actions, drain_candidates).await?;
}

Ok(())
}

async fn scale_down_ats_servers<
'a,
I: Iterator<Item = &'a Server> + DoubleEndedIterator + Clone,
>(
ctx: &ActivityCtx,
tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
actions: &mut Vec<Action>,
pctx: &PoolCtx,
installed_servers: I,
installed_count: usize,
) -> GlobalResult<()> {
tracing::info!(
datacenter_id=?pctx.datacenter_id,
desired=%pctx.desired_count,
installed=%installed_count,
"scaling down ats"
);

let drain_count = installed_count.saturating_sub(pctx.desired_count);

// Drain servers
if drain_count != 0 {
tracing::info!(count=%drain_count, "draining ats servers");

let drain_candidates = installed_servers
.take(drain_count)
.map(|server| server.server_id);

drain_servers(ctx, tx, actions, drain_candidates).await?;
}

Ok(())
}

async fn scale_down_pb_servers(
ctx: &ActivityCtx,
tx: &mut sqlx::Transaction<'_, sqlx::Postgres>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ pub(crate) async fn cluster_datacenter_tls_issue(
"job domain not enabled"
);

ctx.removed::<Activity<Order>>().await?;

let (gg_cert, job_cert) = ctx
.join((
activity(OrderInput {
Expand Down
1 change: 1 addition & 0 deletions packages/services/cluster/src/workflows/prebake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub async fn cluster_prebake(ctx: &mut WorkflowCtx, input: &Input) -> GlobalResu
}
PoolType::Gg => linode::types::FirewallPreset::Gg,
PoolType::Ats => linode::types::FirewallPreset::Ats,
PoolType::Fdb => linode::types::FirewallPreset::Fdb,
},
vlan_ip: None,
tags,
Expand Down
7 changes: 2 additions & 5 deletions packages/services/cluster/src/workflows/server/drain.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
use chirp_workflow::prelude::*;
use nomad_client::{
apis::{configuration::Configuration, nodes_api},
models,
};
use nomad_client::{apis::nodes_api, models};
use rivet_operation::prelude::proto::backend::pkg::*;

use crate::types::PoolType;
Expand Down Expand Up @@ -38,7 +35,6 @@ pub(crate) async fn cluster_server_drain(ctx: &mut WorkflowCtx, input: &Input) -
.send()
.await?;
}
PoolType::Ats => {}
PoolType::Pegboard | PoolType::PegboardIsolate => {
let pegboard_client_id = ctx
.activity(DrainPegboardClientInput {
Expand All @@ -54,6 +50,7 @@ pub(crate) async fn cluster_server_drain(ctx: &mut WorkflowCtx, input: &Input) -
.await?;
}
}
PoolType::Ats | PoolType::Fdb => {}
}

Ok(())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use chirp_workflow::prelude::*;

pub fn install(initialize_immediately: bool) -> String {
let mut script = include_str!("../files/fdb_install.sh").replace(
"__PROMETHEUS_PROXY_SCRIPT__",
include_str!("../files/fdp_prometheus_proxy.py"),
);

if initialize_immediately {
// Run script immediately
script.push_str("systemctl start --no-block fdb_prometheus_proxy.service");
}

script
}

pub fn configure() -> String {
include_str!("../files/fdb_configure.sh").to_string()
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use indoc::indoc;

pub mod fdb;
pub mod nomad;
pub mod ok_server;
pub mod pegboard;
Expand Down Expand Up @@ -79,3 +80,9 @@ pub mod cni {
include_str!("../files/cni_plugins.sh").to_string()
}
}

pub mod python {
pub fn install() -> String {
"apt-get install -y python3 pip".to_string()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Append config
# cat << 'EOF' >> /etc/foundationdb/foundationdb.conf
# [fdbserver]
# EOF

# TODO: add -t flag for TLS (https://apple.github.io/foundationdb/tls.html#enable-tls)
# Make fdb accessible on VLAN
python3 /usr/lib/foundationdb/make_public.py -a ___VLAN_IP___
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
sysctl --system

mkdir -p /etc/foundationdb

curl -Lf -o /tmp/foundationdb-clients_OFF-1_amd64.deb "https://github.com/apple/foundationdb/releases/download/7.3.43/foundationdb-clients_7.3.43-1_amd64.deb"
dpkg -i /tmp/foundationdb-clients_OFF-1_amd64.deb

# Verify installation
fdbcli --version

curl -Lf -o /tmp/foundationdb-server_OFF-1_amd64.deb "https://github.com/apple/foundationdb/releases/download/7.3.43/foundationdb-server_7.3.43-1_amd64.deb"
dpkg -i /tmp/foundationdb-server_OFF-1_amd64.deb

# Verify installation
fdbserver --version

# https://apple.github.io/foundationdb/administration.html#administration-running-foundationdb
# Configure redundancy and storage engine
fdbcli --exec "configure perpetual_storage_wiggle=1 storage_migration_type=gradual"
fdbcli --exec "configure single ssd"
service foundationdb stop


pip install wheel foundationdb prometheus_client

cat << 'EOF' > /usr/local/bin/fdb_prometheus_proxy.py
__PROMETHEUS_PROXY_SCRIPT__
EOF

# Systemd service
cat << 'EOF' > /etc/systemd/system/fdb_prometheus_proxy.service
[Unit]
Description=FDB Prometheus Proxy
After=network-online.target
Requires=network-online.target
[Service]
ExecStart=/usr/bin/python3 /usr/local/bin/fdb_prometheus_proxy.py --fdb-cluster-file /etc/foundationdb/fdb.cluster
Restart=always
RestartSec=2
[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload
systemctl enable fdb_prometheus_proxy

# NOTE: we dont have a systemd service for fdbserver because it uses `service`:
# https://apple.github.io/foundationdb/administration.html#administration-running-foundationdb
Loading

0 comments on commit b5d3547

Please sign in to comment.