Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Pw/devnet fixes #8385

Merged
merged 7 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/devnet-deploys.yml
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ jobs:
run: |
env
terraform init -input=false -backend-config="key=${{ env.DEPLOY_TAG }}/mainnet-fork"
terraform apply -input=false -auto-approve -replace="aws_efs_file_system.aztec_mainnet_fork_data_store"
terraform apply -input=false -auto-approve

- name: Wait for mainnet fork deployment
run: |
Expand Down Expand Up @@ -567,13 +567,13 @@ jobs:
working-directory: ./yarn-project/aztec/terraform/node
run: |
terraform init -input=false -backend-config="key=${{ env.DEPLOY_TAG }}/aztec-node"
terraform apply -input=false -auto-approve -replace="aws_efs_file_system.node_data_store" -var="NODE_P2P_TCP_PORT=${{ needs.set-network.outputs.node_tcp_range_start }}" -var="NODE_P2P_UDP_PORT=${{ needs.set-network.outputs.node_udp_range_start }}"
terraform apply -input=false -auto-approve -var="NODE_P2P_TCP_PORT=${{ needs.set-network.outputs.node_tcp_range_start }}" -var="NODE_P2P_UDP_PORT=${{ needs.set-network.outputs.node_udp_range_start }}"

- name: Deploy Aztec Prover Nodes
working-directory: ./yarn-project/aztec/terraform/prover-node
run: |
terraform init -input=false -backend-config="key=${{ env.DEPLOY_TAG }}/aztec-prover-node"
terraform apply -input=false -auto-approve -replace="aws_efs_file_system.prover_node_data_store" -var="NODE_P2P_TCP_PORT=${{ needs.set-network.outputs.prover_node_tcp_range_start }}" -var="NODE_P2P_UDP_PORT=${{ needs.set-network.outputs.prover_node_udp_range_start }}"
terraform apply -input=false -auto-approve -var="NODE_P2P_TCP_PORT=${{ needs.set-network.outputs.prover_node_tcp_range_start }}" -var="NODE_P2P_UDP_PORT=${{ needs.set-network.outputs.prover_node_udp_range_start }}"

- name: Deploy Provers
working-directory: ./yarn-project/aztec/terraform/prover
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/aztec/terraform/bot/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ resource "aws_ecs_task_definition" "aztec-bot" {
command = ["start", "--bot", "--pxe"]
essential = true
cpu = 8192
memoryReservation = 14336
memoryReservation = 15685
portMappings = [
{
containerPort = 80
Expand Down
91 changes: 81 additions & 10 deletions yarn-project/aztec/terraform/node/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,78 @@ resource "aws_efs_mount_target" "public_az2" {
security_groups = [data.terraform_remote_state.setup_iac.outputs.security_group_public_id]
}


data "template_file" "user_data" {
count = local.node_count
template = <<EOF
#!/bin/bash
echo ECS_CLUSTER=${data.terraform_remote_state.setup_iac.outputs.ecs_cluster_name} >> /etc/ecs/ecs.config
echo 'ECS_INSTANCE_ATTRIBUTES={"group": "${var.DEPLOY_TAG}-aztec-node-${count.index + 1}"}' >> /etc/ecs/ecs.config
EOF
}

# Launch template for our prover agents
# 4 cores and 8 GB memory
resource "aws_launch_template" "aztec-node-launch-template" {
count = local.node_count
name = "${var.DEPLOY_TAG}-aztec-node-launch-template-${count.index + 1}"
image_id = "ami-0cd4858f2b923aa6b"
instance_type = "c6a.xlarge"
vpc_security_group_ids = [data.terraform_remote_state.setup_iac.outputs.security_group_private_id]

iam_instance_profile {
name = data.terraform_remote_state.setup_iac.outputs.ecs_instance_profile_name
}

key_name = data.terraform_remote_state.setup_iac.outputs.ecs_instance_key_pair_name

user_data = base64encode(data.template_file.user_data[count.index].rendered)

tag_specifications {
resource_type = "instance"
tags = {
Name = "${var.DEPLOY_TAG}-aztec-node-${count.index + 1}"
prometheus = ""
}
}
}

resource "aws_ec2_fleet" "aztec_node_fleet" {
count = local.node_count
launch_template_config {
launch_template_specification {
launch_template_id = aws_launch_template.aztec-node-launch-template[count.index].id
version = aws_launch_template.aztec-node-launch-template[count.index].latest_version
}

override {
subnet_id = data.terraform_remote_state.setup_iac.outputs.subnet_az1_private_id
availability_zone = "eu-west-2a"
}

override {
subnet_id = data.terraform_remote_state.setup_iac.outputs.subnet_az2_private_id
availability_zone = "eu-west-2b"
}
}

target_capacity_specification {
default_target_capacity_type = "on-demand"
total_target_capacity = 1
spot_target_capacity = 0
on_demand_target_capacity = 1
}

terminate_instances = true
terminate_instances_with_expiration = true
}

# Define task definitions for each node.
resource "aws_ecs_task_definition" "aztec-node" {
count = local.node_count
family = "${var.DEPLOY_TAG}-aztec-node-${count.index + 1}"
requires_compatibilities = ["FARGATE"]
requires_compatibilities = ["EC2"]
network_mode = "awsvpc"
cpu = "2048"
memory = "4096"
execution_role_arn = data.terraform_remote_state.setup_iac.outputs.ecs_task_execution_role_arn
task_role_arn = data.terraform_remote_state.aztec2_iac.outputs.cloudwatch_logging_ecs_role_arn

Expand All @@ -151,7 +215,8 @@ resource "aws_ecs_task_definition" "aztec-node" {
image = "${var.DOCKERHUB_ACCOUNT}/aztec:${var.IMAGE_TAG}"
command = ["start", "--node", "--archiver", "--sequencer"]
essential = true
memoryReservation = 3776
cpu = 4096
memoryReservation = 7790
portMappings = [
{
containerPort = 80
Expand Down Expand Up @@ -390,19 +455,20 @@ resource "aws_ecs_service" "aztec-node" {
count = local.node_count
name = "${var.DEPLOY_TAG}-aztec-node-${count.index + 1}"
cluster = data.terraform_remote_state.setup_iac.outputs.ecs_cluster_id
launch_type = "FARGATE"
launch_type = "EC2"
desired_count = 1
deployment_maximum_percent = 100
deployment_minimum_healthy_percent = 0
platform_version = "1.4.0"
force_new_deployment = true
enable_execute_command = true
#platform_version = "1.4.0"
force_new_deployment = true
enable_execute_command = true


network_configuration {
assign_public_ip = true
#assign_public_ip = true
subnets = [
data.terraform_remote_state.setup_iac.outputs.subnet_az1_id
data.terraform_remote_state.setup_iac.outputs.subnet_az1_private_id,
data.terraform_remote_state.setup_iac.outputs.subnet_az2_private_id
]
security_groups = [data.terraform_remote_state.aztec-network_iac.outputs.p2p_security_group_id, data.terraform_remote_state.setup_iac.outputs.security_group_private_id]
}
Expand All @@ -419,6 +485,11 @@ resource "aws_ecs_service" "aztec-node" {
container_port = 80
}

placement_constraints {
type = "memberOf"
expression = "attribute:group == ${var.DEPLOY_TAG}-aztec-node-${count.index + 1}"
}

task_definition = aws_ecs_task_definition.aztec-node[count.index].family
}

Expand Down
92 changes: 81 additions & 11 deletions yarn-project/aztec/terraform/prover-node/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ resource "aws_service_discovery_service" "aztec-prover-node" {

# Configure an EFS filesystem.
resource "aws_efs_file_system" "prover_node_data_store" {
creation_token = "${var.DEPLOY_TAG}-prover-node-data"
creation_token = "${var.DEPLOY_TAG}-prover-node-data"

tags = {
Name = "${var.DEPLOY_TAG}-prover-node-data"
Expand All @@ -126,14 +126,77 @@ resource "aws_efs_mount_target" "public_az2" {
security_groups = [data.terraform_remote_state.setup_iac.outputs.security_group_public_id]
}

data "template_file" "user_data" {
count = local.node_count
template = <<EOF
#!/bin/bash
echo ECS_CLUSTER=${data.terraform_remote_state.setup_iac.outputs.ecs_cluster_name} >> /etc/ecs/ecs.config
echo 'ECS_INSTANCE_ATTRIBUTES={"group": "${var.DEPLOY_TAG}-prover-node-${count.index + 1}"}' >> /etc/ecs/ecs.config
EOF
}

# Launch template for our prover agents
# 4 cores and 8 GB memory
resource "aws_launch_template" "aztec-prover-node-launch-template" {
count = local.node_count
name = "${var.DEPLOY_TAG}-prover-node-launch-template-${count.index + 1}"
image_id = "ami-0cd4858f2b923aa6b"
instance_type = "c6a.xlarge"
vpc_security_group_ids = [data.terraform_remote_state.setup_iac.outputs.security_group_private_id]

iam_instance_profile {
name = data.terraform_remote_state.setup_iac.outputs.ecs_instance_profile_name
}

key_name = data.terraform_remote_state.setup_iac.outputs.ecs_instance_key_pair_name

user_data = base64encode(data.template_file.user_data[count.index].rendered)

tag_specifications {
resource_type = "instance"
tags = {
Name = "${var.DEPLOY_TAG}-prover-node-${count.index + 1}"
prometheus = ""
}
}
}

resource "aws_ec2_fleet" "aztec_prover_node_fleet" {
count = local.node_count
launch_template_config {
launch_template_specification {
launch_template_id = aws_launch_template.aztec-prover-node-launch-template[count.index].id
version = aws_launch_template.aztec-prover-node-launch-template[count.index].latest_version
}

override {
subnet_id = data.terraform_remote_state.setup_iac.outputs.subnet_az1_private_id
availability_zone = "eu-west-2a"
}

override {
subnet_id = data.terraform_remote_state.setup_iac.outputs.subnet_az2_private_id
availability_zone = "eu-west-2b"
}
}

target_capacity_specification {
default_target_capacity_type = "on-demand"
total_target_capacity = 1
spot_target_capacity = 0
on_demand_target_capacity = 1
}

terminate_instances = true
terminate_instances_with_expiration = true
}

# Define task definitions for each node.
resource "aws_ecs_task_definition" "aztec-prover-node" {
count = local.node_count
family = "${var.DEPLOY_TAG}-aztec-prover-node-${count.index + 1}"
requires_compatibilities = ["FARGATE"]
requires_compatibilities = ["EC2"]
network_mode = "awsvpc"
cpu = "2048"
memory = "4096"
execution_role_arn = data.terraform_remote_state.setup_iac.outputs.ecs_task_execution_role_arn
task_role_arn = data.terraform_remote_state.aztec2_iac.outputs.cloudwatch_logging_ecs_role_arn

Expand All @@ -151,7 +214,8 @@ resource "aws_ecs_task_definition" "aztec-prover-node" {
image = "${var.DOCKERHUB_ACCOUNT}/aztec:${var.IMAGE_TAG}"
command = ["start", "--prover-node", "--archiver"]
essential = true
memoryReservation = 3776
cpu = 4096
memoryReservation = 7790
portMappings = [
{
containerPort = 80
Expand Down Expand Up @@ -250,18 +314,19 @@ resource "aws_ecs_service" "aztec-prover-node" {
count = local.node_count
name = "${var.DEPLOY_TAG}-aztec-prover-node-${count.index + 1}"
cluster = data.terraform_remote_state.setup_iac.outputs.ecs_cluster_id
launch_type = "FARGATE"
launch_type = "EC2"
desired_count = 1
deployment_maximum_percent = 100
deployment_minimum_healthy_percent = 0
platform_version = "1.4.0"
force_new_deployment = true
enable_execute_command = true
#platform_version = "1.4.0"
force_new_deployment = true
enable_execute_command = true

network_configuration {
assign_public_ip = true
#assign_public_ip = true
subnets = [
data.terraform_remote_state.setup_iac.outputs.subnet_az1_id
data.terraform_remote_state.setup_iac.outputs.subnet_az1_private_id,
data.terraform_remote_state.setup_iac.outputs.subnet_az2_private_id
]
security_groups = [data.terraform_remote_state.aztec-network_iac.outputs.p2p_security_group_id, data.terraform_remote_state.setup_iac.outputs.security_group_private_id]
}
Expand All @@ -278,6 +343,11 @@ resource "aws_ecs_service" "aztec-prover-node" {
container_port = 80
}

placement_constraints {
type = "memberOf"
expression = "attribute:group == ${var.DEPLOY_TAG}-prover-node-${count.index + 1}"
}

task_definition = aws_ecs_task_definition.aztec-prover-node[count.index].family
}

Expand Down
8 changes: 4 additions & 4 deletions yarn-project/aztec/terraform/prover/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ resource "aws_ecs_task_definition" "aztec-proving-agent" {
"command": ["start", "--prover"],
"essential": true,
"cpu": 16384,
"memoryReservation": 122880,
"memoryReservation": 127800,
"portMappings": [
{
"containerPort": 80
Expand Down Expand Up @@ -285,9 +285,9 @@ resource "aws_ecs_task_definition" "aztec-proving-agent" {
"name": "NETWORK_NAME",
"value": "${var.DEPLOY_TAG}"
},
{
"name": "LOG_JSON",
"value": "1"
{
"name": "LOG_JSON",
"value": "1"
}
],
"logConfiguration": {
Expand Down
Loading
Loading