From 37c00779434e4a37a10140ff053f075b492e3580 Mon Sep 17 00:00:00 2001 From: Oleg Avdeev Date: Wed, 7 Oct 2020 11:54:41 +0300 Subject: [PATCH] terraform config for aws Signed-off-by: Oleg Avdeev --- infra/terraform/aws/README.md | 28 +++++++ infra/terraform/aws/eks.tf | 117 ++++++++++++++++++++++++++++ infra/terraform/aws/emr.tf | 70 +++++++++++++++++ infra/terraform/aws/helm.tf | 94 ++++++++++++++++++++++ infra/terraform/aws/iam.tf | 27 +++++++ infra/terraform/aws/online_store.tf | 8 ++ infra/terraform/aws/outputs.tf | 0 infra/terraform/aws/rds.tf | 33 ++++++++ infra/terraform/aws/s3.tf | 20 +++++ infra/terraform/aws/variables.tf | 38 +++++++++ 10 files changed, 435 insertions(+) create mode 100644 infra/terraform/aws/README.md create mode 100644 infra/terraform/aws/eks.tf create mode 100644 infra/terraform/aws/emr.tf create mode 100644 infra/terraform/aws/helm.tf create mode 100644 infra/terraform/aws/iam.tf create mode 100644 infra/terraform/aws/online_store.tf create mode 100644 infra/terraform/aws/outputs.tf create mode 100644 infra/terraform/aws/rds.tf create mode 100644 infra/terraform/aws/s3.tf create mode 100644 infra/terraform/aws/variables.tf diff --git a/infra/terraform/aws/README.md b/infra/terraform/aws/README.md new file mode 100644 index 0000000000..47fba8e04f --- /dev/null +++ b/infra/terraform/aws/README.md @@ -0,0 +1,28 @@ +# Terraform config for feast on AWS + +Uses terraform 0.12 + +1. Run `aws emr create-default-roles` once. + +2. Create a tfvars file, e.g. `my.tfvars` and set name_prefix: + +``` +name_prefix = "my-feast" +region = "us-east-1" +``` + +3. Configure tf state backend, e.g.: +``` +terraform { + backend "s3" { + bucket = "my-terraform-state-bucket" + key = "clusters/my-feast-test" + region = "us-west-2" + dynamodb_table = "terraform-state-lock" + encrypt = true + } +} +``` + +3. Use `terraform apply -var-file="my.tfvars"` to deploy. + diff --git a/infra/terraform/aws/eks.tf b/infra/terraform/aws/eks.tf new file mode 100644 index 0000000000..fdbc0a579f --- /dev/null +++ b/infra/terraform/aws/eks.tf @@ -0,0 +1,117 @@ +terraform { + required_version = ">= 0.12.0" +} + +provider "aws" { + version = ">= 2.28.1" + region = var.region +} + +provider "random" { + version = "~> 2.1" +} + +provider "local" { + version = "~> 1.2" +} + +provider "null" { + version = "~> 2.1" +} + +provider "template" { + version = "~> 2.1" +} + +data "aws_eks_cluster" "cluster" { + name = module.eks.cluster_id +} + +data "aws_eks_cluster_auth" "cluster" { + name = module.eks.cluster_id +} + +provider "kubernetes" { + host = data.aws_eks_cluster.cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data) + token = data.aws_eks_cluster_auth.cluster.token + load_config_file = false + version = "~> 1.11" +} + +data "aws_availability_zones" "available" { +} + +locals { + cluster_name = "${var.name_prefix}-${random_string.suffix.result}" +} + +resource "random_string" "suffix" { + length = 8 + special = false +} + +resource "aws_security_group" "all_worker_mgmt" { + name_prefix = "${var.name_prefix}-worker" + vpc_id = module.vpc.vpc_id +} + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + version = "2.47.0" + + name = "${var.name_prefix}-vpc" + cidr = "10.0.0.0/16" + azs = data.aws_availability_zones.available.names + private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"] + public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] + enable_nat_gateway = true + single_nat_gateway = true + enable_dns_hostnames = true + + public_subnet_tags = { + "kubernetes.io/cluster/${local.cluster_name}" = "shared" + "kubernetes.io/role/elb" = "1" + } + + private_subnet_tags = { + "kubernetes.io/cluster/${local.cluster_name}" = "shared" + "kubernetes.io/role/internal-elb" = "1" + } +} + +module "eks" { + source = "terraform-aws-modules/eks/aws" + version = "12.2.0" + + cluster_name = local.cluster_name + cluster_version = "1.17" + subnets = module.vpc.private_subnets + + tags = { + Environment = "test" + GithubRepo = "terraform-aws-eks" + GithubOrg = "terraform-aws-modules" + } + + vpc_id = module.vpc.vpc_id + + worker_groups = [ + { + name = "worker-group-1" + instance_type = "r3.large" + asg_desired_capacity = 2 + }, + { + name = "worker-group-2" + instance_type = "r3.large" + asg_desired_capacity = 1 + }, + ] + + worker_additional_security_group_ids = [aws_security_group.all_worker_mgmt.id] + map_roles = var.map_roles + map_accounts = var.map_accounts + + workers_additional_policies = [aws_iam_policy.worker_policy.id] +} \ No newline at end of file diff --git a/infra/terraform/aws/emr.tf b/infra/terraform/aws/emr.tf new file mode 100644 index 0000000000..9fde6fac95 --- /dev/null +++ b/infra/terraform/aws/emr.tf @@ -0,0 +1,70 @@ +data "aws_iam_instance_profile" "emr_default_role" { + name = "EMR_EC2_DefaultRole" +} + +resource "aws_emr_cluster" "persistent_cluster" { + count = var.use_persistent_emr_cluster ? 1 : 0 + + name = "${var.name_prefix}-persistent-emr" + keep_job_flow_alive_when_no_steps = true + release_label = "emr-6.0.0" + + ec2_attributes { + subnet_id = module.vpc.private_subnets[0] + additional_master_security_groups = aws_security_group.all_worker_mgmt.id + additional_slave_security_groups = aws_security_group.all_worker_mgmt.id + instance_profile = data.aws_iam_instance_profile.emr_default_role.arn + } + + applications = ["Hadoop", "Hive", "Spark", "Livy"] + service_role = "EMR_DefaultRole" + + bootstrap_action { + path = "s3://aws-bigdata-blog/artifacts/resize_storage/resize_storage.sh" + name = "runif" + args = ["--scaling-factor", "1.5"] + } + + master_instance_fleet { + instance_type_configs { + instance_type = "m4.xlarge" + ebs_config { + size = "100" + type = "gp2" + volumes_per_instance = 1 + } + } + launch_specifications { + spot_specification { + timeout_action = "SWITCH_TO_ON_DEMAND" + timeout_duration_minutes = 10 + allocation_strategy = "capacity-optimized" + } + } + target_spot_capacity = 1 + } + core_instance_fleet { + instance_type_configs { + bid_price_as_percentage_of_on_demand_price = 100 + ebs_config { + size = "100" + type = "gp2" + volumes_per_instance = 1 + } + instance_type = "m4.xlarge" + weighted_capacity = 1 + } + launch_specifications { + spot_specification { + timeout_action = "SWITCH_TO_ON_DEMAND" + timeout_duration_minutes = 10 + allocation_strategy = "capacity-optimized" + } + } + target_spot_capacity = 2 + } + + step_concurrency_level = 256 + + log_uri = "s3://${aws_s3_bucket.feast_bucket.id}/logs/${var.name_prefix}-persistent-emr/" +} diff --git a/infra/terraform/aws/helm.tf b/infra/terraform/aws/helm.tf new file mode 100644 index 0000000000..e8d089e283 --- /dev/null +++ b/infra/terraform/aws/helm.tf @@ -0,0 +1,94 @@ +provider "helm" { + kubernetes { + host = data.aws_eks_cluster.cluster.endpoint + cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data) + token = data.aws_eks_cluster_auth.cluster.token + load_config_file = false + } +} + +# Construct feast configs that need to point to RDS and Redis. +# +# RDS password is stored in a configmap which is not awesome but that RDS instance is not routable +# from the outside anyways so that'll do. +locals { + feast_core_config = { + redis = { + enabled = false + } + postgresql = { + enabled = false + } + kafka = { + enabled = false + } + + "feast-core" = { + "application-generated.yaml" = { + enabled = false + } + + "application-override.yaml" = { + spring = { + datasource = { + url = "jdbc:postgresql://${module.rds_cluster.endpoint}:5432/${module.rds_cluster.database_name}" + username = "${module.rds_cluster.master_username}" + password = "${random_password.db_password.result}" + } + } + feast = { + stream = { + type = "kafka" + options = { + bootstrapServers = "myrelease-kafka:9092" + topic = "feast" + } + } + } + server = { + port = "8080" + } + } + } + + "feast-online-serving" = { + "application-override.yaml" = { + enabled = true + feast = { + stores = [ + { + name = "online" + type = "REDIS" + config = { + host = module.redis.endpoint + port = 6379 + } + subscriptions = [ + { + name= "*" + project= "*" + version= "*" + } + ] + } + ] + job_store = { + redis_host = module.redis.endpoint + redis_port = 6379 + } + } + } + } + } +} + +resource "helm_release" "feast" { + name = "feast" + chart = "../../charts/feast" + + wait = false + + values = [ + yamlencode(local.feast_core_config) + ] +} \ No newline at end of file diff --git a/infra/terraform/aws/iam.tf b/infra/terraform/aws/iam.tf new file mode 100644 index 0000000000..e6cc3205a1 --- /dev/null +++ b/infra/terraform/aws/iam.tf @@ -0,0 +1,27 @@ +data "aws_iam_policy_document" "worker_policy_document" { + statement { + sid = "1" + + actions = [ + "s3:*", + "elasticmapreduce:*", + "glue:*", + "cloudwatch:*", + "ecr:*", + "iam:PassRole", + ] + + resources = [ + "*", + ] + } + +} + +resource "aws_iam_policy" "worker_policy" { + name = "${var.name_prefix}_feast_worker_policy" + path = "/" + description = "Worker IAM policy" + + policy = data.aws_iam_policy_document.worker_policy_document.json +} \ No newline at end of file diff --git a/infra/terraform/aws/online_store.tf b/infra/terraform/aws/online_store.tf new file mode 100644 index 0000000000..f8ea605735 --- /dev/null +++ b/infra/terraform/aws/online_store.tf @@ -0,0 +1,8 @@ + module "redis" { + source = "git::https://github.com/cloudposse/terraform-aws-elasticache-redis.git?ref=tags/0.25.0" + subnets = module.vpc.private_subnets + name = "${var.name_prefix}-online" + vpc_id = module.vpc.vpc_id + allowed_security_groups = [aws_security_group.all_worker_mgmt.id] + availability_zones = module.vpc.azs + } \ No newline at end of file diff --git a/infra/terraform/aws/outputs.tf b/infra/terraform/aws/outputs.tf new file mode 100644 index 0000000000..e69de29bb2 diff --git a/infra/terraform/aws/rds.tf b/infra/terraform/aws/rds.tf new file mode 100644 index 0000000000..fdee2107d3 --- /dev/null +++ b/infra/terraform/aws/rds.tf @@ -0,0 +1,33 @@ +resource "random_password" "db_password" { + length = 16 + special = true + override_special = "!#()-[]<>" +} + +module "rds_cluster" { + source = "git::https://github.com/cloudposse/terraform-aws-rds-cluster.git?ref=tags/0.35.0" + name = "${var.name_prefix}-db" + engine = "aurora-postgresql" + engine_mode = "serverless" + engine_version = "10.7" + cluster_family = "aurora-postgresql10" + cluster_size = 0 + admin_user = var.postgres_db_user + admin_password = random_password.db_password.result + db_name = var.postgres_db_name + db_port = 5432 + instance_type = "db.t2.small" + vpc_id = module.vpc.vpc_id + security_groups = [aws_security_group.all_worker_mgmt.id] + subnets = module.vpc.private_subnets + + scaling_configuration = [ + { + auto_pause = true + max_capacity = 16 + min_capacity = 2 + seconds_until_auto_pause = 300 + timeout_action = "ForceApplyCapacityChange" + } + ] +} \ No newline at end of file diff --git a/infra/terraform/aws/s3.tf b/infra/terraform/aws/s3.tf new file mode 100644 index 0000000000..a3af2fe873 --- /dev/null +++ b/infra/terraform/aws/s3.tf @@ -0,0 +1,20 @@ +resource "random_string" "s3_suffix" { + length = 8 + lower = true + upper = false + special = false +} + +resource "aws_s3_bucket" "feast_bucket" { + # Since bucket names are globally unique, we add a random suffix here. + bucket = "${var.name_prefix}-feast-${random_string.s3_suffix.result}" + acl = "private" + + server_side_encryption_configuration { + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } + } +} \ No newline at end of file diff --git a/infra/terraform/aws/variables.tf b/infra/terraform/aws/variables.tf new file mode 100644 index 0000000000..c8a75c10e8 --- /dev/null +++ b/infra/terraform/aws/variables.tf @@ -0,0 +1,38 @@ +variable "region" { +} + +variable "name_prefix" { +} + +variable "postgres_db_name" { + default = "feast" +} + +variable "postgres_db_user" { + default = "feast" +} + +variable "map_accounts" { + description = "Additional AWS account numbers to add to the aws-auth configmap." + type = list(string) + + default = [ + ] +} + +variable "map_roles" { + description = "Additional IAM roles to add to the aws-auth configmap." + type = list(object({ + rolearn = string + username = string + groups = list(string) + })) + + default = [ + + ] +} + +variable "use_persistent_emr_cluster" { + default = false +}