diff --git a/qdrant.tf b/qdrant.tf new file mode 100644 index 0000000..873fb99 --- /dev/null +++ b/qdrant.tf @@ -0,0 +1,105 @@ +locals { + qdrant_name = "qdrant" + qdrant_repository = "https://qdrant.github.io/qdrant-helm" + qdrant_version = "0.7.6" + + qdrant_namespace = try(var.qdrant_helm_config["namespace"], local.qdrant_name) + qdrant_set_values = [] + + qdrant_default_values = <<-EOT +replicaCount: 3 + +env: + - name: QDRANT__TELEMETRY_DISABLED + value: true + +service: + type: LoadBalancer + +resources: + limits: + cpu: 200m + memory: 1Gi + requests: + cpu: 200m + memory: 1Gi + +persistence: + storageClassName: gp2 + +metrics: + serviceMonitor: + enabled: true + +apiKey: false + +EOT + + qdrant_merged_values_yaml = yamlencode(merge( + yamldecode(local.qdrant_default_values), + try(yamldecode(var.qdrant_helm_config.values[0]), {}) + )) + +} + +resource "helm_release" "qdrant" { + count = var.enable_qdrant ? 1 : 0 + + name = try(var.qdrant_helm_config["name"], local.qdrant_name) + repository = try(var.qdrant_helm_config["repository"], local.qdrant_repository) + chart = try(var.qdrant_helm_config["chart"], local.qdrant_name) + version = try(var.qdrant_helm_config["version"], local.qdrant_version) + timeout = try(var.qdrant_helm_config["timeout"], 300) + values = [local.qdrant_merged_values_yaml] + create_namespace = try(var.qdrant_helm_config["create_namespace"], true) + namespace = local.qdrant_namespace + lint = try(var.qdrant_helm_config["lint"], false) + description = try(var.qdrant_helm_config["description"], "") + repository_key_file = try(var.qdrant_helm_config["repository_key_file"], "") + repository_cert_file = try(var.qdrant_helm_config["repository_cert_file"], "") + repository_username = try(var.qdrant_helm_config["repository_username"], "") + repository_password = try(var.qdrant_helm_config["repository_password"], "") + verify = try(var.qdrant_helm_config["verify"], false) + keyring = try(var.qdrant_helm_config["keyring"], "") + disable_webhooks = try(var.qdrant_helm_config["disable_webhooks"], false) + reuse_values = try(var.qdrant_helm_config["reuse_values"], false) + reset_values = try(var.qdrant_helm_config["reset_values"], false) + force_update = try(var.qdrant_helm_config["force_update"], false) + recreate_pods = try(var.qdrant_helm_config["recreate_pods"], false) + cleanup_on_fail = try(var.qdrant_helm_config["cleanup_on_fail"], false) + max_history = try(var.qdrant_helm_config["max_history"], 0) + atomic = try(var.qdrant_helm_config["atomic"], false) + skip_crds = try(var.qdrant_helm_config["skip_crds"], false) + render_subchart_notes = try(var.qdrant_helm_config["render_subchart_notes"], true) + disable_openapi_validation = try(var.qdrant_helm_config["disable_openapi_validation"], false) + wait = try(var.qdrant_helm_config["wait"], true) + wait_for_jobs = try(var.qdrant_helm_config["wait_for_jobs"], false) + dependency_update = try(var.qdrant_helm_config["dependency_update"], false) + replace = try(var.qdrant_helm_config["replace"], false) + + postrender { + binary_path = try(var.qdrant_helm_config["postrender"], "") + } + + dynamic "set" { + iterator = each_item + for_each = distinct(concat(try(var.qdrant_helm_config.set, []), local.qdrant_set_values)) + + content { + name = each_item.value.name + value = each_item.value.value + type = try(each_item.value.type, null) + } + } + + dynamic "set_sensitive" { + iterator = each_item + for_each = try(var.qdrant_helm_config["set_sensitive"], []) + + content { + name = each_item.value.name + value = each_item.value.value + type = try(each_item.value.type, null) + } + } +} diff --git a/test/main.tf b/test/main.tf index 61db548..3951047 100644 --- a/test/main.tf +++ b/test/main.tf @@ -2,6 +2,13 @@ provider "aws" { region = local.region } +# ECR always authenticates with `us-east-1` region +# Docs -> https://docs.aws.amazon.com/AmazonECR/latest/public/public-registries.html +provider "aws" { + alias = "ecr" + region = "us-east-1" +} + provider "kubernetes" { host = module.eks.cluster_endpoint cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) @@ -14,17 +21,22 @@ provider "kubernetes" { } } +data "aws_eks_cluster_auth" "this" { + name = module.eks.cluster_name +} + provider "helm" { kubernetes { host = module.eks.cluster_endpoint cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) - exec { - api_version = "client.authentication.k8s.io/v1beta1" - command = "aws" - # This requires the awscli to be installed locally where Terraform is executed - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] - } + token = data.aws_eks_cluster_auth.this.token + #exec { + # api_version = "client.authentication.k8s.io/v1beta1" + # command = "aws" + # # This requires the awscli to be installed locally where Terraform is executed + # args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] + #} } } @@ -35,6 +47,10 @@ data "aws_ecr_authorization_token" "token" { registry_id = "895885662937" } +data "aws_ecrpublic_authorization_token" "token" { + provider = aws.ecr +} + data "aws_availability_zones" "available" {} locals { @@ -58,48 +74,167 @@ module "doeks_data_addons" { enable_aws_efa_k8s_device_plugin = true enable_aws_neuron_device_plugin = true enable_emr_spark_operator = true - enable_flink_operator = true - enable_jupyterhub = true - enable_kubecost = true - enable_nvidia_gpu_operator = true - enable_kuberay_operator = true - enable_spark_history_server = true emr_spark_operator_helm_config = { repository_username = data.aws_ecr_authorization_token.token.user_name repository_password = data.aws_ecr_authorization_token.token.password } - enable_spark_operator = true + enable_flink_operator = true + flink_operator_helm_config = { + version = "1.8.0" + } + enable_jupyterhub = true + enable_kubecost = true + kubecost_helm_config = { + repository_username = data.aws_ecrpublic_authorization_token.token.user_name + repository_password = data.aws_ecrpublic_authorization_token.token.password + values = [ + <<-EOT + global: + prometheus: + fqdn: http://kube-prometheus-stack-prometheus.kube-prometheus-stack.svc:9090 + enabled: false + EOT + ] + } + + enable_nvidia_gpu_operator = true + enable_kuberay_operator = true + kuberay_operator_helm_config = { + version = "1.1.0" + } + + enable_spark_history_server = true + enable_spark_operator = true # With custom values spark_operator_helm_config = { values = [templatefile("${path.module}/helm-values/spark-operator-values.yaml", {})] } + enable_strimzi_kafka_operator = true enable_yunikorn = true + yunikorn_helm_config = { + version = "1.5.0" + } + + enable_qdrant = true + +} + +module "eks_blueprints_addons" { + source = "aws-ia/eks-blueprints-addons/aws" + version = "~> 1.0" #ensure to update this to the latest/desired version + cluster_name = module.eks.cluster_name + cluster_endpoint = module.eks.cluster_endpoint + cluster_version = module.eks.cluster_version + oidc_provider_arn = module.eks.oidc_provider_arn + + enable_aws_load_balancer_controller = true + enable_kube_prometheus_stack = true + kube_prometheus_stack = { + values = [ + <<-EOT + prometheus: + prometheusSpec: + serviceMonitorSelectorNilUsesHelmValues: false + EOT + ] + } + + enable_metrics_server = true + enable_cert_manager = true + +} + +module "ebs_csi_driver_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "~> 5.20" + role_name_prefix = format("%s-%s-", local.name, "ebs-csi-driver") + attach_ebs_csi_policy = true + oidc_providers = { + main = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"] + } + } + tags = local.tags } # checkov:skip=CKV_TF_1 #tfsec:ignore:aws-eks-enable-control-plane-logging module "eks" { - source = "terraform-aws-modules/eks/aws" - version = "~> 19.13" + source = "terraform-aws-modules/eks/aws" + version = "~> 20.8" + depends_on = [module.vpc] - cluster_name = local.name - cluster_version = "1.26" - cluster_endpoint_public_access = true + cluster_name = local.name + cluster_version = "1.29" + cluster_endpoint_public_access = true + cluster_endpoint_private_access = true vpc_id = module.vpc.vpc_id subnet_ids = module.vpc.private_subnets - manage_aws_auth_configmap = true + cluster_addons = { + coredns = { + most_recent = true + } + kube-proxy = { + most_recent = true + } + vpc-cni = { + most_recent = true + } + aws-ebs-csi-driver = { + most_recent = true + service_account_role_arn = module.ebs_csi_driver_irsa.iam_role_arn + } + } + + eks_managed_node_group_defaults = { + iam_role_additional_policies = { + # Not required, but used in the example to access the nodes to inspect mounted volumes + AmazonSSMManagedInstanceCore = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" + } + } + + enable_cluster_creator_admin_permissions = true eks_managed_node_groups = { - initial = { - instance_types = ["m5.xlarge"] + # We recommend to have a MNG to place your critical workloads and add-ons + # Then rely on Karpenter to scale your workloads + # You can also make uses on nodeSelector and Taints/tolerations to spread workloads on MNG or Karpenter provisioners + core_node_group = { + name = "core-node-group" + description = "EKS managed node group example launch template" + subnet_ids = module.vpc.private_subnets + + min_size = 1 + max_size = 9 + desired_size = 7 + + force_update_version = true + instance_types = ["m6g.xlarge"] + ami_type = "AL2_ARM_64" + ebs_optimized = true + block_device_mappings = { + xvda = { + device_name = "/dev/xvda" + ebs = { + volume_size = 100 + volume_type = "gp3" + } + } + } + + labels = { + WorkerType = "ON_DEMAND" + NodeGroupType = "core" + } - min_size = 4 - max_size = 10 - desired_size = 4 + tags = { + Name = "core-node-grp" + } } } diff --git a/test/versions.tf b/test/versions.tf index e2fe32c..a00f4ab 100644 --- a/test/versions.tf +++ b/test/versions.tf @@ -1,5 +1,5 @@ terraform { - required_version = ">= 1.0" + required_version = ">= 1.2" required_providers { aws = { diff --git a/variables.tf b/variables.tf index 8662107..c41795e 100644 --- a/variables.tf +++ b/variables.tf @@ -365,12 +365,27 @@ variable "superset_helm_config" { default = {} } +#--------------------------------------------------- +# Qdrant +#--------------------------------------------------- +variable "enable_qdrant" { + description = "Enable Qdrant Vector Database add-on" + type = bool + default = false +} + variable "enable_nvidia_triton_server" { description = "Enable NVIDIA Triton Server add-on" type = bool default = false } +variable "qdrant_helm_config" { + description = "Helm configuration for Qdrant" + type = any + default = {} +} + variable "nvidia_triton_server_helm_config" { description = "Helm configuration for NVIDIA Triton Server" type = any