OPEA ChatQnA Example on Xeon (#34)

wsfowler · web-flow · commit 855794086801 · 2024-06-28T09:49:44.000-05:00
* update readme

* fix typo
diff --git a/examples/gen-ai-xeon-opea-chatqna/README.md b/examples/gen-ai-xeon-opea-chatqna/README.md
@@ -0,0 +1,133 @@
+<p align="center">
+  <img src="https://github.com/intel/terraform-intel-aws-vm/blob/main/images/logo-classicblue-800px.png?raw=true" alt="Intel Logo" width="250"/>
+</p>
+
+# Intel® Optimized Cloud Modules for Terraform
+
+© Copyright 2024, Intel Corporation
+
+## AWS M7i EC2 Instance with 4th Generation Intel® Xeon® Scalable Processor (Sapphire Rapids) & Open Platform for Enterprise AI (OPEA) ChatQnA Example
+
+This demo will showcase Retrieval Augmented Generation (RAG) CPU inference using 4th Gen Xeon Scalable Processors on AWS using the OPEA ChatQnA Example. For more information about OPEA, go [here](https://opea.dev/). For more information on this specific example, go [here](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA).
+
+## Usage
+
+### variables.tf
+
+Modify the region to target a specific AWS Region
+
+```hcl
+variable "region" {
+  description = "Target AWS region to deploy EC2 in."
+  type        = string
+  default     = "us-east-1"
+}
+```
+
+Modify the Huggingface Token variable to your specific Huggingface Token, for information on creating a Huggingface token go [here](https://huggingface.co/docs/hub/en/security-tokens)
+
+```hcl
+variable "huggingface_token" {
+  description = "Huggingface Token"
+  default     = " <YOUR HUGGINGFACE TOKEN> "
+  type        = string
+}
+```
+
+### main.tf
+
+Modify settings in this file to choose your AMI as well as instance size and other details around the instance that will be created
+
+```hcl
+## Get latest Ubuntu 22.04 AMI in AWS for x86
+data "aws_ami" "ubuntu-linux-2204" {
+  most_recent = true
+  owners      = ["099720109477"] # Canonical
+  filter {
+    name   = "name"
+    values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"]
+  }
+  filter {
+    name   = "virtualization-type"
+    values = ["hvm"]
+  }
+}
+
+module "ec2-vm" {
+  source            = "intel/aws-vm/intel"
+  key_name          = aws_key_pair.TF_key.key_name
+  instance_type     = "m7i.8xlarge"
+  availability_zone = "us-east-1a"
+  ami               = data.aws_ami.ubuntu-linux-2204.id
+  user_data         = data.cloudinit_config.ansible.rendered
+
+  root_block_device = [{
+    volume_size = "100"
+  }]
+
+  tags = {
+    Name     = "my-test-vm-${random_id.rid.dec}"
+    Owner    = "OwnerName-${random_id.rid.dec}",
+    Duration = "2"
+  }
+}
+```
+
+Run the Terraform Commands below to deploy the demos.
+
+```Shell
+terraform init
+terraform plan
+terraform apply
+```
+
+## Running the Demo using AWS CloudShell
+
+Open your AWS account and click the Cloudshell prompt
+At the command prompt enter in in these command prompts to install Terraform into the AWS Cloudshell
+
+```Shell
+git clone https://github.com/tfutils/tfenv.git ~/.tfenv
+mkdir ~/bin
+ln -s ~/.tfenv/bin/* ~/bin/
+tfenv install 1.3.0
+tfenv use 1.3.0
+```
+
+Download and run the [OPEA ChatQnA on Xeon](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) Terraform Module by typing this command
+
+```Shell
+git clone https://github.com/intel/terraform-intel-aws-vm.git
+```
+
+Change into the `examples/gen-ai-xeon-opea-chatqna` example folder
+
+```Shell
+cd terraform-intel-aws-vm/examples/gen-ai-xeon-opea-chatqna
+```
+
+Run the Terraform Commands below to deploy the demos.
+
+```Shell
+terraform init
+terraform plan
+terraform apply
+```
+
+After the Terraform module successfully creates the EC2 instance, **wait ~15 minutes** for the recipe to build and launch the containers before continuing.
+
+## Accessing the Demo
+
+You can access the demos using the following:
+
+- OPEA ChatQnA: `http://yourpublicip:5174`
+
+- Note: This module is created using the m7i.16xlarge instance size, you can change your instance type by modifying the **instance_type = "m7i.16xlarge"** in the main.tf under the **ec2-vm module** section of the code. If you just change to an 8xlarge and then run **terraform apply** the module will destroy the old instance and rebuild with a larger instance size.
+
+## Deleting the Demo
+
+To delete the demo, run `terraform destroy` to delete all resources created.
+
+## Considerations
+
+- The AWS region where this example is run should have a default VPC
diff --git a/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml b/examples/gen-ai-xeon-opea-chatqna/cloud_init.yml
@@ -0,0 +1,16 @@
+#cloud-config
+package_update: true
+package_upgrade: true
+
+package:
+  - git
+
+runcmd:
+  - apt install ansible -y
+  - git clone https://github.com/intel/optimized-cloud-recipes.git /tmp/optimized-cloud-recipes
+  - cd /tmp/optimized-cloud-recipes/recipes/ai-opea-chatqna-xeon
+  - cp opea.sh /etc/profile.d/opea.sh
+  - echo 'export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}' | sudo tee -a /etc/profile.d/opea.sh
+  - chmod +x /etc/profile.d/opea.sh
+  - source /etc/profile.d/opea.sh
+  - ansible-playbook recipe.yml
diff --git a/examples/gen-ai-xeon-opea-chatqna/environment.txt b/examples/gen-ai-xeon-opea-chatqna/environment.txt
@@ -0,0 +1,17 @@
+  - export http_proxy=
+  - export https_proxy=
+  - export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+  - export RERANK_MODEL_ID="BAAI/bge-reranker-large"
+  - export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+  - export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
+  - export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+  - export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
+  - export REDIS_URL="redis://${host_ip}:6379"
+  - export INDEX_NAME="rag-redis"
+  - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+  - export MEGA_SERVICE_HOST_IP=${host_ip}
+  - export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+  - export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+  - export RERANK_SERVICE_HOST_IP=${host_ip}
+  - export LLM_SERVICE_HOST_IP=${host_ip}
+  - export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
diff --git a/examples/gen-ai-xeon-opea-chatqna/main.tf b/examples/gen-ai-xeon-opea-chatqna/main.tf
@@ -0,0 +1,99 @@
+# Provision EC2 Instance on Icelake on Amazon Linux OS in default vpc. It is configured to create the EC2 in
+# US-East-1 region. The region is provided in variables.tf in this example folder.
+
+# This example also create an EC2 key pair. Associate the public key with the EC2 instance. Create the private key
+# in the local system where terraform apply is done. Create a new scurity group to open up the SSH port 
+# 22 to a specific IP CIDR block
+
+######### PLEASE NOTE TO CHANGE THE IP CIDR BLOCK TO ALLOW SSH FROM YOUR OWN ALLOWED IP ADDRESS FOR SSH #########
+
+data "cloudinit_config" "ansible" {
+  gzip          = true
+  base64_encode = true
+
+  part {
+    filename     = "cloud_init"
+    content_type = "text/cloud-config"
+    content = templatefile(
+      "cloud_init.yml", 
+      {
+        HUGGINGFACEHUB_API_TOKEN=var.huggingface_token
+      }
+    )
+  }
+}
+
+data "aws_ami" "ubuntu-linux-2204" {
+  most_recent = true
+  owners      = ["099720109477"] # Canonical
+  filter {
+    name   = "name"
+    values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"]
+  }
+  filter {
+    name   = "virtualization-type"
+    values = ["hvm"]
+  }
+}
+
+resource "random_id" "rid" {
+  byte_length = 5
+}
+
+# RSA key of size 4096 bits
+resource "tls_private_key" "rsa" {
+  algorithm = "RSA"
+  rsa_bits  = 4096
+}
+
+resource "aws_key_pair" "TF_key" {
+  key_name   = "TF_key-${random_id.rid.dec}"
+  public_key = tls_private_key.rsa.public_key_openssh
+}
+
+resource "local_file" "TF_private_key" {
+  content  = tls_private_key.rsa.private_key_pem
+  filename = "tfkey.private"
+}
+resource "aws_security_group" "ssh_security_group" {
+  description = "security group to configure ports for ssh"
+  name_prefix = "ssh_security_group"
+}
+
+# Modify the `ingress_rules` variable in the variables.tf file to allow the required ports for your CIDR ranges
+resource "aws_security_group_rule" "ingress_rules" {
+  count             = length(var.ingress_rules)
+  type              = "ingress"
+  security_group_id = aws_security_group.ssh_security_group.id
+  from_port         = var.ingress_rules[count.index].from_port
+  to_port           = var.ingress_rules[count.index].to_port
+  protocol          = var.ingress_rules[count.index].protocol
+  cidr_blocks       = [var.ingress_rules[count.index].cidr_blocks]
+}
+
+resource "aws_network_interface_sg_attachment" "sg_attachment" {
+  count = length(module.ec2-vm)
+  security_group_id    = aws_security_group.ssh_security_group.id
+  network_interface_id = module.ec2-vm[count.index].primary_network_interface_id
+}
+
+# Modify the `vm_count` variable in the variables.tf file to create the required number of EC2 instances
+module "ec2-vm" {
+  count = var.vm_count
+  source            = "intel/aws-vm/intel"
+  key_name          = aws_key_pair.TF_key.key_name
+  instance_type     = "m7i.16xlarge"
+  availability_zone = "us-east-1d"
+  ami               = data.aws_ami.ubuntu-linux-2204.id
+  user_data         = data.cloudinit_config.ansible.rendered
+ 
+  root_block_device = [{
+    volume_size = "1000"
+  }]
+
+  tags = {
+    Name     = "my-test-vm-${count.index}-${random_id.rid.dec}"
+    Owner    = "owner-${random_id.rid.dec}",
+    Duration = "2"
+  }
+}
diff --git a/examples/gen-ai-xeon-opea-chatqna/outputs.tf b/examples/gen-ai-xeon-opea-chatqna/outputs.tf
@@ -0,0 +1,113 @@
+output "id" {
+  description = "The ID of the instance"
+  value       = try(module.ec2-vm.*.id, module.ec2-vm.*.id, "")
+}
+
+output "arn" {
+  description = "The ARN of the instance"
+  value       = try(module.ec2-vm.*.arn, "")
+}
+
+output "capacity_reservation_specification" {
+  description = "Capacity reservation specification of the instance"
+  value       = try(module.ec2-vm.*.capacity_reservation_specification, "")
+}
+
+output "instance_state" {
+  description = "The state of the instance. One of: `pending`, `running`, `shutting-down`, `terminated`, `stopping`, `stopped`"
+  value       = try(module.ec2-vm.*.instance_state, "")
+}
+
+output "outpost_arn" {
+  description = "The ARN of the Outpost the instance is assigned to"
+  value       = try(module.ec2-vm.*.outpost_arn, "")
+}
+
+output "password_data" {
+  description = "Base-64 encoded encrypted password data for the instance. Useful for getting the administrator password for instances running Microsoft Windows. This attribute is only exported if `get_password_data` is true"
+  value       = try(module.ec2-vm.*.password_data, "")
+}
+
+output "primary_network_interface_id" {
+  description = "The ID of the instance's primary network interface"
+  value       = try(module.ec2-vm.*.primary_network_interface_id, "")
+}
+
+output "private_dns" {
+  description = "The private DNS name assigned to the instance. Can only be used inside the Amazon EC2, and only available if you've enabled DNS hostnames for your VPC"
+  value       = try(module.ec2-vm.*.private_dns, "")
+}
+
+output "public_dns" {
+  description = "The public DNS name assigned to the instance. For EC2-VPC, this is only available if you've enabled DNS hostnames for your VPC"
+  value       = try(module.ec2-vm.*.public_dns, "")
+}
+
+output "public_ip" {
+  description = "The public IP address assigned to the instance, if applicable. NOTE: If you are using an aws_eip with your instance, you should refer to the EIP's address directly and not use `public_ip` as this field will change after the EIP is attached"
+  value       = try(module.ec2-vm.*.public_ip, "")
+}
+
+output "private_ip" {
+  description = "The private IP address assigned to the instance."
+  value       = try(module.ec2-vm.*.private_ip, "")
+}
+
+output "ipv6_addresses" {
+  description = "The IPv6 address assigned to the instance, if applicable."
+  value       = try(module.ec2-vm.*.ipv6_addresses, [])
+}
+
+output "tags_all" {
+  description = "A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block"
+  value       = try(module.ec2-vm.*.tags_all, {})
+}
+
+output "spot_bid_status" {
+  description = "The current bid status of the Spot Instance Request"
+  value       = try(module.ec2-vm.*.spot_bid_status, "")
+}
+
+output "spot_request_state" {
+  description = "The current request state of the Spot Instance Request"
+  value       = try(module.ec2-vm.*.spot_request_state, "")
+}
+
+output "spot_instance_id" {
+  description = "The Instance ID (if any) that is currently fulfilling the Spot Instance request"
+  value       = try(module.ec2-vm.*.spot_instance_id, "")
+}
+
+################################################################################
+# IAM Role / Instance Profile
+################################################################################
+
+output "iam_role_name" {
+  description = "The name of the IAM role"
+  value       = try(module.ec2-vm.*.aws_iam_role.name, null)
+}
+
+output "iam_role_arn" {
+  description = "The Amazon Resource Name (ARN) specifying the IAM role"
+  value       = try(module.ec2-vm.*.aws_iam_role.arn, null)
+}
+
+output "iam_role_unique_id" {
+  description = "Stable and unique string identifying the IAM role"
+  value       = try(module.ec2-vm.*.aws_iam_role.unique_id, null)
+}
+
+output "iam_instance_profile_arn" {
+  description = "ARN assigned by AWS to the instance profile"
+  value       = try(module.ec2-vm.*.aws_iam_instance_profile.arn, null)
+}
+
+output "iam_instance_profile_id" {
+  description = "Instance profile's ID"
+  value       = try(module.ec2-vm.*.aws_iam_instance_profile.id, null)
+}
+
+output "iam_instance_profile_unique" {
+  description = "Stable and unique string identifying the IAM instance profile"
+  value       = try(module.ec2-vm.*.aws_iam_instance_profile.unique_id, null)
+}
diff --git a/examples/gen-ai-xeon-opea-chatqna/providers.tf b/examples/gen-ai-xeon-opea-chatqna/providers.tf
@@ -0,0 +1,4 @@
+provider "aws" {
+  # Environment Variables used for Authentication
+  region = var.region
+}
diff --git a/examples/gen-ai-xeon-opea-chatqna/variables.tf b/examples/gen-ai-xeon-opea-chatqna/variables.tf
diff --git a/examples/gen-ai-xeon-opea-chatqna/versions.tf b/examples/gen-ai-xeon-opea-chatqna/versions.tf