Skip to content

Commit 8557940

Browse files
authored
OPEA ChatQnA Example on Xeon (#34)
* update readme * fix typo
1 parent 38e0633 commit 8557940

File tree

8 files changed

+512
-0
lines changed

8 files changed

+512
-0
lines changed
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
<p align="center">
2+
<img src="https://github.com/intel/terraform-intel-aws-vm/blob/main/images/logo-classicblue-800px.png?raw=true" alt="Intel Logo" width="250"/>
3+
</p>
4+
5+
# Intel® Optimized Cloud Modules for Terraform
6+
7+
© Copyright 2024, Intel Corporation
8+
9+
## AWS M7i EC2 Instance with 4th Generation Intel® Xeon® Scalable Processor (Sapphire Rapids) & Open Platform for Enterprise AI (OPEA) ChatQnA Example
10+
11+
This demo will showcase Retrieval Augmented Generation (RAG) CPU inference using 4th Gen Xeon Scalable Processors on AWS using the OPEA ChatQnA Example. For more information about OPEA, go [here](https://opea.dev/). For more information on this specific example, go [here](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA).
12+
13+
## Usage
14+
15+
### variables.tf
16+
17+
Modify the region to target a specific AWS Region
18+
19+
```hcl
20+
variable "region" {
21+
description = "Target AWS region to deploy EC2 in."
22+
type = string
23+
default = "us-east-1"
24+
}
25+
```
26+
27+
Modify the Huggingface Token variable to your specific Huggingface Token, for information on creating a Huggingface token go [here](https://huggingface.co/docs/hub/en/security-tokens)
28+
29+
```hcl
30+
variable "huggingface_token" {
31+
description = "Huggingface Token"
32+
default = " <YOUR HUGGINGFACE TOKEN> "
33+
type = string
34+
}
35+
```
36+
37+
### main.tf
38+
39+
Modify settings in this file to choose your AMI as well as instance size and other details around the instance that will be created
40+
41+
```hcl
42+
## Get latest Ubuntu 22.04 AMI in AWS for x86
43+
data "aws_ami" "ubuntu-linux-2204" {
44+
most_recent = true
45+
owners = ["099720109477"] # Canonical
46+
filter {
47+
name = "name"
48+
values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"]
49+
}
50+
filter {
51+
name = "virtualization-type"
52+
values = ["hvm"]
53+
}
54+
}
55+
56+
module "ec2-vm" {
57+
source = "intel/aws-vm/intel"
58+
key_name = aws_key_pair.TF_key.key_name
59+
instance_type = "m7i.8xlarge"
60+
availability_zone = "us-east-1a"
61+
ami = data.aws_ami.ubuntu-linux-2204.id
62+
user_data = data.cloudinit_config.ansible.rendered
63+
64+
root_block_device = [{
65+
volume_size = "100"
66+
}]
67+
68+
tags = {
69+
Name = "my-test-vm-${random_id.rid.dec}"
70+
Owner = "OwnerName-${random_id.rid.dec}",
71+
Duration = "2"
72+
}
73+
}
74+
```
75+
76+
Run the Terraform Commands below to deploy the demos.
77+
78+
```Shell
79+
terraform init
80+
terraform plan
81+
terraform apply
82+
```
83+
84+
## Running the Demo using AWS CloudShell
85+
86+
Open your AWS account and click the Cloudshell prompt
87+
At the command prompt enter in in these command prompts to install Terraform into the AWS Cloudshell
88+
89+
```Shell
90+
git clone https://github.com/tfutils/tfenv.git ~/.tfenv
91+
mkdir ~/bin
92+
ln -s ~/.tfenv/bin/* ~/bin/
93+
tfenv install 1.3.0
94+
tfenv use 1.3.0
95+
```
96+
97+
Download and run the [OPEA ChatQnA on Xeon](https://github.com/intel/terraform-intel-aws-vm/tree/main/examples/gen-ai-xeon-opea-chatqna) Terraform Module by typing this command
98+
99+
```Shell
100+
git clone https://github.com/intel/terraform-intel-aws-vm.git
101+
```
102+
103+
Change into the `examples/gen-ai-xeon-opea-chatqna` example folder
104+
105+
```Shell
106+
cd terraform-intel-aws-vm/examples/gen-ai-xeon-opea-chatqna
107+
```
108+
109+
Run the Terraform Commands below to deploy the demos.
110+
111+
```Shell
112+
terraform init
113+
terraform plan
114+
terraform apply
115+
```
116+
117+
After the Terraform module successfully creates the EC2 instance, **wait ~15 minutes** for the recipe to build and launch the containers before continuing.
118+
119+
## Accessing the Demo
120+
121+
You can access the demos using the following:
122+
123+
- OPEA ChatQnA: `http://yourpublicip:5174`
124+
125+
- Note: This module is created using the m7i.16xlarge instance size, you can change your instance type by modifying the **instance_type = "m7i.16xlarge"** in the main.tf under the **ec2-vm module** section of the code. If you just change to an 8xlarge and then run **terraform apply** the module will destroy the old instance and rebuild with a larger instance size.
126+
127+
## Deleting the Demo
128+
129+
To delete the demo, run `terraform destroy` to delete all resources created.
130+
131+
## Considerations
132+
133+
- The AWS region where this example is run should have a default VPC
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#cloud-config
2+
package_update: true
3+
package_upgrade: true
4+
5+
package:
6+
- git
7+
8+
runcmd:
9+
- apt install ansible -y
10+
- git clone https://github.com/intel/optimized-cloud-recipes.git /tmp/optimized-cloud-recipes
11+
- cd /tmp/optimized-cloud-recipes/recipes/ai-opea-chatqna-xeon
12+
- cp opea.sh /etc/profile.d/opea.sh
13+
- echo 'export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}' | sudo tee -a /etc/profile.d/opea.sh
14+
- chmod +x /etc/profile.d/opea.sh
15+
- source /etc/profile.d/opea.sh
16+
- ansible-playbook recipe.yml
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
- export http_proxy=
2+
- export https_proxy=
3+
- export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
4+
- export RERANK_MODEL_ID="BAAI/bge-reranker-large"
5+
- export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
6+
- export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
7+
- export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
8+
- export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
9+
- export REDIS_URL="redis://${host_ip}:6379"
10+
- export INDEX_NAME="rag-redis"
11+
- export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
12+
- export MEGA_SERVICE_HOST_IP=${host_ip}
13+
- export EMBEDDING_SERVICE_HOST_IP=${host_ip}
14+
- export RETRIEVER_SERVICE_HOST_IP=${host_ip}
15+
- export RERANK_SERVICE_HOST_IP=${host_ip}
16+
- export LLM_SERVICE_HOST_IP=${host_ip}
17+
- export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# Provision EC2 Instance on Icelake on Amazon Linux OS in default vpc. It is configured to create the EC2 in
2+
# US-East-1 region. The region is provided in variables.tf in this example folder.
3+
4+
# This example also create an EC2 key pair. Associate the public key with the EC2 instance. Create the private key
5+
# in the local system where terraform apply is done. Create a new scurity group to open up the SSH port
6+
# 22 to a specific IP CIDR block
7+
8+
######### PLEASE NOTE TO CHANGE THE IP CIDR BLOCK TO ALLOW SSH FROM YOUR OWN ALLOWED IP ADDRESS FOR SSH #########
9+
10+
data "cloudinit_config" "ansible" {
11+
gzip = true
12+
base64_encode = true
13+
14+
part {
15+
filename = "cloud_init"
16+
content_type = "text/cloud-config"
17+
content = templatefile(
18+
"cloud_init.yml",
19+
{
20+
HUGGINGFACEHUB_API_TOKEN=var.huggingface_token
21+
}
22+
)
23+
}
24+
}
25+
26+
data "aws_ami" "ubuntu-linux-2204" {
27+
most_recent = true
28+
owners = ["099720109477"] # Canonical
29+
filter {
30+
name = "name"
31+
values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"]
32+
}
33+
filter {
34+
name = "virtualization-type"
35+
values = ["hvm"]
36+
}
37+
}
38+
39+
resource "random_id" "rid" {
40+
byte_length = 5
41+
}
42+
43+
# RSA key of size 4096 bits
44+
resource "tls_private_key" "rsa" {
45+
algorithm = "RSA"
46+
rsa_bits = 4096
47+
}
48+
49+
resource "aws_key_pair" "TF_key" {
50+
key_name = "TF_key-${random_id.rid.dec}"
51+
public_key = tls_private_key.rsa.public_key_openssh
52+
}
53+
54+
resource "local_file" "TF_private_key" {
55+
content = tls_private_key.rsa.private_key_pem
56+
filename = "tfkey.private"
57+
}
58+
resource "aws_security_group" "ssh_security_group" {
59+
description = "security group to configure ports for ssh"
60+
name_prefix = "ssh_security_group"
61+
}
62+
63+
# Modify the `ingress_rules` variable in the variables.tf file to allow the required ports for your CIDR ranges
64+
resource "aws_security_group_rule" "ingress_rules" {
65+
count = length(var.ingress_rules)
66+
type = "ingress"
67+
security_group_id = aws_security_group.ssh_security_group.id
68+
from_port = var.ingress_rules[count.index].from_port
69+
to_port = var.ingress_rules[count.index].to_port
70+
protocol = var.ingress_rules[count.index].protocol
71+
cidr_blocks = [var.ingress_rules[count.index].cidr_blocks]
72+
}
73+
74+
resource "aws_network_interface_sg_attachment" "sg_attachment" {
75+
count = length(module.ec2-vm)
76+
security_group_id = aws_security_group.ssh_security_group.id
77+
network_interface_id = module.ec2-vm[count.index].primary_network_interface_id
78+
}
79+
80+
# Modify the `vm_count` variable in the variables.tf file to create the required number of EC2 instances
81+
module "ec2-vm" {
82+
count = var.vm_count
83+
source = "intel/aws-vm/intel"
84+
key_name = aws_key_pair.TF_key.key_name
85+
instance_type = "m7i.16xlarge"
86+
availability_zone = "us-east-1d"
87+
ami = data.aws_ami.ubuntu-linux-2204.id
88+
user_data = data.cloudinit_config.ansible.rendered
89+
90+
root_block_device = [{
91+
volume_size = "1000"
92+
}]
93+
94+
tags = {
95+
Name = "my-test-vm-${count.index}-${random_id.rid.dec}"
96+
Owner = "owner-${random_id.rid.dec}",
97+
Duration = "2"
98+
}
99+
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
output "id" {
2+
description = "The ID of the instance"
3+
value = try(module.ec2-vm.*.id, module.ec2-vm.*.id, "")
4+
}
5+
6+
output "arn" {
7+
description = "The ARN of the instance"
8+
value = try(module.ec2-vm.*.arn, "")
9+
}
10+
11+
output "capacity_reservation_specification" {
12+
description = "Capacity reservation specification of the instance"
13+
value = try(module.ec2-vm.*.capacity_reservation_specification, "")
14+
}
15+
16+
output "instance_state" {
17+
description = "The state of the instance. One of: `pending`, `running`, `shutting-down`, `terminated`, `stopping`, `stopped`"
18+
value = try(module.ec2-vm.*.instance_state, "")
19+
}
20+
21+
output "outpost_arn" {
22+
description = "The ARN of the Outpost the instance is assigned to"
23+
value = try(module.ec2-vm.*.outpost_arn, "")
24+
}
25+
26+
output "password_data" {
27+
description = "Base-64 encoded encrypted password data for the instance. Useful for getting the administrator password for instances running Microsoft Windows. This attribute is only exported if `get_password_data` is true"
28+
value = try(module.ec2-vm.*.password_data, "")
29+
}
30+
31+
output "primary_network_interface_id" {
32+
description = "The ID of the instance's primary network interface"
33+
value = try(module.ec2-vm.*.primary_network_interface_id, "")
34+
}
35+
36+
output "private_dns" {
37+
description = "The private DNS name assigned to the instance. Can only be used inside the Amazon EC2, and only available if you've enabled DNS hostnames for your VPC"
38+
value = try(module.ec2-vm.*.private_dns, "")
39+
}
40+
41+
output "public_dns" {
42+
description = "The public DNS name assigned to the instance. For EC2-VPC, this is only available if you've enabled DNS hostnames for your VPC"
43+
value = try(module.ec2-vm.*.public_dns, "")
44+
}
45+
46+
output "public_ip" {
47+
description = "The public IP address assigned to the instance, if applicable. NOTE: If you are using an aws_eip with your instance, you should refer to the EIP's address directly and not use `public_ip` as this field will change after the EIP is attached"
48+
value = try(module.ec2-vm.*.public_ip, "")
49+
}
50+
51+
output "private_ip" {
52+
description = "The private IP address assigned to the instance."
53+
value = try(module.ec2-vm.*.private_ip, "")
54+
}
55+
56+
output "ipv6_addresses" {
57+
description = "The IPv6 address assigned to the instance, if applicable."
58+
value = try(module.ec2-vm.*.ipv6_addresses, [])
59+
}
60+
61+
output "tags_all" {
62+
description = "A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block"
63+
value = try(module.ec2-vm.*.tags_all, {})
64+
}
65+
66+
output "spot_bid_status" {
67+
description = "The current bid status of the Spot Instance Request"
68+
value = try(module.ec2-vm.*.spot_bid_status, "")
69+
}
70+
71+
output "spot_request_state" {
72+
description = "The current request state of the Spot Instance Request"
73+
value = try(module.ec2-vm.*.spot_request_state, "")
74+
}
75+
76+
output "spot_instance_id" {
77+
description = "The Instance ID (if any) that is currently fulfilling the Spot Instance request"
78+
value = try(module.ec2-vm.*.spot_instance_id, "")
79+
}
80+
81+
################################################################################
82+
# IAM Role / Instance Profile
83+
################################################################################
84+
85+
output "iam_role_name" {
86+
description = "The name of the IAM role"
87+
value = try(module.ec2-vm.*.aws_iam_role.name, null)
88+
}
89+
90+
output "iam_role_arn" {
91+
description = "The Amazon Resource Name (ARN) specifying the IAM role"
92+
value = try(module.ec2-vm.*.aws_iam_role.arn, null)
93+
}
94+
95+
output "iam_role_unique_id" {
96+
description = "Stable and unique string identifying the IAM role"
97+
value = try(module.ec2-vm.*.aws_iam_role.unique_id, null)
98+
}
99+
100+
output "iam_instance_profile_arn" {
101+
description = "ARN assigned by AWS to the instance profile"
102+
value = try(module.ec2-vm.*.aws_iam_instance_profile.arn, null)
103+
}
104+
105+
output "iam_instance_profile_id" {
106+
description = "Instance profile's ID"
107+
value = try(module.ec2-vm.*.aws_iam_instance_profile.id, null)
108+
}
109+
110+
output "iam_instance_profile_unique" {
111+
description = "Stable and unique string identifying the IAM instance profile"
112+
value = try(module.ec2-vm.*.aws_iam_instance_profile.unique_id, null)
113+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
provider "aws" {
2+
# Environment Variables used for Authentication
3+
region = var.region
4+
}

0 commit comments

Comments
 (0)