Skip to content

Commit

Permalink
Merge pull request #26 from Odraxs/v0.2
Browse files Browse the repository at this point in the history
Release v0.2
  • Loading branch information
Odraxs authored Apr 24, 2024
2 parents 43c3593 + b1c601b commit a30037e
Show file tree
Hide file tree
Showing 13 changed files with 264 additions and 29 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
## GO Z V MAIL

This project consist in 4 folders:
This project consist in 5 folders:

- `data-embedding`: the folder that automated the process to create and embed the index `emails` to the zincsearch database.
- `web`: the folder that contains the web application.
- `server`: the folder that contains the go server that will handle the requests to the database `zincsearch` and retrieves the results(limited to 200).
- `docker`: the folder that contains the docker-compose file that can be used to run the entire project.
- `terraform`: the folder that contains the terraform code that allows top deploy the project to an `aws` EC2 instance.

## Requirements:

Expand All @@ -14,6 +15,7 @@ This project consist in 4 folders:
- Docker-compose
- Node >= 20.10.0(recommended)
- Graphviz(if you want to generate the profiling graphs)
- terraform(Optional, more info [here](/terraform/README.md))

## I just want to see the project running!

Expand Down
83 changes: 55 additions & 28 deletions data-embedding/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"net/http"
"os"
"path/filepath"
"runtime/pprof"
"strings"
"sync"
"time"
Expand All @@ -32,7 +33,18 @@ var (

func main() {
log.Println("Starting indexer!")
utils.CpuProfiling()

//utils.CpuProfiling() doesn't generates a proper cpu profiling
f, err := os.Create("./profs/cpu.prof")
if err != nil {
log.Fatal("could not create CPU profile: ", err)
}
defer f.Close()
if err := pprof.StartCPUProfile(f); err != nil {
log.Fatal("could not start CPU profile: ", err)
}
defer pprof.StopCPUProfile()

indexerData, err := createIndexerFromJsonFile(jsonIndexerPath)
if err != nil {
log.Fatal(err)
Expand All @@ -56,6 +68,10 @@ func main() {
var records []utils.EmailData
var locker sync.Mutex
var wg sync.WaitGroup
// This channel was created with the intention to limit the cpu usage from the goroutines but after running it
// it seems like it doesn't work as expected, anyways I'm gonna let it there since it could be helpful to someone trying to optimize this code.
routines := make(chan int, 1000)
jobCounter := 0

// Process all the folders contained in the path `dataToIndexRootPath` to obtain all the emails records
err = filepath.Walk(dataToIndexRootPath, func(path string, info os.FileInfo, err error) error {
Expand All @@ -64,27 +80,35 @@ func main() {
}
if !info.IsDir() {
wg.Add(1)
go func(p string) {
jobCounter++
go func(p string, routines <-chan int) {
defer wg.Done()
emailData, err := processFile(p)
if err != nil {
log.Println(err)
return
for range routines {
emailData, err := processFile(p)
if err != nil {
log.Println(err)
return
}
locker.Lock()
records = append(records, emailData)
locker.Unlock()
}
locker.Lock()
records = append(records, emailData)
locker.Unlock()
}(path)
}(path, routines)
routines <- jobCounter
}
return nil
})
close(routines)
if err != nil {
log.Fatal(err)
}

wg.Wait()

sendBulkToZincSearch(records)
err = sendBulkToZincSearch(records)
if err != nil {
log.Println("something happened while storing the records: ", err)
}
utils.MemoryProfiling()
duration := time.Since(startTime)
log.Printf("Finished indexing. Time taken: %.2f seconds", duration.Seconds())
Expand Down Expand Up @@ -128,22 +152,20 @@ func processFile(path string) (utils.EmailData, error) {
}, nil
}

func sendBulkToZincSearch(records []utils.EmailData) {
func sendBulkToZincSearch(records []utils.EmailData) error {
bulkData := utils.BulkData{
Index: "emails",
Index: indexName,
Records: records,
}

jsonData, err := json.Marshal(bulkData)
if err != nil {
log.Println(err)
return
return fmt.Errorf("failed to decode data into json: %w", err)
}

req, err := http.NewRequest("POST", zincsearchBaseUrl+"/_bulkv2", bytes.NewReader(jsonData))
req, err := http.NewRequest(http.MethodPost, zincsearchBaseUrl+"/_bulkv2", bytes.NewReader(jsonData))
if err != nil {
log.Println(err)
return
return fmt.Errorf("failed to create the http request: %w", err)
}

req.SetBasicAuth(zincUser, zincPassword)
Expand All @@ -152,16 +174,21 @@ func sendBulkToZincSearch(records []utils.EmailData) {

resp, err := http.DefaultClient.Do(req)
if err != nil {
log.Println(err)
return
return fmt.Errorf("something happened while doing the request to zincsearch: %w", err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unexpected status response: %d", resp.StatusCode)
}

_, err = io.ReadAll(resp.Body)
if err != nil {
log.Println(err)
return
return err
}

return nil
}

func createIndexerFromJsonFile(filepath string) (utils.IndexerData, error) {
Expand All @@ -185,33 +212,33 @@ func createIndexerFromJsonFile(filepath string) (utils.IndexerData, error) {
func createIndexOnZincSearch(indexerData utils.IndexerData) error {
jsonData, err := json.Marshal(indexerData)
if err != nil {
log.Fatal(err)
return fmt.Errorf("failed to encode the index data: %w", err)
}

req, err := http.NewRequest("POST", zincsearchBaseUrl+"/index", bytes.NewBuffer(jsonData))
req, err := http.NewRequest(http.MethodPost, zincsearchBaseUrl+"/index", bytes.NewBuffer(jsonData))
if err != nil {
log.Fatal(err)
return fmt.Errorf("failed to create the index request: %w", err)
}

req.Header.Set("Content-Type", "application/json")
req.SetBasicAuth("admin", "password")
req.SetBasicAuth(zincUser, zincPassword)

client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
log.Fatal(err)
return fmt.Errorf("zincsearch request failed: %w", err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
log.Fatalf("failed to create indexer, status code: %d", resp.StatusCode)
return fmt.Errorf("failed to create indexer, status code: %d", resp.StatusCode)
}

return nil
}

func deleteIndexOnZincSearch(indexName string) error {
req, err := http.NewRequest("DELETE", zincsearchBaseUrl+"/index/"+indexName, nil)
req, err := http.NewRequest(http.MethodDelete, zincsearchBaseUrl+"/index/"+indexName, nil)
if err != nil {
return err
}
Expand Down
Binary file modified data-embedding/profs/cpu.prof
Binary file not shown.
Binary file modified data-embedding/profs/mem.prof
Binary file not shown.
Binary file added data-embedding/profs/profile004.pdf
Binary file not shown.
Binary file added data-embedding/profs/profile005.pdf
Binary file not shown.
Binary file added data-embedding/profs/profile006.pdf
Binary file not shown.
3 changes: 3 additions & 0 deletions terraform/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
credentials.sh
terraform.tfstate*
.terraform*
55 changes: 55 additions & 0 deletions terraform/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
## Terraform

This directory contains the terraform code that allows to deploy the project to an aws EC2 instance.

### Instructions

- Get into the terraform directory
```bash
cd terraform
```

- Set your aws IAM credentials of a user with programmatic access.
```bash
cp credentials.example credentials.sh
##Set your credentials in the created file
chmod +x credentials.sh
. credentials.sh
```

- Create your ssh key-pair
```bash
ssh-keygen -t rsa -b 2048
...
Generating public/private RSA key pair.
Enter file in which to save the key (/home/path_to_ssh/.ssh/id_rsa): /home/path/to/save/keys/aws_key
...
```

> [!NOTE]
> Change your own path in the line 27 of the `main.tf` and line 3 of the `security.tf` files.

- Start terraform
```bash
terraform init
```

- Look the terraform plan and deploy
```bash
terraform plan
...
terraform apply
...
```

- Connect to the instance and check if the images are running
```bash
ssh -i "your/private/key/path" ubuntu@your_instance_ip
sudo docker ps -a
...
```
If everything went right all the images should be shown by the last command.

### Additional notes

Everything was done having in mind a free plan deployment, so the images of the project are in a docker repository, IMO the best way should be by creating the images directly in the EC2 instance, because with that way we can modify the `.evn` file of the web application so it sends the requests to the correct endpoint, but with a `t2.micro` is was impossible to build the images on the EC2 instance.
2 changes: 2 additions & 0 deletions terraform/credentials.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export AWS_SECRET_ACCESS_KEY=SECERT_ACCES_KEY_EXAMPLE
export AWS_ACCESS_KEY_ID=ACCESS_KEY_ID_EXAMPLE
30 changes: 30 additions & 0 deletions terraform/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
provider "aws" {
region = "us-east-1"
}

resource "aws_instance" "ec2_example" {
ami = "ami-080e1f13689e07408"
instance_type = "t2.micro"
key_name = "aws_key"
vpc_security_group_ids = [aws_security_group.main.id]
user_data = file("${path.module}/userdata.tpl")

root_block_device {
volume_size = 8
}

provisioner "remote-exec" {
inline = [
"touch hello.txt",
"echo helloworld remote provisioner >> hello.txt",
]
}

connection {
type = "ssh"
host = self.public_ip
user = "ubuntu"
private_key = file("/home/david/.ssh/aws_key")
timeout = "4m"
}
}
66 changes: 66 additions & 0 deletions terraform/secutiry.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
resource "aws_key_pair" "deployer" {
key_name = "aws_key"
public_key = file("/home/david/.ssh/aws_key.pub")
}

resource "aws_security_group" "main" {
egress = [
{
cidr_blocks = ["0.0.0.0/0", ]
description = ""
from_port = 0
ipv6_cidr_blocks = []
prefix_list_ids = []
protocol = "-1"
security_groups = []
self = false
to_port = 0
}
]
ingress = [
{
cidr_blocks = ["0.0.0.0/0", ]
description = ""
from_port = 22
ipv6_cidr_blocks = []
prefix_list_ids = []
protocol = "tcp"
security_groups = []
self = false
to_port = 22
},
{
cidr_blocks = ["0.0.0.0/0", ]
description = "Vue page"
from_port = 5173
ipv6_cidr_blocks = []
prefix_list_ids = []
protocol = "tcp"
security_groups = []
self = false
to_port = 5173
},
{
cidr_blocks = ["0.0.0.0/0", ]
description = "Server"
from_port = 3001
ipv6_cidr_blocks = []
prefix_list_ids = []
protocol = "tcp"
security_groups = []
self = false
to_port = 3001
},
{
cidr_blocks = ["0.0.0.0/0", ]
description = "Zincsearch"
from_port = 4080
ipv6_cidr_blocks = []
prefix_list_ids = []
protocol = "tcp"
security_groups = []
self = false
to_port = 4080
}
]
}
Loading

0 comments on commit a30037e

Please sign in to comment.