Skip to content

Commit

Permalink
Merge pull request #162 from pulibrary/only_run_indexer
Browse files Browse the repository at this point in the history
Run one indexer
  • Loading branch information
hackartisan authored Oct 23, 2024
2 parents 7005d91 + 2d5a885 commit d007c9a
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 11 deletions.
19 changes: 19 additions & 0 deletions architecture-decisions/0003-one-indexer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# 3. Record architecture decisions

Date: 2024-10-23

## Status

Accepted

## Context

We're using Broadway for indexing, which has no built in concept of multi-machine distributed indexing. Right now we're unsure we need to scale past one machine for indexing our documents.

## Decision

We will use one special machine that can be scaled independently to have more resources for indexing.

## Consequences

If we need to distribute indexing later, and can't just add resources to the one machine, then we'll have to develop a way for Broadway to distribute.
56 changes: 55 additions & 1 deletion config/deploy/staging.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ job "dpulc-staging" {
auto_revert = true
}
group "web" {
count = 1
count = 2
network {
port "http" { to = 4000 }
}
service {
port = "http"
name = "dpulc-staging-web"
tags = ["frontend"]
check {
type = "http"
port = "http"
Expand Down Expand Up @@ -69,6 +71,57 @@ job "dpulc-staging" {
ports = ["http"]
force_pull = true
}
# Doesn't take much just to run a webserver.
resources {
cpu = 2000
memory = 1000
}
template {
destination = "${NOMAD_SECRETS_DIR}/env.vars"
env = true
change_mode = "restart"
data = <<EOF
{{- with nomadVar "nomad/jobs/dpulc-staging" -}}
DATABASE_URL = ecto://{{ .DB_USER }}:{{ .DB_PASSWORD }}@{{ .POSTGRES_HOST }}/{{ .DB_NAME }}
FIGGY_DATABASE_URL = {{ .FIGGY_DATABASE_URL }}
SOLR_URL = {{ .SOLR_URL }}
SECRET_KEY_BASE = {{ .SECRET_KEY_BASE }}
CACHE_VERSION = ${var.cache_version}
PHX_HOST = ${var.host}
{{- end -}}
EOF
}
}
}
group "indexer" {
count = 1
network {
port "http" { to = 4000 }
}
service {
name = "dpulc-staging-web"
tags = ["indexer"]
port = "http"
check {
type = "http"
port = "http"
path = "/"
interval = "10s"
timeout = "1s"
}
}
task "indexer" {
driver = "podman"
config {
image = "ghcr.io/pulibrary/dpul-collections:${ var.branch_or_sha }"
ports = ["http"]
force_pull = true
}
# Save a bunch of CPU and RAM to run indexing.
resources {
cores = 6
memory = 5000
}
template {
destination = "${NOMAD_SECRETS_DIR}/env.vars"
env = true
Expand All @@ -81,6 +134,7 @@ job "dpulc-staging" {
SECRET_KEY_BASE = {{ .SECRET_KEY_BASE }}
CACHE_VERSION = ${var.cache_version}
PHX_HOST = ${var.host}
INDEXER = true
{{- end -}}
EOF
}
Expand Down
6 changes: 6 additions & 0 deletions config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ if config_env() == :prod do

config :dpul_collections, :dns_cluster_query, System.get_env("DNS_CLUSTER_QUERY")

if System.get_env("INDEXER") do
config :dpul_collections, :start_indexing_pipeline, true
else
config :dpul_collections, :start_indexing_pipeline, false
end

config :dpul_collections, DpulCollectionsWeb.Endpoint,
url: [host: host, port: 443, scheme: "https"],
http: [
Expand Down
34 changes: 24 additions & 10 deletions lib/dpul_collections/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,37 @@ defmodule DpulCollections.Application do
end

# coveralls-ignore-start
def environment_children(_) do
# In development, start the indexing pipeline when the phoenix server starts.
def environment_children(:dev) do
if Phoenix.Endpoint.server?(:dpul_collections, DpulCollectionsWeb.Endpoint) do
cache_version = Application.fetch_env!(:dpul_collections, :cache_version)
indexing_pipeline_children()
else
[]
end
end

[
{DpulCollections.IndexingPipeline.Figgy.IndexingConsumer,
cache_version: cache_version, batch_size: 50},
{DpulCollections.IndexingPipeline.Figgy.TransformationConsumer,
cache_version: cache_version, batch_size: 50},
{DpulCollections.IndexingPipeline.Figgy.HydrationConsumer,
cache_version: cache_version, batch_size: 50}
]
# In production, start the indexing pipeline if it's configured to be started
def environment_children(:prod) do
if Application.fetch_env!(:dpul_collections, :start_indexing_pipeline) == true do
indexing_pipeline_children()
else
[]
end
end

def indexing_pipeline_children() do
cache_version = Application.fetch_env!(:dpul_collections, :cache_version)

[
{DpulCollections.IndexingPipeline.Figgy.IndexingConsumer,
cache_version: cache_version, batch_size: 50},
{DpulCollections.IndexingPipeline.Figgy.TransformationConsumer,
cache_version: cache_version, batch_size: 50},
{DpulCollections.IndexingPipeline.Figgy.HydrationConsumer,
cache_version: cache_version, batch_size: 50}
]
end

# coveralls-ignore-end

# Tell Phoenix to update the endpoint configuration
Expand Down

0 comments on commit d007c9a

Please sign in to comment.