svilupp · svilupp · Jul 10, 2024 · Jul 10, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+## [0.4.0]
+
+### Added
+- Added a launcher function `launch` to make it easier to launch the app.
+- Semantic caching enabled by SemanticCaches.jl. You can change it by setting `cached=false` in the `launch()` function.
+
 ## [0.3.0]
 
 ### Added

diff --git a/Project.toml b/Project.toml
@@ -1,21 +1,25 @@
 name = "ProToPortal"
 uuid = "f9496bd6-a3bb-4afc-927d-7268532ebfa9"
 authors = ["J S <49557684+svilupp@users.noreply.github.com> and contributors"]
-version = "0.3.0"
+version = "0.4.0"
 
 [deps]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 GenieFramework = "a59fdf5c-6bf0-4f5d-949c-a137c9e2f353"
 GenieSession = "03cc5b98-4f21-4eb6-99f2-22eced81f962"
+HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 PromptingTools = "670122d1-24a8-4d70-bfce-740807c42192"
+SemanticCaches = "03ba8f0e-aaaa-4626-a19b-56297996781b"
 
 [compat]
 Aqua = "0.7"
 Dates = "<0.0.1, 1"
 GenieFramework = "2.1"
 GenieSession = "1"
-PromptingTools = "0.33"
+HTTP = "1"
+PromptingTools = "0.37.1"
+SemanticCaches = "0.2"
 Test = "<0.0.1, 1"
 julia = "1.10"
 

diff --git a/README.md b/README.md
@@ -32,11 +32,15 @@ using Pkg; Pkg.activate("."); Pkg.instantiate(".")
 ```julia
 # as a quick hack if you don't have your environment variables set up, run the below line with your OpenAI key
 # ENV["OPENAI_API_KEY"] = "<your_openai_api_key>"
-include("main.jl")
+ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" # required for caching
+using ProToPortal
+launch(; cached = true)
 ```
 
 Then head to your browser and go to [http://127.0.0.1:8000](http://127.0.0.1:8000) to see the app.
 
+It will now cache similar LLM requests by default (disable with `cached=false` in `launch()` function).
+
 For the purists: simply run `julia --project -t auto main.jl` in your terminal (once installed)!
 
 How to start? Type `Say hi!` in the question box on the Chat tab and click Submit (or press CTRL+ENTER).

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -54,8 +54,11 @@ It's the first Julia-focused GUI (evaluate Julia code, fix it, critique it - or
 Clone ProToPortal, instantiate it, enable your desired settings, and streamline your LLM interactions right away:
 
 ```julia
-using Pkg; Pkg.activate("."); Pkg.instantiate(".")
-include("main.jl")
+# as a quick hack if you don't have your environment variables set up, run the below line with your OpenAI key
+# ENV["OPENAI_API_KEY"] = "<your_openai_api_key>"
+ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" # required for caching
+using ProToPortal
+launch(; cached = true)
 ```
 
 Then head to your browser and go to [http://127.0.0.1:8000](http://127.0.0.1:8000) to see the app.

diff --git a/docs/src/introduction.md b/docs/src/introduction.md
@@ -27,11 +27,15 @@ using Pkg; Pkg.activate("."); Pkg.instantiate(".")
 ```julia
 # as a quick hack if you don't have your environment variables set up, run the below line with your OpenAI key
 # ENV["OPENAI_API_KEY"] = "<your_openai_api_key>"
-include("main.jl")
+ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" # required for caching
+using ProToPortal
+launch(; cached = true)
 ```
 
 Then head to your browser and go to [http://127.0.0.1:8000](http://127.0.0.1:8000) to see the app.
 
+It will now cache similar LLM requests by default (disable with `cached=false` in `launch()` function).
+
 For the purists: simply run `julia --project -t auto main.jl` in your terminal (once installed)!
 
 How to start? Type `Say hi!` in the question box on the Chat tab and click Submit (or press CTRL+ENTER).

diff --git a/docs/src/videos/screen-capture-code-fixing.gif b/docs/src/videos/screen-capture-code-fixing.gif
diff --git a/docs/src/videos/screen-capture-plain.webm b/docs/src/videos/screen-capture-plain.webm
diff --git a/main.jl b/main.jl
@@ -1,9 +1,6 @@
 using Pkg
 Pkg.activate(".")
-using GenieFramework
-ENV["GENIE_HOST"] = "127.0.0.1"
-ENV["PORT"] = "8000"
-## ENV["GENIE_ENV"] = "prod"
-include("app.jl") # hack for hot-reloading when fixing things
-Genie.loadapp();
-up(async = true);
+## Required to support semantic caching
+ENV["DATADEPS_ALWAYS_ACCEPT"] = "true"
+using ProToPortal
+ProToPortal.launch(8000, "0.0.0.0"; async = false, cached = true, cache_verbose = true)
diff --git a/src/CacheLayer.jl b/src/CacheLayer.jl
@@ -0,0 +1,81 @@
+## Define the new caching mechanism as a layer for HTTP
+## See documentation [here](https://juliaweb.github.io/HTTP.jl/stable/client/#Quick-Examples)
+"""
+ CacheLayer
+
+A module providing caching of LLM requests for ProToPortal.
+
+It caches 3 URL paths: 
+- `/v1/chat/completions` (for OpenAI API)
+- `/v1/embeddings` (for OpenAI API)
+- `/v1/rerank` (for Cohere API)
+
+# How to use
+You can use the layer directly
+`CacheLayer.get(req)`
+
+You can push the layer globally in all HTTP.jl requests
+`HTTP.pushlayer!(CacheLayer.cache_layer)`
+
+You can remove the layer later
+`HTTP.poplayer!()`
+
+"""
+module CacheLayer
+
+using SemanticCaches, HTTP
+using PromptingTools: JSON3
+
+const SEM_CACHE = SemanticCache()
+const HASH_CACHE = HashCache()
+
+function cache_layer(handler)
+ return function (req; kw...)
+ VERBOSE = Base.get(ENV, "CACHES_VERBOSE", "true") == "true"
+ if req.method == "POST" && !isempty(req.body)
+ body = JSON3.read(copy(req.body))
+ ## chat/completions is for OpenAI, v1/messages is for Anthropic
+ if occursin("v1/chat/completions", req.target) ||
+ occursin("v1/messages", req.target)
+ ## We're in chat completion endpoint
+ temperature_str = haskey(body, :temperature) ? body[:temperature] : "-"
+ cache_key = string("chat-", body[:model], "-", temperature_str)
+ input = join([m["content"] for m in body[:messages]], " ")
+ elseif occursin("v1/embeddings", req.target)
+ cache_key = string("emb-", body[:model])
+ ## We're in embedding endpoint
+ input = join(body[:input], " ")
+ elseif occursin("v1/rerank", req.target)
+ cache_key = string("rerank-", body[:model], "-", body[:top_n])
+ input = join([body[:query], body[:documents]...], " ")
+ else
+ ## Skip, unknown API 
+ VERBOSE && @info "Skipping cache for $(req.method) $(req.target)"
+ return handler(req; kw...)
+ end
+ ## Check the cache
+
+ VERBOSE && @info "Check if we can cache this request ($(length(input)) chars)"
+ active_cache = length(input) > 5000 ? HASH_CACHE : SEM_CACHE
+ item = active_cache(cache_key, input; verbose = 2 * VERBOSE) # change verbosity to 0 to disable detailed logs
+ if !isvalid(item)
+ VERBOSE && @info "Cache miss! Pinging the API"
+ # pass the request along to the next layer by calling `cache_layer` arg `handler`
+ resp = handler(req; kw...)
+ item.output = resp
+ # Let's remember it for the next time
+ push!(active_cache, item)
+ end
+ ## Return the calculated or cached result
+ return item.output
+ end
+ # pass the request along to the next layer by calling `cache_layer` arg `handler`
+ # also pass along the trailing keyword args `kw...`
+ return handler(req; kw...)
+ end
+end
+
+# Create a new client with the auth layer added
+HTTP.@client [cache_layer]
+
+end # module
diff --git a/src/ProToPortal.jl b/src/ProToPortal.jl
@@ -53,6 +53,11 @@ include("llm.jl")
 export meta_prompt_step!
 include("meta_prompting.jl")
 
+include("CacheLayer.jl")
+
+export launch
+include("server.jl")
+
 function __init__()
  ## Load extra templates
  PT.load_templates!(joinpath(@__DIR__, "..", "templates"); remember_path = true) # add our custom ones

diff --git a/src/server.jl b/src/server.jl
@@ -0,0 +1,38 @@
+"""
+ launch(
+ port::Int = get(ENV, "PORT", 8000), host::String = get(
+ ENV, "GENIE_HOST", "127.0.0.1");
+ async::Bool = true, cached::Bool = true, cache_verbose::Bool = false)
+
+Launches ProToPortal in the browser.
+
+Defaults to: `http://127.0.0.1:8000`. 
+This is a convenience wrapper around `Genie.up`, to customize the server configuration use `Genie.up()` and `Genie.config`.
+
+# Arguments
+- `port::Union{Int, String} = get(ENV, "PORT", "8000")`: The port to launch the server on.
+- `host::String = get(ENV, "GENIE_HOST", "127.0.0.1")`: The host to launch the server on.
+- `async::Bool = true`: Whether to launch the server asynchronously, ie, in the background.
+- `cached::Bool = true`: Whether to use semantic caching of the requests.
+- `cache_verbose::Bool = true`: Whether to print verbose information about the caching process.
+
+If you want to remove the cache layer later, you can use `import HTTP; HTTP.poplayer!()`.
+"""
+function launch(
+ port::Union{Int, String} = get(ENV, "PORT", "8000"),
+ host::String = get(ENV, "GENIE_HOST", "127.0.0.1");
+ async::Bool = true, cached::Bool = true, cache_verbose::Bool = true)
+ ## Loads app.jl in the root directory
+ Genie.loadapp(pkgdir(ProToPortal))
+
+ ## Enables caching
+ ENV["CACHES_VERBOSE"] = cache_verbose ? "true" : "false"
+ if cached
+ @info "Caching enabled globally (for all requests, see `CacheLayer` module for details). Remove with `HTTP.poplayer!()`"
+ HTTP.pushlayer!(CacheLayer.cache_layer)
+ end
+ ## Convert to INT
+ port_ = port isa Integer ? port : tryparse(Int, port)
+ @assert port_ isa Integer "Port must be an integer. Provided: $port"
+ up(port_, host; async)
+end