feat(openai): use OpenAI to dynamically drive the browser.

spider-rs · Mar 19, 2024 · 6a361a4 · 6a361a4
1 parent ca3c1e1
commit 6a361a4
Show file tree

Hide file tree

Showing 13 changed files with 315 additions and 151 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## Unreleased
 
+## v1.85.0
+
+1. feat(openai): use OpenAI to dynamically drive the browser.
+
 ## v1.84.1
 
 1. feat(chrome): add chrome_headless_new flag

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/examples/README.md b/examples/README.md
@@ -84,7 +84,10 @@ Add links to gather mid crawl. [Queue](./queue.rs).
 
 - `cargo run --example queue`
 
-
-Use OpenAI to get custom Javascript to run in a browser. [OpenAI](./openai.rs).
+Use OpenAI to get custom Javascript to run in a browser. [OpenAI](./openai.rs). Make sure to set OPENAI_API_KEY=$MY_KEY as an env variable or pass it in before the script.
 
 - `cargo run --example openai`
+
+or 
+
+- `OPENAI_API_KEY=replace_me_with_key cargo run --example openai`
diff --git a/examples/openai.rs b/examples/openai.rs
@@ -1,4 +1,4 @@
-//! `cargo run --example openai --features chrome`
+//! `OPENAI_API_KEY=$MYAPI_KEY cargo run --example openai --features openai`
 extern crate spider;
 
 use std::time::Duration;
@@ -10,13 +10,22 @@ use spider::website::Website;
 #[tokio::main]
 async fn main() {
     let mut gpt_config = GPTConfigs::default();
-    gpt_config.model = "gpt-4-turbo-preview".into();
+    gpt_config.model = "gpt-4-1106-preview".into();
     gpt_config.prompt = "Search for Movies".into();
+    gpt_config.max_tokens = 800;
+
+    let _ = tokio::fs::create_dir_all("./storage/").await;
+
+    let screenshot_params =
+        spider::configuration::ScreenshotParams::new(Default::default(), Some(true), Some(true));
+    // params that handle the way to take screenshots
+    let screenshot_config =
+        spider::configuration::ScreenShotConfig::new(screenshot_params, true, true, None);
 
     let mut website: Website = Website::new("https://google.com")
         .with_chrome_intercept(true, true)
         .with_wait_for_idle_network(Some(WaitForIdleNetwork::new(Some(Duration::from_secs(30)))))
-        .with_caching(cfg!(feature = "cache"))
+        .with_screenshot(Some(screenshot_config))
         .with_limit(1)
         .with_openai(Some(gpt_config))
         .build()

diff --git a/spider/Cargo.toml b/spider/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "spider"
-version = "1.85.5"
+version = "1.86.0"
 authors = [
     "madeindjs <contact@rousseau-alexandre.fr>",
     "j-mendez <jeff@a11ywatch.com>",
@@ -119,5 +119,5 @@ http3 = ["reqwest/http3"]
 smart = ["chrome", "dep:regex"]
 encoding = ["dep:encoding_rs"]
 headers = []
-openai = ["chrome", "dep:async-openai", "dep:tiktoken-rs"]
+openai = ["chrome", "chrome_intercept", "dep:async-openai", "dep:tiktoken-rs"]
 decentralized_headers = ["dep:const_format", "dep:itertools"]
diff --git a/spider/README.md b/spider/README.md
@@ -16,7 +16,7 @@ This is a basic async example crawling a web page, add spider to your `Cargo.tom
 
 ```toml
 [dependencies]
-spider = "1.85.5"
+spider = "1.86.0"
 ```
 
 And then the code:
@@ -93,7 +93,7 @@ We have the following optional feature flags.
 
 ```toml
 [dependencies]
-spider = { version = "1.85.5", features = ["regex", "ua_generator"] }
+spider = { version = "1.86.0", features = ["regex", "ua_generator"] }
 ```
 
 1. `ua_generator`: Enables auto generating a random real User-Agent.
@@ -135,7 +135,7 @@ Move processing to a worker, drastically increases performance even if worker is
 
 ```toml
 [dependencies]
-spider = { version = "1.85.5", features = ["decentralized"] }
+spider = { version = "1.86.0", features = ["decentralized"] }
 ```
 
 ```sh
@@ -166,7 +166,7 @@ Use the subscribe method to get a broadcast channel.
 
 ```toml
 [dependencies]
-spider = { version = "1.85.5", features = ["sync"] }
+spider = { version = "1.86.0", features = ["sync"] }
 ```
 
 ```rust,no_run
@@ -196,7 +196,7 @@ Allow regex for blacklisting routes
 
 ```toml
 [dependencies]
-spider = { version = "1.85.5", features = ["regex"] }
+spider = { version = "1.86.0", features = ["regex"] }
 ```
 
 ```rust,no_run
@@ -223,7 +223,7 @@ If you are performing large workloads you may need to control the crawler by ena
 
 ```toml
 [dependencies]
-spider = { version = "1.85.5", features = ["control"] }
+spider = { version = "1.86.0", features = ["control"] }
 ```
 
 ```rust
@@ -293,7 +293,7 @@ Use cron jobs to run crawls continuously at anytime.
 
 ```toml
 [dependencies]
-spider = { version = "1.85.5", features = ["sync", "cron"] }
+spider = { version = "1.86.0", features = ["sync", "cron"] }
 ```
 
 ```rust,no_run
@@ -332,7 +332,7 @@ the feature flag [`chrome_intercept`] to possibly speed up request using Network
 
 ```toml
 [dependencies]
-spider = { version = "1.85.5", features = ["chrome", "chrome_intercept"] }
+spider = { version = "1.86.0", features = ["chrome", "chrome_intercept"] }
 ```
 
 You can use `website.crawl_concurrent_raw` to perform a crawl without chromium when needed. Use the feature flag `chrome_headed` to enable headful browser usage if needed to debug.
@@ -362,7 +362,7 @@ Enabling HTTP cache can be done with the feature flag [`cache`] or [`cache_mem`]
 
 ```toml
 [dependencies]
-spider = { version = "1.85.5", features = ["cache"] }
+spider = { version = "1.86.0", features = ["cache"] }
 ```
 
 You need to set `website.cache` to true to enable as well.
@@ -393,7 +393,7 @@ Intelligently run crawls using HTTP and JavaScript Rendering when needed. The be
 
 ```toml
 [dependencies]
-spider = { version = "1.85.5", features = ["smart"] }
+spider = { version = "1.86.0", features = ["smart"] }
 ```
 
 ```rust,no_run
@@ -419,7 +419,7 @@ Set a depth limit to prevent forwarding.
 
 ```toml
 [dependencies]
-spider = { version = "1.85.5", features = ["budget"] }
+spider = { version = "1.86.0", features = ["budget"] }
 ```
 
 ```rust,no_run

diff --git a/spider/src/configuration.rs b/spider/src/configuration.rs
@@ -428,9 +428,13 @@ pub struct GPTConfigs {
     /// The API key to use. This defaults to using the env var OPENAI_API_KEY.
     pub openai_key: String,
     /// The max tokens to use for the request.
-    pub max_tokens: u32,
+    pub max_tokens: u16,
     /// The temperature between 0 - 2
     pub temperature: Option<f32>,
+    /// The user for the request
+    pub user: Option<String>,
+    /// The top priority for the request
+    pub top_p: Option<f32>,
 }
 
 #[cfg(feature = "chrome")]

diff --git a/spider/src/features/openai.rs b/spider/src/features/openai.rs
@@ -1,16 +1,29 @@
 use async_openai::types::ChatCompletionRequestSystemMessageArgs;
 use tiktoken_rs::{get_chat_completion_max_tokens, ChatCompletionRequestMessage};
 
+const PROMPT: &str = r#"You are tasked with generating pure JavaScript code snippets in response to user-provided scenarios involving web page interactions.\n
+Upon receipt of specific HTML content, the website’s URL, and a detailed user prompt describing the action to be performed, you are to supply an unembellished JavaScript code string.\n
+This code should be immediately executable in a browser's console or script environment, achieving the described objectives without any extraneous formatting or annotations.\n
+Respond exclusively with the raw JavaScript code to ensure seamless functionality and applicability. Ex: window.location.href = 'https://www.google.com/search?q=Movies';"#;
+
 lazy_static! {
-    static ref BROWSER_ACTIONS_SYSTEM_PROMPT: async_openai::types::ChatCompletionRequestMessage = {
+    /// The base system prompt for driving the browser.
+    pub static ref BROWSER_ACTIONS_SYSTEM_PROMPT: async_openai::types::ChatCompletionRequestMessage = {
         ChatCompletionRequestSystemMessageArgs::default()
-                .content(r#"You are an expert assistant that is dedicated to curating valid Javascript to use for a website. 
-You will receive the website HTML from an assistant and a user prompt on what actions to do. 
-ONLY RESPOND WITH VALID JAVASCRIPT STRING."#.trim())
+                .content(PROMPT.trim())
                 .build()
                 .unwrap()
                 .into()
     };
+    /// The prompt completion for tiktoken token counting.
+    pub static ref BROWSER_ACTIONS_SYSTEM_PROMPT_COMPLETION: tiktoken_rs::ChatCompletionRequestMessage = {
+       tiktoken_rs::ChatCompletionRequestMessage {
+            content: Some(PROMPT.trim().to_string()),
+            role: "system".to_string(),
+            name: None,
+            function_call: None,
+       }
+    };
 }
 
 /// calculate the max tokens for a request

diff --git a/spider/src/page.rs b/spider/src/page.rs
@@ -299,10 +299,18 @@ impl Page {
         wait_for: &Option<crate::configuration::WaitFor>,
         screenshot: &Option<crate::configuration::ScreenShotConfig>,
         page_set: bool,
+        openai_config: &Option<crate::configuration::GPTConfigs>,
     ) -> Self {
-        let page_resource =
-            crate::utils::fetch_page_html(&url, &client, &page, wait_for, screenshot, page_set)
-                .await;
+        let page_resource = crate::utils::fetch_page_html(
+            &url,
+            &client,
+            &page,
+            wait_for,
+            screenshot,
+            page_set,
+            openai_config,
+        )
+        .await;
         let mut p = build(url, page_resource);
 
         // store the chrome page to perform actions like screenshots etc.
@@ -910,6 +918,7 @@ impl Page {
                                                         )),
                                                         &configuration.screenshot,
                                                         false,
+                                                        &configuration.openai_config,
                                                     )
                                                     .await;