Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce RAG #31

Merged
merged 18 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
339 changes: 327 additions & 12 deletions Cargo.lock

Large diffs are not rendered by default.

46 changes: 7 additions & 39 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,44 +1,12 @@
[package]
name = "lsp-ai"
version = "0.3.0"
[workspace]
members = [
"crates/*",
]
resolver = "2"

[workspace.package]
edition = "2021"
license = "MIT"
description = "LSP-AI is an open-source language server that serves as a backend for AI-powered functionality, designed to assist and empower software engineers, not replace them."
repository = "https://github.com/SilasMarvin/lsp-ai"
readme = "README.md"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = "1.0.75"
lsp-server = "0.7.6"
lsp-types = "0.95.0"
ropey = "1.6.1"
serde = "1.0.190"
serde_json = "1.0.108"
hf-hub = { git = "https://github.com/huggingface/hf-hub", version = "0.3.2" }
rand = "0.8.5"
tokenizers = "0.14.1"
parking_lot = "0.12.1"
once_cell = "1.19.0"
directories = "5.0.1"
llama-cpp-2 = { version = "0.1.55", optional = true }
minijinja = { version = "1.0.12", features = ["loader"] }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
tracing = "0.1.40"
xxhash-rust = { version = "0.8.5", features = ["xxh3"] }
reqwest = { version = "0.11.25", features = ["blocking", "json"] }
ignore = "0.4.22"
pgml = "1.0.4"
tokio = { version = "1.36.0", features = ["rt-multi-thread", "time"] }
indexmap = "2.2.5"
async-trait = "0.1.78"

[features]
default = []
llama_cpp = ["dep:llama-cpp-2"]
metal = ["llama-cpp-2/metal"]
cuda = ["llama-cpp-2/cuda"]

[dev-dependencies]
assert_cmd = "2.0.14"
51 changes: 51 additions & 0 deletions crates/lsp-ai/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
[package]
name = "lsp-ai"
version = "0.3.0"

description.workspace = true
repository.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true

[dependencies]
anyhow = "1.0.75"
lsp-server = "0.7.6"
lsp-types = "0.95.0"
ropey = "1.6.1"
serde = "1.0.190"
serde_json = "1.0.108"
hf-hub = { git = "https://github.com/huggingface/hf-hub", version = "0.3.2" }
rand = "0.8.5"
tokenizers = "0.14.1"
parking_lot = "0.12.1"
once_cell = "1.19.0"
directories = "5.0.1"
llama-cpp-2 = { version = "0.1.55", optional = true }
minijinja = { version = "1.0.12", features = ["loader"] }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
tracing = "0.1.40"
xxhash-rust = { version = "0.8.5", features = ["xxh3"] }
reqwest = { version = "0.11.25", features = ["blocking", "json"] }
ignore = "0.4.22"
pgml = "1.0.4"
tokio = { version = "1.36.0", features = ["rt-multi-thread", "time"] }
indexmap = "2.2.5"
async-trait = "0.1.78"
tree-sitter = "0.22"
utils-tree-sitter = { path = "../utils-tree-sitter", features = ["all"], version = "0.1.0" }
splitter-tree-sitter = { path = "../splitter-tree-sitter", version = "0.1.0" }
text-splitter = { version = "0.13.3" }
md5 = "0.7.0"

[build-dependencies]
cc="*"

[features]
default = []
llama_cpp = ["dep:llama-cpp-2"]
metal = ["llama-cpp-2/metal"]
cuda = ["llama-cpp-2/cuda"]

[dev-dependencies]
assert_cmd = "2.0.14"
126 changes: 97 additions & 29 deletions src/config.rs → crates/lsp-ai/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,51 @@ impl Default for PostProcess {
}
}

#[derive(Debug, Clone, Deserialize)]
pub enum ValidSplitter {
#[serde(rename = "tree_sitter")]
TreeSitter(TreeSitter),
#[serde(rename = "text_sitter")]
TextSplitter(TextSplitter),
}

impl Default for ValidSplitter {
fn default() -> Self {
ValidSplitter::TreeSitter(TreeSitter::default())
}
}

const fn chunk_size_default() -> usize {
1500
}

const fn chunk_overlap_default() -> usize {
0
}

#[derive(Debug, Clone, Deserialize)]
pub struct TreeSitter {
#[serde(default = "chunk_size_default")]
pub chunk_size: usize,
#[serde(default = "chunk_overlap_default")]
pub chunk_overlap: usize,
}

impl Default for TreeSitter {
fn default() -> Self {
Self {
chunk_size: 1500,
chunk_overlap: 0,
}
}
}

#[derive(Debug, Clone, Deserialize)]
pub struct TextSplitter {
#[serde(default = "chunk_size_default")]
pub chunk_size: usize,
}

#[derive(Debug, Clone, Deserialize)]
pub enum ValidMemoryBackend {
#[serde(rename = "file_store")]
Expand Down Expand Up @@ -67,15 +112,6 @@ impl ChatMessage {
}
}

#[derive(Debug, Clone, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Chat {
pub completion: Option<Vec<ChatMessage>>,
pub generation: Option<Vec<ChatMessage>>,
pub chat_template: Option<String>,
pub chat_format: Option<String>,
}

#[derive(Clone, Debug, Deserialize)]
#[allow(clippy::upper_case_acronyms)]
#[serde(deny_unknown_fields)]
Expand All @@ -85,27 +121,52 @@ pub struct FIM {
pub end: String,
}

const fn max_crawl_memory_default() -> u64 {
100_000_000
}

const fn max_crawl_file_size_default() -> u64 {
10_000_000
}

#[derive(Clone, Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Crawl {
#[serde(default = "max_crawl_file_size_default")]
pub max_file_size: u64,
#[serde(default = "max_crawl_memory_default")]
pub max_crawl_memory: u64,
#[serde(default)]
pub all_files: bool,
}

#[derive(Clone, Debug, Deserialize)]
pub struct PostgresMLEmbeddingModel {
pub model: String,
pub embed_parameters: Option<Value>,
pub query_parameters: Option<Value>,
}

#[derive(Clone, Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct PostgresML {
pub database_url: Option<String>,
pub crawl: Option<Crawl>,
#[serde(default)]
pub crawl: bool,
pub splitter: ValidSplitter,
pub embedding_model: Option<PostgresMLEmbeddingModel>,
}

#[derive(Clone, Debug, Deserialize, Default)]
#[serde(deny_unknown_fields)]
pub struct FileStore {
#[serde(default)]
pub crawl: bool,
}

const fn n_gpu_layers_default() -> u32 {
1000
pub crawl: Option<Crawl>,
}

const fn n_ctx_default() -> u32 {
1000
impl FileStore {
pub fn new_without_crawl() -> Self {
Self { crawl: None }
}
}

#[derive(Clone, Debug, Deserialize)]
Expand Down Expand Up @@ -137,6 +198,17 @@ pub struct MistralFIM {
pub max_requests_per_second: f32,
}

#[cfg(feature = "llama_cpp")]
const fn n_gpu_layers_default() -> u32 {
1000
}

#[cfg(feature = "llama_cpp")]
const fn n_ctx_default() -> u32 {
1000
}

#[cfg(feature = "llama_cpp")]
#[derive(Clone, Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct LLaMACPP {
Expand Down Expand Up @@ -230,15 +302,14 @@ pub struct ValidConfig {

#[derive(Clone, Debug, Deserialize, Default)]
pub struct ValidClientParams {
#[serde(alias = "rootURI")]
_root_uri: Option<String>,
_workspace_folders: Option<Vec<String>>,
#[serde(alias = "rootUri")]
pub root_uri: Option<String>,
}

#[derive(Clone, Debug)]
pub struct Config {
pub config: ValidConfig,
_client_params: ValidClientParams,
pub client_params: ValidClientParams,
}

impl Config {
Expand All @@ -255,7 +326,7 @@ impl Config {
let client_params: ValidClientParams = serde_json::from_value(args)?;
Ok(Self {
config: valid_args,
_client_params: client_params,
client_params,
})
}

Expand Down Expand Up @@ -300,20 +371,17 @@ impl Config {
}
}

// This makes testing much easier.
// For teesting use only
#[cfg(test)]
impl Config {
pub fn default_with_file_store_without_models() -> Self {
Self {
config: ValidConfig {
memory: ValidMemoryBackend::FileStore(FileStore { crawl: false }),
memory: ValidMemoryBackend::FileStore(FileStore { crawl: None }),
models: HashMap::new(),
completion: None,
},
_client_params: ValidClientParams {
_root_uri: None,
_workspace_folders: None,
},
client_params: ValidClientParams { root_uri: None },
}
}
}
Expand Down
Loading