Skip to content

Commit

Permalink
Merge pull request #1 from instructor-ai/struct
Browse files Browse the repository at this point in the history
Initial commit for instructor!
  • Loading branch information
ivanleomk authored Jun 24, 2024
2 parents c30a97a + d2b681b commit a79c30a
Show file tree
Hide file tree
Showing 15 changed files with 621 additions and 17 deletions.
96 changes: 79 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,28 +1,90 @@
# instructor-rs

Instructor is a RUst library that makes it a breeze to work with structured outputs from large language models (LLMs). it provides a simple and easy API to help maanage LLM Workflows by abstrating away validation, retries and streamning responses.
Instructor is a Rust library that makes it a breeze to work with structured outputs from large language models (LLMs). it provides a simple and easy API to help maanage LLM Workflows by abstrating away validation, retries and streamning responses.

Now, let's see Instructor in action with a simple example:

```rust
#[derive(InstructModel)]
struct UserInfo{
name: str
age: u8
let client = Client::new(env::var("OPENAI_API_KEY").unwrap().to_string());
let instructor_client = from_openai(client);

#[derive(InstructMacro, Debug, Serialize, Deserialize)]
// This represents a single user
struct UserInfo {
// This represents the name of the user
name: String,
// This represents the age of the user
age: u8,
}

let req = ChatCompletionRequest::new(
GPT3_5_TURBO.to_string(),
vec![chat_completion::ChatCompletionMessage {
role: chat_completion::MessageRole::user,
content: chat_completion::Content::Text(String::from(
"John Doe is a 30 year old software engineer",
)),
name: None,
}],
);

let result = instructor_client
.chat_completion::<UserInfo>(req, 3)
.unwrap();

println!("{}", result.name); // John Doe
println!("{}", result.age); // 30

```

## Structured Validation

We can use native inbuilt serde functions in order to handle validation of specific values.

```rust
let client = Client::new(env::var("OPENAI_API_KEY").unwrap().to_string());
let instructor_client = from_openai(client);

#[derive(InstructMacro, Debug, Serialize, Deserialize)]
// This represents a single user
struct UserInfo {
// This represents the name of the user
#[serde(deserialize_with = "validate_uppercase")]
name: String,
// This represents the age of the user
age: u8,
}

fn validate_uppercase<'de, D>(de: D) -> Result<String, D::Error>
where
D: Deserializer<'de>,
{
let s = String::deserialize(de)?;
println!("{}", s);
if s.chars().any(|c| c.is_lowercase()) {
return Err(de::Error::custom(format!(
"Name '{}' should be entirely in uppercase. Examples: 'TIMOTHY', 'JANE SMITH'",
s
)));
}
Ok(s.to_uppercase())
}

let client = Instructor::from_openai(Client::new(env::var("OPENAI_API_KEY").unwrap().to_string()));
let req = ChatCompletionRequest::new(
GPT3_5_TURBO.to_string(),
vec![chat_completion::ChatCompletionMessage {
role: chat_completion::MessageRole::user,
content: chat_completion::Content::Text(String::from(
"John Doe is a 30 year old software engineer",
)),
name: None,
}],
);

let user = ChatCompletionRequest::new(
GPT3_5_TURBO_0613.to_string(),
vec![chat_completion::ChatCompletionMessage {
role: chat_completion::MessageRole::user,
content: chat_completion::Content::Text(String::from("John Doe is 30 years old")),
name: None,
}],
vec![UserInfo]
)
let result = instructor_client
.chat_completion::<UserInfo>(req, 3)
.unwrap();

println!("{}", UserInfo.name) // John Doe
println!("{}", UserInfo.age) // 30
println!("{}", result.name); // JOHN DOE
println!("{}", result.age); // 30
```
7 changes: 7 additions & 0 deletions instruct-macros-types/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
### v1.0.2 (2024-06-22)

Updated the README and CHANGELOG.md so that we can accurately track changes

### v1.0.1 (2024-06-22)

Published it on crates.io! Implementation is closely tied to the `Instructor` package for now with a goal to decouple this moving forward.
20 changes: 20 additions & 0 deletions instruct-macros-types/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[package]
name = "instruct-macros-types"
version = "0.1.2"
edition = "2021"
authors = ["Ivan Leo <ivanleomk@gmail.com>"]
description = "Instructor Macro Types are a collection of simple types that we export to work with the instruct-macros crate"
readme = "README.md"
changelog = "CHANGELOG.md"
homepage = "https://rust.useinstructor.com"
repository = "https://github.com/instructor-ai/instructor-rs"
license = "MIT OR Apache-2.0"


[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"

[lib]
name = "instruct_macros_types"
path = "src/lib.rs"
3 changes: 3 additions & 0 deletions instruct-macros-types/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Instruct Macro Types

Instruct Macro Types are a set of exported types that work nicely with the `instruct-macros` package which provides a set of procedural macros for getting type information at runtime.
19 changes: 19 additions & 0 deletions instruct-macros-types/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use serde::{Deserialize, Serialize};

pub trait InstructMacro {
fn get_info() -> StructInfo;
}

#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct StructInfo {
pub name: String,
pub description: String,
pub parameters: Vec<ParameterInfo>,
}

#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct ParameterInfo {
pub name: String,
pub r#type: String,
pub comment: String,
}
7 changes: 7 additions & 0 deletions instruct-macros/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
### v0.1.2 (2024-06-22)

Abstracted out types for the package to a second crate at `instruct-macro-types`! This allows you to use the procedural macro and get information on the types at runtime

### v0.1.1 (2024-06-22)

Published our first version to crates.io! Implementation is closely tied to the `Instructor` package for now with a goal to decouple this moving forward.
30 changes: 30 additions & 0 deletions instruct-macros/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[package]
name = "instruct-macros"
version = "0.1.1"
edition = "2018"
authors = ["Ivan Leo <ivanleomk@gmail.com>"]
description = "instruct-macros are a collection of simple macros that we're using in Instructor-AI to generate json schema from Serde Objects"
readme = "README.md"
license = "MIT OR Apache-2.0"

[package.metadata]
changelog = "CHANGELOG.md"

homepage = "https://rust.useinstructor.com"
repository = "https://github.com/instructor-ai/instructor-rs"
license = "MIT OR Apache-2.0"

[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
instruct-macros-types = { path = "../instruct-macros-types", version = "0.1.0" }

[dependencies.syn]
version = "1.0"
features = ["full"]

[dependencies.quote]
version = "1.0"

[lib]
proc-macro = true
24 changes: 24 additions & 0 deletions instruct-macros/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Instruct Macros

Instruct Macros are a set of procedural macros that expose a `get_info()` method which is meant to help you get reflection in your objects by default, exposing field names+ types along with the object name.

You can use it by doing

```rust
use instruct_macros::InstructMacro; // Ensure this is a derive macro
use instruct_macros_types::{ParameterInfo, StructInfo}; // Import the trait

#[derive(InstructMacro, Deserialize, Serialize, Debug)]
/// This is a model which represents a single individual user
struct UserInfo {
/// This is the name of the user
#[serde(deserialize_with = "uppercase_name")]
name: String,
/// This is the age of the user
age: u8,
/// This is the city of the user
city: String,
}
```

This in turn will expose a get_info() method on your struct that returns a body that looks omsething like this
99 changes: 99 additions & 0 deletions instruct-macros/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
extern crate proc_macro;
use proc_macro::TokenStream;
use quote::quote;
use syn::{parse_macro_input, Data, DeriveInput, Fields, Lit, Meta};

#[proc_macro_derive(InstructMacro)]
pub fn instruct_macro_derive(input: TokenStream) -> TokenStream {
// Parse the input tokens into a syntax tree
let input = parse_macro_input!(input as DeriveInput);

// Used in the quasi-quotation below as `#name`
let name = &input.ident;

// Extract struct-level comment
let struct_comment = input
.attrs
.iter()
.filter_map(|attr| {
if attr.path.is_ident("doc") {
match attr.parse_meta().ok()? {
Meta::NameValue(meta) => {
if let Lit::Str(lit) = meta.lit {
return Some(lit.value());
}
}
_ => {}
}
}
None
})
.collect::<Vec<String>>()
.join(" ");

// Process each field in the struct
let fields = if let Data::Struct(data) = &input.data {
if let Fields::Named(fields) = &data.fields {
fields
} else {
panic!("Unnamed fields are not supported");
}
} else {
panic!("Only structs are supported");
};

let parameters: Vec<_> = fields
.named
.iter()
.map(|field| {
let field_name = &field.ident;
let field_type = &field.ty;

// Extract field-level comment
let field_comment = field
.attrs
.iter()
.filter_map(|attr| {
if attr.path.is_ident("doc") {
match attr.parse_meta().ok()? {
Meta::NameValue(meta) => {
if let Lit::Str(lit) = meta.lit {
return Some(lit.value());
}
}
_ => {}
}
}
None
})
.collect::<Vec<String>>()
.join(" ");

quote! {
parameters.push(ParameterInfo {
name: stringify!(#field_name).to_string(),
r#type: stringify!(#field_type).to_string(),
comment: #field_comment.to_string(),
});
}
})
.collect();

let expanded = quote! {
impl instruct_macros_types::InstructMacro for #name {
fn get_info() -> instruct_macros_types::StructInfo {
let mut parameters = Vec::new();
#(#parameters)*

StructInfo {
name: stringify!(#name).to_string(),
description: #struct_comment.to_string(),
parameters,
}
}
}
};

// Hand the output tokens back to the compiler
TokenStream::from(expanded)
}
38 changes: 38 additions & 0 deletions instruct-macros/tests/integration_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
extern crate instruct_macros_types;

use instruct_macros::InstructMacro; // Add this line
use instruct_macros_types::{InstructMacro, ParameterInfo, StructInfo};

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_string_conversion() {
#[derive(InstructMacro, Debug)]
struct TestStruct {
///This is a test field
field1: String,
///This is a test field
field2: str,
}
let info = TestStruct::get_info();
let desired_struct = StructInfo {
name: "TestStruct".to_string(),
description: "".to_string(),
parameters: vec![
ParameterInfo {
name: "field1".to_string(),
r#type: "String".to_string(),
comment: "This is a test field".to_string(),
},
ParameterInfo {
name: "field2".to_string(),
r#type: "str".to_string(),
comment: "This is a test field".to_string(),
},
],
};
assert!(info == desired_struct);
}
}
3 changes: 3 additions & 0 deletions instructor/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
### v1.0.1 (2024-06-22)

Published it on crates.io! Implementation is closely tied to the `instructor-ai` package for now with a goal to decouple this moving forward.
24 changes: 24 additions & 0 deletions instructor/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[package]
name = "instructor-ai"
version = "0.1.0"
edition = "2021"
authors = ["Ivan Leo <ivanleomk@gmail.com>"]
description = "instructor-ai is a simple crate that allows for users to do validated structured outputs"
readme = "../README.md"
homepage = "https://rust.useinstructor.com"
repository = "https://github.com/instructor-ai/instructor-rs"
license = "MIT OR Apache-2.0"

[package.metadata]
changelog = "CHANGELOG.md"

[dependencies]
instruct-macros = "0.1.1"
openai-api-rs = "4.1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
instruct-macros-types = "0.1.2"

[lib]
name = "instructor_ai"
path = "src/lib.rs"
Loading

0 comments on commit a79c30a

Please sign in to comment.