Merge branch 'main' into max-iterations

patterns-ai-core · Jun 10, 2023 · c0a682e · c0a682e
2 parents 59edf8c + 73c00e1
commit c0a682e
Show file tree

Hide file tree

Showing 32 changed files with 649 additions and 172 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -61,4 +61,4 @@ jobs:
           bundler: default
           bundler-cache: true
       - name: Build docs
-        run: bundle exec rake yard
+        run: bundle exec yardoc
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 ## [Unreleased]
 
+## [0.5.3] - 2023-06-09
+- 🗣️ LLMs
+  - Chat message history support for Langchain::LLM::GooglePalm and Langchain::LLM::OpenAI
+
+## [0.5.2] - 2023-06-07
+- 🗣️ LLMs
+  - Auto-calculate the max_tokens: setting to be passed on to OpenAI
+
 ## [0.5.1] - 2023-06-06
 - 🛠️ Tools
   - Modified Tool usage. Agents now accept Tools instances instead of Tool strings.

diff --git a/Gemfile.lock b/Gemfile.lock
@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.5.1)
+    langchainrb (0.5.3)
       colorize (~> 0.8.1)
       tiktoken_ruby (~> 0.0.5)
 
@@ -122,7 +122,7 @@ GEM
     faraday-retry (1.0.3)
     faraday_middleware (1.2.0)
       faraday (~> 1.0)
-    google_palm_api (0.1.0)
+    google_palm_api (0.1.1)
       faraday (>= 1.0.0)
       faraday_middleware (>= 1.0.0)
     google_search_results (2.0.1)
@@ -310,7 +310,7 @@ DEPENDENCIES
   docx (~> 0.8.0)
   dotenv-rails (~> 2.7.6)
   eqn (~> 1.6.5)
-  google_palm_api (~> 0.1.0)
+  google_palm_api (~> 0.1.1)
   google_search_results (~> 2.0.0)
   hugging-face (~> 0.3.4)
   langchainrb!

diff --git a/README.md b/README.md
@@ -1,12 +1,15 @@
-🦜️🔗 LangChain.rb
+💎🔗 LangChain.rb
 ---
 ⚡ Building applications with LLMs through composability ⚡
 
 👨‍💻👩‍💻 CURRENTLY SEEKING PEOPLE TO FORM THE CORE GROUP OF MAINTAINERS WITH
 
 :warning: UNDER ACTIVE AND RAPID DEVELOPMENT (MAY BE BUGGY AND UNTESTED)
 
-![Tests status](https://github.com/andreibondarev/langchainrb/actions/workflows/ci.yml/badge.svg) [![Gem Version](https://badge.fury.io/rb/langchainrb.svg)](https://badge.fury.io/rb/langchainrb)
+![Tests status](https://github.com/andreibondarev/langchainrb/actions/workflows/ci.yml/badge.svg)
+[![Gem Version](https://badge.fury.io/rb/langchainrb.svg)](https://badge.fury.io/rb/langchainrb)
+[![Docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://rubydoc.info/gems/langchainrb)
+[![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/andreibondarev/langchainrb/blob/main/LICENSE.txt)
 
 Langchain.rb is a library that's an abstraction layer on top many emergent AI, ML and other DS tools. The goal is to abstract complexity and difficult concepts to make building AI/ML-supercharged applications approachable for traditional software engineers.
 
@@ -33,6 +36,7 @@ require "langchain"
 | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
 | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
 | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
+| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
 | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
 | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
 
@@ -47,6 +51,7 @@ Pick the vector search database you'll be using and instantiate the client:
 client = Langchain::Vectorsearch::Weaviate.new(
     url: ENV["WEAVIATE_URL"],
     api_key: ENV["WEAVIATE_API_KEY"],
+    index: "",
     llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
 )
 
@@ -55,6 +60,7 @@ client = Langchain::Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
 client = Langchain::Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
 client = Langchain::Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
 client = Langchain::Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
+client = Langchain::Vectorsearch::Pgvector.new(...) # `gem "pgvector", "~> 0.2"`
 ```
 
 ```ruby
@@ -135,17 +141,17 @@ cohere.complete(prompt: "What is the meaning of life?")
 #### HuggingFace
 Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
 ```ruby
-cohere = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
+hugging_face = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
 ```
 
 #### Replicate
 Add `gem "replicate-ruby", "~> 0.2.2"` to your Gemfile.
 ```ruby
-cohere = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
+replicate = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
 ```
 
 #### Google PaLM (Pathways Language Model)
-Add `"google_palm_api", "~> 0.1.0"` to your Gemfile.
+Add `"google_palm_api", "~> 0.1.1"` to your Gemfile.
 ```ruby
 google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
 ```
@@ -281,8 +287,7 @@ Add `gem "sequel"` to your Gemfile
 ```ruby
 database = Langchain::Tool::Database.new(connection_string: "postgres://user:password@localhost:5432/db_name")
 
-agent = Langchain::Agent::SQLQueryAgent.new(llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]), tools: [database])
-
+agent = Langchain::Agent::SQLQueryAgent.new(llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]), db: database)
 ```
 ```ruby
 agent.run(question: "How many users have a name with length greater than 5 in the users table?")

diff --git a/Rakefile b/Rakefile
@@ -14,5 +14,4 @@ Rake::Task["spec"].enhance do
 end
 
 YARD::Rake::YardocTask.new do |t|
-  t.options = ["--fail-on-warning"]
 end
diff --git a/langchain.gemspec b/langchain.gemspec
@@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "cohere-ruby", "~> 0.9.4"
   spec.add_development_dependency "docx", "~> 0.8.0"
   spec.add_development_dependency "eqn", "~> 1.6.5"
-  spec.add_development_dependency "google_palm_api", "~> 0.1.0"
+  spec.add_development_dependency "google_palm_api", "~> 0.1.1"
   spec.add_development_dependency "google_search_results", "~> 2.0.0"
   spec.add_development_dependency "hugging-face", "~> 0.3.4"
   spec.add_development_dependency "milvus", "~> 0.9.0"

diff --git a/lib/langchain.rb b/lib/langchain.rb
@@ -6,10 +6,53 @@
 
 require_relative "./langchain/version"
 
+# Langchain.rb a is library for building LLM-backed Ruby applications. It is an abstraction layer that sits on top of the emerging AI-related tools that makes it easy for developers to consume and string those services together.
+#
+# = Installation
+# Install the gem and add to the application's Gemfile by executing:
+#
+#     $ bundle add langchainrb
+#
+# If bundler is not being used to manage dependencies, install the gem by executing:
+#
+#     $ gem install langchainrb
+#
+# Require the gem to start using it:
+#
+#     require "langchain"
+#
+# = Concepts
+#
+# == Processors
+# Processors load and parse/process various data types such as CSVs, PDFs, Word documents, HTML pages, and others.
+#
+# == Chunkers
+# Chunkers split data based on various available options such as delimeters, chunk sizes or custom-defined functions. Chunkers are used when data needs to be split up before being imported in vector databases.
+#
+# == Prompts
+# Prompts are structured inputs to the LLMs. Prompts provide instructions, context and other user input that LLMs use to generate responses.
+#
+# == Large Language Models (LLMs)
+# LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
+#
+# == Vectorsearch Databases
+# Vector database is a type of database that stores data as high-dimensional vectors, which are mathematical representations of features or attributes. Each vector has a certain number of dimensions, which can range from tens to thousands, depending on the complexity and granularity of the data.
+#
+# == Embedding
+# Word embedding or word vector is an approach with which we represent documents and words. It is defined as a numeric vector input that allows words with similar meanings to have the same representation. It can approximate meaning and represent a word in a lower dimensional space.
+#
+#
+# = Logging
+#
+# LangChain.rb uses standard logging mechanisms and defaults to :debug level. Most messages are at info level, but we will add debug or warn statements as needed. To show all log messages:
+#
+#     Langchain.logger.level = :info
 module Langchain
   class << self
+    # @return [Logger]
     attr_accessor :logger
 
+    # @return [Pathname]
     attr_reader :root
   end
 

diff --git a/lib/langchain/agent/base.rb b/lib/langchain/agent/base.rb
@@ -1,6 +1,14 @@
 # frozen_string_literal: true
 
 module Langchain::Agent
+  # = Agents
+  #
+  # Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
+  #
+  # Available:
+  # - {Langchain::Agent::ChainOfThoughtAgent}
+  #
+  # @abstract
   class Base
   end
 end
diff --git a/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb b/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb
@@ -1,6 +1,20 @@
 # frozen_string_literal: true
 
 module Langchain::Agent
+  # = Chain of Thought Agent
+  #
+  #     llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]) # or your choice of Langchain::LLM::Base implementation
+  #
+  #     agent = Langchain::Agent::ChainOfThoughtAgent.new(
+  #       llm: llm,
+  #       tools: ["search", "calculator", "wikipedia"]
+  #     )
+  #
+  #     agent.tools
+  #     # => ["search", "calculator", "wikipedia"]
+  #
+  #     agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
+  #     #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
   class ChainOfThoughtAgent < Base
     attr_reader :llm, :tools, :max_iterations
 
@@ -45,11 +59,8 @@ def run(question:)
         end
 
         Langchain.logger.info("[#{self.class.name}]".red + ": Sending the prompt to the #{llm.class} LLM")
-        response = llm.complete(
-          prompt: prompt,
-          stop_sequences: ["Observation:"],
-          max_tokens: 500
-        )
+
+        response = llm.complete(prompt: prompt, stop_sequences: ["Observation:"])
 
         # Append the response to the prompt
         prompt += response

diff --git a/lib/langchain/agent/sql_query_agent/sql_query_agent.rb b/lib/langchain/agent/sql_query_agent/sql_query_agent.rb
@@ -27,7 +27,7 @@ def run(question:)
 
       # Get the SQL string to execute
       Langchain.logger.info("[#{self.class.name}]".red + ":  Passing the inital prompt to the #{llm.class} LLM")
-      sql_string = llm.complete(prompt: prompt, max_tokens: 500)
+      sql_string = llm.complete(prompt: prompt)
 
       # Execute the SQL string and collect the results
       Langchain.logger.info("[#{self.class.name}]".red + ":  Passing the SQL to the Database: #{sql_string}")
@@ -36,7 +36,7 @@ def run(question:)
       # Pass the results and get the LLM to synthesize the answer to the question
       Langchain.logger.info("[#{self.class.name}]".red + ":  Passing the synthesize prompt to the #{llm.class} LLM with results: #{results}")
       prompt2 = create_prompt_for_answer(question: question, sql_query: sql_string, results: results)
-      llm.complete(prompt: prompt2, max_tokens: 500)
+      llm.complete(prompt: prompt2)
     end
 
     private

diff --git a/lib/langchain/llm/ai21.rb b/lib/langchain/llm/ai21.rb
@@ -1,16 +1,16 @@
 # frozen_string_literal: true
 
 module Langchain::LLM
+  #
+  # Wrapper around AI21 Studio APIs.
+  #
+  # Gem requirements:
+  #   gem "ai21", "~> 0.2.0"
+  #
+  # Usage:
+  #     ai21 = Langchain::LLM::AI21.new(api_key:)
+  #
   class AI21 < Base
-    #
-    # Wrapper around AI21 Studio APIs.
-    #
-    # Gem requirements: gem "ai21", "~> 0.2.0"
-    #
-    # Usage:
-    # ai21 = Langchain::LLM::AI21.new(api_key:)
-    #
-
     def initialize(api_key:)
       depends_on "ai21"
       require "ai21"

diff --git a/lib/langchain/llm/base.rb b/lib/langchain/llm/base.rb
@@ -1,31 +1,58 @@
 # frozen_string_literal: true
 
 module Langchain::LLM
+  # A LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
+  #
+  # Langchain.rb provides a common interface to interact with all supported LLMs:
+  #
+  # - {Langchain::LLM::AI21}
+  # - {Langchain::LLM::Cohere}
+  # - {Langchain::LLM::GooglePalm}
+  # - {Langchain::LLM::HuggingFace}
+  # - {Langchain::LLM::OpenAI}
+  # - {Langchain::LLM::Replicate}
+  #
+  # @abstract
   class Base
     include Langchain::DependencyHelper
 
+    # A client for communicating with the LLM
     attr_reader :client
 
     def default_dimension
       self.class.const_get(:DEFAULTS).dig(:dimension)
     end
 
-    # Method supported by an LLM that generates a response for a given chat-style prompt
+    #
+    # Generate a chat completion for a given prompt. Parameters will depend on the LLM
+    #
+    # @raise NotImplementedError if not supported by the LLM
     def chat(...)
       raise NotImplementedError, "#{self.class.name} does not support chat"
     end
 
-    # Method supported by an LLM that completes a given prompt
+    #
+    # Generate a completion for a given prompt. Parameters will depend on the LLM.
+    #
+    # @raise NotImplementedError if not supported by the LLM
     def complete(...)
       raise NotImplementedError, "#{self.class.name} does not support completion"
     end
 
-    # Method supported by an LLM that generates an embedding for a given text or array of texts
+    #
+    # Generate an embedding for a given text. Parameters depends on the LLM.
+    #
+    # @raise NotImplementedError if not supported by the LLM
+    #
     def embed(...)
       raise NotImplementedError, "#{self.class.name} does not support generating embeddings"
     end
 
-    # Method supported by an LLM that summarizes a given text
+    #
+    # Generate a summary for a given text. Parameters depends on the LLM.
+    #
+    # @raise NotImplementedError if not supported by the LLM
+    #
     def summarize(...)
       raise NotImplementedError, "#{self.class.name} does not support summarization"
     end

diff --git a/lib/langchain/llm/cohere.rb b/lib/langchain/llm/cohere.rb
@@ -1,16 +1,16 @@
 # frozen_string_literal: true
 
 module Langchain::LLM
+  #
+  # Wrapper around the Cohere API.
+  #
+  # Gem requirements:
+  #     gem "cohere-ruby", "~> 0.9.4"
+  #
+  # Usage:
+  #     cohere = Langchain::LLM::Cohere.new(api_key: "YOUR_API_KEY")
+  #
   class Cohere < Base
-    #
-    # Wrapper around the Cohere API.
-    #
-    # Gem requirements: gem "cohere-ruby", "~> 0.9.4"
-    #
-    # Usage:
-    # cohere = Langchain::LLM::Cohere.new(api_key: "YOUR_API_KEY")
-    #
-
     DEFAULTS = {
       temperature: 0.0,
       completion_model_name: "base",
@@ -43,6 +43,7 @@ def embed(text:)
     # Generate a completion for a given prompt
     #
     # @param prompt [String] The prompt to generate a completion for
+    # @param params[:stop_sequences]
     # @return [Hash] The completion
     #
     def complete(prompt:, **params)