Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename dimension parameter to dimensions everywhere #586

Merged
merged 2 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/langchain/llm/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ class Base
# A client for communicating with the LLM
attr_reader :client

def default_dimension
self.class.const_get(:DEFAULTS).dig(:dimension)
def default_dimensions
self.class.const_get(:DEFAULTS).dig(:dimensions)
end

#
Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/llm/cohere.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Cohere < Base
temperature: 0.0,
completion_model_name: "command",
embeddings_model_name: "small",
dimension: 1024,
dimensions: 1024,
truncate: "START"
}.freeze

Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/llm/google_palm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module Langchain::LLM
class GooglePalm < Base
DEFAULTS = {
temperature: 0.0,
dimension: 768, # This is what the `embedding-gecko-001` model generates
dimensions: 768, # This is what the `embedding-gecko-001` model generates
completion_model_name: "text-bison-001",
chat_completion_model_name: "chat-bison-001",
embeddings_model_name: "embedding-gecko-001"
Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/llm/google_vertex_ai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class GoogleVertexAi < Base
max_output_tokens: 1000,
top_p: 0.8,
top_k: 40,
dimension: 768,
dimensions: 768,
completion_model_name: "text-bison", # Optional: tect-bison@001
embeddings_model_name: "textembedding-gecko"
}.freeze
Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/llm/hugging_face.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class HuggingFace < Base
DEFAULTS = {
temperature: 0.0,
embeddings_model_name: "sentence-transformers/all-MiniLM-L6-v2",
dimension: 384 # Vector size generated by the above model
dimensions: 384 # Vector size generated by the above model
}.freeze

#
Expand Down
4 changes: 2 additions & 2 deletions lib/langchain/llm/ollama.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ def initialize(url:, default_options: {})

# Returns the # of vector dimensions for the embeddings
# @return [Integer] The # of vector dimensions
def default_dimension
def default_dimensions
# since Ollama can run multiple models, look it up or generate an embedding and return the size
@default_dimension ||=
@default_dimensions ||=
EMBEDDING_SIZES.fetch(defaults[:embeddings_model_name].to_sym) do
embed(text: "test").embedding.size
end
Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/llm/openai.rb
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def summarize(text:)
complete(prompt: prompt)
end

def default_dimension
def default_dimensions
@defaults[:dimensions] || EMBEDDING_SIZES.fetch(defaults[:embeddings_model_name])
end

Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/llm/replicate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Replicate < Base
# TODO: Design the interface to pass and use different models
completion_model_name: "replicate/vicuna-13b",
embeddings_model_name: "creatorrr/all-mpnet-base-v2",
dimension: 384
dimensions: 384
}.freeze

#
Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/vectorsearch/elasticsearch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def delete_default_schema
end

def default_vector_settings
{type: "dense_vector", dims: llm.default_dimension}
{type: "dense_vector", dims: llm.default_dimensions}
end

def vector_settings
Expand Down
4 changes: 2 additions & 2 deletions lib/langchain/vectorsearch/epsilla.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def initialize(url:, db_name:, db_path:, index_name:, llm:)
@db_path = db_path
@table_name = index_name

@vector_dimension = llm.default_dimension
@vector_dimensions = llm.default_dimensions

super(llm: llm)
end
Expand All @@ -64,7 +64,7 @@ def create_default_schema
status_code, response = @client.database.create_table(@table_name, [
{"name" => "ID", "dataType" => "STRING", "primaryKey" => true},
{"name" => "Doc", "dataType" => "STRING"},
{"name" => "Embedding", "dataType" => "VECTOR_FLOAT", "dimensions" => @vector_dimension}
{"name" => "Embedding", "dataType" => "VECTOR_FLOAT", "dimensions" => @vector_dimensions}
])
raise "Failed to create table: #{response}" if status_code != 200

Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/vectorsearch/hnswlib.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def initialize(llm:, path_to_index:)

super(llm: llm)

@client = ::Hnswlib::HierarchicalNSW.new(space: DEFAULT_METRIC, dim: llm.default_dimension)
@client = ::Hnswlib::HierarchicalNSW.new(space: DEFAULT_METRIC, dim: llm.default_dimensions)
@path_to_index = path_to_index

initialize_index
Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/vectorsearch/milvus.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def create_default_schema
type_params: [
{
key: "dim",
value: llm.default_dimension.to_s
value: llm.default_dimensions.to_s
}
]
}
Expand Down
4 changes: 2 additions & 2 deletions lib/langchain/vectorsearch/pgvector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,11 @@ def remove_texts(ids:)
def create_default_schema
db.run "CREATE EXTENSION IF NOT EXISTS vector"
namespace_column = @namespace_column
vector_dimension = llm.default_dimension
vector_dimensions = llm.default_dimensions
db.create_table? table_name.to_sym do
primary_key :id
text :content
column :vectors, "vector(#{vector_dimension})"
column :vectors, "vector(#{vector_dimensions})"
text namespace_column.to_sym, default: nil
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/vectorsearch/pinecone.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def create_default_schema
client.create_index(
metric: DEFAULT_METRIC,
name: index_name,
dimension: llm.default_dimension
dimension: llm.default_dimensions
)
end

Expand Down
2 changes: 1 addition & 1 deletion lib/langchain/vectorsearch/qdrant.rb
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def create_default_schema
collection_name: index_name,
vectors: {
distance: DEFAULT_METRIC.capitalize,
size: llm.default_dimension
size: llm.default_dimensions
}
)
end
Expand Down
6 changes: 3 additions & 3 deletions spec/langchain/llm/cohere_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@
end
end

describe "#default_dimension" do
it "returns the default dimension" do
expect(subject.default_dimension).to eq(1024)
describe "#default_dimensions" do
it "returns the default dimensions" do
expect(subject.default_dimensions).to eq(1024)
end
end

Expand Down
6 changes: 3 additions & 3 deletions spec/langchain/llm/hugging_face_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
end
end

describe "#default_dimension" do
it "returns the default dimension" do
expect(subject.default_dimension).to eq(384)
describe "#default_dimensions" do
it "returns the default dimensions" do
expect(subject.default_dimensions).to eq(384)
end
end
end
18 changes: 9 additions & 9 deletions spec/langchain/llm/ollama_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,52 +68,52 @@
end
end

describe "#default_dimension" do
describe "#default_dimensions" do
it "returns size of llama2 embeddings" do
subject = described_class.new(url: "http://localhost:11434", default_options: {embeddings_model_name: "llama2"})

expect(subject.default_dimension).to eq(4_096)
expect(subject.default_dimensions).to eq(4_096)
end

it "returns size of llava embeddings" do
subject = described_class.new(url: "http://localhost:11434", default_options: {embeddings_model_name: "llava"})

expect(subject.default_dimension).to eq(4_096)
expect(subject.default_dimensions).to eq(4_096)
end

it "returns size of mistral embeddings" do
subject = described_class.new(url: "http://localhost:11434", default_options: {embeddings_model_name: "mistral"})

expect(subject.default_dimension).to eq(4_096)
expect(subject.default_dimensions).to eq(4_096)
end

it "returns size of mixtral embeddings" do
subject = described_class.new(url: "http://localhost:11434", default_options: {embeddings_model_name: "mixtral"})

expect(subject.default_dimension).to eq(4_096)
expect(subject.default_dimensions).to eq(4_096)
end

it "returns size of dolphin-mixtral embeddings" do
subject = described_class.new(url: "http://localhost:11434", default_options: {embeddings_model_name: "dolphin-mixtral"})
expect(subject.default_dimension).to eq(4_096)
expect(subject.default_dimensions).to eq(4_096)
end

it "returns size of mistral-openorca embeddings" do
subject = described_class.new(url: "http://localhost:11434", default_options: {embeddings_model_name: "mistral-openorca"})
expect(subject.default_dimension).to eq(4_096)
expect(subject.default_dimensions).to eq(4_096)
end

it "returns size of codellama embeddings" do
subject = described_class.new(url: "http://localhost:11434", default_options: {embeddings_model_name: "codellama"})
expect(subject.default_dimension).to eq(4_096)
expect(subject.default_dimensions).to eq(4_096)
end

# this one has not been hardcoded, but will be looked up
# by generating an embedding and checking its size
it "returns size of tinydolphin embeddings", vcr: true do
subject = described_class.new(url: "http://localhost:11434", default_options: {embeddings_model_name: "tinydolphin"})

expect(subject.default_dimension).to eq(2_048)
expect(subject.default_dimensions).to eq(2_048)
end
end
end
30 changes: 15 additions & 15 deletions spec/langchain/llm/openai_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -91,34 +91,34 @@

describe "the model dimension" do
let(:model) { "text-embedding-3-small" }
let(:dimension_size) { 1536 }
let(:dimensions_size) { 1536 }
let(:parameters) do
{parameters: {input: "Hello World", model: model, dimensions: dimension_size}}
{parameters: {input: "Hello World", model: model, dimensions: dimensions_size}}
end

context "when dimension is not provided" do
it "forwards the models default dimension" do
context "when dimensions is not provided" do
it "forwards the models default dimensions" do
subject.embed(text: "Hello World", model: model)

expect(subject.client).to have_received(:embeddings).with(parameters)
end
end

context "when dimension is provided" do
let(:dimension_size) { 1536 }
context "when dimensions is provided" do
let(:dimensions_size) { 1536 }

let(:parameters) do
{parameters: {input: "Hello World", model: model, dimensions: dimension_size}}
{parameters: {input: "Hello World", model: model, dimensions: dimensions_size}}
end

let(:subject) do
described_class.new(api_key: "123", default_options: {
embeddings_model_name: model,
dimension: dimension_size
dimensions: dimensions_size
})
end

it "forwards the model's default dimension" do
it "forwards the model's default dimensions" do
allow(subject.client).to receive(:embeddings).with(parameters).and_return(response)
subject.embed(text: "Hello World", model: model)

Expand Down Expand Up @@ -369,21 +369,21 @@
end
end

describe "#default_dimension" do
it "returns the default dimension" do
expect(subject.default_dimension).to eq(1536)
describe "#default_dimensions" do
it "returns the default dimensions" do
expect(subject.default_dimensions).to eq(1536)
end

context "when the dimension is passed as an argument" do
context "when the dimensions is passed as an argument" do
let(:subject) do
described_class.new(api_key: "123", default_options: {
embeddings_model_name: "text-embedding-3-small",
dimensions: 512
})
end

it "sets the default_dimension" do
expect(subject.default_dimension).to eq 512
it "sets the default_dimensions" do
expect(subject.default_dimensions).to eq 512
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion spec/langchain/vectorsearch/hnswlib_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
end

before do
allow_any_instance_of(Langchain::LLM::GooglePalm).to receive(:default_dimension).and_return(3)
allow_any_instance_of(Langchain::LLM::GooglePalm).to receive(:default_dimensions).and_return(3)
end

let(:llm) { Langchain::LLM::GooglePalm.new(api_key: "123") }
Expand Down
2 changes: 1 addition & 1 deletion spec/langchain/vectorsearch/pinecone_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
allow(subject.client).to receive(:create_index).with(
metric: described_class::DEFAULT_METRIC,
name: index_name,
dimension: subject.llm.default_dimension
dimension: subject.llm.default_dimensions
).and_return(true)
expect(subject.create_default_schema).to eq(true)
end
Expand Down