elixir-nx · jonatanklosko · Jun 2, 2023 · May 26, 2023 · May 27, 2023 · May 27, 2023
diff --git a/lib/bumblebee/text.ex b/lib/bumblebee/text.ex
@@ -305,6 +305,68 @@ defmodule Bumblebee.Text do
   defdelegate text_classification(model_info, tokenizer, opts \\ []),
     to: Bumblebee.Text.TextClassification
 
+  @type text_embedding_input :: String.t()
+  @type text_embedding_output :: %{embedding: Nx.Tensor.t()}
+
+  @doc """
+  Builds serving for text embeddings.
+
+  The serving accepts `t:text_embedding_input/0` and returns
+  `t:text_embedding_output/0`. A list of inputs is also supported.
+
+  ## Options
+
+    * `:output_attribute` - the attribute of the embedding model output
+      with the desired embedding. Set this option to `nil` in order to
+      directly retrieve the model output rather than choosing an attribute
+      of it. Defaults to `:pooled_state`
+
+    * `embedding_functions` - a list of the functions to apply to the
+      output embedding, in order. Each element of the list should be
+      one of `:l2_normalization` or `:mean_pooling`. Defaults to `[]`
+
+    * `:compile` - compiles all computations for predefined input shapes
+      during serving initialization. Should be a keyword list with the
+      following keys:
+
+        * `:batch_size` - the maximum batch size of the input. Inputs
+          are optionally padded to always match this batch size
+
+        * `:sequence_length` - the maximum input sequence length. Input
+          sequences are always padded/truncated to match that length
+
+      It is advised to set this option in production and also configure
+      a defn compiler using `:defn_options` to maximally reduce inference
+      time.
+
+    * `:defn_options` - the options for JIT compilation. Defaults to `[]`
+
+  ## Examples
+
+      {:ok, model_info} = Bumblebee.load_model({:hf, "intfloat/e5-large"})
+      {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "intfloat/e5-large"})
+
+      serving = Bumblebee.Text.TextEmbedding.text_embedding(model_info, tokenizer)
+
+      text = "query: Cats are cute."
+      Nx.Serving.run(serving, text)
+
+      #=> %{
+      #=>   embedding: #Nx.Tensor<
+      #=>     f32[1024]
+      #=>     EXLA.Backend<host:0, 0.124908262.1234305056.185360>
+      #=>     [-0.9789889454841614, -0.9814645051956177, -0.5015208125114441, 0.9867952466011047, 0.9917466640472412, -0.5557178258895874, -0.18618212640285492, 0.797040581703186, 0.8922086954116821, 0.7599573135375977, -0.16524426639080048, -0.8740050792694092, 0.9433475732803345, 0.7217797636985779, 0.9437620639801025, 0.4694959223270416, 0.40594056248664856, -0.20143413543701172, 0.7144518494606018, -0.8689796924591064, 0.94001305103302, 0.17163503170013428, -0.9896315932273865, 0.4455447494983673, 0.41139301657676697, 0.01911175064742565, -0.11275406181812286, -0.734498143196106, -0.6410953402519226, -0.628239095211029, -0.2570168673992157, 0.475137323141098, -0.7534396052360535, -0.9492156505584717, -0.17271563410758972, 0.9081271886825562, -0.4851466119289398, -0.9440935254096985, -0.20976334810256958, -0.684502899646759, -0.11581139266490936, 0.17509342730045319, 0.05547652021050453, 0.31042391061782837, 0.955132007598877, -0.35595986247062683, 0.016105204820632935, -0.3154579997062683, 0.9630348682403564, ...]
+      #=>   >
+      #=> }
+  """
+  @spec text_embedding(
+          Bumblebee.model_info(),
+          Bumblebee.Tokenizer.t(),
+          keyword()
+        ) :: Nx.Serving.t()
+  defdelegate text_embedding(model_info, tokenizer, opts \\ []),
+    to: Bumblebee.Text.TextEmbedding
+
   @type fill_mask_input :: String.t()
   @type fill_mask_output :: %{predictions: list(fill_mask_prediction())}
   @type fill_mask_prediction :: %{score: number(), token: String.t()}

diff --git a/lib/bumblebee/text/text_embedding.ex b/lib/bumblebee/text/text_embedding.ex
@@ -0,0 +1,107 @@
+defmodule Bumblebee.Text.TextEmbedding do
+  @moduledoc false
+
+  alias Bumblebee.Shared
+
+  def text_embedding(model_info, tokenizer, opts \\ []) do
+    %{model: model, params: params, spec: _spec} = model_info
+
+    opts =
+      Keyword.validate!(opts, [
+        :compile,
+        output_attribute: :pooled_state,
+        embedding_functions: [],
+        defn_options: []
+      ])
+
+    output_attribute = opts[:output_attribute]
+    embedding_functions = opts[:embedding_functions]
+    compile = opts[:compile]
+    defn_options = opts[:defn_options]
+
+    batch_size = compile[:batch_size]
+    sequence_length = compile[:sequence_length]
+
+    if compile != nil and (batch_size == nil or sequence_length == nil) do
+      raise ArgumentError,
+            "expected :compile to be a keyword list specifying :batch_size and :sequence_length, got: #{inspect(compile)}"
+    end
+
+    {_init_fun, encoder} = Axon.build(model)
+
+    embedding_fun = fn params, inputs ->
+      if output_attribute == nil do
+        {inputs, encoder.(params, inputs)}
+      else
+        {inputs, encoder.(params, inputs)[output_attribute]}
+      end
+    end
+
+    Nx.Serving.new(
+      fn defn_options ->
+        embedding_fun =
+          Shared.compile_or_jit(embedding_fun, defn_options, compile != nil, fn ->
+            inputs = %{
+              "input_ids" => Nx.template({batch_size, sequence_length}, :u32),
+              "attention_mask" => Nx.template({batch_size, sequence_length}, :u32)
+            }
+
+            [params, inputs]
+          end)
+
+        fn inputs ->
+          inputs = Shared.maybe_pad(inputs, batch_size)
+          embedding_fun.(params, inputs)
+        end
+      end,
+      defn_options
+    )
+    |> Nx.Serving.process_options(batch_size: batch_size)
+    |> Nx.Serving.client_preprocessing(fn input ->
+      {texts, multi?} = Shared.validate_serving_input!(input, &Shared.validate_string/1)
+
+      inputs =
+        Bumblebee.apply_tokenizer(tokenizer, texts,
+          length: sequence_length,
+          return_token_type_ids: false
+        )
+
+      {Nx.Batch.concatenate([inputs]), multi?}
+    end)
+    |> Nx.Serving.client_postprocessing(fn inputs_and_embeddings, _metadata, multi? ->
+      for inputs_and_embedding <- Bumblebee.Utils.Nx.batch_to_list(inputs_and_embeddings) do
+        {inputs, embedding} = inputs_and_embedding
+
+        transformed_embedding =
+          Enum.reduce(embedding_functions, embedding, fn embedding_function, acc_embedding ->
+            case embedding_function do
+              :l2_normalization ->
+                norm = Nx.LinAlg.norm(acc_embedding, ord: 2)
+
+                if norm > 0 do
+                  Nx.divide(acc_embedding, norm)
+                else
+                  # If the norm is 0, we return the original embedding (the zero vector)
+                  acc_embedding
+                end
+
+              :mean_pooling ->
+                input_mask_expanded = Nx.new_axis(inputs["attention_mask"], -1)
+
+                acc_embedding
+                |> Nx.multiply(input_mask_expanded)
+                |> Nx.sum(axes: [1])
+                |> Nx.divide(Nx.sum(input_mask_expanded, axes: [1]))
+
+              other ->
+                raise ArgumentError,
+                      "expected each element of :embedding_functions to be one of :l2_normalization or :mean_pooling, got: #{inspect(other)}"
+            end
+          end)
+
+        %{embedding: transformed_embedding}
+      end
+      |> Shared.normalize_output(multi?)
+    end)
+  end
+end
diff --git a/test/bumblebee/text/text_embedding_test.exs b/test/bumblebee/text/text_embedding_test.exs
@@ -0,0 +1,22 @@
+defmodule Bumblebee.Text.TextEmbeddingTest do
+  use ExUnit.Case, async: false
+
+  import Bumblebee.TestHelpers
+
+  @moduletag model_test_tags()
+
+  describe "integration" do
+    test "returns E5 embedding for a piece of text" do
+      {:ok, model_info} = Bumblebee.load_model({:hf, "intfloat/e5-large"})
+      {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "intfloat/e5-large"})
+
+      options = [embedding_functions: [:l2_normalization]]
+
+      serving = Bumblebee.Text.TextEmbedding.text_embedding(model_info, tokenizer, options)
+
+      text = "query: Cats are cute."
+
+      assert Nx.shape(Nx.Serving.run(serving, text).embedding) == {1024}
+    end
+  end
+end