elixir-nx · nyo16 · Oct 5, 2025 · Oct 5, 2025 · Oct 5, 2025 · Oct 5, 2025
diff --git a/lib/bumblebee.ex b/lib/bumblebee.ex
@@ -178,6 +178,10 @@ defmodule Bumblebee do
     "Phi3ForCausalLM" => {Bumblebee.Text.Phi3, :for_causal_language_modeling},
     "Phi3ForSequenceClassification" => {Bumblebee.Text.Phi3, :for_sequence_classification},
     "Phi3ForTokenClassification" => {Bumblebee.Text.Phi3, :for_token_classification},
+    "Qwen3Model" => {Bumblebee.Text.Qwen3, :base},
+    "Qwen3ForCausalLM" => {Bumblebee.Text.Qwen3, :for_causal_language_modeling},
+    "Qwen3ForSequenceClassification" => {Bumblebee.Text.Qwen3, :for_sequence_classification},
+    "Qwen3ForEmbedding" => {Bumblebee.Text.Qwen3, :for_embedding},
     "ResNetForImageClassification" => {Bumblebee.Vision.ResNet, :for_image_classification},
     "ResNetModel" => {Bumblebee.Vision.ResNet, :base},
     "RobertaForMaskedLM" => {Bumblebee.Text.Roberta, :for_masked_language_modeling},
@@ -258,6 +262,7 @@ defmodule Bumblebee do
     "mbart" => :mbart,
     "phi" => :code_gen,
     "phi3" => :llama,
+    "qwen3" => :qwen2,
     "roberta" => :roberta,
     "smollm3" => :smollm3,
     "t5" => :t5,

diff --git a/lib/bumblebee/layers/transformer.ex b/lib/bumblebee/layers/transformer.ex
@@ -53,7 +53,9 @@ defmodule Bumblebee.Layers.Transformer do
       :layer_norm,
       :block_type,
       :attention_window_size,
-      :scale_attention_weights
+      :scale_attention_weights,
+      :query_norm,
+      :key_norm
     ]
 
     opts =
@@ -330,7 +332,9 @@ defmodule Bumblebee.Layers.Transformer do
         layer_norm: [],
         attention_window_size: nil,
         scale_attention_weights: true,
-        rotary_embedding: nil
+        rotary_embedding: nil,
+        query_norm: nil,
+        key_norm: nil
       ])
 
     name = opts[:name]
@@ -360,6 +364,8 @@ defmodule Bumblebee.Layers.Transformer do
     attention_window_size = opts[:attention_window_size]
     scale_attention_weights = opts[:scale_attention_weights]
     rotary_embedding = opts[:rotary_embedding]
+    query_norm = opts[:query_norm]
+    key_norm = opts[:key_norm]
 
     ffn_fun =
       case ffn do
@@ -418,6 +424,8 @@ defmodule Bumblebee.Layers.Transformer do
           attention_window_size: attention_window_size,
           scale_attention_weights: scale_attention_weights,
           rotary_embedding: rotary_embedding,
+          query_norm: query_norm,
+          key_norm: key_norm,
           name: join(name, "self_attention")
         )
 
@@ -703,6 +711,14 @@ defmodule Bumblebee.Layers.Transformer do
 
         * `:max_positions` - the maximum number of distinct positions
 
+    * `:query_norm` - a function that applies normalization to the query
+      projection before rotary embedding. The function should accept two
+      arguments: the input and a name for the layer. Defaults to `nil`
+
+    * `:key_norm` - a function that applies normalization to the key
+      projection before rotary embedding. The function should accept two
+      arguments: the input and a name for the layer. Defaults to `nil`
+
     * `:name` - the prefix for layer names
 
   ## References
@@ -734,7 +750,9 @@ defmodule Bumblebee.Layers.Transformer do
         key_use_bias: true,
         value_use_bias: true,
         output_use_bias: true,
-        rotary_embedding: nil
+        rotary_embedding: nil,
+        query_norm: nil,
+        key_norm: nil
       ])
 
     attention_mask = opts[:attention_mask]
@@ -752,6 +770,8 @@ defmodule Bumblebee.Layers.Transformer do
     scale_attention_weights = opts[:scale_attention_weights]
     dropout_rate = opts[:dropout_rate]
     rotary_embedding = opts[:rotary_embedding]
+    query_norm = opts[:query_norm]
+    key_norm = opts[:key_norm]
 
     query_use_bias = opts[:query_use_bias]
     key_use_bias = opts[:key_use_bias]
@@ -791,6 +811,21 @@ defmodule Bumblebee.Layers.Transformer do
       )
       |> Layers.split_heads(num_key_value_heads)
 
+    # Apply query and key normalization if configured (before rotary embedding)
+    query =
+      if query_norm do
+        query_norm.(query, join(name, "query_norm"))
+      else
+        query
+      end
+
+    key =
+      if key_norm do
+        key_norm.(key, join(name, "key_norm"))
+      else
+        key
+      end
+
     {query, key} =
       case rotary_embedding do
         opts when is_list(opts) ->

diff --git a/lib/bumblebee/text.ex b/lib/bumblebee/text.ex
@@ -385,6 +385,9 @@ defmodule Bumblebee.Text do
           Note that we currently assume that the CLS token is the first token
           in the sequence
 
+        * `:last_token_pooling` - takes the embedding for the last non-padding
+          token in each sequence
+
       By default no pooling is applied
 
     * `:embedding_processor` - a post-processing step to apply to the
@@ -444,6 +447,49 @@ defmodule Bumblebee.Text do
   defdelegate text_embedding(model_info, tokenizer, opts \\ []),
     to: Bumblebee.Text.TextEmbedding
 
+  @type text_reranking_qwen3_input :: {String.t(), String.t()} | [{String.t(), String.t()}]
+  @type text_reranking_qwen3_output :: %{
+          scores: text_reranking_qwen3_score() | list(text_reranking_qwen3_score())
+        }
+  @type text_reranking_qwen3_score :: %{score: number(), query: String.t(), document: String.t()}
+
+  @doc """
+  Builds a serving for text reranking with Qwen3 reranker models.
+
+  The serving expects input in one of the following formats:
+
+    * `{query, document}` - a tuple with query and document text
+    * `[{query1, doc1}, {query2, doc2}, ...]` - a list of query-document pairs
+
+  ## Options
+
+  See `Bumblebee.Text.TextRerankingQwen3.text_reranking_qwen3/3` for available options.
+
+  ## Examples
+
+      {:ok, model_info} = Bumblebee.load_model({:hf, "Qwen/Qwen3-Reranker-0.6B"})
+      {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "Qwen/Qwen3-Reranker-0.6B"})
+
+      serving = Bumblebee.Text.text_reranking_qwen3(model_info, tokenizer)
+
+      query = "What is the capital of France?"
+      documents = [
+        "Paris is the capital of France.",
+        "Berlin is the capital of Germany."
+      ]
+
+      pairs = Enum.map(documents, &{query, &1})
+      Nx.Serving.run(serving, pairs)
+
+  """
+  @spec text_reranking_qwen3(
+          Bumblebee.model_info(),
+          Bumblebee.Tokenizer.t(),
+          keyword()
+        ) :: Nx.Serving.t()
+  defdelegate text_reranking_qwen3(model_info, tokenizer, opts \\ []),
+    to: Bumblebee.Text.TextRerankingQwen3
+
   @type fill_mask_input :: String.t()
   @type fill_mask_output :: %{predictions: list(fill_mask_prediction())}
   @type fill_mask_prediction :: %{score: number(), token: String.t()}

diff --git a/lib/bumblebee/text/pre_trained_tokenizer.ex b/lib/bumblebee/text/pre_trained_tokenizer.ex
@@ -200,6 +200,13 @@ defmodule Bumblebee.Text.PreTrainedTokenizer do
       },
       default_template_options: [language_token: "eng_Latn"]
     },
+    qwen2: %{
+      special_tokens: %{
+        unk: "<|endoftext|>",
+        eos: "<|endoftext|>",
+        pad: "<|endoftext|>"
+      }
+    },
     roberta: %{
       special_tokens: %{
         bos: "<s>",