From 7ddc9ac28bf07f5e8082b351a031997073ef92db Mon Sep 17 00:00:00 2001
From: Bruno Bornsztein <bruno.bornsztein@gmail.com>
Date: Mon, 21 Oct 2024 10:07:24 -0500
Subject: [PATCH 1/2] cosine similarity

---
 lib/langchain/evals/llm/cosine_similarity.rb  | 20 ++++++++
 lib/langchain/evals/llm/llm.rb                | 36 ++++++++++++++
 .../evals/llm/prompts/expected_answer.yml     | 17 +++++++
 .../evals/llm/cosine_similarity_spec.rb       | 19 ++++++++
 spec/langchain/evals/llm/llm_spec.rb          | 48 +++++++++++++++++++
 5 files changed, 140 insertions(+)
 create mode 100644 lib/langchain/evals/llm/cosine_similarity.rb
 create mode 100644 lib/langchain/evals/llm/llm.rb
 create mode 100644 lib/langchain/evals/llm/prompts/expected_answer.yml
 create mode 100644 spec/langchain/evals/llm/cosine_similarity_spec.rb
 create mode 100644 spec/langchain/evals/llm/llm_spec.rb

diff --git a/lib/langchain/evals/llm/cosine_similarity.rb b/lib/langchain/evals/llm/cosine_similarity.rb
new file mode 100644
index 000000000..bc8e282a8
--- /dev/null
+++ b/lib/langchain/evals/llm/cosine_similarity.rb
@@ -0,0 +1,20 @@
+module Langchain
+  module Evals
+    module LLM
+      class CosineSimilarity
+        attr_reader :llm
+
+        def initialize(llm:, prompt_template: nil)
+          @llm = llm
+        end
+
+        def score(question:, answer:, expected_answer:)
+          question_embedding = llm.embed(text: question).embedding
+          answer_ebedding = llm.embed(text: answer).embedding
+
+          Langchain::Utils::CosineSimilarity.new(question_embedding, answer_ebedding).calculate_similarity
+        end
+      end
+    end
+  end
+end
diff --git a/lib/langchain/evals/llm/llm.rb b/lib/langchain/evals/llm/llm.rb
new file mode 100644
index 000000000..c8511e796
--- /dev/null
+++ b/lib/langchain/evals/llm/llm.rb
@@ -0,0 +1,36 @@
+module Langchain
+  module Evals
+    module LLM
+      class LLM
+        attr_reader :llm, :prompt_template
+
+        def initialize(llm:, prompt_template: nil)
+          @llm = llm
+
+          @prompt_template = if prompt_template.nil?
+            Langchain::Prompt.load_from_path(
+              file_path: Langchain.root.join("langchain/evals/llm/prompts/expected_answer.yml")
+            )
+          else
+            prompt_template
+          end
+        end
+
+        def score(question:, answer:, expected_answer:)
+          prompt = prompt_template.format(
+            question: question,
+            answer: answer,
+            expected_answer: expected_answer
+          )
+          completion = llm.complete(prompt: prompt).completion
+
+          if completion.eql?("Y")
+            1.0
+          else
+            0.0
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/langchain/evals/llm/prompts/expected_answer.yml b/lib/langchain/evals/llm/prompts/expected_answer.yml
new file mode 100644
index 000000000..ebfb4ab02
--- /dev/null
+++ b/lib/langchain/evals/llm/prompts/expected_answer.yml
@@ -0,0 +1,17 @@
+_type: prompt
+input_variables:
+  - question
+  - answer
+  - expected_answer
+template: |
+  You are assessing a submitted answer on a given task or input. Here is the data:
+  [BEGIN DATA]
+  ***
+  [Input]: {question}
+  ***
+  [Submission]: {answer}
+  ***
+  [Expected Answer]: {expected_answer}
+  ***
+  [END DATA]
+  Does the submission match the Expected Answer? First, write out in a step by step manner your reasoning about each criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the single character "Y" or "N" (without quotes or punctuation) on its own line corresponding to the correct answer of whether the submission matches the expected answer. At the end, repeat just the letter again by itself on a new line."""
diff --git a/spec/langchain/evals/llm/cosine_similarity_spec.rb b/spec/langchain/evals/llm/cosine_similarity_spec.rb
new file mode 100644
index 000000000..300af1d22
--- /dev/null
+++ b/spec/langchain/evals/llm/cosine_similarity_spec.rb
@@ -0,0 +1,19 @@
+RSpec.describe Langchain::Evals::LLM::CosineSimilarity do
+  let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") }
+
+  describe "#score" do
+    subject { described_class.new(llm: llm) }
+
+    let(:question) { "What is 2 + 2?" }
+    let(:answer) { "The answer is 4" }
+    let(:expected_answer) { "2 + 2 = 4" }
+
+    before do
+      allow(subject.llm).to receive(:embed).and_return(double("Langchain::LLM::OpenAIResponse", embedding: [1, 0, 0]))
+    end
+
+    it "generates the score" do
+      expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0)
+    end
+  end
+end
diff --git a/spec/langchain/evals/llm/llm_spec.rb b/spec/langchain/evals/llm/llm_spec.rb
new file mode 100644
index 000000000..731236b21
--- /dev/null
+++ b/spec/langchain/evals/llm/llm_spec.rb
@@ -0,0 +1,48 @@
+RSpec.describe Langchain::Evals::LLM::LLM do
+  let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") }
+
+  describe "#score" do
+    subject { described_class.new(llm: llm) }
+
+    let(:question) { "What is 2 + 2?" }
+    let(:answer) { "The answer is 4" }
+    let(:expected_answer) { "2 + 2 = 4" }
+
+    before do
+      allow(subject.llm).to receive(:complete).and_return(double("Langchain::LLM::OpenAIResponse", completion: "Y"))
+    end
+
+    it "generates the score" do
+      expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0)
+    end
+  end
+
+  context "with custom prompt template" do
+    let(:prompt_template) {
+      Langchain::Prompt::PromptTemplate.new(
+        template: "Question: {question}. Answer: {answer}. Correct answer: {expected_answer}. Return 'Y' if answer matches correct answer, else 'N'",
+        input_variables: [
+          "question",
+          "answer",
+          "expected_answer"
+        ]
+      )
+    }
+
+    describe "#score" do
+      subject { described_class.new(llm: llm, prompt_template: prompt_template) }
+
+      let(:question) { "What is 2 + 2?" }
+      let(:answer) { "The answer is 4" }
+      let(:expected_answer) { "2 + 2 = 4" }
+
+      before do
+        allow(subject.llm).to receive(:complete).and_return(double("Langchain::LLM::OpenAIResponse", completion: "Y"))
+      end
+
+      it "generates the score" do
+        expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0)
+      end
+    end
+  end
+end

From c1b014286deb1ae2cc702f79613ae0afa5b3ba91 Mon Sep 17 00:00:00 2001
From: Andrei Bondarev <andrei.bondarev13@gmail.com>
Date: Wed, 23 Oct 2024 21:10:03 -0400
Subject: [PATCH 2/2] Langchain::Evals::CosineSimilarity to calculate simple
 similarity between actual and expected outputs

---
 lib/langchain/evals/cosine_similarity.rb      | 18 +++++++
 lib/langchain/evals/llm/cosine_similarity.rb  | 20 --------
 lib/langchain/evals/llm/llm.rb                | 36 --------------
 .../evals/llm/prompts/expected_answer.yml     | 17 -------
 .../evals/{llm => }/cosine_similarity_spec.rb |  9 ++--
 spec/langchain/evals/llm/llm_spec.rb          | 48 -------------------
 6 files changed, 22 insertions(+), 126 deletions(-)
 create mode 100644 lib/langchain/evals/cosine_similarity.rb
 delete mode 100644 lib/langchain/evals/llm/cosine_similarity.rb
 delete mode 100644 lib/langchain/evals/llm/llm.rb
 delete mode 100644 lib/langchain/evals/llm/prompts/expected_answer.yml
 rename spec/langchain/evals/{llm => }/cosine_similarity_spec.rb (52%)
 delete mode 100644 spec/langchain/evals/llm/llm_spec.rb

diff --git a/lib/langchain/evals/cosine_similarity.rb b/lib/langchain/evals/cosine_similarity.rb
new file mode 100644
index 000000000..fee8445a5
--- /dev/null
+++ b/lib/langchain/evals/cosine_similarity.rb
@@ -0,0 +1,18 @@
+module Langchain
+  module Evals
+    class CosineSimilarity
+      attr_reader :llm
+
+      def initialize(llm:)
+        @llm = llm
+      end
+
+      def score(actual_output:, expected_output:)
+        vector_a = llm.embed(text: actual_output).embedding
+        vector_b = llm.embed(text: expected_output).embedding
+
+        Langchain::Utils::CosineSimilarity.new(vector_a, vector_b).calculate_similarity
+      end
+    end
+  end
+end
diff --git a/lib/langchain/evals/llm/cosine_similarity.rb b/lib/langchain/evals/llm/cosine_similarity.rb
deleted file mode 100644
index bc8e282a8..000000000
--- a/lib/langchain/evals/llm/cosine_similarity.rb
+++ /dev/null
@@ -1,20 +0,0 @@
-module Langchain
-  module Evals
-    module LLM
-      class CosineSimilarity
-        attr_reader :llm
-
-        def initialize(llm:, prompt_template: nil)
-          @llm = llm
-        end
-
-        def score(question:, answer:, expected_answer:)
-          question_embedding = llm.embed(text: question).embedding
-          answer_ebedding = llm.embed(text: answer).embedding
-
-          Langchain::Utils::CosineSimilarity.new(question_embedding, answer_ebedding).calculate_similarity
-        end
-      end
-    end
-  end
-end
diff --git a/lib/langchain/evals/llm/llm.rb b/lib/langchain/evals/llm/llm.rb
deleted file mode 100644
index c8511e796..000000000
--- a/lib/langchain/evals/llm/llm.rb
+++ /dev/null
@@ -1,36 +0,0 @@
-module Langchain
-  module Evals
-    module LLM
-      class LLM
-        attr_reader :llm, :prompt_template
-
-        def initialize(llm:, prompt_template: nil)
-          @llm = llm
-
-          @prompt_template = if prompt_template.nil?
-            Langchain::Prompt.load_from_path(
-              file_path: Langchain.root.join("langchain/evals/llm/prompts/expected_answer.yml")
-            )
-          else
-            prompt_template
-          end
-        end
-
-        def score(question:, answer:, expected_answer:)
-          prompt = prompt_template.format(
-            question: question,
-            answer: answer,
-            expected_answer: expected_answer
-          )
-          completion = llm.complete(prompt: prompt).completion
-
-          if completion.eql?("Y")
-            1.0
-          else
-            0.0
-          end
-        end
-      end
-    end
-  end
-end
diff --git a/lib/langchain/evals/llm/prompts/expected_answer.yml b/lib/langchain/evals/llm/prompts/expected_answer.yml
deleted file mode 100644
index ebfb4ab02..000000000
--- a/lib/langchain/evals/llm/prompts/expected_answer.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-_type: prompt
-input_variables:
-  - question
-  - answer
-  - expected_answer
-template: |
-  You are assessing a submitted answer on a given task or input. Here is the data:
-  [BEGIN DATA]
-  ***
-  [Input]: {question}
-  ***
-  [Submission]: {answer}
-  ***
-  [Expected Answer]: {expected_answer}
-  ***
-  [END DATA]
-  Does the submission match the Expected Answer? First, write out in a step by step manner your reasoning about each criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the single character "Y" or "N" (without quotes or punctuation) on its own line corresponding to the correct answer of whether the submission matches the expected answer. At the end, repeat just the letter again by itself on a new line."""
diff --git a/spec/langchain/evals/llm/cosine_similarity_spec.rb b/spec/langchain/evals/cosine_similarity_spec.rb
similarity index 52%
rename from spec/langchain/evals/llm/cosine_similarity_spec.rb
rename to spec/langchain/evals/cosine_similarity_spec.rb
index 300af1d22..999e0858e 100644
--- a/spec/langchain/evals/llm/cosine_similarity_spec.rb
+++ b/spec/langchain/evals/cosine_similarity_spec.rb
@@ -1,19 +1,18 @@
-RSpec.describe Langchain::Evals::LLM::CosineSimilarity do
+RSpec.describe Langchain::Evals::CosineSimilarity do
   let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") }
 
   describe "#score" do
     subject { described_class.new(llm: llm) }
 
-    let(:question) { "What is 2 + 2?" }
-    let(:answer) { "The answer is 4" }
-    let(:expected_answer) { "2 + 2 = 4" }
+    let(:actual_output) { "The answer is 4" }
+    let(:expected_output) { "2 + 2 = 4" }
 
     before do
       allow(subject.llm).to receive(:embed).and_return(double("Langchain::LLM::OpenAIResponse", embedding: [1, 0, 0]))
     end
 
     it "generates the score" do
-      expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0)
+      expect(subject.score(actual_output: actual_output, expected_output: expected_output)).to eq(1.0)
     end
   end
 end
diff --git a/spec/langchain/evals/llm/llm_spec.rb b/spec/langchain/evals/llm/llm_spec.rb
deleted file mode 100644
index 731236b21..000000000
--- a/spec/langchain/evals/llm/llm_spec.rb
+++ /dev/null
@@ -1,48 +0,0 @@
-RSpec.describe Langchain::Evals::LLM::LLM do
-  let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") }
-
-  describe "#score" do
-    subject { described_class.new(llm: llm) }
-
-    let(:question) { "What is 2 + 2?" }
-    let(:answer) { "The answer is 4" }
-    let(:expected_answer) { "2 + 2 = 4" }
-
-    before do
-      allow(subject.llm).to receive(:complete).and_return(double("Langchain::LLM::OpenAIResponse", completion: "Y"))
-    end
-
-    it "generates the score" do
-      expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0)
-    end
-  end
-
-  context "with custom prompt template" do
-    let(:prompt_template) {
-      Langchain::Prompt::PromptTemplate.new(
-        template: "Question: {question}. Answer: {answer}. Correct answer: {expected_answer}. Return 'Y' if answer matches correct answer, else 'N'",
-        input_variables: [
-          "question",
-          "answer",
-          "expected_answer"
-        ]
-      )
-    }
-
-    describe "#score" do
-      subject { described_class.new(llm: llm, prompt_template: prompt_template) }
-
-      let(:question) { "What is 2 + 2?" }
-      let(:answer) { "The answer is 4" }
-      let(:expected_answer) { "2 + 2 = 4" }
-
-      before do
-        allow(subject.llm).to receive(:complete).and_return(double("Langchain::LLM::OpenAIResponse", completion: "Y"))
-      end
-
-      it "generates the score" do
-        expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0)
-      end
-    end
-  end
-end