From 7ddc9ac28bf07f5e8082b351a031997073ef92db Mon Sep 17 00:00:00 2001 From: Bruno Bornsztein Date: Mon, 21 Oct 2024 10:07:24 -0500 Subject: [PATCH 1/2] cosine similarity --- lib/langchain/evals/llm/cosine_similarity.rb | 20 ++++++++ lib/langchain/evals/llm/llm.rb | 36 ++++++++++++++ .../evals/llm/prompts/expected_answer.yml | 17 +++++++ .../evals/llm/cosine_similarity_spec.rb | 19 ++++++++ spec/langchain/evals/llm/llm_spec.rb | 48 +++++++++++++++++++ 5 files changed, 140 insertions(+) create mode 100644 lib/langchain/evals/llm/cosine_similarity.rb create mode 100644 lib/langchain/evals/llm/llm.rb create mode 100644 lib/langchain/evals/llm/prompts/expected_answer.yml create mode 100644 spec/langchain/evals/llm/cosine_similarity_spec.rb create mode 100644 spec/langchain/evals/llm/llm_spec.rb diff --git a/lib/langchain/evals/llm/cosine_similarity.rb b/lib/langchain/evals/llm/cosine_similarity.rb new file mode 100644 index 000000000..bc8e282a8 --- /dev/null +++ b/lib/langchain/evals/llm/cosine_similarity.rb @@ -0,0 +1,20 @@ +module Langchain + module Evals + module LLM + class CosineSimilarity + attr_reader :llm + + def initialize(llm:, prompt_template: nil) + @llm = llm + end + + def score(question:, answer:, expected_answer:) + question_embedding = llm.embed(text: question).embedding + answer_ebedding = llm.embed(text: answer).embedding + + Langchain::Utils::CosineSimilarity.new(question_embedding, answer_ebedding).calculate_similarity + end + end + end + end +end diff --git a/lib/langchain/evals/llm/llm.rb b/lib/langchain/evals/llm/llm.rb new file mode 100644 index 000000000..c8511e796 --- /dev/null +++ b/lib/langchain/evals/llm/llm.rb @@ -0,0 +1,36 @@ +module Langchain + module Evals + module LLM + class LLM + attr_reader :llm, :prompt_template + + def initialize(llm:, prompt_template: nil) + @llm = llm + + @prompt_template = if prompt_template.nil? + Langchain::Prompt.load_from_path( + file_path: Langchain.root.join("langchain/evals/llm/prompts/expected_answer.yml") + ) + else + prompt_template + end + end + + def score(question:, answer:, expected_answer:) + prompt = prompt_template.format( + question: question, + answer: answer, + expected_answer: expected_answer + ) + completion = llm.complete(prompt: prompt).completion + + if completion.eql?("Y") + 1.0 + else + 0.0 + end + end + end + end + end +end diff --git a/lib/langchain/evals/llm/prompts/expected_answer.yml b/lib/langchain/evals/llm/prompts/expected_answer.yml new file mode 100644 index 000000000..ebfb4ab02 --- /dev/null +++ b/lib/langchain/evals/llm/prompts/expected_answer.yml @@ -0,0 +1,17 @@ +_type: prompt +input_variables: + - question + - answer + - expected_answer +template: | + You are assessing a submitted answer on a given task or input. Here is the data: + [BEGIN DATA] + *** + [Input]: {question} + *** + [Submission]: {answer} + *** + [Expected Answer]: {expected_answer} + *** + [END DATA] + Does the submission match the Expected Answer? First, write out in a step by step manner your reasoning about each criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the single character "Y" or "N" (without quotes or punctuation) on its own line corresponding to the correct answer of whether the submission matches the expected answer. At the end, repeat just the letter again by itself on a new line.""" diff --git a/spec/langchain/evals/llm/cosine_similarity_spec.rb b/spec/langchain/evals/llm/cosine_similarity_spec.rb new file mode 100644 index 000000000..300af1d22 --- /dev/null +++ b/spec/langchain/evals/llm/cosine_similarity_spec.rb @@ -0,0 +1,19 @@ +RSpec.describe Langchain::Evals::LLM::CosineSimilarity do + let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") } + + describe "#score" do + subject { described_class.new(llm: llm) } + + let(:question) { "What is 2 + 2?" } + let(:answer) { "The answer is 4" } + let(:expected_answer) { "2 + 2 = 4" } + + before do + allow(subject.llm).to receive(:embed).and_return(double("Langchain::LLM::OpenAIResponse", embedding: [1, 0, 0])) + end + + it "generates the score" do + expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0) + end + end +end diff --git a/spec/langchain/evals/llm/llm_spec.rb b/spec/langchain/evals/llm/llm_spec.rb new file mode 100644 index 000000000..731236b21 --- /dev/null +++ b/spec/langchain/evals/llm/llm_spec.rb @@ -0,0 +1,48 @@ +RSpec.describe Langchain::Evals::LLM::LLM do + let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") } + + describe "#score" do + subject { described_class.new(llm: llm) } + + let(:question) { "What is 2 + 2?" } + let(:answer) { "The answer is 4" } + let(:expected_answer) { "2 + 2 = 4" } + + before do + allow(subject.llm).to receive(:complete).and_return(double("Langchain::LLM::OpenAIResponse", completion: "Y")) + end + + it "generates the score" do + expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0) + end + end + + context "with custom prompt template" do + let(:prompt_template) { + Langchain::Prompt::PromptTemplate.new( + template: "Question: {question}. Answer: {answer}. Correct answer: {expected_answer}. Return 'Y' if answer matches correct answer, else 'N'", + input_variables: [ + "question", + "answer", + "expected_answer" + ] + ) + } + + describe "#score" do + subject { described_class.new(llm: llm, prompt_template: prompt_template) } + + let(:question) { "What is 2 + 2?" } + let(:answer) { "The answer is 4" } + let(:expected_answer) { "2 + 2 = 4" } + + before do + allow(subject.llm).to receive(:complete).and_return(double("Langchain::LLM::OpenAIResponse", completion: "Y")) + end + + it "generates the score" do + expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0) + end + end + end +end From c1b014286deb1ae2cc702f79613ae0afa5b3ba91 Mon Sep 17 00:00:00 2001 From: Andrei Bondarev Date: Wed, 23 Oct 2024 21:10:03 -0400 Subject: [PATCH 2/2] Langchain::Evals::CosineSimilarity to calculate simple similarity between actual and expected outputs --- lib/langchain/evals/cosine_similarity.rb | 18 +++++++ lib/langchain/evals/llm/cosine_similarity.rb | 20 -------- lib/langchain/evals/llm/llm.rb | 36 -------------- .../evals/llm/prompts/expected_answer.yml | 17 ------- .../evals/{llm => }/cosine_similarity_spec.rb | 9 ++-- spec/langchain/evals/llm/llm_spec.rb | 48 ------------------- 6 files changed, 22 insertions(+), 126 deletions(-) create mode 100644 lib/langchain/evals/cosine_similarity.rb delete mode 100644 lib/langchain/evals/llm/cosine_similarity.rb delete mode 100644 lib/langchain/evals/llm/llm.rb delete mode 100644 lib/langchain/evals/llm/prompts/expected_answer.yml rename spec/langchain/evals/{llm => }/cosine_similarity_spec.rb (52%) delete mode 100644 spec/langchain/evals/llm/llm_spec.rb diff --git a/lib/langchain/evals/cosine_similarity.rb b/lib/langchain/evals/cosine_similarity.rb new file mode 100644 index 000000000..fee8445a5 --- /dev/null +++ b/lib/langchain/evals/cosine_similarity.rb @@ -0,0 +1,18 @@ +module Langchain + module Evals + class CosineSimilarity + attr_reader :llm + + def initialize(llm:) + @llm = llm + end + + def score(actual_output:, expected_output:) + vector_a = llm.embed(text: actual_output).embedding + vector_b = llm.embed(text: expected_output).embedding + + Langchain::Utils::CosineSimilarity.new(vector_a, vector_b).calculate_similarity + end + end + end +end diff --git a/lib/langchain/evals/llm/cosine_similarity.rb b/lib/langchain/evals/llm/cosine_similarity.rb deleted file mode 100644 index bc8e282a8..000000000 --- a/lib/langchain/evals/llm/cosine_similarity.rb +++ /dev/null @@ -1,20 +0,0 @@ -module Langchain - module Evals - module LLM - class CosineSimilarity - attr_reader :llm - - def initialize(llm:, prompt_template: nil) - @llm = llm - end - - def score(question:, answer:, expected_answer:) - question_embedding = llm.embed(text: question).embedding - answer_ebedding = llm.embed(text: answer).embedding - - Langchain::Utils::CosineSimilarity.new(question_embedding, answer_ebedding).calculate_similarity - end - end - end - end -end diff --git a/lib/langchain/evals/llm/llm.rb b/lib/langchain/evals/llm/llm.rb deleted file mode 100644 index c8511e796..000000000 --- a/lib/langchain/evals/llm/llm.rb +++ /dev/null @@ -1,36 +0,0 @@ -module Langchain - module Evals - module LLM - class LLM - attr_reader :llm, :prompt_template - - def initialize(llm:, prompt_template: nil) - @llm = llm - - @prompt_template = if prompt_template.nil? - Langchain::Prompt.load_from_path( - file_path: Langchain.root.join("langchain/evals/llm/prompts/expected_answer.yml") - ) - else - prompt_template - end - end - - def score(question:, answer:, expected_answer:) - prompt = prompt_template.format( - question: question, - answer: answer, - expected_answer: expected_answer - ) - completion = llm.complete(prompt: prompt).completion - - if completion.eql?("Y") - 1.0 - else - 0.0 - end - end - end - end - end -end diff --git a/lib/langchain/evals/llm/prompts/expected_answer.yml b/lib/langchain/evals/llm/prompts/expected_answer.yml deleted file mode 100644 index ebfb4ab02..000000000 --- a/lib/langchain/evals/llm/prompts/expected_answer.yml +++ /dev/null @@ -1,17 +0,0 @@ -_type: prompt -input_variables: - - question - - answer - - expected_answer -template: | - You are assessing a submitted answer on a given task or input. Here is the data: - [BEGIN DATA] - *** - [Input]: {question} - *** - [Submission]: {answer} - *** - [Expected Answer]: {expected_answer} - *** - [END DATA] - Does the submission match the Expected Answer? First, write out in a step by step manner your reasoning about each criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print only the single character "Y" or "N" (without quotes or punctuation) on its own line corresponding to the correct answer of whether the submission matches the expected answer. At the end, repeat just the letter again by itself on a new line.""" diff --git a/spec/langchain/evals/llm/cosine_similarity_spec.rb b/spec/langchain/evals/cosine_similarity_spec.rb similarity index 52% rename from spec/langchain/evals/llm/cosine_similarity_spec.rb rename to spec/langchain/evals/cosine_similarity_spec.rb index 300af1d22..999e0858e 100644 --- a/spec/langchain/evals/llm/cosine_similarity_spec.rb +++ b/spec/langchain/evals/cosine_similarity_spec.rb @@ -1,19 +1,18 @@ -RSpec.describe Langchain::Evals::LLM::CosineSimilarity do +RSpec.describe Langchain::Evals::CosineSimilarity do let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") } describe "#score" do subject { described_class.new(llm: llm) } - let(:question) { "What is 2 + 2?" } - let(:answer) { "The answer is 4" } - let(:expected_answer) { "2 + 2 = 4" } + let(:actual_output) { "The answer is 4" } + let(:expected_output) { "2 + 2 = 4" } before do allow(subject.llm).to receive(:embed).and_return(double("Langchain::LLM::OpenAIResponse", embedding: [1, 0, 0])) end it "generates the score" do - expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0) + expect(subject.score(actual_output: actual_output, expected_output: expected_output)).to eq(1.0) end end end diff --git a/spec/langchain/evals/llm/llm_spec.rb b/spec/langchain/evals/llm/llm_spec.rb deleted file mode 100644 index 731236b21..000000000 --- a/spec/langchain/evals/llm/llm_spec.rb +++ /dev/null @@ -1,48 +0,0 @@ -RSpec.describe Langchain::Evals::LLM::LLM do - let(:llm) { Langchain::LLM::OpenAI.new(api_key: "123") } - - describe "#score" do - subject { described_class.new(llm: llm) } - - let(:question) { "What is 2 + 2?" } - let(:answer) { "The answer is 4" } - let(:expected_answer) { "2 + 2 = 4" } - - before do - allow(subject.llm).to receive(:complete).and_return(double("Langchain::LLM::OpenAIResponse", completion: "Y")) - end - - it "generates the score" do - expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0) - end - end - - context "with custom prompt template" do - let(:prompt_template) { - Langchain::Prompt::PromptTemplate.new( - template: "Question: {question}. Answer: {answer}. Correct answer: {expected_answer}. Return 'Y' if answer matches correct answer, else 'N'", - input_variables: [ - "question", - "answer", - "expected_answer" - ] - ) - } - - describe "#score" do - subject { described_class.new(llm: llm, prompt_template: prompt_template) } - - let(:question) { "What is 2 + 2?" } - let(:answer) { "The answer is 4" } - let(:expected_answer) { "2 + 2 = 4" } - - before do - allow(subject.llm).to receive(:complete).and_return(double("Langchain::LLM::OpenAIResponse", completion: "Y")) - end - - it "generates the score" do - expect(subject.score(question: question, answer: answer, expected_answer: expected_answer)).to eq(1.0) - end - end - end -end