From 26611fc4e6dc44db63221f80cef29ffb0833572f Mon Sep 17 00:00:00 2001
From: capjamesg <jamesg@jamesg.blog>
Date: Thu, 24 Oct 2024 01:28:24 +0000
Subject: [PATCH] Update results

---
 index.html              | 196 +++++++++++++++++++---------------------
 results/2024-10-24.json | 106 ++++++++++++++++++++++
 2 files changed, 198 insertions(+), 104 deletions(-)
 create mode 100644 results/2024-10-24.json
diff --git a/index.html b/index.html
index 0f05381..2ab865f 100644
--- a/index.html
+++ b/index.html
@@ -40,7 +40,7 @@ <h1>How's GPT-4o Doing?</h1>
                     <p>You can contribute your own tests, too! See the <a href="https://github.com/roboflow/gpt-checkup?tab=readme-ov-file#-contribute">GitHub README</a> for contributing instructions.</p>
                 </div>
                 <div class="header_subtitle">
-                    <p>Tests are run every day at 1am PT. Last updated October 23, 2024.</p>
+                    <p>Tests are run every day at 1am PT. Last updated October 24, 2024.</p>
                     <p>Made with ❤️ by the team at <a href="https://roboflow.com">Roboflow</a>.</p>
                 </div>
                 <div class="header_cta">
@@ -58,12 +58,12 @@ <h1>How's GPT-4o Doing?</h1>
                         <div class="feature_header" style="min-height: auto">
                             <div class="feature_header_text" style="gap: var(--spacing-sizing-4)">
                                 <h2>Response Time</h2>
-                                <p style="font-size: 16px; color: var(--gray-700)">Today, the average response time to receive results from our tests was <b>3.95 seconds</b> per request.</p>
+                                <p style="font-size: 16px; color: var(--gray-700)">Today, the average response time to receive results from our tests was <b>3.94 seconds</b> per request.</p>
                                 <p class="subtitle">This number only accounts for requests made by this application.</p>
                             </div>
                             <div class="chart">
                                 <div class="chart_box chart_box_green">
-                                    <p>3.95 s</p>
+                                    <p>3.94 s</p>
                                 </div>
                             </div>
                         </div>
@@ -122,17 +122,17 @@ <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
                                         <img class="test_image" src="images/fruit.jpeg" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
-                                        <pre>7</pre>
+                                        <pre>8</pre>
                                         <p class="subtitle" style="margin-top: 16px; text-align: center">Test submitted by <a href="https://roboflow.com" target="_blank">Roboflow</a></p>
                                     </div>
                                 </div>
                             </div>
-                                
+                                      
                             <div class="feature_card">
                                 <div class="feature_header">
                                     <div class="feature_header_text">
-                                        <h2>Handwriting OCR</h2>
-                                        <p>Can GPT-4V read handwriting?</p>
+                                        <h2>Object Detection</h2>
+                                        <p>Can GPT-4V detect objects in an image?</p>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
@@ -145,48 +145,48 @@ <h2>Handwriting OCR</h2>
                                         <b class="summary_title">Last 7-Day Performance</b>
                                         <div class="summary_squares">
                                             
-                                            <div class="summary_square summary_square_green"></div>
+                                            <div class="summary_square summary_square_red"></div>
                                             
-                                            <div class="summary_square summary_square_green"></div>
+                                            <div class="summary_square summary_square_red"></div>
                                             
-                                            <div class="summary_square summary_square_green"></div>
+                                            <div class="summary_square summary_square_red"></div>
                                             
                                             <div class="summary_square summary_square_red"></div>
                                             
-                                            <div class="summary_square summary_square_green"></div>
+                                            <div class="summary_square summary_square_red"></div>
                                             
-                                            <div class="summary_square summary_square_green"></div>
+                                            <div class="summary_square summary_square_red"></div>
                                             
-                                            <div class="summary_square summary_square_green"></div>
+                                            <div class="summary_square summary_square_red"></div>
                                             
                                         </div>
                                     </div>
-                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>86.0%</b> of the time.</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
                                 </div>
                                 <div class="explainer_dropdown">
                                     <button type="button" class="dropdown dropdown_learn active">Learn about this test</button>
                                     <div class="explainer">
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
-                                        <pre class="test_method">We send a image of a handwritten note to determine if it can correctly read the text. If it correctly gets the text, it gets a 100%. Otherwise, it gets a 0%.</pre>
+                                        <pre class="test_method">We provide GPT-4V with an image with a known object. We ask it to provide a normalized bounding box of the object and for scoring, we calculate the intersection over union (IOU) between the predicted bounding box and the correct bounding box.</pre>
                                         <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <pre class="prompt">
-                                            Read the text in the image. Return only the text, with punctuation.
+                                            If there are banana in this image, return a JSON object with `x`, `y`, `width` and `height` properties of the banana. All values should be normalized between 0-1 and x&y should be the center point.
                                         </pre>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
-                                        <img class="test_image" src="images/ocr.jpeg" alt="Image of the input into GPT-4" />
+                                        <img class="test_image" src="images/fruit.jpeg" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
-                                        <pre>The words of songs on the album have been echoing in my head all week. "Fades into the grey of my day, oh tea."</pre>
+                                        <pre>{'x': 0.514, 'y': 0.34, 'width': 0.28, 'height': 0.4}</pre>
                                         <p class="subtitle" style="margin-top: 16px; text-align: center">Test submitted by <a href="https://roboflow.com" target="_blank">Roboflow</a></p>
                                     </div>
                                 </div>
                             </div>
-                                  
+                              
                             <div class="feature_card">
                                 <div class="feature_header">
                                     <div class="feature_header_text">
-                                        <h2>Object Detection</h2>
-                                        <p>Can GPT-4V detect objects in an image?</p>
+                                        <h2>Graph Understanding</h2>
+                                        <p>Can GPT-4V identify points on a graph?</p>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
@@ -216,21 +216,28 @@ <h2>Object Detection</h2>
                                         </div>
                                     </div>
                                     <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
-                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
+                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.01</p>
                                 </div>
                                 <div class="explainer_dropdown">
                                     <button type="button" class="dropdown dropdown_learn active">Learn about this test</button>
                                     <div class="explainer">
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
-                                        <pre class="test_method">We provide GPT-4V with an image with a known object. We ask it to provide a normalized bounding box of the object and for scoring, we calculate the intersection over union (IOU) between the predicted bounding box and the correct bounding box.</pre>
+                                        <pre class="test_method">We send a picuture of a graph with four labeled points and ask GPT-4V to identify the points. This test is scored by the accuracy of each point. The accuracy is measured by averaging a ratio of the correct values to the answered values.</pre>
                                         <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <pre class="prompt">
-                                            If there are banana in this image, return a JSON object with `x`, `y`, `width` and `height` properties of the banana. All values should be normalized between 0-1 and x&y should be the center point.
+                                            State positions of points A through D. Return only a JSON with properties A-D, each having a object with properties for integers matching the respective point: `quantity` and `price`.
                                         </pre>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
-                                        <img class="test_image" src="images/fruit.jpeg" alt="Image of the input into GPT-4" />
+                                        <img class="test_image" src="images/graph.png" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
-                                        <pre>{'x': 0.5, 'y': 0.35, 'width': 0.2, 'height': 0.4}</pre>
+                                        <pre>```json
+{
+    "A": {"quantity": 18, "price": 10},
+    "B": {"quantity": 24, "price": 20},
+    "C": {"quantity": 28, "price": 30},
+    "D": {"quantity": 32, "price": 40}
+}
+```</pre>
                                         <p class="subtitle" style="margin-top: 16px; text-align: center">Test submitted by <a href="https://roboflow.com" target="_blank">Roboflow</a></p>
                                     </div>
                                 </div>
@@ -239,8 +246,8 @@ <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
                             <div class="feature_card">
                                 <div class="feature_header">
                                     <div class="feature_header_text">
-                                        <h2>Graph Understanding</h2>
-                                        <p>Can GPT-4V identify points on a graph?</p>
+                                        <h2>Color Recognition</h2>
+                                        <p>Can GPT-4V identify colors accurately?</p>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
@@ -270,38 +277,25 @@ <h2>Graph Understanding</h2>
                                         </div>
                                     </div>
                                     <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
-                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.011</p>
+                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
                                 </div>
                                 <div class="explainer_dropdown">
                                     <button type="button" class="dropdown dropdown_learn active">Learn about this test</button>
                                     <div class="explainer">
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
-                                        <pre class="test_method">We send a picuture of a graph with four labeled points and ask GPT-4V to identify the points. This test is scored by the accuracy of each point. The accuracy is measured by averaging a ratio of the correct values to the answered values.</pre>
+                                        <pre class="test_method">We provide GPT-4V with an image with multiple shapes with differing colors. We ask it to identify the color of a particular shape in RGB color codes.</pre>
                                         <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <pre class="prompt">
-                                            State positions of points A through D. Return only a JSON with properties A-D, each having a object with properties for integers matching the respective point: `quantity` and `price`.
+                                            Guess the RGB color code of the rectangle and return only the result in JSON. The JSON should have three integer properties: 'R', 'G' and 'B'
                                         </pre>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
-                                        <img class="test_image" src="images/graph.png" alt="Image of the input into GPT-4" />
+                                        <img class="test_image" src="images/color.png" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
                                         <pre>```json
 {
-    "A": {
-        "quantity": 15,
-        "price": 10
-    },
-    "B": {
-        "quantity": 22,
-        "price": 20
-    },
-    "C": {
-        "quantity": 27,
-        "price": 30
-    },
-    "D": {
-        "quantity": 31,
-        "price": 40
-    }
+  "R": 79,
+  "G": 16,
+  "B": 127
 }
 ```</pre>
                                         <p class="subtitle" style="margin-top: 16px; text-align: center">Test submitted by <a href="https://roboflow.com" target="_blank">Roboflow</a></p>
@@ -312,8 +306,8 @@ <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
                             <div class="feature_card">
                                 <div class="feature_header">
                                     <div class="feature_header_text">
-                                        <h2>Color Recognition</h2>
-                                        <p>Can GPT-4V identify colors accurately?</p>
+                                        <h2>Annotation Quality Assurance</h2>
+                                        <p>Can GPT-4V identify image labeling mistakes?</p>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
@@ -343,25 +337,23 @@ <h2>Color Recognition</h2>
                                         </div>
                                     </div>
                                     <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
-                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
+                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.015</p>
                                 </div>
                                 <div class="explainer_dropdown">
                                     <button type="button" class="dropdown dropdown_learn active">Learn about this test</button>
                                     <div class="explainer">
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
-                                        <pre class="test_method">We provide GPT-4V with an image with multiple shapes with differing colors. We ask it to identify the color of a particular shape in RGB color codes.</pre>
+                                        <pre class="test_method">We provide a image from a self driving car dataset with intentionally three missing annotations. We ask GPT-4V to identify the number of missing annotations. We score the result based on the number of missing annotations identfied.</pre>
                                         <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <pre class="prompt">
-                                            Guess the RGB color code of the rectangle and return only the result in JSON. The JSON should have three integer properties: 'R', 'G' and 'B'
+                                            This is a sample image from a dataset with cars labeled with red bounding boxes. Are there any missing annotations? Return a JSON with a integer property 'missing' for the number of missing annotations.
                                         </pre>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
-                                        <img class="test_image" src="images/color.png" alt="Image of the input into GPT-4" />
+                                        <img class="test_image" src="images/annotationqa.jpeg" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
                                         <pre>```json
 {
-  "R": 80,
-  "G": 0,
-  "B": 120
+  "missing": 1
 }
 ```</pre>
                                         <p class="subtitle" style="margin-top: 16px; text-align: center">Test submitted by <a href="https://roboflow.com" target="_blank">Roboflow</a></p>
@@ -372,8 +364,8 @@ <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
                             <div class="feature_card">
                                 <div class="feature_header">
                                     <div class="feature_header_text">
-                                        <h2>Annotation Quality Assurance</h2>
-                                        <p>Can GPT-4V identify image labeling mistakes?</p>
+                                        <h2>Measurement Test</h2>
+                                        <p>Can GPT-4 Measure Items Using a Reference?</p>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
@@ -403,30 +395,31 @@ <h2>Annotation Quality Assurance</h2>
                                         </div>
                                     </div>
                                     <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
-                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.015</p>
+                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
                                 </div>
                                 <div class="explainer_dropdown">
                                     <button type="button" class="dropdown dropdown_learn active">Learn about this test</button>
                                     <div class="explainer">
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
-                                        <pre class="test_method">We provide a image from a self driving car dataset with intentionally three missing annotations. We ask GPT-4V to identify the number of missing annotations. We score the result based on the number of missing annotations identfied.</pre>
+                                        <pre class="test_method">This test gives GPT-4 a image of a square sticker with a ruler on one side and asks it to provide a number for the length and width. We score this test based on precent error, gauging how far it is from the actual value.</pre>
                                         <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <pre class="prompt">
-                                            This is a sample image from a dataset with cars labeled with red bounding boxes. Are there any missing annotations? Return a JSON with a integer property 'missing' for the number of missing annotations.
+                                            What is the length and width of this square sticker, based on the ruler? Return a JSON with float properties for `length` and `width` representing inches.
                                         </pre>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
-                                        <img class="test_image" src="images/annotationqa.jpeg" alt="Image of the input into GPT-4" />
+                                        <img class="test_image" src="images/measurement.jpg" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
                                         <pre>```json
 {
-  "missing": 1
+  "length": 3.0,
+  "width": 3.0
 }
 ```</pre>
                                         <p class="subtitle" style="margin-top: 16px; text-align: center">Test submitted by <a href="https://roboflow.com" target="_blank">Roboflow</a></p>
                                     </div>
                                 </div>
                             </div>
-                                
+                              
                             <div class="feature_card">
                                 <div class="feature_header">
                                     <div class="feature_header_text">
@@ -651,12 +644,12 @@ <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
                                     </div>
                                 </div>
                             </div>
-                                
+                              
                             <div class="feature_card">
                                 <div class="feature_header">
                                     <div class="feature_header_text">
-                                        <h2>Structured Data OCR</h2>
-                                        <p>Can GPT-4V extract structured data from an image?</p>
+                                        <h2>Handwriting OCR</h2>
+                                        <p>Can GPT-4V read handwriting?</p>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_green">
@@ -675,7 +668,7 @@ <h2>Structured Data OCR</h2>
                                             
                                             <div class="summary_square summary_square_green"></div>
                                             
-                                            <div class="summary_square summary_square_green"></div>
+                                            <div class="summary_square summary_square_red"></div>
                                             
                                             <div class="summary_square summary_square_green"></div>
                                             
@@ -685,22 +678,22 @@ <h2>Structured Data OCR</h2>
                                             
                                         </div>
                                     </div>
-                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time.</p>
-                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.007</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>86.0%</b> of the time.</p>
+                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
                                 </div>
                                 <div class="explainer_dropdown">
                                     <button type="button" class="dropdown dropdown_learn active">Learn about this test</button>
                                     <div class="explainer">
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
-                                        <pre class="test_method">We send a picture of a prescription bottle with a label, and ask it to extract pieces of relevant data. This is scored using the Levenshtein ratio between the output and the correct answer, which is based on the number of edits necessary to achieve the correct answer.</pre>
+                                        <pre class="test_method">We send a image of a handwritten note to determine if it can correctly read the text. If it correctly gets the text, it gets a 100%. Otherwise, it gets a 0%.</pre>
                                         <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <pre class="prompt">
-                                            Return a JSON array containing information about the prescription in this image. Each object should contain the following: `name` should have the name of the patient. `time_per_day` should have a integer with thetimes the medication should be taken in a day. `medication` should have the brand name of the medication. `dosage` should have a integer in mg units of each tablet. `rx_number` should have the prescription number, also marked Rx. The image is a stock photo which contains no personal information and is all fictional.
+                                            Read the text in the image. Return only the text, with punctuation.
                                         </pre>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
-                                        <img class="test_image" src="images/prescription.png" alt="Image of the input into GPT-4" />
+                                        <img class="test_image" src="images/ocr.jpeg" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
-                                        <pre>[{'name': 'Mary Thomas', 'time_per_day': 1, 'medication': 'Atenolol', 'dosage': 100, 'rx_number': '1234567-12345'}]</pre>
+                                        <pre>The words of songs on the album have been echoing in my head all week. "Fades into the grey of my day old tea."</pre>
                                         <p class="subtitle" style="margin-top: 16px; text-align: center">Test submitted by <a href="https://roboflow.com" target="_blank">Roboflow</a></p>
                                     </div>
                                 </div>
@@ -709,8 +702,8 @@ <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
                             <div class="feature_card">
                                 <div class="feature_header">
                                     <div class="feature_header_text">
-                                        <h2>Math OCR</h2>
-                                        <p>Can GPT-4V recognize math equations?</p>
+                                        <h2>Structured Data OCR</h2>
+                                        <p>Can GPT-4V extract structured data from an image?</p>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_green">
@@ -740,31 +733,31 @@ <h2>Math OCR</h2>
                                         </div>
                                     </div>
                                     <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time.</p>
-                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.015</p>
+                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.007</p>
                                 </div>
                                 <div class="explainer_dropdown">
                                     <button type="button" class="dropdown dropdown_learn active">Learn about this test</button>
                                     <div class="explainer">
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
-                                        <pre class="test_method">We provide a image of a math equation and ask it to provide a LaTeX string of the equation. This is scored using the Levenshtein ratio between the output and the correct answer, which is based on the number of edits necessary to achieve the correct answer.</pre>
+                                        <pre class="test_method">We send a picture of a prescription bottle with a label, and ask it to extract pieces of relevant data. This is scored using the Levenshtein ratio between the output and the correct answer, which is based on the number of edits necessary to achieve the correct answer.</pre>
                                         <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <pre class="prompt">
-                                            Produce a JSON array with a LaTeX string of each equation in the image.
+                                            Return a JSON array containing information about the prescription in this image. Each object should contain the following: `name` should have the name of the patient. `time_per_day` should have a integer with thetimes the medication should be taken in a day. `medication` should have the brand name of the medication. `dosage` should have a integer in mg units of each tablet. `rx_number` should have the prescription number, also marked Rx. The image is a stock photo which contains no personal information and is all fictional.
                                         </pre>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
-                                        <img class="test_image" src="images/math.jpeg" alt="Image of the input into GPT-4" />
+                                        <img class="test_image" src="images/prescription.png" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
-                                        <pre>3x^2-6x+2</pre>
+                                        <pre>[{'name': 'Mary Thomas', 'time_per_day': 1, 'medication': 'Atenolol', 'dosage': 100, 'rx_number': '1234567-12345'}]</pre>
                                         <p class="subtitle" style="margin-top: 16px; text-align: center">Test submitted by <a href="https://roboflow.com" target="_blank">Roboflow</a></p>
                                     </div>
                                 </div>
                             </div>
-                                      
+                              
                             <div class="feature_card">
                                 <div class="feature_header">
                                     <div class="feature_header_text">
-                                        <h2>Measurement Test</h2>
-                                        <p>Can GPT-4 Measure Items Using a Reference?</p>
+                                        <h2>Math OCR</h2>
+                                        <p>Can GPT-4V recognize math equations?</p>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_green">
@@ -777,48 +770,43 @@ <h2>Measurement Test</h2>
                                         <b class="summary_title">Last 7-Day Performance</b>
                                         <div class="summary_squares">
                                             
-                                            <div class="summary_square summary_square_red"></div>
+                                            <div class="summary_square summary_square_green"></div>
                                             
-                                            <div class="summary_square summary_square_red"></div>
+                                            <div class="summary_square summary_square_green"></div>
                                             
-                                            <div class="summary_square summary_square_red"></div>
+                                            <div class="summary_square summary_square_green"></div>
                                             
-                                            <div class="summary_square summary_square_red"></div>
+                                            <div class="summary_square summary_square_green"></div>
                                             
-                                            <div class="summary_square summary_square_red"></div>
+                                            <div class="summary_square summary_square_green"></div>
                                             
-                                            <div class="summary_square summary_square_red"></div>
+                                            <div class="summary_square summary_square_green"></div>
                                             
-                                            <div class="summary_square summary_square_red"></div>
+                                            <div class="summary_square summary_square_green"></div>
                                             
                                         </div>
                                     </div>
-                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
-                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time.</p>
+                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.015</p>
                                 </div>
                                 <div class="explainer_dropdown">
                                     <button type="button" class="dropdown dropdown_learn active">Learn about this test</button>
                                     <div class="explainer">
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
-                                        <pre class="test_method">This test gives GPT-4 a image of a square sticker with a ruler on one side and asks it to provide a number for the length and width. We score this test based on precent error, gauging how far it is from the actual value.</pre>
+                                        <pre class="test_method">We provide a image of a math equation and ask it to provide a LaTeX string of the equation. This is scored using the Levenshtein ratio between the output and the correct answer, which is based on the number of edits necessary to achieve the correct answer.</pre>
                                         <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <pre class="prompt">
-                                            What is the length and width of this square sticker, based on the ruler? Return a JSON with float properties for `length` and `width` representing inches.
+                                            Produce a JSON array with a LaTeX string of each equation in the image.
                                         </pre>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
-                                        <img class="test_image" src="images/measurement.jpg" alt="Image of the input into GPT-4" />
+                                        <img class="test_image" src="images/math.jpeg" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
-                                        <pre>```json
-{
-  "length": 3.5,
-  "width": 3.5
-}
-```</pre>
+                                        <pre>3x^2-6x+2</pre>
                                         <p class="subtitle" style="margin-top: 16px; text-align: center">Test submitted by <a href="https://roboflow.com" target="_blank">Roboflow</a></p>
                                     </div>
                                 </div>
                             </div>
-                                 
+                                           
                         </section>
                     </section>
                     <section class="feature_card_wide" id="methodology">
diff --git a/results/2024-10-24.json b/results/2024-10-24.json
new file mode 100644
index 0000000..8519ab8
--- /dev/null
+++ b/results/2024-10-24.json
@@ -0,0 +1,106 @@
+{
+    "zero_shot_classification": {
+        "score": 1,
+        "success": true,
+        "price": 0.00481,
+        "pass_fail": "Pass",
+        "response_time": 3.3009657859802246,
+        "result": "Toyota Camry"
+    },
+    "count_fruit": {
+        "score": 0,
+        "success": false,
+        "price": 0.007870000000000002,
+        "pass_fail": "Fail",
+        "response_time": 2.117178201675415,
+        "result": "8"
+    },
+    "document_ocr": {
+        "score": 1,
+        "success": true,
+        "price": 0.008539999999999999,
+        "pass_fail": "Pass",
+        "response_time": 2.9634759426116943,
+        "result": "I was thinking earlier today that I have gone through, to use the lingo, eras of listening to each of Swift's Eras. Meta indeed. I started listening to Ms. Swift's music after hearing the Midnights album. A few weeks after hearing the album for the first time, I found myself playing various songs on repeat. I listened to the album in order multiple times."
+    },
+    "handwriting_ocr": {
+        "score": 1,
+        "success": true,
+        "price": 0.00876,
+        "pass_fail": "Pass",
+        "response_time": 4.376910209655762,
+        "result": "The words of songs on the album have been echoing in my head all week. \"Fades into the grey of my day old tea.\""
+    },
+    "extraction_ocr": {
+        "score": 1.0,
+        "success": true,
+        "price": 0.00719,
+        "pass_fail": "Pass",
+        "response_time": 2.957542896270752,
+        "result": "[{'name': 'Mary Thomas', 'time_per_day': 1, 'medication': 'Atenolol', 'dosage': 100, 'rx_number': '1234567-12345'}]"
+    },
+    "math_ocr": {
+        "score": 1.0,
+        "success": true,
+        "price": 0.015290000000000002,
+        "pass_fail": "Pass",
+        "response_time": 2.6341590881347656,
+        "result": "3x^2-6x+2"
+    },
+    "object_detection": {
+        "score": 0.512212643678161,
+        "success": false,
+        "price": 0.009490000000000002,
+        "pass_fail": "Fail",
+        "response_time": 3.117370843887329,
+        "result": "{'x': 0.514, 'y': 0.34, 'width': 0.28, 'height': 0.4}"
+    },
+    "graph_understanding": {
+        "score": 0.96,
+        "success": false,
+        "price": 0.01019,
+        "pass_fail": "Fail",
+        "response_time": 3.61852765083313,
+        "result": "```json\n{\n    \"A\": {\"quantity\": 18, \"price\": 10},\n    \"B\": {\"quantity\": 24, \"price\": 20},\n    \"C\": {\"quantity\": 28, \"price\": 30},\n    \"D\": {\"quantity\": 32, \"price\": 40}\n}\n```"
+    },
+    "color_recognition": {
+        "score": 0.9464052287581699,
+        "success": false,
+        "price": 0.008870000000000001,
+        "pass_fail": "Fail",
+        "response_time": 1.9738786220550537,
+        "result": "```json\n{\n  \"R\": 79,\n  \"G\": 16,\n  \"B\": 127\n}\n```"
+    },
+    "annotation_qa": {
+        "score": 0.33333333333333337,
+        "success": false,
+        "price": 0.015300000000000001,
+        "pass_fail": "Fail",
+        "response_time": 2.307623863220215,
+        "result": "```json\n{\n  \"missing\": 1\n}\n```"
+    },
+    "measurement": {
+        "score": 0.8571428571428572,
+        "success": false,
+        "price": 0.00877,
+        "pass_fail": "Fail",
+        "response_time": 4.103639602661133,
+        "result": "```json\n{\n  \"length\": 3.0,\n  \"width\": 3.0\n}\n```"
+    },
+    "easy_captcha": {
+        "score": 0,
+        "success": false,
+        "price": 0.004850000000000001,
+        "pass_fail": "Fail",
+        "response_time": 1.0324516296386719,
+        "result": "i'm sorry, i can't assist with that."
+    },
+    "easy_captcha_persuade": {
+        "score": 0,
+        "success": false,
+        "price": 0.005350000000000001,
+        "pass_fail": "Fail",
+        "response_time": 1.048719882965088,
+        "result": "i'm sorry, i can't assist with that."
+    }
+}
\ No newline at end of file