GammaTauAI · closedLoop · Apr 12, 2023 · Apr 12, 2023 · Apr 12, 2023 · Apr 12, 2023
diff --git a/.env.example b/.env.example
@@ -0,0 +1,4 @@
+# Copy this file to .env and fill in the values
+
+# Set environment variable `LEETCODE_SESSION` to the cookie `LEETCODE_SESSION` from a signed-in Leetcode session
+LEETCODE_SESSION=""
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,5 @@ env/
 __pycache__/
 SubmissionTest.ipynb
 .vscode/
+
+*.egg-info
diff --git a/README.md b/README.md
@@ -31,10 +31,32 @@ Supports:
   - Reflexion + GPT-4: `???`
 
 ### Setup:
-- pip install requirements
-- Set environment variable `LEETCODE_SESSION` to the cookie `LEETCODE_SESSION` from a signed-in Leetcode session
 
-### Example usage:
+  python3 -m venv env
+  source env/bin/activate
+  pip install -r requirements.txt
+
+- Set environment variable `LEETCODE_SESSION` to the cookie `LEETCODE_SESSION` from a signed-in Leetcode session or set it in the .env file
+
+
+### Basic Usage
+
+```python
+
+from leetcode_hard_gym.main import run_all
+
+# define `generate_one_completion` to be a function that generates code
+def generate_one_completion(prompt):
+    return "def hello_world():\n    print('hello world')"
+
+run_all(
+  generate_one_completion,
+  output_file="results.jsonl",
+  lang="python3"
+)
+```
+
+### Detailed Example usage:
 
 We can load the code-snippet annotated dataset like so:
 
@@ -46,15 +68,15 @@ row = data.iloc[0]
 
 Then we can instantiate a submission environment ...
 ```python
-from leetcode_env.environment import LeetCodeEnv
+from leetcode_hard_gym.leetcode_env.environment import LeetCodeEnv
 
 env = LeetCodeEnv()
 ```
 
 ... and build a submission using a row from the dataset ...
 
 ```python
-from leetcode_env.leetcode_types import LeetCodeSubmission
+from leetcode_hard_gym.leetcode_env.leetcode_types import LeetCodeSubmission
 
 code = """
 class Solution:

diff --git a/__init__.py → leetcode_hard_gym/__init__.py b/__init__.py → leetcode_hard_gym/__init__.py
diff --git a/example.py → leetcode_hard_gym/example.py b/example.py → leetcode_hard_gym/example.py
@@ -1,23 +1,26 @@
 from .leetcode_env.environment import LeetCodeEnv
-from .leetcode_env.leetcode_types import LeetCodeSubmission, ProgrammingLanguage
+from .leetcode_env.leetcode_types import (LeetCodeSubmission,
+                                          ProgrammingLanguage)
 from .leetcode_env.utils import id_from_slug
+
 code = """
 class Solution:
     def twoSum(self, nums, target):
         return [0]
 """
 lang = ProgrammingLanguage.PYTHON3
 question_id = 1
-question_slug = 'two-sum'
+question_slug = "two-sum"
 
-sub = LeetCodeSubmission(code=code,
-                         lang=lang,
-                         question_id=id_from_slug(question_slug),
-                         question_slug=question_slug)
+sub = LeetCodeSubmission(
+    code=code,
+    lang=lang,
+    question_id=id_from_slug(question_slug),
+    question_slug=question_slug,
+)
 
 env = LeetCodeEnv()
 
 status, reward, done, submission_result = env.step(sub)
 
 print(status, reward, done, submission_result)
-
diff --git a/leetcode_dataset/BuildDataset.ipynb → ...d_gym/leetcode_dataset/BuildDataset.ipynb b/leetcode_dataset/BuildDataset.ipynb → ...d_gym/leetcode_dataset/BuildDataset.ipynb
diff --git a/leetcode_dataset/BuildHumanEval.ipynb → ...gym/leetcode_dataset/BuildHumanEval.ipynb b/leetcode_dataset/BuildHumanEval.ipynb → ...gym/leetcode_dataset/BuildHumanEval.ipynb
diff --git a/leetcode_dataset/CleanDataset.ipynb → ...d_gym/leetcode_dataset/CleanDataset.ipynb b/leetcode_dataset/CleanDataset.ipynb → ...d_gym/leetcode_dataset/CleanDataset.ipynb
diff --git a/...ned/leetcode_hard_with_snippets_clean.csv → ...ned/leetcode_hard_with_snippets_clean.csv b/...ned/leetcode_hard_with_snippets_clean.csv → ...ned/leetcode_hard_with_snippets_clean.csv
diff --git a/...eval/leetcode-hard-py-100-sample-45.jsonl → ...eval/leetcode-hard-py-100-sample-45.jsonl b/...eval/leetcode-hard-py-100-sample-45.jsonl → ...eval/leetcode-hard-py-100-sample-45.jsonl
diff --git a/...leetcode-hard-py-100-sample-comment.jsonl → ...leetcode-hard-py-100-sample-comment.jsonl b/...leetcode-hard-py-100-sample-comment.jsonl → ...leetcode-hard-py-100-sample-comment.jsonl
diff --git a/...al/leetcode-hard-py-100-sample-desc.jsonl → ...al/leetcode-hard-py-100-sample-desc.jsonl b/...al/leetcode-hard-py-100-sample-desc.jsonl → ...al/leetcode-hard-py-100-sample-desc.jsonl
diff --git a/...maneval/leetcode-hard-py-100-sample.jsonl → ...maneval/leetcode-hard-py-100-sample.jsonl b/...maneval/leetcode-hard-py-100-sample.jsonl → ...maneval/leetcode-hard-py-100-sample.jsonl
diff --git a/.../leetcode-hard-py-40-uncontaminated.jsonl → .../leetcode-hard-py-40-uncontaminated.jsonl b/.../leetcode-hard-py-40-uncontaminated.jsonl → .../leetcode-hard-py-40-uncontaminated.jsonl
diff --git a/...al/leetcode-hard-rs-100-sample-desc.jsonl → ...al/leetcode-hard-rs-100-sample-desc.jsonl b/...al/leetcode-hard-rs-100-sample-desc.jsonl → ...al/leetcode-hard-rs-100-sample-desc.jsonl
diff --git a/...maneval/leetcode-hard-rs-100-sample.jsonl → ...maneval/leetcode-hard-rs-100-sample.jsonl b/...maneval/leetcode-hard-rs-100-sample.jsonl → ...maneval/leetcode-hard-rs-100-sample.jsonl
diff --git a/.../leetcode-hard-rs-40-uncontaminated.jsonl → .../leetcode-hard-rs-40-uncontaminated.jsonl b/.../leetcode-hard-rs-40-uncontaminated.jsonl → .../leetcode-hard-rs-40-uncontaminated.jsonl
diff --git a/...ode_dataset/data/raw/leetcode_dataset.csv → ...ode_dataset/data/raw/leetcode_dataset.csv b/...ode_dataset/data/raw/leetcode_dataset.csv → ...ode_dataset/data/raw/leetcode_dataset.csv
diff --git a/..._snippets/leetcode_hard_with_snippets.csv → ..._snippets/leetcode_hard_with_snippets.csv b/..._snippets/leetcode_hard_with_snippets.csv → ..._snippets/leetcode_hard_with_snippets.csv
diff --git a/...ets/leetcode_hard_with_snippets_clean.csv → ...ets/leetcode_hard_with_snippets_clean.csv b/...ets/leetcode_hard_with_snippets_clean.csv → ...ets/leetcode_hard_with_snippets_clean.csv
diff --git a/...ode_hard_with_snippets_uncontaminated.csv → ...ode_hard_with_snippets_uncontaminated.csv b/...ode_hard_with_snippets_uncontaminated.csv → ...ode_hard_with_snippets_uncontaminated.csv
diff --git a/..._with_snippets_uncontaminated_cleaned.csv → ..._with_snippets_uncontaminated_cleaned.csv b/..._with_snippets_uncontaminated_cleaned.csv → ..._with_snippets_uncontaminated_cleaned.csv
diff --git a/leetcode_dataset/utils.py → leetcode_hard_gym/leetcode_dataset/utils.py b/leetcode_dataset/utils.py → leetcode_hard_gym/leetcode_dataset/utils.py
@@ -1,14 +1,16 @@
 import json
 import time
+
 import requests
 from bs4 import BeautifulSoup
 
+
 def get_question(url):
     """
     Get the question page
     """
     while True:
-        res = requests.get(url) # type: ignore
+        res = requests.get(url)  # type: ignore
         status = res.status_code
         if status == 200:
             return res
@@ -17,12 +19,14 @@ def get_question(url):
         else:
             print(status)
             time.sleep(300)
-
+
+
 def title_slug(title):
     """
     Format the title into a title slug
     """
-    return '-'.join(title.lower().split())
+    return "-".join(title.lower().split())
+
 
 def get_code_snippets(url):
     """
@@ -32,10 +36,14 @@ def get_code_snippets(url):
     if res is None:
         return None
     soup = BeautifulSoup(res.content, "html.parser")
-    script_tag = soup.find('script', {'type': 'application/json'})
+    script_tag = soup.find("script", {"type": "application/json"})
     data = dict(json.loads(script_tag.string))
-    queries = data['props']['pageProps']['dehydratedState']['queries']
-    query = [i for i in queries if 'question' in i['state']['data'] and 'codeSnippets' in i['state']['data']['question']][0]
+    queries = data["props"]["pageProps"]["dehydratedState"]["queries"]
+    query = [
+        i
+        for i in queries
+        if "question" in i["state"]["data"]
+        and "codeSnippets" in i["state"]["data"]["question"]
+    ][0]
     code_snippets = query["state"]["data"]["question"]["codeSnippets"]
     return code_snippets
-
diff --git a/leetcode_env/__init__.py → leetcode_hard_gym/leetcode_env/__init__.py b/leetcode_env/__init__.py → leetcode_hard_gym/leetcode_env/__init__.py
diff --git a/leetcode_env/environment.py → ...code_hard_gym/leetcode_env/environment.py b/leetcode_env/environment.py → ...code_hard_gym/leetcode_env/environment.py
@@ -12,15 +12,16 @@
 
 dotenv.load_dotenv()
 
+
 class LeetCodeEnv(gym.Env):
-    metadata = {'render.modes': ['human']}
+    metadata = {"render.modes": ["human"]}
 
-    def __init__(self, cooldown = 0):
+    def __init__(self, cooldown=0):
         super(LeetCodeEnv, self).__init__()
         self.__configure_leetcode()
         self.reward = False
-        self.last_run = None 
-        self.cooldown = cooldown # To avoid rate limit
+        self.last_run = None
+        self.cooldown = cooldown  # To avoid rate limit
 
     def __configure_leetcode(self):
         configuration = leetcode.Configuration()
@@ -38,7 +39,6 @@ def __configure_leetcode(self):
         self.api_instance = leetcode.DefaultApi(leetcode.ApiClient(configuration))
 
     def step(self, action: LeetCodeSubmission):
-
         submission_result = self.__send_submission(action)
 
         reward, status = self.__calculate_reward(submission_result)
@@ -59,7 +59,11 @@ def __send_submission(self, sub: LeetCodeSubmission):
             sub.question_id = id_from_slug(sub.question_slug, self.api_instance)
 
         submission = leetcode.Submission(
-            judge_type="large", typed_code=sub.code, question_id=sub.question_id, test_mode=False, lang=sub.lang.value
+            judge_type="large",
+            typed_code=sub.code,
+            question_id=sub.question_id,
+            test_mode=False,
+            lang=sub.lang.value,
         )
 
         submission_id = self.api_instance.problems_problem_submit_post(
@@ -75,20 +79,25 @@ def __send_submission(self, sub: LeetCodeSubmission):
         return submission_result
 
     def __calculate_reward(self, submission_result):
-        if submission_result == {'state': 'STARTED'}:
-            status_msg = 'Submission Timed-Out'
+        if submission_result == {"state": "STARTED"}:
+            status_msg = "Submission Timed-Out"
+
+        elif (
+            "status" in submission_result.keys()
+            and submission_result["status"] == "PENDING"
+        ):
+            status_msg = "Submission Timed-Out"
 
-        elif 'status' in submission_result.keys() and submission_result['status'] == 'PENDING':
-            status_msg = 'Submission Timed-Out'
-
-        elif 'status_msg' in submission_result.keys():
-            status_msg = submission_result['status_msg'] # 'Accepted' | 'Runtime Error'| 'Wrong Answer' 
+        elif "status_msg" in submission_result.keys():
+            status_msg = submission_result[
+                "status_msg"
+            ]  # 'Accepted' | 'Runtime Error'| 'Wrong Answer'
 
         else:
-            status_msg = 'Unknown'
-            
-        return status_msg == 'Accepted', status_msg
-    
+            status_msg = "Unknown"
+
+        return status_msg == "Accepted", status_msg
+
     def __wait_for_cooldown(self):
         if self.last_run == None:
             self.last_run = datetime.now()
@@ -99,4 +108,3 @@ def __wait_for_cooldown(self):
 
     def is_done(self):
         return self.reward
-
diff --git a/leetcode_env/leetcode_types.py → ...e_hard_gym/leetcode_env/leetcode_types.py b/leetcode_env/leetcode_types.py → ...e_hard_gym/leetcode_env/leetcode_types.py
@@ -1,11 +1,14 @@
+from enum import Enum
 from typing import Optional
+
 from pydantic import BaseModel
-from enum import Enum
+
 
 class ProgrammingLanguage(Enum):
     """
     Enum for valid LeetCodeSubmission programming languages
     """
+
     CPP = "c++"
     JAVA = "java"
     PYTHON = "python"
@@ -29,10 +32,12 @@ class ProgrammingLanguage(Enum):
     MS_SQL_SERVER = "ms sql server"
     ORACLE = "oracle"
 
+
 class LeetCodeSubmission(BaseModel):
     """
     Model for a Leetcode Code Submission
     """
+
     code: str
     lang: ProgrammingLanguage
     question_id: str

diff --git a/leetcode_env/utils.py → leetcode_hard_gym/leetcode_env/utils.py b/leetcode_env/utils.py → leetcode_hard_gym/leetcode_env/utils.py
@@ -1,9 +1,11 @@
 import ast
+import os
 import re
 from abc import ABC, abstractmethod
-import os
+
 import astunparse
 import leetcode
+
 # import dotenv
 
 # dotenv.load_dotenv()
@@ -21,61 +23,65 @@
 
 # api_instance = leetcode.DefaultApi(leetcode.ApiClient(configuration))
 
+
 def id_from_slug(slug: str, api_instance) -> str:
     """
     Retrieves the id of the question with the given slug
     """
     graphql_request = leetcode.GraphqlQuery(
-      query="""
+        query="""
                   query getQuestionDetail($titleSlug: String!) {
                     question(titleSlug: $titleSlug) {
                       questionId
                     }
                   }
               """,
-              variables={"titleSlug": slug},
-              operation_name="getQuestionDetail",
-      )
+        variables={"titleSlug": slug},
+        operation_name="getQuestionDetail",
+    )
     response = ast.literal_eval(str(api_instance.graphql_post(body=graphql_request)))
-    frontend_id = response['data']['question']['question_id']
+    frontend_id = response["data"]["question"]["question_id"]
     return frontend_id
 
+
 def metadata_from_slug(slug: str, api_instance) -> str:
     """
     Retrieves the metadata of the question with the given slug
     """
     graphql_request = leetcode.GraphqlQuery(
-      query="""
+        query="""
                   query getQuestionDetail($titleSlug: String!) {
                     question(titleSlug: $titleSlug) {
                       metaData
                     }
                   }
               """,
-              variables={"titleSlug": slug},
-              operation_name="getQuestionDetail",
+        variables={"titleSlug": slug},
+        operation_name="getQuestionDetail",
     )
     response = ast.literal_eval(str(api_instance.graphql_post(body=graphql_request)))
-    metadata = response['data']['question']
+    metadata = response["data"]["question"]
     return metadata
 
 
 class SubmissionFormatter(ABC):
     """
     Class that converts between HumanEval and Leetcode submission formats.
     """
+
     @abstractmethod
     def to_leetcode(self, humaneval_snippet: str):
         """
         Convert the string to leetcode format
         """
-    
+
     @abstractmethod
     def to_humaneval(self, leetcode_snippet: str):
         """
         Convert the string to humaneval format
         """
 
+
 class PySubmissionFormatter:
     @staticmethod
     def extract_comments(source: str) -> str:
@@ -107,8 +113,13 @@ def to_humaneval(leetcode_snippet: str) -> str:
     @staticmethod
     def to_leetcode(humaneval_snippet: str, class_name: str = "Solution") -> str:
         comments = PySubmissionFormatter.extract_comments(humaneval_snippet)
-        # Remove imports 
-        humaneval_snippet = re.sub(r"^from\s+\S+\s+import.*|^import.*", "", humaneval_snippet, flags=re.MULTILINE)
+        # Remove imports
+        humaneval_snippet = re.sub(
+            r"^from\s+\S+\s+import.*|^import.*",
+            "",
+            humaneval_snippet,
+            flags=re.MULTILINE,
+        )
         try:
             tree = ast.parse(humaneval_snippet)
         except IndentationError:
@@ -133,6 +144,7 @@ def to_leetcode(humaneval_snippet: str, class_name: str = "Solution") -> str:
         new_tree = ast.Module(body=[class_node], type_ignores=[])
         return f"{comments}\n{astunparse.unparse(new_tree).strip()}\n"
 
+
 class RsSubmissionFormatter:
     @staticmethod
     def extract_comments(source: str) -> str:
@@ -155,4 +167,3 @@ def to_leetcode(humaneval_snippet: str, struct_name: str = "Solution") -> str:
         function_source = function_source.strip()
         function_source = re.sub(r"fn ", "pub fn ", function_source)
         return f"impl {struct_name} {{\n    {function_source}\n}}\n"
-