diff --git a/nemo_skills/inference/eval/swebench.py b/nemo_skills/inference/eval/swebench.py
index 0fc7509ccc..f3610091e8 100644
--- a/nemo_skills/inference/eval/swebench.py
+++ b/nemo_skills/inference/eval/swebench.py
@@ -246,6 +246,10 @@ def __init__(self, cfg: SweBenchGenerationConfig):
"mkdir -p /root/tmux && "
"curl -Lf https://github.com/nelsonenzo/tmux-appimage/releases/download/3.5a/tmux.appimage -o /root/tmux/tmux && "
"chmod 777 /root/tmux/tmux && "
+ # download jq
+ "mkdir -p /root/jq && "
+ "curl -Lf https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-linux-amd64 -o /root/jq/jq && "
+ "chmod 777 /root/jq/jq && "
# clone the openhands repo
"rm -rf /root/OpenHands && "
f"git clone {self.cfg.agent_framework_repo} /root/OpenHands && "
@@ -531,13 +535,16 @@ async def _run_openhands(self, data_point, api_base):
" echo 'This is because OpenHands DELETES EVERYTHING in the /workspace folder if it exists.' && "
" exit 1; "
"fi && "
- # copy installed repo, uv & tmux dirs from /root_mount
+ # copy installed repo, uv, tmux & jq dirs from /root_mount
"cp -r /root_mount/OpenHands /root && "
"cp -r /root_mount/uv /root && "
"cp -r /root_mount/tmux /root && "
+ "cp -r /root_mount/jq /root && "
"cd /root/OpenHands && "
- # add poetry & tmux to PATH
- "export PATH=/root/uv/tool-bin:/root/tmux:$PATH && "
+ # make soft links to poetry, tmux & jq in /usr/local/bin, so OpenHands can run them from the command line
+ "ln -sf /root/uv/tool-bin/poetry /usr/local/bin/poetry && "
+ "ln -sf /root/tmux/tmux /usr/local/bin/tmux && "
+ "ln -sf /root/jq/jq /usr/local/bin/jq && "
# enable tmux appimage to run without fusermount
# https://docs.appimage.org/user-guide/troubleshooting/fuse.html#extract-and-run-type-2-appimages
"export APPIMAGE_EXTRACT_AND_RUN=1 && "
diff --git a/nemo_skills/prompt/config/eval/swe-bench/swe-agent/multilingual.yaml b/nemo_skills/prompt/config/eval/swe-bench/swe-agent/multilingual.yaml
new file mode 100644
index 0000000000..2b0e67de89
--- /dev/null
+++ b/nemo_skills/prompt/config/eval/swe-bench/swe-agent/multilingual.yaml
@@ -0,0 +1,78 @@
+# Based on the default config from the SWE-agent repo:
+# https://github.com/SWE-agent/SWE-agent/blob/1375ec4fa69d300b432b9ca61d6b0e5d7259131c/config/default.yaml
+# but mentions of Python are removed to make the prompt language-agnostic.
+
+# note that this doesn't use nemo-skills prompt logic and instead is passed directly to swe-agent
+
+agent:
+ templates:
+ system_template: |-
+ You are a helpful assistant that can interact with a computer to solve tasks.
+ instance_template: |-
+
+ {{working_dir}}
+
+ I've uploaded a code repository in the directory {{working_dir}}. Consider the following PR description:
+
+
+ {{problem_statement}}
+
+
+ Can you help me implement the necessary changes to the repository so that the requirements specified in the are met?
+ I've already taken care of all changes to any of the test files described in the . This means you DON'T have to modify the testing logic or any of the tests in any way!
+ Your task is to make the minimal changes to non-tests files in the {{working_dir}} directory to ensure the is satisfied.
+ Follow these steps to resolve the issue:
+ 1. As a first step, it might be a good idea to find and read code relevant to the
+ 2. Create a script to reproduce the error and execute it using the bash tool, to confirm the error
+ 3. Edit the sourcecode of the repo to resolve the issue
+ 4. Rerun your reproduce script and confirm that the error is fixed!
+ 5. Think about edgecases and make sure your fix handles them as well
+ Your thinking should be thorough and so it's fine if it's very long.
+ next_step_template: |-
+ OBSERVATION:
+ {{observation}}
+ next_step_no_output_template: |-
+ Your command ran successfully and did not produce any output.
+ tools:
+ env_variables:
+ PAGER: cat
+ MANPAGER: cat
+ LESS: -R
+ PIP_PROGRESS_BAR: 'off'
+ TQDM_DISABLE: '1'
+ GIT_PAGER: cat
+ bundles:
+ - path: tools/registry
+ - path: tools/edit_anthropic
+ - path: tools/review_on_submit_m
+ registry_variables:
+ USE_FILEMAP: 'true'
+ SUBMIT_REVIEW_MESSAGES:
+ - |
+ Thank you for your work on this issue. Please carefully follow the steps below to help review your changes.
+
+ 1. If you made any changes to your code after running the reproduction script, please run the reproduction script again.
+ If the reproduction script is failing, please revisit your changes and make sure they are correct.
+ If you have already removed your reproduction script, please ignore this step.
+ 2. Remove your reproduction script (if you haven't done so already).
+ 3. If you have modified any TEST files, please revert them to the state they had before you started fixing the issue.
+ You can do this with `git checkout -- /path/to/test/file`. Use below to find the files you need to revert.
+ 4. Run the submit command again to confirm.
+
+ Here is a list of all of your changes:
+
+
+ {{diff}}
+
+ enable_bash_tool: true
+ parse_function:
+ type: function_calling
+ history_processors: []
+ model:
+ # The following parameters are overridden by Nemo-Skills:
+ # name, api_base, temperature, top_p, completion_kwargs, per_instance_call_limit.
+ # Specifying them here will have no effect! Use Nemo-Skills options instead.
+ per_instance_cost_limit: 0
+ total_cost_limit: 0
+ max_input_tokens: 0
+ max_output_tokens: 0