Skip to content

Commit d7658f9

Browse files
committed
Update leaderboard
1 parent 6a151d1 commit d7658f9

File tree

1 file changed

+131
-3
lines changed

1 file changed

+131
-3
lines changed

data/leaderboards.json

Lines changed: 131 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43140,6 +43140,32 @@
4314043140
{
4314143141
"name": "Test",
4314243142
"results": [
43143+
{
43144+
"name": "Salesforce AI Research SAGE (bash-only)",
43145+
"logo": [
43146+
"https://avatars.githubusercontent.com/u/137096229"
43147+
],
43148+
"site": "https://www.salesforce.com/blog/sage-swe/",
43149+
"folder": "20251027_salesforce_SAGE",
43150+
"resolved": 44.25,
43151+
"date": "2025-10-27",
43152+
"logs": "s3://swe-bench-experiments/test/20251027_salesforce_SAGE/logs",
43153+
"trajs": "s3://swe-bench-experiments/test/20251027_salesforce_SAGE/trajs",
43154+
"trajs_docent": false,
43155+
"cost": null,
43156+
"instance_cost": null,
43157+
"instance_calls": null,
43158+
"os_model": false,
43159+
"os_system": false,
43160+
"checked": false,
43161+
"tags": [
43162+
"Model: claude-sonnet-4.5",
43163+
"Model: gpt-5",
43164+
"Org: Salesforce AI Research",
43165+
"System: Attempts - 2+"
43166+
],
43167+
"warning": null
43168+
},
4314343169
{
4314443170
"name": "Atlassian Rovo Dev (2025-06-05)",
4314543171
"logo": [
@@ -43683,7 +43709,8 @@
4368343709
"https://se-research.bytedance.com/logos/bytedance.jpg"
4368443710
],
4368543711
"site": [
43686-
"https://github.com/bytedance/trae-agent"
43712+
"https://github.com/bytedance/trae-agent",
43713+
"https://seed.bytedance.com/"
4368743714
],
4368843715
"folder": "20250928_trae_doubao_seed_code",
4368943716
"resolved": 78.8,
@@ -43705,6 +43732,31 @@
4370543732
],
4370643733
"warning": null
4370743734
},
43735+
{
43736+
"name": "live-SWE-agent + Gemini 3 Pro Preview (2025-11-18)",
43737+
"logo": [
43738+
"https://brand.illinois.edu/wp-content/uploads/2024/02/Color-Variation-Orange-Block-I-White-Background.png"
43739+
],
43740+
"site": "https://github.com/OpenAutoCoder/live-swe-agent",
43741+
"folder": "20251120_livesweagent_gemini-3-pro-preview",
43742+
"resolved": 77.4,
43743+
"date": "2025-11-20",
43744+
"logs": "s3://swe-bench-experiments/verified/20251120_livesweagent_gemini-3-pro-preview/logs",
43745+
"trajs": "s3://swe-bench-experiments/verified/20251120_livesweagent_gemini-3-pro-preview/trajs",
43746+
"trajs_docent": false,
43747+
"cost": null,
43748+
"instance_cost": null,
43749+
"instance_calls": null,
43750+
"os_model": false,
43751+
"os_system": true,
43752+
"checked": "false (See README.md for info on how to get your results verified)",
43753+
"tags": [
43754+
"Model: gemini-3-pro-preview",
43755+
"Org: UIUC",
43756+
"System: Attempts - 1"
43757+
],
43758+
"warning": null
43759+
},
4370843760
{
4370943761
"name": "Atlassian Rovo Dev (2025-09-02)",
4371043762
"logo": [
@@ -43863,6 +43915,31 @@
4386343915
],
4386443916
"warning": null
4386543917
},
43918+
{
43919+
"name": "Sonar Foundation Agent + Claude 4.5 Sonnet",
43920+
"logo": [
43921+
"https://assets-eu-01.kc-usercontent.com/55017e37-262d-017b-afd6-daa9468cbc30/8e59bcad-6e39-41dc-abd9-a0e251e8d63f/Sonar%20%282%29.svg?w=128&h=32&dpr=2&fit=crop&q=80"
43922+
],
43923+
"site": "https://www.sonarsource.com",
43924+
"folder": "20251103_sonar-foundation-agent_claude-sonnet-4-5",
43925+
"resolved": 74.8,
43926+
"date": "2025-11-03",
43927+
"logs": "s3://swe-bench-experiments/verified/20251103_sonar-foundation-agent_claude-sonnet-4-5/logs",
43928+
"trajs": "s3://swe-bench-experiments/verified/20251103_sonar-foundation-agent_claude-sonnet-4-5/trajs",
43929+
"trajs_docent": false,
43930+
"cost": null,
43931+
"instance_cost": null,
43932+
"instance_calls": null,
43933+
"os_model": false,
43934+
"os_system": false,
43935+
"checked": false,
43936+
"tags": [
43937+
"Model: claude-sonnet-4-5",
43938+
"Org: Sonar",
43939+
"System: Attempts - 1"
43940+
],
43941+
"warning": null
43942+
},
4386643943
{
4386743944
"name": "Lingxi-v1.5_claude-4-sonnet-20250514",
4386843945
"logo": [
@@ -46496,6 +46573,32 @@
4649646573
}
4649746574
}
4649846575
},
46576+
{
46577+
"name": "Salesforce AI Research SAGE (OpenHands)",
46578+
"logo": [
46579+
"https://avatars.githubusercontent.com/u/137096229"
46580+
],
46581+
"site": "https://www.salesforce.com/blog/sage-swe/",
46582+
"folder": "20251103_SalesforceAIResearch_SAGE_OpenHands",
46583+
"resolved": 73.8,
46584+
"date": "2025-11-03",
46585+
"logs": "s3://swe-bench-experiments/verified/20251103_SalesforceAIResearch_SAGE_OpenHands/logs",
46586+
"trajs": "s3://swe-bench-experiments/verified/20251103_SalesforceAIResearch_SAGE_OpenHands/trajs",
46587+
"trajs_docent": false,
46588+
"cost": null,
46589+
"instance_cost": null,
46590+
"instance_calls": null,
46591+
"os_model": false,
46592+
"os_system": false,
46593+
"checked": false,
46594+
"tags": [
46595+
"Model: claude-sonnet-4.5",
46596+
"Model: gpt-5",
46597+
"Org: Salesforce AI Research",
46598+
"System: Attempts - 2+"
46599+
],
46600+
"warning": null
46601+
},
4649946602
{
4650046603
"name": "Tools + Claude 4 Opus (2025-05-22)",
4650146604
"logo": [
@@ -46585,7 +46688,7 @@
4658546688
"cost": null,
4658646689
"instance_cost": null,
4658746690
"instance_calls": null,
46588-
"os_model": true,
46691+
"os_model": false,
4658946692
"os_system": true,
4659046693
"checked": true,
4659146694
"tags": [
@@ -91964,6 +92067,31 @@
9196492067
],
9196592068
"warning": null
9196692069
},
92070+
{
92071+
"name": "Codefuse_Pycfuse_SVR",
92072+
"logo": [
92073+
"https://github.com/codefuse-ai/codefuse-svr/blob/main/assets/codefuse_logo.png"
92074+
],
92075+
"site": "https://github.com/codefuse-ai/codefuse-svr?tab=readme-ov-file",
92076+
"folder": "20251117_codefuse_pycfuse_svr",
92077+
"resolved": 35.98,
92078+
"date": "2025-11-17",
92079+
"logs": null,
92080+
"trajs": null,
92081+
"trajs_docent": false,
92082+
"cost": null,
92083+
"instance_cost": null,
92084+
"instance_calls": null,
92085+
"os_model": false,
92086+
"os_system": false,
92087+
"checked": true,
92088+
"tags": [
92089+
"Model: o3-2025-04-16",
92090+
"Org: Codefuse",
92091+
"System: Attempts - 1"
92092+
],
92093+
"warning": null
92094+
},
9196792095
{
9196892096
"name": "Refact.ai Agent",
9196992097
"logo": [
@@ -92455,4 +92583,4 @@
9245592583
]
9245692584
}
9245792585
]
92458-
}
92586+
}

0 commit comments

Comments
 (0)