4314043140 {
4314143141 "name": "Test",
4314243142 "results": [
43143+ {
43144+ "name": "Salesforce AI Research SAGE (bash-only)",
43145+ "logo": [
43146+ "https://avatars.githubusercontent.com/u/137096229"
43147+ ],
43148+ "site": "https://www.salesforce.com/blog/sage-swe/",
43149+ "folder": "20251027_salesforce_SAGE",
43150+ "resolved": 44.25,
43151+ "date": "2025-10-27",
43152+ "logs": "s3://swe-bench-experiments/test/20251027_salesforce_SAGE/logs",
43153+ "trajs": "s3://swe-bench-experiments/test/20251027_salesforce_SAGE/trajs",
43154+ "trajs_docent": false,
43155+ "cost": null,
43156+ "instance_cost": null,
43157+ "instance_calls": null,
43158+ "os_model": false,
43159+ "os_system": false,
43160+ "checked": false,
43161+ "tags": [
43162+ "Model: claude-sonnet-4.5",
43163+ "Model: gpt-5",
43164+ "Org: Salesforce AI Research",
43165+ "System: Attempts - 2+"
43166+ ],
43167+ "warning": null
43168+ },
4314343169 {
4314443170 "name": "Atlassian Rovo Dev (2025-06-05)",
4314543171 "logo": [
4368343709 "https://se-research.bytedance.com/logos/bytedance.jpg"
4368443710 ],
4368543711 "site": [
43686- "https://github.com/bytedance/trae-agent"
43712+ "https://github.com/bytedance/trae-agent",
43713+ "https://seed.bytedance.com/"
4368743714 ],
4368843715 "folder": "20250928_trae_doubao_seed_code",
4368943716 "resolved": 78.8,
4370543732 ],
4370643733 "warning": null
4370743734 },
43735+ {
43736+ "name": "live-SWE-agent + Gemini 3 Pro Preview (2025-11-18)",
43737+ "logo": [
43738+ "https://brand.illinois.edu/wp-content/uploads/2024/02/Color-Variation-Orange-Block-I-White-Background.png"
43739+ ],
43740+ "site": "https://github.com/OpenAutoCoder/live-swe-agent",
43741+ "folder": "20251120_livesweagent_gemini-3-pro-preview",
43742+ "resolved": 77.4,
43743+ "date": "2025-11-20",
43744+ "logs": "s3://swe-bench-experiments/verified/20251120_livesweagent_gemini-3-pro-preview/logs",
43745+ "trajs": "s3://swe-bench-experiments/verified/20251120_livesweagent_gemini-3-pro-preview/trajs",
43746+ "trajs_docent": false,
43747+ "cost": null,
43748+ "instance_cost": null,
43749+ "instance_calls": null,
43750+ "os_model": false,
43751+ "os_system": true,
43752+ "checked": "false (See README.md for info on how to get your results verified)",
43753+ "tags": [
43754+ "Model: gemini-3-pro-preview",
43755+ "Org: UIUC",
43756+ "System: Attempts - 1"
43757+ ],
43758+ "warning": null
43759+ },
4370843760 {
4370943761 "name": "Atlassian Rovo Dev (2025-09-02)",
4371043762 "logo": [
4386343915 ],
4386443916 "warning": null
4386543917 },
43918+ {
43919+ "name": "Sonar Foundation Agent + Claude 4.5 Sonnet",
43920+ "logo": [
43921+ "https://assets-eu-01.kc-usercontent.com/55017e37-262d-017b-afd6-daa9468cbc30/8e59bcad-6e39-41dc-abd9-a0e251e8d63f/Sonar%20%282%29.svg?w=128&h=32&dpr=2&fit=crop&q=80"
43922+ ],
43923+ "site": "https://www.sonarsource.com",
43924+ "folder": "20251103_sonar-foundation-agent_claude-sonnet-4-5",
43925+ "resolved": 74.8,
43926+ "date": "2025-11-03",
43927+ "logs": "s3://swe-bench-experiments/verified/20251103_sonar-foundation-agent_claude-sonnet-4-5/logs",
43928+ "trajs": "s3://swe-bench-experiments/verified/20251103_sonar-foundation-agent_claude-sonnet-4-5/trajs",
43929+ "trajs_docent": false,
43930+ "cost": null,
43931+ "instance_cost": null,
43932+ "instance_calls": null,
43933+ "os_model": false,
43934+ "os_system": false,
43935+ "checked": false,
43936+ "tags": [
43937+ "Model: claude-sonnet-4-5",
43938+ "Org: Sonar",
43939+ "System: Attempts - 1"
43940+ ],
43941+ "warning": null
43942+ },
4386643943 {
4386743944 "name": "Lingxi-v1.5_claude-4-sonnet-20250514",
4386843945 "logo": [
4649646573 }
4649746574 }
4649846575 },
46576+ {
46577+ "name": "Salesforce AI Research SAGE (OpenHands)",
46578+ "logo": [
46579+ "https://avatars.githubusercontent.com/u/137096229"
46580+ ],
46581+ "site": "https://www.salesforce.com/blog/sage-swe/",
46582+ "folder": "20251103_SalesforceAIResearch_SAGE_OpenHands",
46583+ "resolved": 73.8,
46584+ "date": "2025-11-03",
46585+ "logs": "s3://swe-bench-experiments/verified/20251103_SalesforceAIResearch_SAGE_OpenHands/logs",
46586+ "trajs": "s3://swe-bench-experiments/verified/20251103_SalesforceAIResearch_SAGE_OpenHands/trajs",
46587+ "trajs_docent": false,
46588+ "cost": null,
46589+ "instance_cost": null,
46590+ "instance_calls": null,
46591+ "os_model": false,
46592+ "os_system": false,
46593+ "checked": false,
46594+ "tags": [
46595+ "Model: claude-sonnet-4.5",
46596+ "Model: gpt-5",
46597+ "Org: Salesforce AI Research",
46598+ "System: Attempts - 2+"
46599+ ],
46600+ "warning": null
46601+ },
4649946602 {
4650046603 "name": "Tools + Claude 4 Opus (2025-05-22)",
4650146604 "logo": [
4658546688 "cost": null,
4658646689 "instance_cost": null,
4658746690 "instance_calls": null,
46588- "os_model": true ,
46691+ "os_model": false ,
4658946692 "os_system": true,
4659046693 "checked": true,
4659146694 "tags": [
9196492067 ],
9196592068 "warning": null
9196692069 },
92070+ {
92071+ "name": "Codefuse_Pycfuse_SVR",
92072+ "logo": [
92073+ "https://github.com/codefuse-ai/codefuse-svr/blob/main/assets/codefuse_logo.png"
92074+ ],
92075+ "site": "https://github.com/codefuse-ai/codefuse-svr?tab=readme-ov-file",
92076+ "folder": "20251117_codefuse_pycfuse_svr",
92077+ "resolved": 35.98,
92078+ "date": "2025-11-17",
92079+ "logs": null,
92080+ "trajs": null,
92081+ "trajs_docent": false,
92082+ "cost": null,
92083+ "instance_cost": null,
92084+ "instance_calls": null,
92085+ "os_model": false,
92086+ "os_system": false,
92087+ "checked": true,
92088+ "tags": [
92089+ "Model: o3-2025-04-16",
92090+ "Org: Codefuse",
92091+ "System: Attempts - 1"
92092+ ],
92093+ "warning": null
92094+ },
9196792095 {
9196892096 "name": "Refact.ai Agent",
9196992097 "logo": [
9245592583 ]
9245692584 }
9245792585 ]
92458- }
92586+ }
0 commit comments