diff --git a/index.html b/index.html index 25e3ff3..0ad364f 100644 --- a/index.html +++ b/index.html @@ -167,16 +167,12 @@

Leaderboard

2024-08-20

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -202,16 +198,12 @@

Leaderboard

2024-07-21

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -237,9 +229,7 @@

Leaderboard

2024-06-17

- - 🔗 - + ✓

@@ -269,9 +259,7 @@

Leaderboard

2024-06-28

- - 🔗 - + ✓

@@ -301,16 +289,12 @@

Leaderboard

2024-06-20

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -333,9 +317,7 @@

Leaderboard

2024-06-15

- - 🔗 - + ✓

@@ -365,9 +347,7 @@

Leaderboard

2024-05-09

- - 🔗 - + ✓

@@ -397,16 +377,12 @@

Leaderboard

2024-04-02

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -431,16 +407,12 @@

Leaderboard

2024-07-28

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -465,16 +437,12 @@

Leaderboard

2024-04-02

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -497,9 +465,7 @@

Leaderboard

2024-04-02

- - 🔗 - + ✓

@@ -529,9 +495,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

@@ -559,9 +523,7 @@

Leaderboard

2024-04-02

- - 🔗 - + ✓

@@ -589,9 +551,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

@@ -619,9 +579,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

@@ -649,9 +607,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

@@ -691,6 +647,68 @@

Leaderboard

+ Tools + Claude 3.5 Sonnet (2024-10-22) +

+ +

49.00

+

2024-10-22

+ +

+ ✓ +

+ + +

+ ✓ +

+ + +

+ + 🔗 + +

+ + + + + +

+ 🥈 + + + + Solver (2024-09-12) +

+ +

45.40

+

2024-09-24

+ +

+ ✓ +

+ + +

+ ✓ +

+ + +

+ + 🔗 + +

+ + + + + +

+ 🥉 + + + Gru(2024-08-24)

@@ -698,22 +716,78 @@

Leaderboard

2024-08-24

+ ✓ +

+ + +

+ ✓ +

+ + +

+ + 🔗 + +

+ + + + + +

+ - 🔗 + Solver (2024-09-12) +

+ +

43.60

+

2024-09-20

+ +

+ ✓

+ ✓ +

+ + +

+ + 🔗 + +

+ + + + + +

+ - 🔗 + Tools + Claude 3.5 Haiku +

+ +

40.60

+

2024-10-22

+ +

+ ✓ +

+ + +

+ ✓

- 🔗 + 🔗

@@ -722,7 +796,6 @@

Leaderboard

- 🥈 @@ -731,24 +804,50 @@

Leaderboard

40.60

2024-08-20

+ +

+ ✓ +

+ + +

+ ✓ +

+

- 🔗 + 🔗

+ + + -

+

- 🔗 + 🤠 + Composio SWEkit + Claude 3.5 Sonnet (2024-10-16) +

+ +

40.60

+

2024-10-16

+ +

+ ✓ +

+ + +

+ ✓

- 🔗 + 🔗

@@ -757,7 +856,6 @@

Leaderboard

- 🥉 @@ -768,16 +866,12 @@

Leaderboard

2024-07-21

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -802,9 +896,7 @@

Leaderboard

2024-06-28

- - 🔗 - + ✓

@@ -834,9 +926,7 @@

Leaderboard

2024-06-17

- - 🔗 - + ✓

@@ -866,21 +956,107 @@

Leaderboard

2024-06-20

+ ✓ +

+ + +

+ ✓ +

+ + +

+ - +

+ + + + + +

- 🔗 + + nFactorial (2024-10-07) +

+ +

31.60

+

2024-10-07

+ +

+ ✓

+ ✓ +

+ + +

+ + 🔗 - 🔗 +

+ + + + + +

+ + 🤠 + Lingma Agent + Lingma SWE-GPT 72b (v0925) +

+ +

28.80

+

2024-10-02

+ +

+ ✓

- - + ✓ +

+ + +

+ + 🔗 + +

+ + + + + +

+ + + + EPAM AI/Run Developer Agent + GPT4o +

+ +

27.00

+

2024-10-16

+ +

+ ✓ +

+ + +

+ ✓ +

+ + +

+ + 🔗 +

@@ -898,9 +1074,7 @@

Leaderboard

2024-06-15

- - 🔗 - + ✓

@@ -923,6 +1097,36 @@

Leaderboard

+ nFactorial (2024-10-01) +

+ +

25.80

+

2024-10-01

+ +

+ ✓ +

+ + +

+ ✓ +

+ + +

+ + 🔗 + +

+ + + + + +

+ + + Amazon Q Developer Agent (v20240430-dev)

@@ -930,9 +1134,7 @@

Leaderboard

2024-05-09

- - 🔗 - + ✓

@@ -949,6 +1151,36 @@

Leaderboard

+ + +

+ + 🤠 + + Lingma Agent + Lingma SWE-GPT 72b (v0918) +

+ +

25.00

+

2024-09-18

+ +

+ ✓ +

+ + +

+ ✓ +

+ + +

+ + 🔗 + +

+ + +

@@ -962,22 +1194,78 @@

Leaderboard

2024-08-20

- - 🔗 - + ✓ +

+ + +

+ ✓ +

+ + +

+ + 🔗 + +

+ + + + + +

+ + 🤠 + ✅ + SWE-agent + GPT 4o (2024-05-13) +

+ +

23.20

+

2024-07-28

+ +

+ ✓ +

+ + +

+ ✓ +

+ + +

+ + 🔗 + +

+ + + + + +

+ + 🤠 + ✅ + SWE-agent + GPT 4 (1106) +

+ +

22.40

+

2024-04-02

+ +

+ ✓

- - 🔗 - + ✓

- 🔗 + 🔗

@@ -989,30 +1277,24 @@

Leaderboard

🤠 ✅ - SWE-agent + GPT 4o (2024-05-13) + SWE-agent + Claude 3 Opus

-

23.20

-

2024-07-28

+

18.20

+

2024-04-02

- - 🔗 - + ✓

- - 🔗 - + ✓

- - 🔗 - + -

@@ -1022,30 +1304,26 @@

Leaderboard

🤠 - ✅ - SWE-agent + GPT 4 (1106) + + Lingma Agent + Lingma SWE-GPT 7b (v0925)

-

22.40

-

2024-04-02

+

18.20

+

2024-10-02

- - 🔗 - + ✓

- - 🔗 - + ✓

- 🔗 + 🔗

@@ -1056,29 +1334,27 @@

Leaderboard

🤠 - ✅ - SWE-agent + Claude 3 Opus + + Lingma Agent + Lingma SWE-GPT 7b (v0918)

-

18.20

-

2024-04-02

+

10.20

+

2024-09-18

- - 🔗 - + ✓

- - 🔗 - + ✓

- - + + 🔗 +

@@ -1096,9 +1372,7 @@

Leaderboard

2024-04-02

- - 🔗 - + ✓

@@ -1128,9 +1402,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

@@ -1158,9 +1430,7 @@

Leaderboard

2024-04-02

- - 🔗 - + ✓

@@ -1188,9 +1458,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

@@ -1218,9 +1486,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

@@ -1248,9 +1514,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

@@ -1297,9 +1561,7 @@

Leaderboard

2024-07-02

- - 🔗 - + ✓

@@ -1323,23 +1585,50 @@

Leaderboard

- Honeycomb + Bytedance MarsCode Agent

-

38.33

-

2024-08-20

+

39.33

+

2024-09-12

+ +

+ ✓ +

+ + +

+ ✓ +

+

- 🔗 + 🔗

+ + + -

+

+ 🥉 + - 🔗 + Honeycomb +

+ +

38.33

+

2024-08-20

+ +

+ ✓ +

+ + +

+ ✓

@@ -1354,7 +1643,6 @@

Leaderboard

- 🥉 @@ -1365,9 +1653,7 @@

Leaderboard

2024-06-27

- - 🔗 - + ✓

@@ -1397,16 +1683,12 @@

Leaderboard

2024-08-11

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -1431,16 +1713,12 @@

Leaderboard

2024-08-29

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -1465,16 +1743,12 @@

Leaderboard

2024-08-06

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -1499,9 +1773,7 @@

Leaderboard

2024-07-23

- - 🔗 - + ✓

@@ -1531,16 +1803,12 @@

Leaderboard

2024-06-22

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -1565,9 +1833,7 @@

Leaderboard

2024-06-17

- - 🔗 - + ✓

@@ -1595,24 +1861,50 @@

Leaderboard

30.67

2024-06-21

+ +

+ ✓ +

+ + +

+ ✓ +

+

- 🔗 + 🔗

+ + + -

+

- 🔗 + + AIGCode Infant-Coder(2024-08-30) +

+ +

30.00

+

2024-09-08

+ +

+ ✓ +

+ + +

+ ✓

- 🔗 + 🔗

@@ -1631,16 +1923,12 @@

Leaderboard

2024-07-21

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -1665,16 +1953,12 @@

Leaderboard

2024-08-08

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -1699,9 +1983,7 @@

Leaderboard

2024-06-04

- - 🔗 - + ✓

@@ -1731,9 +2013,7 @@

Leaderboard

2024-06-12

- - 🔗 - + ✓

@@ -1763,16 +2043,12 @@

Leaderboard

2024-07-06

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -1797,9 +2073,7 @@

Leaderboard

2024-06-30

- - 🔗 - + ✓

@@ -1829,16 +2103,12 @@

Leaderboard

2024-06-23

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -1863,16 +2133,12 @@

Leaderboard

2024-07-25

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -1897,9 +2163,7 @@

Leaderboard

2024-06-12

- - 🔗 - + ✓

@@ -1929,9 +2193,7 @@

Leaderboard

2024-05-23

- - 🔗 - + ✓

@@ -1948,6 +2210,36 @@

Leaderboard

+ + +

+ + + + HyperAgent +

+ +

25.33

+

2024-09-25

+ +

+ ✓ +

+ + +

+ ✓ +

+ + +

+ + 🔗 + +

+ + +

@@ -1959,24 +2251,50 @@

Leaderboard

24.67

2024-06-17

+ +

+ ✓ +

+ + +

+ ✓ +

+

- 🔗 + 🔗

+ + + -

+

- 🔗 + + IBM SWE-1.0 (with open LLMs) +

+ +

23.67

+

2024-10-16

+ +

+ ✓ +

+ + +

+ ✓

- 🔗 + 🔗

@@ -1995,9 +2313,7 @@

Leaderboard

2024-05-24

- - 🔗 - + ✓

@@ -2027,16 +2343,12 @@

Leaderboard

2024-06-20

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -2059,9 +2371,7 @@

Leaderboard

2024-06-15

- - 🔗 - + ✓

@@ -2091,16 +2401,12 @@

Leaderboard

2024-08-28

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -2123,9 +2429,7 @@

Leaderboard

2024-05-09

- - 🔗 - + ✓

@@ -2155,9 +2459,7 @@

Leaderboard

2024-05-30

- - 🔗 - + ✓

@@ -2187,16 +2489,12 @@

Leaderboard

2024-07-28

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -2221,16 +2519,12 @@

Leaderboard

2024-04-02

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -2255,16 +2549,12 @@

Leaderboard

2024-04-02

- - 🔗 - + ✓

- - 🔗 - + ✓

@@ -2287,9 +2577,7 @@

Leaderboard

2024-04-02

- - 🔗 - + ✓

@@ -2319,14 +2607,12 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

- - + ✓

@@ -2349,9 +2635,7 @@

Leaderboard

2024-04-02

- - 🔗 - + ✓

@@ -2379,9 +2663,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

@@ -2409,9 +2691,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

@@ -2439,9 +2719,7 @@

Leaderboard

2023-10-10

- - 🔗 - + ✓

diff --git a/template/data.json b/template/data.json index f0a2e1b..2c811a0 100644 --- a/template/data.json +++ b/template/data.json @@ -8,8 +8,8 @@ "folder": "20240820_honeycomb", "resolved": 22.06, "date": "2024-08-20", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240820_honeycomb/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240820_honeycomb/trajs", + "logs": true, + "trajs": true, "site": "https://honeycomb.sh/", "verified": false, "oss": false @@ -19,8 +19,8 @@ "folder": "20240721_amazon-q-developer-agent-20240719-dev", "resolved": 19.75, "date": "2024-07-21", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240721_amazon-q-developer-agent-20240719-dev/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240721_amazon-q-developer-agent-20240719-dev/trajs", + "logs": true, + "trajs": true, "site": "https://aws.amazon.com/q/developer/", "verified": false, "oss": false @@ -30,8 +30,8 @@ "folder": "20240617_factory_code_droid", "resolved": 19.27, "date": "2024-06-17", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240617_factory_code_droid/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://www.factory.ai/", "verified": false, "oss": false @@ -41,8 +41,8 @@ "folder": "20240628_autocoderover-v20240620", "resolved": 18.83, "date": "2024-06-28", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240628_autocoderover-v20240620/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://autocoderover.dev/", "verified": false, "oss": false @@ -52,8 +52,8 @@ "folder": "20240620_sweagent_claude3.5sonnet", "resolved": 18.13, "date": "2024-06-20", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240620_sweagent_claude3.5sonnet/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240620_sweagent_claude3.5sonnet/trajs", + "logs": true, + "trajs": true, "site": null, "verified": true, "oss": true @@ -63,8 +63,8 @@ "folder": "20240615_appmap-navie_gpt4o", "resolved": 14.6, "date": "2024-06-15", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240615_appmap-navie_gpt4o/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://appmap.io/navie", "verified": true, "oss": true @@ -74,8 +74,8 @@ "folder": "20240509_amazon-q-developer-agent-20240430-dev", "resolved": 13.82, "date": "2024-05-09", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240509_amazon-q-developer-agent-20240430-dev/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://aws.amazon.com/q/developer/", "verified": false, "oss": false @@ -85,8 +85,8 @@ "folder": "20240402_sweagent_gpt4", "resolved": 12.47, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240402_sweagent_gpt4/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240402_sweagent_gpt4/trajs", + "logs": true, + "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, "oss": true @@ -96,8 +96,8 @@ "folder": "20240728_sweagent_gpt4o", "resolved": 11.99, "date": "2024-07-28", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240728_sweagent_gpt4o/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240728_sweagent_gpt4o/trajs", + "logs": true, + "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, "oss": true @@ -107,8 +107,8 @@ "folder": "20240402_sweagent_claude3opus", "resolved": 10.51, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240402_sweagent_claude3opus/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240402_sweagent_claude3opus/trajs", + "logs": true, + "trajs": true, "site": null, "verified": true, "oss": true @@ -118,8 +118,8 @@ "folder": "20240402_rag_claude3opus", "resolved": 3.79, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240402_rag_claude3opus/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference", "verified": true, "oss": true @@ -129,8 +129,8 @@ "folder": "20231010_rag_claude2", "resolved": 1.96, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20231010_rag_claude2/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -140,8 +140,8 @@ "folder": "20240402_rag_gpt4", "resolved": 1.31, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20240402_rag_gpt4/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -151,8 +151,8 @@ "folder": "20231010_rag_swellama13b", "resolved": 0.7, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20231010_rag_swellama13b/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -162,8 +162,8 @@ "folder": "20231010_rag_swellama7b", "resolved": 0.7, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20231010_rag_swellama7b/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -173,8 +173,8 @@ "folder": "20231010_rag_gpt35", "resolved": 0.17, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/test/20231010_rag_gpt35/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -184,35 +184,90 @@ { "name": "Verified", "results": [ + { + "name": "Tools + Claude 3.5 Sonnet (2024-10-22)", + "folder": "20241022_tools_claude-3-5-sonnet-updated", + "resolved": 49.0, + "date": "2024-10-22", + "logs": true, + "trajs": true, + "site": "https://www.anthropic.com/", + "verified": false, + "oss": false + }, + { + "name": "Solver (2024-09-12)", + "folder": "20240924_solver", + "resolved": 45.4, + "date": "2024-09-24", + "logs": true, + "trajs": true, + "site": "https://laredolabs.com/", + "verified": false, + "oss": false + }, { "name": "Gru(2024-08-24)", "folder": "20240824_gru", "resolved": 45.2, "date": "2024-08-24", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240824_gru/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240824_gru/trajs", + "logs": true, + "trajs": true, "site": "https://gru.ai", "verified": false, "oss": false }, + { + "name": "Solver (2024-09-12)", + "folder": "20240920_solver", + "resolved": 43.6, + "date": "2024-09-20", + "logs": true, + "trajs": true, + "site": "https://laredolabs.com/", + "verified": false, + "oss": false + }, + { + "name": "Tools + Claude 3.5 Haiku", + "folder": "20241022_tools_claude-3-5-haiku", + "resolved": 40.6, + "date": "2024-10-22", + "logs": true, + "trajs": true, + "site": "https://www.anthropic.com/", + "verified": false, + "oss": false + }, { "name": "Honeycomb", "folder": "20240820_honeycomb", "resolved": 40.6, "date": "2024-08-20", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240820_honeycomb/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240820_honeycomb/trajs", + "logs": true, + "trajs": true, "site": "https://honeycomb.sh/", "verified": false, "oss": false }, + { + "name": "Composio SWEkit + Claude 3.5 Sonnet (2024-10-16)", + "folder": "20241016_composio_swekit", + "resolved": 40.6, + "date": "2024-10-16", + "logs": true, + "trajs": true, + "site": "https://github.com/ComposioHQ/composio/tree/master/python/swe/agent", + "verified": false, + "oss": true + }, { "name": "Amazon Q Developer Agent (v20240719-dev)", "folder": "20240721_amazon-q-developer-agent-20240719-dev", "resolved": 38.8, "date": "2024-07-21", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240721_amazon-q-developer-agent-20240719-dev/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240721_amazon-q-developer-agent-20240719-dev/trajs", + "logs": true, + "trajs": true, "site": "https://aws.amazon.com/q/developer/", "verified": false, "oss": false @@ -222,8 +277,8 @@ "folder": "20240628_autocoderover-v20240620", "resolved": 38.4, "date": "2024-06-28", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240628_autocoderover-v20240620/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://autocoderover.dev/", "verified": false, "oss": false @@ -233,8 +288,8 @@ "folder": "20240617_factory_code_droid", "resolved": 37.0, "date": "2024-06-17", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240617_factory_code_droid/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://www.factory.ai/", "verified": false, "oss": false @@ -244,41 +299,96 @@ "folder": "20240620_sweagent_claude3.5sonnet", "resolved": 33.6, "date": "2024-06-20", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240620_sweagent_claude3.5sonnet/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240620_sweagent_claude3.5sonnet/trajs", + "logs": true, + "trajs": true, "site": null, "verified": true, "oss": true }, + { + "name": "nFactorial (2024-10-07)", + "folder": "20241007_nfactorial", + "resolved": 31.6, + "date": "2024-10-07", + "logs": true, + "trajs": true, + "site": "https://nfactorial.dev/", + "verified": false, + "oss": false + }, + { + "name": "Lingma Agent + Lingma SWE-GPT 72b (v0925)", + "folder": "20241002_lingma-agent_lingma-swe-gpt-72b", + "resolved": 28.8, + "date": "2024-10-02", + "logs": true, + "trajs": true, + "site": "https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT (https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT-v20240925)", + "verified": false, + "oss": true + }, + { + "name": "EPAM AI/Run Developer Agent + GPT4o", + "folder": "20241016_epam-ai-run-gpt-4o", + "resolved": 27.0, + "date": "2024-10-16", + "logs": true, + "trajs": true, + "site": "https://www.epam.com/services/artificial-intelligence", + "verified": false, + "oss": false + }, { "name": "AppMap Navie + GPT 4o (2024-05-13)", "folder": "20240615_appmap-navie_gpt4o", "resolved": 26.2, "date": "2024-06-15", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240615_appmap-navie_gpt4o/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://appmap.io/navie", "verified": true, "oss": true }, + { + "name": "nFactorial (2024-10-01)", + "folder": "20241001_nfactorial", + "resolved": 25.8, + "date": "2024-10-01", + "logs": true, + "trajs": true, + "site": "https://nfactorial.dev/", + "verified": false, + "oss": false + }, { "name": "Amazon Q Developer Agent (v20240430-dev)", "folder": "20240509_amazon-q-developer-agent-20240430-dev", "resolved": 25.6, "date": "2024-05-09", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240509_amazon-q-developer-agent-20240430-dev/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://aws.amazon.com/q/developer/", "verified": false, "oss": false }, + { + "name": "Lingma Agent + Lingma SWE-GPT 72b (v0918)", + "folder": "20240918_lingma-agent_lingma-swe-gpt-72b", + "resolved": 25.0, + "date": "2024-09-18", + "logs": true, + "trajs": true, + "site": "https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT", + "verified": false, + "oss": true + }, { "name": "EPAM AI/Run Developer Agent + GPT4o", "folder": "20240820_epam-ai-run-gpt-4o", "resolved": 24.0, "date": "2024-08-20", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240820_epam-ai-run-gpt-4o/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240820_epam-ai-run-gpt-4o/trajs", + "logs": true, + "trajs": true, "site": "https://www.epam.com/services/artificial-intelligence", "verified": false, "oss": false @@ -288,8 +398,8 @@ "folder": "20240728_sweagent_gpt4o", "resolved": 23.2, "date": "2024-07-28", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240728_sweagent_gpt4o/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240728_sweagent_gpt4o/trajs", + "logs": true, + "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, "oss": true @@ -299,8 +409,8 @@ "folder": "20240402_sweagent_gpt4", "resolved": 22.4, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240402_sweagent_gpt4/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240402_sweagent_gpt4/trajs", + "logs": true, + "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, "oss": true @@ -310,19 +420,41 @@ "folder": "20240402_sweagent_claude3opus", "resolved": 18.2, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240402_sweagent_claude3opus/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240402_sweagent_claude3opus/trajs", + "logs": true, + "trajs": true, "site": null, "verified": true, "oss": true }, + { + "name": "Lingma Agent + Lingma SWE-GPT 7b (v0925)", + "folder": "20241002_lingma-agent_lingma-swe-gpt-7b", + "resolved": 18.2, + "date": "2024-10-02", + "logs": true, + "trajs": true, + "site": "https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT (https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT-v20240925)", + "verified": false, + "oss": true + }, + { + "name": "Lingma Agent + Lingma SWE-GPT 7b (v0918)", + "folder": "20240918_lingma-agent_lingma-swe-gpt-7b", + "resolved": 10.2, + "date": "2024-09-18", + "logs": true, + "trajs": true, + "site": "https://www.modelscope.cn/models/yingwei/Lingma-SWE-GPT", + "verified": false, + "oss": true + }, { "name": "RAG + Claude 3 Opus", "folder": "20240402_rag_claude3opus", "resolved": 7.0, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240402_rag_claude3opus/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference", "verified": true, "oss": true @@ -332,8 +464,8 @@ "folder": "20231010_rag_claude2", "resolved": 4.4, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20231010_rag_claude2/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -343,8 +475,8 @@ "folder": "20240402_rag_gpt4", "resolved": 2.8, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20240402_rag_gpt4/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -354,8 +486,8 @@ "folder": "20231010_rag_swellama7b", "resolved": 1.4, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20231010_rag_swellama7b/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -365,8 +497,8 @@ "folder": "20231010_rag_swellama13b", "resolved": 1.2, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20231010_rag_swellama13b/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -376,8 +508,8 @@ "folder": "20231010_rag_gpt35", "resolved": 0.4, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/verified/20231010_rag_gpt35/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -392,19 +524,30 @@ "folder": "20240702_codestory_aide_mixed", "resolved": 43.0, "date": "2024-07-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240702_codestory_aide_mixed/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://aide.dev/", "verified": false, "oss": false }, + { + "name": "Bytedance MarsCode Agent", + "folder": "20240912_marscode-agent-dev", + "resolved": 39.33, + "date": "2024-09-12", + "logs": true, + "trajs": true, + "site": "https://www.marscode.com/", + "verified": false, + "oss": false + }, { "name": "Honeycomb", "folder": "20240820_honeycomb", "resolved": 38.33, "date": "2024-08-20", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240820_honeycomb/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240820_honeycomb/trajs", + "logs": true, + "trajs": true, "site": "https://honeycomb.sh/", "verified": false, "oss": false @@ -414,8 +557,8 @@ "folder": "20240627_abanteai_mentatbot_gpt4o", "resolved": 38.0, "date": "2024-06-27", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240627_abanteai_mentatbot_gpt4o/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://mentat.ai/blog/mentatbot-sota-coding-agent", "verified": false, "oss": false @@ -425,8 +568,8 @@ "folder": "20240811_gru", "resolved": 35.67, "date": "2024-08-11", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240811_gru/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240811_gru/trajs", + "logs": true, + "trajs": true, "site": "https://gru.ai", "verified": false, "oss": false @@ -436,8 +579,8 @@ "folder": "20240829_Isoform", "resolved": 35.0, "date": "2024-08-29", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240829_Isoform/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240829_Isoform/trajs", + "logs": true, + "trajs": true, "site": "https://isoform.ai", "verified": false, "oss": false @@ -447,8 +590,8 @@ "folder": "20240806_SuperCoder2.0", "resolved": 34.0, "date": "2024-08-06", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240806_SuperCoder2.0/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240806_SuperCoder2.0/trajs", + "logs": true, + "trajs": true, "site": "https://superagi.com/supercoder/", "verified": false, "oss": false @@ -458,8 +601,8 @@ "folder": "20240723_marscode-agent-dev", "resolved": 34.0, "date": "2024-07-23", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240723_marscode-agent-dev/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://www.marscode.com/", "verified": false, "oss": false @@ -469,8 +612,8 @@ "folder": "20240622_Lingma_Agent", "resolved": 33.0, "date": "2024-06-22", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240622_Lingma_Agent/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240622_Lingma_Agent/trajs", + "logs": true, + "trajs": true, "site": "https://arxiv.org/abs/2406.01422", "verified": false, "oss": false @@ -480,8 +623,8 @@ "folder": "20240617_factory_code_droid", "resolved": 31.33, "date": "2024-06-17", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240617_factory_code_droid/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://www.factory.ai/", "verified": false, "oss": false @@ -491,19 +634,30 @@ "folder": "20240621_autocoderover-v20240620", "resolved": 30.67, "date": "2024-06-21", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240621_autocoderover-v20240620/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240621_autocoderover-v20240620/trajs", + "logs": true, + "trajs": true, "site": "https://autocoderover.dev/", "verified": false, "oss": true }, + { + "name": "AIGCode Infant-Coder(2024-08-30)", + "folder": "20240908_infant_gpt4o", + "resolved": 30.0, + "date": "2024-09-08", + "logs": true, + "trajs": true, + "site": "https://aigcode.net/", + "verified": false, + "oss": false + }, { "name": "Amazon Q Developer Agent (v20240719-dev)", "folder": "20240721_amazon-q-developer-agent-20240719-dev", "resolved": 29.67, "date": "2024-07-21", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240721_amazon-q-developer-agent-20240719-dev/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240721_amazon-q-developer-agent-20240719-dev/trajs", + "logs": true, + "trajs": true, "site": "https://aws.amazon.com/q/developer/", "verified": false, "oss": false @@ -513,8 +667,8 @@ "folder": "20240808_RepoGraph_gpt4o", "resolved": 29.67, "date": "2024-08-08", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240808_RepoGraph_gpt4o/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240808_RepoGraph_gpt4o/trajs", + "logs": true, + "trajs": true, "site": "https://github.com/ozyyshr/RepoGraph", "verified": false, "oss": true @@ -524,8 +678,8 @@ "folder": "20240604_CodeR", "resolved": 28.33, "date": "2024-06-04", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240604_CodeR/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://github.com/NL2Code/CodeR", "verified": false, "oss": false @@ -535,8 +689,8 @@ "folder": "20240612_MASAI_gpt4o", "resolved": 28.0, "date": "2024-06-12", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240612_MASAI_gpt4o/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://github.com/masai-dev-agent/masai", "verified": false, "oss": false @@ -546,8 +700,8 @@ "folder": "20240706_sima_gpt4o", "resolved": 27.67, "date": "2024-07-06", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240706_sima_gpt4o/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240706_sima_gpt4o/trajs", + "logs": true, + "trajs": true, "site": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240706_sima_gpt4o", "verified": false, "oss": false @@ -557,8 +711,8 @@ "folder": "20240630_agentless_gpt4o", "resolved": 27.33, "date": "2024-06-30", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240630_agentless_gpt4o/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://github.com/OpenAutoCoder/Agentless", "verified": false, "oss": true @@ -568,8 +722,8 @@ "folder": "20240623_moatless_claude35sonnet", "resolved": 26.67, "date": "2024-06-23", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240623_moatless_claude35sonnet/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240623_moatless_claude35sonnet/trajs", + "logs": true, + "trajs": true, "site": "https://github.com/aorwall/moatless-tools", "verified": true, "oss": true @@ -579,8 +733,8 @@ "folder": "20240725_opendevin_codeact_v1.8_claude35sonnet", "resolved": 26.67, "date": "2024-07-25", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240725_opendevin_codeact_v1.8_claude35sonnet/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240725_opendevin_codeact_v1.8_claude35sonnet/trajs", + "logs": true, + "trajs": true, "site": "https://docs.all-hands.dev/", "verified": true, "oss": true @@ -590,8 +744,8 @@ "folder": "20240612_IBM_Research_Agent101", "resolved": 26.67, "date": "2024-06-12", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240612_IBM_Research_Agent101/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240612_IBM_Research_Agent101", "verified": false, "oss": false @@ -601,30 +755,52 @@ "folder": "20240523_aider", "resolved": 26.33, "date": "2024-05-23", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240523_aider/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://github.com/paul-gauthier/aider", "verified": false, "oss": true }, + { + "name": "HyperAgent", + "folder": "20240925_hyperagent_lite1", + "resolved": 25.33, + "date": "2024-09-25", + "logs": true, + "trajs": true, + "site": "https://arxiv.org/abs/2409.16299", + "verified": false, + "oss": false + }, { "name": "Moatless Tools + GPT 4o (2024-05-13)", "folder": "20240617_moatless_gpt4o", "resolved": 24.67, "date": "2024-06-17", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240617_moatless_gpt4o/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240617_moatless_gpt4o/trajs", + "logs": true, + "trajs": true, "site": "https://github.com/aorwall/moatless-tools", "verified": true, "oss": true }, + { + "name": "IBM SWE-1.0 (with open LLMs)", + "folder": "20241016_IBM-SWE-1.0", + "resolved": 23.67, + "date": "2024-10-16", + "logs": true, + "trajs": true, + "site": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20241016_IBM-SWE-1.0", + "verified": false, + "oss": false + }, { "name": "OpenCSG StarShip CodeGenAgent + GPT 4 (0613)", "folder": "20240524_opencsg_starship_gpt4", "resolved": 23.67, "date": "2024-05-24", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240524_opencsg_starship_gpt4/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://opencsg.com/product?class=StarShip", "verified": false, "oss": false @@ -634,8 +810,8 @@ "folder": "20240620_sweagent_claude3.5sonnet", "resolved": 23.0, "date": "2024-06-20", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240620_sweagent_claude3.5sonnet/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240620_sweagent_claude3.5sonnet/trajs", + "logs": true, + "trajs": true, "site": null, "verified": true, "oss": true @@ -645,8 +821,8 @@ "folder": "20240615_appmap-navie_gpt4o", "resolved": 21.67, "date": "2024-06-15", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240615_appmap-navie_gpt4o/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://appmap.io/navie", "verified": true, "oss": true @@ -656,8 +832,8 @@ "folder": "20240828_autose_mixed", "resolved": 21.67, "date": "2024-08-28", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240828_autose_mixed/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240828_autose_mixed/trajs", + "logs": true, + "trajs": true, "site": null, "verified": false, "oss": false @@ -667,8 +843,8 @@ "folder": "20240509_amazon-q-developer-agent-20240430-dev", "resolved": 20.33, "date": "2024-05-09", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240509_amazon-q-developer-agent-20240430-dev/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://aws.amazon.com/q/developer/", "verified": false, "oss": false @@ -678,8 +854,8 @@ "folder": "20240530_autocoderover-v20240408", "resolved": 19.0, "date": "2024-05-30", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240530_autocoderover-v20240408/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://github.com/nus-apr/auto-code-rover", "verified": false, "oss": true @@ -689,8 +865,8 @@ "folder": "20240728_sweagent_gpt4o", "resolved": 18.33, "date": "2024-07-28", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240728_sweagent_gpt4o/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240728_sweagent_gpt4o/trajs", + "logs": true, + "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, "oss": true @@ -700,8 +876,8 @@ "folder": "20240402_sweagent_gpt4", "resolved": 18.0, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240402_sweagent_gpt4/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240402_sweagent_gpt4/trajs", + "logs": true, + "trajs": true, "site": "https://github.com/princeton-nlp/SWE-agent", "verified": true, "oss": true @@ -711,8 +887,8 @@ "folder": "20240402_sweagent_claude3opus", "resolved": 11.67, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240402_sweagent_claude3opus/logs", - "trajs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240402_sweagent_claude3opus/trajs", + "logs": true, + "trajs": true, "site": null, "verified": true, "oss": true @@ -722,8 +898,8 @@ "folder": "20240402_rag_claude3opus", "resolved": 4.33, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240402_rag_claude3opus/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": "https://github.com/princeton-nlp/SWE-bench/tree/main/swebench/inference", "verified": true, "oss": true @@ -733,8 +909,8 @@ "folder": "20231010_rag_claude2", "resolved": 3.0, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20231010_rag_claude2/logs", - "trajs": null, + "logs": true, + "trajs": true, "site": null, "verified": true, "oss": true @@ -744,8 +920,8 @@ "folder": "20240402_rag_gpt4", "resolved": 2.67, "date": "2024-04-02", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20240402_rag_gpt4/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -755,8 +931,8 @@ "folder": "20231010_rag_swellama7b", "resolved": 1.33, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20231010_rag_swellama7b/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -766,8 +942,8 @@ "folder": "20231010_rag_swellama13b", "resolved": 1.0, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20231010_rag_swellama13b/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true @@ -777,8 +953,8 @@ "folder": "20231010_rag_gpt35", "resolved": 0.33, "date": "2023-10-10", - "logs": "https://github.com/swe-bench/experiments/tree/main/evaluation/lite/20231010_rag_gpt35/logs", - "trajs": null, + "logs": true, + "trajs": false, "site": null, "verified": true, "oss": true diff --git a/template/template_index.html b/template/template_index.html index 00a26f2..a440605 100644 --- a/template/template_index.html +++ b/template/template_index.html @@ -169,16 +169,12 @@

Leaderboard

{{item.date}}

- {% if item.logs %} - 🔗 - {% else %} - {% endif %} + {% if item.logs %}✓{% else %} - {% endif %}

- {% if item.trajs %} - 🔗 - {% else %} - {% endif %} + {% if item.trajs %}✓{% else %} - {% endif %}

diff --git a/viewer.html b/viewer.html index 6daed9a..620d138 100644 --- a/viewer.html +++ b/viewer.html @@ -115,18 +115,30 @@

SWE-bench Analysis

+ @@ -147,6 +160,7 @@

SWE-bench Analysis

+ @@ -157,7 +171,9 @@

SWE-bench Analysis

+ +