{"rows":[{"split":"cli","harness":"ale_claw","model":"gpt-5-5","harnessVariant":null,"runs":104,"tasks":104,"splitTasks":105,"passes":26,"passRate":0.24761904761904763,"avgScore":0.4808490601677813,"totalDurationS":82506.89451687399,"totalInputTokens":77212132,"totalOutputTokens":1334925},{"split":"cli","harness":"codex","model":"gpt-5-5","harnessVariant":null,"runs":103,"tasks":103,"splitTasks":105,"passes":27,"passRate":0.2571428571428571,"avgScore":0.44992902037376187,"totalDurationS":127482.48767670355,"totalInputTokens":375083014,"totalOutputTokens":2384819},{"split":"cli","harness":"cursor_cli","model":"claude-opus-4-7","harnessVariant":"thinking-high","runs":97,"tasks":97,"splitTasks":105,"passes":23,"passRate":0.21904761904761905,"avgScore":0.4472829042528059,"totalDurationS":109765.0964193521,"totalInputTokens":215438714,"totalOutputTokens":2552388},{"split":"cli","harness":"openclaw","model":"gpt-5-5","harnessVariant":null,"runs":102,"tasks":102,"splitTasks":105,"passes":25,"passRate":0.23809523809523808,"avgScore":0.44433530114186853,"totalDurationS":132520.17433097732,"totalInputTokens":213060708,"totalOutputTokens":2188072},{"split":"cli","harness":"ale_claw","model":"claude-opus-4-7","harnessVariant":null,"runs":104,"tasks":104,"splitTasks":105,"passes":21,"passRate":0.2,"avgScore":0.4326975564273962,"totalDurationS":179218.6959300012,"totalInputTokens":400987904,"totalOutputTokens":3054603},{"split":"cli","harness":"openclaw","model":"gpt-5-4","harnessVariant":null,"runs":132,"tasks":102,"splitTasks":105,"passes":32,"passRate":0.22539682539682537,"avgScore":0.41848205145060874,"totalDurationS":263248.3473799315,"totalInputTokens":337555947,"totalOutputTokens":6154837},{"split":"cli","harness":"cursor_cli","model":"gpt-5-5","harnessVariant":null,"runs":101,"tasks":101,"splitTasks":105,"passes":23,"passRate":0.21904761904761905,"avgScore":0.41807970372457365,"totalDurationS":67128.53749491161,"totalInputTokens":68935221,"totalOutputTokens":1091037},{"split":"cli","harness":"cursor_cli","model":"composer-2-5","harnessVariant":null,"runs":101,"tasks":101,"splitTasks":105,"passes":20,"passRate":0.19047619047619047,"avgScore":0.4077768843542395,"totalDurationS":659199.0199999997,"totalInputTokens":153192576,"totalOutputTokens":1763306},{"split":"cli","harness":"droid","model":"gpt-5-5","harnessVariant":null,"runs":102,"tasks":102,"splitTasks":105,"passes":20,"passRate":0.19047619047619047,"avgScore":0.39529517222257876,"totalDurationS":175143.5352045538,"totalInputTokens":118077445,"totalOutputTokens":1499561},{"split":"cli","harness":"claude_code","model":"claude-opus-4-8","harnessVariant":null,"runs":102,"tasks":102,"splitTasks":105,"passes":17,"passRate":0.1619047619047619,"avgScore":0.3928079937703214,"totalDurationS":1151355.1900000002,"totalInputTokens":112447742,"totalOutputTokens":1784448},{"split":"cli","harness":"claude_code","model":"claude-opus-4-7","harnessVariant":null,"runs":97,"tasks":97,"splitTasks":105,"passes":15,"passRate":0.14285714285714285,"avgScore":0.3795336029767581,"totalDurationS":109581.98021228705,"totalInputTokens":280978346,"totalOutputTokens":2723233},{"split":"cli","harness":"openclaw","model":"claude-opus-4-7","harnessVariant":null,"runs":105,"tasks":104,"splitTasks":105,"passes":17,"passRate":0.1619047619047619,"avgScore":0.3778698318916953,"totalDurationS":234362.016194176,"totalInputTokens":541904804,"totalOutputTokens":2880069},{"split":"cli","harness":"droid","model":"claude-opus-4-7","harnessVariant":null,"runs":74,"tasks":70,"splitTasks":105,"passes":16,"passRate":0.14285714285714285,"avgScore":0.33741012872327425,"totalDurationS":83831.48496117,"totalInputTokens":179881747,"totalOutputTokens":1701334},{"split":"cli","harness":"openclaw","model":"claude-opus-4-6","harnessVariant":null,"runs":132,"tasks":101,"splitTasks":105,"passes":21,"passRate":0.14761904761904762,"avgScore":0.3365996634357178,"totalDurationS":304460.4724225318,"totalInputTokens":250376450,"totalOutputTokens":2605125},{"split":"cli","harness":"ale_claw","model":"gpt-5-4","harnessVariant":null,"runs":104,"tasks":104,"splitTasks":105,"passes":14,"passRate":0.13333333333333333,"avgScore":0.3314411094946031,"totalDurationS":114351.31370857747,"totalInputTokens":541674804,"totalOutputTokens":1138083},{"split":"cli","harness":"hermes","model":"claude-sonnet-4-6","harnessVariant":null,"runs":251,"tasks":97,"splitTasks":105,"passes":36,"passRate":0.13174603174603175,"avgScore":0.32036709192716567,"totalDurationS":261658.25242491878,"totalInputTokens":504461026,"totalOutputTokens":5191898},{"split":"cli","harness":"openclaw","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":109,"tasks":101,"splitTasks":105,"passes":17,"passRate":0.15714285714285714,"avgScore":0.3167154139627384,"totalDurationS":299209.1330697639,"totalInputTokens":1627517076,"totalOutputTokens":2705587},{"split":"cli","harness":"openclaw","model":"claude-sonnet-4-6","harnessVariant":null,"runs":88,"tasks":88,"splitTasks":105,"passes":12,"passRate":0.11428571428571428,"avgScore":0.30973594188319803,"totalDurationS":134071.73485428994,"totalInputTokens":247046472,"totalOutputTokens":1875276},{"split":"cli","harness":"terminus_2","model":"claude-sonnet-4-6","harnessVariant":null,"runs":181,"tasks":98,"splitTasks":105,"passes":24,"passRate":0.11904761904761904,"avgScore":0.3094266498739773,"totalDurationS":437629.945897243,"totalInputTokens":1312497434,"totalOutputTokens":5402732},{"split":"cli","harness":"openclaw","model":"deepseek-v4-pro","harnessVariant":null,"runs":215,"tasks":105,"splitTasks":105,"passes":32,"passRate":0.14126984126984127,"avgScore":0.3084164065884847,"totalDurationS":463172.9330586301,"totalInputTokens":619704885,"totalOutputTokens":6568842},{"split":"cli","harness":"openclaw","model":"glm-5-1","harnessVariant":null,"runs":195,"tasks":101,"splitTasks":105,"passes":27,"passRate":0.12857142857142856,"avgScore":0.308411998290028,"totalDurationS":574617.2573029955,"totalInputTokens":766044562,"totalOutputTokens":6979519},{"split":"cli","harness":"openclaw","model":"qwen3-6-plus","harnessVariant":null,"runs":203,"tasks":105,"splitTasks":105,"passes":21,"passRate":0.10476190476190476,"avgScore":0.28577483998342657,"totalDurationS":503718.6721035085,"totalInputTokens":706044872,"totalOutputTokens":8744870},{"split":"cli","harness":"forgecode","model":"claude-sonnet-4-6","harnessVariant":null,"runs":247,"tasks":98,"splitTasks":105,"passes":38,"passRate":0.13333333333333333,"avgScore":0.28521060449931346,"totalDurationS":418234.56122257595,"totalInputTokens":338857366,"totalOutputTokens":4486075},{"split":"cli","harness":"openclaw","model":"mimo-v2-5","harnessVariant":null,"runs":198,"tasks":105,"splitTasks":105,"passes":20,"passRate":0.1,"avgScore":0.26465217947397845,"totalDurationS":401958.0322236653,"totalInputTokens":554924354,"totalOutputTokens":5624880},{"split":"cli","harness":"grok_cli","model":"grok-4-3","harnessVariant":null,"runs":107,"tasks":102,"splitTasks":105,"passes":8,"passRate":0.0761904761904762,"avgScore":0.2425099290969929,"totalDurationS":178505.05761796318,"totalInputTokens":168361864,"totalOutputTokens":1773830},{"split":"cli","harness":"openclaw","model":"kimi-k2-6","harnessVariant":null,"runs":190,"tasks":105,"splitTasks":105,"passes":16,"passRate":0.08095238095238096,"avgScore":0.2123825941816749,"totalDurationS":614615.709558196,"totalInputTokens":366113437,"totalOutputTokens":7410284},{"split":"cli","harness":"openhands","model":"claude-sonnet-4-6","harnessVariant":null,"runs":288,"tasks":98,"splitTasks":105,"passes":28,"passRate":0.09047619047619047,"avgScore":0.19774111108214873,"totalDurationS":370600.7453296188,"totalInputTokens":652142074,"totalOutputTokens":8105599},{"split":"cli","harness":"openclaw","model":"grok-4-3","harnessVariant":null,"runs":198,"tasks":105,"splitTasks":105,"passes":9,"passRate":0.04285714285714286,"avgScore":0.17504517169766134,"totalDurationS":314984.70812184596,"totalInputTokens":252605365,"totalOutputTokens":3954386},{"split":"cli","harness":"grok_cli","model":"grok-3","harnessVariant":null,"runs":102,"tasks":101,"splitTasks":105,"passes":5,"passRate":0.047619047619047616,"avgScore":0.1605153704145524,"totalDurationS":98312.85780064017,"totalInputTokens":52985885,"totalOutputTokens":475139},{"split":"cli","harness":"openclaw","model":"minimax-m2-7","harnessVariant":null,"runs":169,"tasks":105,"splitTasks":105,"passes":9,"passRate":0.05714285714285714,"avgScore":0.1464903265101336,"totalDurationS":294701.9608578151,"totalInputTokens":323457982,"totalOutputTokens":4560919},{"split":"cli","harness":"codex","model":"gpt-5-4","harnessVariant":null,"runs":103,"tasks":102,"splitTasks":105,"passes":7,"passRate":0.06666666666666667,"avgScore":0.12355059079728772,"totalDurationS":57414.76344731846,"totalInputTokens":88032495,"totalOutputTokens":1906591},{"split":"cli","harness":"gemini_cli","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":166,"tasks":99,"splitTasks":105,"passes":9,"passRate":0.047619047619047616,"avgScore":0.08569743251483314,"totalDurationS":242658.52943160804,"totalInputTokens":563922591,"totalOutputTokens":7075985},{"split":"full/full-spectrum","harness":"ale_claw","model":"gpt-5-5","harnessVariant":null,"runs":55,"tasks":55,"splitTasks":55,"passes":12,"passRate":0.21818181818181817,"avgScore":0.4085431620504688,"totalDurationS":48441.414872081004,"totalInputTokens":54547177,"totalOutputTokens":752039},{"split":"full/full-spectrum","harness":"cursor_cli","model":"claude-opus-4-7","harnessVariant":"thinking-high","runs":54,"tasks":54,"splitTasks":55,"passes":11,"passRate":0.2,"avgScore":0.3914654812672938,"totalDurationS":62503.063174276846,"totalInputTokens":200133131,"totalOutputTokens":1766374},{"split":"full/full-spectrum","harness":"ale_claw","model":"claude-opus-4-7","harnessVariant":null,"runs":54,"tasks":54,"splitTasks":55,"passes":10,"passRate":0.18181818181818182,"avgScore":0.3814243126815272,"totalDurationS":68508.71212921255,"totalInputTokens":294813811,"totalOutputTokens":1812841},{"split":"full/full-spectrum","harness":"codex","model":"gpt-5-5","harnessVariant":null,"runs":54,"tasks":54,"splitTasks":55,"passes":11,"passRate":0.2,"avgScore":0.3613239613858418,"totalDurationS":82556.86488501911,"totalInputTokens":154637383,"totalOutputTokens":1331024},{"split":"full/full-spectrum","harness":"droid","model":"gpt-5-5","harnessVariant":null,"runs":52,"tasks":52,"splitTasks":55,"passes":9,"passRate":0.16363636363636364,"avgScore":0.3499781212135704,"totalDurationS":75367.69921550892,"totalInputTokens":58095095,"totalOutputTokens":823427},{"split":"full/full-spectrum","harness":"openclaw","model":"gpt-5-4","harnessVariant":null,"runs":73,"tasks":55,"splitTasks":55,"passes":13,"passRate":0.19393939393939394,"avgScore":0.3479601586565538,"totalDurationS":228883.8851823085,"totalInputTokens":281279117,"totalOutputTokens":3856389},{"split":"full/full-spectrum","harness":"cursor_cli","model":"gpt-5-5","harnessVariant":null,"runs":54,"tasks":54,"splitTasks":55,"passes":11,"passRate":0.2,"avgScore":0.34401547489775935,"totalDurationS":92324.64622803553,"totalInputTokens":38302176,"totalOutputTokens":574101},{"split":"full/full-spectrum","harness":"openclaw","model":"gpt-5-5","harnessVariant":null,"runs":53,"tasks":53,"splitTasks":55,"passes":10,"passRate":0.18181818181818182,"avgScore":0.3380852575306984,"totalDurationS":98570.44920987956,"totalInputTokens":116041374,"totalOutputTokens":1132752},{"split":"full/full-spectrum","harness":"cursor_cli","model":"composer-2-5","harnessVariant":null,"runs":53,"tasks":53,"splitTasks":55,"passes":10,"passRate":0.18181818181818182,"avgScore":0.31388820961370295,"totalDurationS":141403.38,"totalInputTokens":143045069,"totalOutputTokens":1108678},{"split":"full/full-spectrum","harness":"claude_code","model":"claude-opus-4-7","harnessVariant":null,"runs":52,"tasks":52,"splitTasks":55,"passes":7,"passRate":0.12727272727272726,"avgScore":0.30638518208324883,"totalDurationS":52403.701492876746,"totalInputTokens":196311306,"totalOutputTokens":1517056},{"split":"full/full-spectrum","harness":"openclaw","model":"claude-opus-4-6","harnessVariant":null,"runs":67,"tasks":51,"splitTasks":55,"passes":8,"passRate":0.13636363636363635,"avgScore":0.2998336003956633,"totalDurationS":209848.90211759624,"totalInputTokens":93605453,"totalOutputTokens":1329895},{"split":"full/full-spectrum","harness":"openclaw","model":"claude-opus-4-7","harnessVariant":null,"runs":52,"tasks":52,"splitTasks":55,"passes":6,"passRate":0.10909090909090909,"avgScore":0.29000507300408107,"totalDurationS":96473.71219665789,"totalInputTokens":143266727,"totalOutputTokens":1139355},{"split":"full/full-spectrum","harness":"claude_code","model":"claude-opus-4-8","harnessVariant":null,"runs":54,"tasks":54,"splitTasks":55,"passes":6,"passRate":0.10909090909090909,"avgScore":0.28188342946448086,"totalDurationS":267756.61999999994,"totalInputTokens":108825195,"totalOutputTokens":1072300},{"split":"full/full-spectrum","harness":"openclaw","model":"deepseek-v4-pro","harnessVariant":null,"runs":99,"tasks":55,"splitTasks":55,"passes":12,"passRate":0.10909090909090909,"avgScore":0.24299048181818186,"totalDurationS":219042.53427295314,"totalInputTokens":198718666,"totalOutputTokens":2725718},{"split":"full/full-spectrum","harness":"openclaw","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":57,"tasks":53,"splitTasks":55,"passes":6,"passRate":0.10909090909090909,"avgScore":0.23948693939393942,"totalDurationS":247630.1694970429,"totalInputTokens":2050991374,"totalOutputTokens":1960021},{"split":"full/full-spectrum","harness":"openclaw","model":"qwen3-6-plus","harnessVariant":null,"runs":100,"tasks":55,"splitTasks":55,"passes":8,"passRate":0.08181818181818182,"avgScore":0.2380152355272775,"totalDurationS":295030.69267301576,"totalInputTokens":414663811,"totalOutputTokens":4928726},{"split":"full/full-spectrum","harness":"ale_claw","model":"gpt-5-4","harnessVariant":null,"runs":55,"tasks":55,"splitTasks":55,"passes":5,"passRate":0.09090909090909091,"avgScore":0.23666190046044558,"totalDurationS":75286.5860719539,"totalInputTokens":496796484,"totalOutputTokens":841085},{"split":"full/full-spectrum","harness":"openclaw","model":"glm-5-1","harnessVariant":null,"runs":92,"tasks":50,"splitTasks":55,"passes":10,"passRate":0.09090909090909091,"avgScore":0.23034064419481948,"totalDurationS":394669.9254975276,"totalInputTokens":585358730,"totalOutputTokens":4809142},{"split":"full/full-spectrum","harness":"openclaw","model":"mimo-v2-5","harnessVariant":null,"runs":100,"tasks":55,"splitTasks":55,"passes":9,"passRate":0.09090909090909091,"avgScore":0.2111816877410493,"totalDurationS":219488.03866662446,"totalInputTokens":323045755,"totalOutputTokens":2554178},{"split":"full/full-spectrum","harness":"openclaw","model":"kimi-k2-6","harnessVariant":null,"runs":90,"tasks":55,"splitTasks":55,"passes":6,"passRate":0.06363636363636363,"avgScore":0.1861677123846492,"totalDurationS":397438.48187087814,"totalInputTokens":129493873,"totalOutputTokens":3498110},{"split":"full/full-spectrum","harness":"grok_cli","model":"grok-4-3","harnessVariant":null,"runs":53,"tasks":51,"splitTasks":55,"passes":4,"passRate":0.07272727272727272,"avgScore":0.17410652121212122,"totalDurationS":73150.57581158825,"totalInputTokens":72355051,"totalOutputTokens":909562},{"split":"full/full-spectrum","harness":"grok_cli","model":"grok-3","harnessVariant":null,"runs":53,"tasks":50,"splitTasks":55,"passes":3,"passRate":0.05454545454545454,"avgScore":0.12991377341047367,"totalDurationS":43291.61047739489,"totalInputTokens":21245879,"totalOutputTokens":184094},{"split":"full/full-spectrum","harness":"openclaw","model":"grok-4-3","harnessVariant":null,"runs":97,"tasks":55,"splitTasks":55,"passes":4,"passRate":0.03636363636363636,"avgScore":0.1290553212121212,"totalDurationS":183534.46326976,"totalInputTokens":96326146,"totalOutputTokens":1845993},{"split":"full/full-spectrum","harness":"droid","model":"claude-opus-4-7","harnessVariant":null,"runs":21,"tasks":21,"splitTasks":55,"passes":2,"passRate":0.03636363636363636,"avgScore":0.12386467742457974,"totalDurationS":15990.469136312604,"totalInputTokens":39004391,"totalOutputTokens":484550},{"split":"full/full-spectrum","harness":"openclaw","model":"minimax-m2-7","harnessVariant":null,"runs":88,"tasks":55,"splitTasks":55,"passes":2,"passRate":0.03636363636363636,"avgScore":0.08604545454545455,"totalDurationS":229523.98397234306,"totalInputTokens":145793775,"totalOutputTokens":2138319},{"split":"full/full-spectrum","harness":"codex","model":"gpt-5-4","harnessVariant":null,"runs":53,"tasks":51,"splitTasks":55,"passes":2,"passRate":0.03636363636363636,"avgScore":0.07652873704683727,"totalDurationS":52230.21029007761,"totalInputTokens":68693164,"totalOutputTokens":1101230},{"split":"full/full-spectrum","harness":"gemini_cli","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":81,"tasks":46,"splitTasks":55,"passes":0,"passRate":0,"avgScore":0.021946492765842873,"totalDurationS":150972.69021566678,"totalInputTokens":335273828,"totalOutputTokens":3784823},{"split":"full/last-exam","harness":"ale_claw","model":"gpt-5-5","harnessVariant":null,"runs":34,"tasks":34,"splitTasks":35,"passes":3,"passRate":0.08571428571428572,"avgScore":0.15172881620788128,"totalDurationS":64920.14367862504,"totalInputTokens":131080206,"totalOutputTokens":763054},{"split":"full/last-exam","harness":"droid","model":"gpt-5-5","harnessVariant":null,"runs":32,"tasks":30,"splitTasks":35,"passes":3,"passRate":0.08571428571428572,"avgScore":0.14338251428571427,"totalDurationS":147767.35185883916,"totalInputTokens":100954745,"totalOutputTokens":608007},{"split":"full/last-exam","harness":"codex","model":"gpt-5-5","harnessVariant":null,"runs":32,"tasks":32,"splitTasks":35,"passes":3,"passRate":0.08571428571428572,"avgScore":0.1383974050420168,"totalDurationS":103121.93911079323,"totalInputTokens":216163055,"totalOutputTokens":1248079},{"split":"full/last-exam","harness":"cursor_cli","model":"composer-2-5","harnessVariant":null,"runs":29,"tasks":29,"splitTasks":35,"passes":1,"passRate":0.02857142857142857,"avgScore":0.11449708571428573,"totalDurationS":239754.90000000005,"totalInputTokens":116951382,"totalOutputTokens":881474},{"split":"full/last-exam","harness":"openclaw","model":"gpt-5-5","harnessVariant":null,"runs":29,"tasks":29,"splitTasks":35,"passes":1,"passRate":0.02857142857142857,"avgScore":0.1073438924369748,"totalDurationS":103582.21928197237,"totalInputTokens":168928878,"totalOutputTokens":1007407},{"split":"full/last-exam","harness":"cursor_cli","model":"claude-opus-4-7","harnessVariant":"thinking-high","runs":28,"tasks":28,"splitTasks":35,"passes":2,"passRate":0.05714285714285714,"avgScore":0.10557654078379619,"totalDurationS":78832.50030040718,"totalInputTokens":147286321,"totalOutputTokens":1361447},{"split":"full/last-exam","harness":"cursor_cli","model":"gpt-5-5","harnessVariant":null,"runs":31,"tasks":30,"splitTasks":35,"passes":1,"passRate":0.02857142857142857,"avgScore":0.08659868571428571,"totalDurationS":73553.90128463766,"totalInputTokens":68113501,"totalOutputTokens":508315},{"split":"full/last-exam","harness":"openclaw","model":"gpt-5-4","harnessVariant":null,"runs":36,"tasks":33,"splitTasks":35,"passes":2,"passRate":0.05714285714285714,"avgScore":0.08203714285714285,"totalDurationS":178180.71653543902,"totalInputTokens":181818017,"totalOutputTokens":2317299},{"split":"full/last-exam","harness":"ale_claw","model":"claude-opus-4-7","harnessVariant":null,"runs":33,"tasks":33,"splitTasks":35,"passes":1,"passRate":0.02857142857142857,"avgScore":0.07924857142857142,"totalDurationS":174367.63322783523,"totalInputTokens":765029913,"totalOutputTokens":2243316},{"split":"full/last-exam","harness":"droid","model":"claude-opus-4-7","harnessVariant":null,"runs":24,"tasks":24,"splitTasks":35,"passes":1,"passRate":0.02857142857142857,"avgScore":0.0765538,"totalDurationS":51441.97419417533,"totalInputTokens":175794610,"totalOutputTokens":1018333},{"split":"full/last-exam","harness":"claude_code","model":"claude-opus-4-8","harnessVariant":null,"runs":30,"tasks":30,"splitTasks":35,"passes":1,"passRate":0.02857142857142857,"avgScore":0.07062,"totalDurationS":441356.8599999999,"totalInputTokens":188985932,"totalOutputTokens":974566},{"split":"full/last-exam","harness":"openclaw","model":"glm-5-1","harnessVariant":null,"runs":60,"tasks":34,"splitTasks":35,"passes":2,"passRate":0.02857142857142857,"avgScore":0.06533465714285713,"totalDurationS":380513.95101711166,"totalInputTokens":557772743,"totalOutputTokens":4078203},{"split":"full/last-exam","harness":"openclaw","model":"claude-opus-4-6","harnessVariant":null,"runs":38,"tasks":35,"splitTasks":35,"passes":1,"passRate":0.02857142857142857,"avgScore":0.060267085714285705,"totalDurationS":228334.72421820485,"totalInputTokens":162296402,"totalOutputTokens":1293105},{"split":"full/last-exam","harness":"ale_claw","model":"gpt-5-4","harnessVariant":null,"runs":33,"tasks":33,"splitTasks":35,"passes":0,"passRate":0,"avgScore":0.04466554285714286,"totalDurationS":95344.92498020912,"totalInputTokens":428880361,"totalOutputTokens":652899},{"split":"full/last-exam","harness":"openclaw","model":"mimo-v2-5","harnessVariant":null,"runs":57,"tasks":35,"splitTasks":35,"passes":1,"passRate":0.014285714285714285,"avgScore":0.042001142857142855,"totalDurationS":233927.21708044902,"totalInputTokens":254613775,"totalOutputTokens":2113143},{"split":"full/last-exam","harness":"openclaw","model":"claude-opus-4-7","harnessVariant":null,"runs":35,"tasks":35,"splitTasks":35,"passes":1,"passRate":0.02857142857142857,"avgScore":0.04,"totalDurationS":258900.88108133106,"totalInputTokens":519951247,"totalOutputTokens":1638336},{"split":"full/last-exam","harness":"openclaw","model":"deepseek-v4-pro","harnessVariant":null,"runs":66,"tasks":35,"splitTasks":35,"passes":2,"passRate":0.02857142857142857,"avgScore":0.03851714285714286,"totalDurationS":296621.47959489573,"totalInputTokens":415242050,"totalOutputTokens":2785465},{"split":"full/last-exam","harness":"gemini_cli","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":32,"tasks":27,"splitTasks":35,"passes":1,"passRate":0.02857142857142857,"avgScore":0.02857142857142857,"totalDurationS":47087.921259280294,"totalInputTokens":151514303,"totalOutputTokens":821051},{"split":"full/last-exam","harness":"openclaw","model":"minimax-m2-7","harnessVariant":null,"runs":46,"tasks":35,"splitTasks":35,"passes":0,"passRate":0,"avgScore":0.026745714285714286,"totalDurationS":191226.30371885662,"totalInputTokens":112698455,"totalOutputTokens":1923729},{"split":"full/last-exam","harness":"claude_code","model":"claude-opus-4-7","harnessVariant":null,"runs":30,"tasks":30,"splitTasks":35,"passes":0,"passRate":0,"avgScore":0.02051142857142857,"totalDurationS":67771.59994092351,"totalInputTokens":140536186,"totalOutputTokens":1127904},{"split":"full/last-exam","harness":"openclaw","model":"grok-4-3","harnessVariant":null,"runs":63,"tasks":35,"splitTasks":35,"passes":0,"passRate":0,"avgScore":0.020497814285714285,"totalDurationS":239852.80029139307,"totalInputTokens":98233311,"totalOutputTokens":1272682},{"split":"full/last-exam","harness":"grok_cli","model":"grok-3","harnessVariant":null,"runs":35,"tasks":35,"splitTasks":35,"passes":0,"passRate":0,"avgScore":0.019786028571428572,"totalDurationS":35020.98048710427,"totalInputTokens":8700406,"totalOutputTokens":103394},{"split":"full/last-exam","harness":"grok_cli","model":"grok-4-3","harnessVariant":null,"runs":38,"tasks":35,"splitTasks":35,"passes":0,"passRate":0,"avgScore":0.01976822857142857,"totalDurationS":71843.9983822346,"totalInputTokens":76397741,"totalOutputTokens":583202},{"split":"full/last-exam","harness":"openclaw","model":"qwen3-6-plus","harnessVariant":null,"runs":64,"tasks":35,"splitTasks":35,"passes":0,"passRate":0,"avgScore":0.015338814285714287,"totalDurationS":291254.6018178351,"totalInputTokens":440322211,"totalOutputTokens":3375097},{"split":"full/last-exam","harness":"openclaw","model":"kimi-k2-6","harnessVariant":null,"runs":52,"tasks":35,"splitTasks":35,"passes":1,"passRate":0.014285714285714285,"avgScore":0.015142857142857144,"totalDurationS":303918.51698727557,"totalInputTokens":153591423,"totalOutputTokens":2554497},{"split":"full/last-exam","harness":"openclaw","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":36,"tasks":34,"splitTasks":35,"passes":0,"passRate":0,"avgScore":0.0037285714285714287,"totalDurationS":199149.40209261794,"totalInputTokens":1024184526,"totalOutputTokens":1210970},{"split":"full/last-exam","harness":"codex","model":"gpt-5-4","harnessVariant":null,"runs":35,"tasks":35,"splitTasks":35,"passes":0,"passRate":0,"avgScore":0,"totalDurationS":57123.511981076095,"totalInputTokens":89770420,"totalOutputTokens":1264366},{"split":"full/near-term","harness":"ale_claw","model":"gpt-5-5","harnessVariant":null,"runs":59,"tasks":59,"splitTasks":59,"passes":21,"passRate":0.3559322033898305,"avgScore":0.7404314376115096,"totalDurationS":58053.733442712,"totalInputTokens":149490626,"totalOutputTokens":868142},{"split":"full/near-term","harness":"openclaw","model":"gpt-5-5","harnessVariant":null,"runs":57,"tasks":57,"splitTasks":59,"passes":23,"passRate":0.3898305084745763,"avgScore":0.7101409827202967,"totalDurationS":134644.0298788912,"totalInputTokens":187696525,"totalOutputTokens":1208418},{"split":"full/near-term","harness":"codex","model":"gpt-5-5","harnessVariant":null,"runs":60,"tasks":59,"splitTasks":59,"passes":25,"passRate":0.423728813559322,"avgScore":0.7066704716213894,"totalDurationS":106073.53405749508,"totalInputTokens":206885970,"totalOutputTokens":1277834},{"split":"full/near-term","harness":"cursor_cli","model":"gpt-5-5","harnessVariant":null,"runs":60,"tasks":58,"splitTasks":59,"passes":22,"passRate":0.3644067796610169,"avgScore":0.6810493947246481,"totalDurationS":130706.10355556315,"totalInputTokens":48373541,"totalOutputTokens":676817},{"split":"full/near-term","harness":"cursor_cli","model":"claude-opus-4-7","harnessVariant":"thinking-high","runs":58,"tasks":58,"splitTasks":59,"passes":19,"passRate":0.3220338983050847,"avgScore":0.6670294719224472,"totalDurationS":122202.24336090288,"totalInputTokens":94050574,"totalOutputTokens":1436390},{"split":"full/near-term","harness":"droid","model":"claude-opus-4-7","harnessVariant":null,"runs":62,"tasks":56,"splitTasks":59,"passes":19,"passRate":0.2966101694915254,"avgScore":0.6669075593430003,"totalDurationS":62710.933380717644,"totalInputTokens":142990215,"totalOutputTokens":1324353},{"split":"full/near-term","harness":"cursor_cli","model":"composer-2-5","harnessVariant":null,"runs":58,"tasks":58,"splitTasks":59,"passes":22,"passRate":0.3728813559322034,"avgScore":0.6650790265166685,"totalDurationS":529283.74,"totalInputTokens":79565421,"totalOutputTokens":922017},{"split":"full/near-term","harness":"ale_claw","model":"claude-opus-4-7","harnessVariant":null,"runs":59,"tasks":59,"splitTasks":59,"passes":18,"passRate":0.3050847457627119,"avgScore":0.6576604053409585,"totalDurationS":74992.17798316792,"totalInputTokens":296034802,"totalOutputTokens":1710300},{"split":"full/near-term","harness":"claude_code","model":"claude-opus-4-8","harnessVariant":null,"runs":59,"tasks":59,"splitTasks":59,"passes":17,"passRate":0.288135593220339,"avgScore":0.6535437199716331,"totalDurationS":937424.2199999997,"totalInputTokens":155487361,"totalOutputTokens":1769273},{"split":"full/near-term","harness":"openclaw","model":"claude-opus-4-7","harnessVariant":null,"runs":58,"tasks":57,"splitTasks":59,"passes":17,"passRate":0.288135593220339,"avgScore":0.6246614353923106,"totalDurationS":164075.0484052577,"totalInputTokens":170394470,"totalOutputTokens":1319098},{"split":"full/near-term","harness":"openclaw","model":"gpt-5-4","harnessVariant":null,"runs":75,"tasks":57,"splitTasks":59,"passes":28,"passRate":0.3474576271186441,"avgScore":0.6224835646484256,"totalDurationS":179165.28083586786,"totalInputTokens":83656969,"totalOutputTokens":2567220},{"split":"full/near-term","harness":"claude_code","model":"claude-opus-4-7","harnessVariant":null,"runs":55,"tasks":55,"splitTasks":59,"passes":14,"passRate":0.23728813559322035,"avgScore":0.6165351861901069,"totalDurationS":60097.522464826005,"totalInputTokens":112715211,"totalOutputTokens":1006357},{"split":"full/near-term","harness":"droid","model":"gpt-5-5","harnessVariant":null,"runs":58,"tasks":58,"splitTasks":59,"passes":18,"passRate":0.3050847457627119,"avgScore":0.6145934653744958,"totalDurationS":94972.81659283297,"totalInputTokens":84717050,"totalOutputTokens":843403},{"split":"full/near-term","harness":"openclaw","model":"claude-opus-4-6","harnessVariant":null,"runs":79,"tasks":58,"splitTasks":59,"passes":22,"passRate":0.23728813559322035,"avgScore":0.5508555454954842,"totalDurationS":173597.14907350275,"totalInputTokens":185740668,"totalOutputTokens":1606391},{"split":"full/near-term","harness":"openclaw","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":59,"tasks":57,"splitTasks":59,"passes":18,"passRate":0.2966101694915254,"avgScore":0.5490195892426949,"totalDurationS":182036.45653694542,"totalInputTokens":554077511,"totalOutputTokens":852385},{"split":"full/near-term","harness":"ale_claw","model":"gpt-5-4","harnessVariant":null,"runs":58,"tasks":58,"splitTasks":59,"passes":14,"passRate":0.23728813559322035,"avgScore":0.5033958266441436,"totalDurationS":65706.78400124719,"totalInputTokens":149147582,"totalOutputTokens":585048},{"split":"full/near-term","harness":"openclaw","model":"glm-5-1","harnessVariant":null,"runs":114,"tasks":59,"splitTasks":59,"passes":25,"passRate":0.211864406779661,"avgScore":0.4894211136030381,"totalDurationS":386351.5623639928,"totalInputTokens":256880461,"totalOutputTokens":2618556},{"split":"full/near-term","harness":"openclaw","model":"deepseek-v4-pro","harnessVariant":null,"runs":116,"tasks":59,"splitTasks":59,"passes":26,"passRate":0.2090395480225989,"avgScore":0.4805737147733599,"totalDurationS":331607.71330186,"totalInputTokens":290728838,"totalOutputTokens":3287436},{"split":"full/near-term","harness":"openclaw","model":"qwen3-6-plus","harnessVariant":null,"runs":114,"tasks":59,"splitTasks":59,"passes":17,"passRate":0.1440677966101695,"avgScore":0.40682991850254424,"totalDurationS":347806.2566155931,"totalInputTokens":364369239,"totalOutputTokens":4387375},{"split":"full/near-term","harness":"openclaw","model":"kimi-k2-6","harnessVariant":null,"runs":104,"tasks":57,"splitTasks":59,"passes":18,"passRate":0.1694915254237288,"avgScore":0.39042603639259404,"totalDurationS":359750.1327385056,"totalInputTokens":171713880,"totalOutputTokens":3319440},{"split":"full/near-term","harness":"openclaw","model":"mimo-v2-5","harnessVariant":null,"runs":109,"tasks":59,"splitTasks":59,"passes":15,"passRate":0.1271186440677966,"avgScore":0.38997662996643717,"totalDurationS":251042.72691141142,"totalInputTokens":154645562,"totalOutputTokens":2564959},{"split":"full/near-term","harness":"grok_cli","model":"grok-4-3","harnessVariant":null,"runs":61,"tasks":59,"splitTasks":59,"passes":6,"passRate":0.1016949152542373,"avgScore":0.34471464004101215,"totalDurationS":83012.58024813654,"totalInputTokens":84686127,"totalOutputTokens":893625},{"split":"full/near-term","harness":"openclaw","model":"grok-4-3","harnessVariant":null,"runs":107,"tasks":58,"splitTasks":59,"passes":9,"passRate":0.07627118644067797,"avgScore":0.2775967442449337,"totalDurationS":215692.61779075128,"totalInputTokens":118584588,"totalOutputTokens":1971713},{"split":"full/near-term","harness":"openclaw","model":"minimax-m2-7","harnessVariant":null,"runs":95,"tasks":59,"splitTasks":59,"passes":12,"passRate":0.11864406779661017,"avgScore":0.269443216579498,"totalDurationS":271849.77140135574,"totalInputTokens":110943900,"totalOutputTokens":1970826},{"split":"full/near-term","harness":"codex","model":"gpt-5-4","harnessVariant":null,"runs":60,"tasks":59,"splitTasks":59,"passes":9,"passRate":0.15254237288135594,"avgScore":0.2586922170827128,"totalDurationS":68306.41423679003,"totalInputTokens":52724479,"totalOutputTokens":977202},{"split":"full/near-term","harness":"grok_cli","model":"grok-3","harnessVariant":null,"runs":59,"tasks":58,"splitTasks":59,"passes":4,"passRate":0.06779661016949153,"avgScore":0.20201263315172804,"totalDurationS":38331.24149998929,"totalInputTokens":26212608,"totalOutputTokens":237127},{"split":"full/near-term","harness":"gemini_cli","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":98,"tasks":55,"splitTasks":59,"passes":10,"passRate":0.0847457627118644,"avgScore":0.1525108470949625,"totalDurationS":189770.2337840125,"totalInputTokens":305751192,"totalOutputTokens":3568457},{"split":"full/overall","harness":"ale_claw","model":"gpt-5-5","harnessVariant":null,"runs":148,"tasks":148,"splitTasks":149,"passes":36,"passRate":0.24161073825503357,"avgScore":0.4796364919404745,"totalDurationS":171415.29199341807,"totalInputTokens":335118009,"totalOutputTokens":2383235},{"split":"full/overall","harness":"codex","model":"gpt-5-5","harnessVariant":null,"runs":146,"tasks":145,"splitTasks":149,"passes":39,"passRate":0.26174496644295303,"avgScore":0.4457066099218381,"totalDurationS":291752.3380533074,"totalInputTokens":577686408,"totalOutputTokens":3856937},{"split":"full/overall","harness":"cursor_cli","model":"claude-opus-4-7","harnessVariant":"thinking-high","runs":140,"tasks":140,"splitTasks":149,"passes":32,"passRate":0.21476510067114093,"avgScore":0.4334263036279089,"totalDurationS":263537.8068355869,"totalInputTokens":441470026,"totalOutputTokens":4564211},{"split":"full/overall","harness":"openclaw","model":"gpt-5-5","harnessVariant":null,"runs":139,"tasks":139,"splitTasks":149,"passes":34,"passRate":0.22818791946308725,"avgScore":0.4312083448320807,"totalDurationS":336796.6983707431,"totalInputTokens":472666777,"totalOutputTokens":3348577},{"split":"full/overall","harness":"ale_claw","model":"claude-opus-4-7","harnessVariant":null,"runs":146,"tasks":146,"splitTasks":149,"passes":29,"passRate":0.19463087248322147,"avgScore":0.4198255108228225,"totalDurationS":317868.52334021573,"totalInputTokens":1355878526,"totalOutputTokens":5766457},{"split":"full/overall","harness":"cursor_cli","model":"gpt-5-5","harnessVariant":null,"runs":145,"tasks":142,"splitTasks":149,"passes":34,"passRate":0.22483221476510068,"avgScore":0.4170048282424899,"totalDurationS":296584.65106823633,"totalInputTokens":154789218,"totalOutputTokens":1759233},{"split":"full/overall","harness":"droid","model":"gpt-5-5","harnessVariant":null,"runs":142,"tasks":140,"splitTasks":149,"passes":30,"passRate":0.20134228187919462,"avgScore":0.4062295243210848,"totalDurationS":318107.867667181,"totalInputTokens":243766890,"totalOutputTokens":2274837},{"split":"full/overall","harness":"cursor_cli","model":"composer-2-5","harnessVariant":null,"runs":140,"tasks":140,"splitTasks":149,"passes":33,"passRate":0.2214765100671141,"avgScore":0.406113503981457,"totalDurationS":910442.0199999998,"totalInputTokens":339561872,"totalOutputTokens":2912169},{"split":"full/overall","harness":"openclaw","model":"gpt-5-4","harnessVariant":null,"runs":184,"tasks":145,"splitTasks":149,"passes":43,"passRate":0.22259507829977626,"avgScore":0.3941989197340106,"totalDurationS":586229.8825536154,"totalInputTokens":546754103,"totalOutputTokens":8740908},{"split":"full/overall","harness":"claude_code","model":"claude-opus-4-8","harnessVariant":null,"runs":143,"tasks":143,"splitTasks":149,"passes":24,"passRate":0.1610738255033557,"avgScore":0.37942528925417984,"totalDurationS":1646537.699999999,"totalInputTokens":453298488,"totalOutputTokens":3816139},{"split":"full/overall","harness":"openclaw","model":"claude-opus-4-7","harnessVariant":null,"runs":145,"tasks":144,"splitTasks":149,"passes":24,"passRate":0.1610738255033557,"avgScore":0.363793984586381,"totalDurationS":519449.6416832467,"totalInputTokens":833612444,"totalOutputTokens":4096789},{"split":"full/overall","harness":"claude_code","model":"claude-opus-4-7","harnessVariant":null,"runs":137,"tasks":137,"splitTasks":149,"passes":21,"passRate":0.14093959731543623,"avgScore":0.36204470469661065,"totalDurationS":180272.82389862626,"totalInputTokens":449562703,"totalOutputTokens":3651317},{"split":"full/overall","harness":"openclaw","model":"claude-opus-4-6","harnessVariant":null,"runs":184,"tasks":144,"splitTasks":149,"passes":31,"passRate":0.15100671140939598,"avgScore":0.34295753829526865,"totalDurationS":611780.7754093038,"totalInputTokens":441642523,"totalOutputTokens":4229391},{"split":"full/overall","harness":"droid","model":"claude-opus-4-7","harnessVariant":null,"runs":107,"tasks":101,"splitTasks":149,"passes":22,"passRate":0.13758389261744966,"avgScore":0.3277817869771067,"totalDurationS":130143.37671120558,"totalInputTokens":357789216,"totalOutputTokens":2827236},{"split":"full/overall","harness":"openclaw","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":152,"tasks":144,"splitTasks":149,"passes":24,"passRate":0.15771812080536912,"avgScore":0.306674076724736,"totalDurationS":628816.0281266063,"totalInputTokens":3629253411,"totalOutputTokens":4023376},{"split":"full/overall","harness":"ale_claw","model":"gpt-5-4","harnessVariant":null,"runs":146,"tasks":146,"splitTasks":149,"passes":19,"passRate":0.12751677852348994,"avgScore":0.2971815590424764,"totalDurationS":236338.29505341017,"totalInputTokens":1074824427,"totalOutputTokens":2079032},{"split":"full/overall","harness":"openclaw","model":"glm-5-1","harnessVariant":null,"runs":266,"tasks":143,"splitTasks":149,"passes":37,"passRate":0.12416107382550336,"avgScore":0.2941697592838544,"totalDurationS":1161535.438878632,"totalInputTokens":1400011934,"totalOutputTokens":11505901},{"split":"full/overall","harness":"openclaw","model":"deepseek-v4-pro","harnessVariant":null,"runs":281,"tasks":149,"splitTasks":149,"passes":40,"passRate":0.1297539149888143,"avgScore":0.28903641390354523,"totalDurationS":847271.7271697088,"totalInputTokens":904689554,"totalOutputTokens":8798619},{"split":"full/overall","harness":"openclaw","model":"qwen3-6-plus","harnessVariant":null,"runs":278,"tasks":149,"splitTasks":149,"passes":25,"passRate":0.087248322147651,"avgScore":0.2525547761453045,"totalDurationS":934091.5511064444,"totalInputTokens":1219355261,"totalOutputTokens":12691198},{"split":"full/overall","harness":"openclaw","model":"mimo-v2-5","harnessVariant":null,"runs":266,"tasks":149,"splitTasks":149,"passes":25,"passRate":0.087248322147651,"avgScore":0.24223928854884233,"totalDurationS":704457.982658485,"totalInputTokens":732305092,"totalOutputTokens":7232280},{"split":"full/overall","harness":"openclaw","model":"kimi-k2-6","harnessVariant":null,"runs":246,"tasks":147,"splitTasks":149,"passes":25,"passRate":0.09395973154362416,"avgScore":0.22687490153234072,"totalDurationS":1061107.1315966593,"totalInputTokens":454799176,"totalOutputTokens":9372047},{"split":"full/overall","harness":"grok_cli","model":"grok-4-3","harnessVariant":null,"runs":152,"tasks":145,"splitTasks":149,"passes":10,"passRate":0.06711409395973154,"avgScore":0.2054087948260832,"totalDurationS":228007.15444195934,"totalInputTokens":233438919,"totalOutputTokens":2386389},{"split":"full/overall","harness":"openclaw","model":"grok-4-3","harnessVariant":null,"runs":267,"tasks":148,"splitTasks":149,"passes":13,"passRate":0.0436241610738255,"avgScore":0.16237365152428027,"totalDurationS":639079.8813519047,"totalInputTokens":313144045,"totalOutputTokens":5090388},{"split":"full/overall","harness":"openclaw","model":"minimax-m2-7","harnessVariant":null,"runs":229,"tasks":149,"splitTasks":149,"passes":14,"passRate":0.06040268456375839,"avgScore":0.1447365756925529,"totalDurationS":692600.0590925557,"totalInputTokens":369436130,"totalOutputTokens":6032874},{"split":"full/overall","harness":"grok_cli","model":"grok-3","harnessVariant":null,"runs":147,"tasks":143,"splitTasks":149,"passes":7,"passRate":0.04697986577181208,"avgScore":0.132594052976698,"totalDurationS":116643.83246448846,"totalInputTokens":56158893,"totalOutputTokens":524615},{"split":"full/overall","harness":"codex","model":"gpt-5-4","harnessVariant":null,"runs":148,"tasks":145,"splitTasks":149,"passes":11,"passRate":0.0738255033557047,"avgScore":0.13068403587554428,"totalDurationS":177660.13650794374,"totalInputTokens":211188063,"totalOutputTokens":3342798},{"split":"full/overall","harness":"gemini_cli","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":211,"tasks":128,"splitTasks":149,"passes":11,"passRate":0.040268456375838924,"avgScore":0.07520266497130298,"totalDurationS":387830.84525895957,"totalInputTokens":792539323,"totalOutputTokens":8174331},{"split":"unlicensed/full-spectrum","harness":"ale_claw","model":"gpt-5-5","harnessVariant":null,"runs":50,"tasks":50,"splitTasks":50,"passes":11,"passRate":0.22,"avgScore":0.40152714009093793,"totalDurationS":37901.38413824699,"totalInputTokens":37966347,"totalOutputTokens":649363},{"split":"unlicensed/full-spectrum","harness":"cursor_cli","model":"claude-opus-4-7","harnessVariant":"thinking-high","runs":49,"tasks":49,"splitTasks":50,"passes":11,"passRate":0.22,"avgScore":0.399929002531715,"totalDurationS":51051.74856577627,"totalInputTokens":132395542,"totalOutputTokens":1420691},{"split":"unlicensed/full-spectrum","harness":"ale_claw","model":"claude-opus-4-7","harnessVariant":null,"runs":50,"tasks":50,"splitTasks":50,"passes":9,"passRate":0.18,"avgScore":0.3721347439496799,"totalDurationS":58111.784729670755,"totalInputTokens":227247771,"totalOutputTokens":1618320},{"split":"unlicensed/full-spectrum","harness":"codex","model":"gpt-5-5","harnessVariant":null,"runs":49,"tasks":49,"splitTasks":50,"passes":11,"passRate":0.22,"avgScore":0.36659841553466704,"totalDurationS":55356.216603215886,"totalInputTokens":116559608,"totalOutputTokens":1192679},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"gpt-5-4","harnessVariant":null,"runs":68,"tasks":50,"splitTasks":50,"passes":12,"passRate":0.19333333333333333,"avgScore":0.35358321851917374,"totalDurationS":180268.4490979833,"totalInputTokens":214153364,"totalOutputTokens":3372060},{"split":"unlicensed/full-spectrum","harness":"droid","model":"gpt-5-5","harnessVariant":null,"runs":48,"tasks":48,"splitTasks":50,"passes":8,"passRate":0.16,"avgScore":0.3383439333349274,"totalDurationS":71418.70456005765,"totalInputTokens":47747004,"totalOutputTokens":740396},{"split":"unlicensed/full-spectrum","harness":"cursor_cli","model":"gpt-5-5","harnessVariant":null,"runs":49,"tasks":49,"splitTasks":50,"passes":10,"passRate":0.2,"avgScore":0.33014943333333335,"totalDurationS":52773.9532048766,"totalInputTokens":32724010,"totalOutputTokens":508279},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"gpt-5-5","harnessVariant":null,"runs":49,"tasks":49,"splitTasks":50,"passes":9,"passRate":0.18,"avgScore":0.32062378328376817,"totalDurationS":77908.58097421432,"totalInputTokens":71924602,"totalOutputTokens":967267},{"split":"unlicensed/full-spectrum","harness":"cursor_cli","model":"composer-2-5","harnessVariant":null,"runs":48,"tasks":48,"splitTasks":50,"passes":9,"passRate":0.18,"avgScore":0.3116770305750731,"totalDurationS":117657.03000000003,"totalInputTokens":90363256,"totalOutputTokens":923898},{"split":"unlicensed/full-spectrum","harness":"claude_code","model":"claude-opus-4-8","harnessVariant":null,"runs":50,"tasks":50,"splitTasks":50,"passes":6,"passRate":0.12,"avgScore":0.29247177241092903,"totalDurationS":239030.98,"totalInputTokens":87892469,"totalOutputTokens":1004376},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"claude-opus-4-7","harnessVariant":null,"runs":49,"tasks":49,"splitTasks":50,"passes":5,"passRate":0.1,"avgScore":0.28473197076864204,"totalDurationS":86560.86541658566,"totalInputTokens":122093481,"totalOutputTokens":1061008},{"split":"unlicensed/full-spectrum","harness":"claude_code","model":"claude-opus-4-7","harnessVariant":null,"runs":47,"tasks":47,"splitTasks":50,"passes":6,"passRate":0.12,"avgScore":0.28246770029157375,"totalDurationS":41494.61762869381,"totalInputTokens":131778275,"totalOutputTokens":1305862},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"claude-opus-4-6","harnessVariant":null,"runs":62,"tasks":46,"splitTasks":50,"passes":7,"passRate":0.13,"avgScore":0.2741536401039251,"totalDurationS":169700.86793887336,"totalInputTokens":91516741,"totalOutputTokens":987041},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"deepseek-v4-pro","harnessVariant":null,"runs":93,"tasks":50,"splitTasks":50,"passes":11,"passRate":0.11,"avgScore":0.25488953000000003,"totalDurationS":183051.95904689992,"totalInputTokens":178238163,"totalOutputTokens":2581594},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"qwen3-6-plus","harnessVariant":null,"runs":92,"tasks":50,"splitTasks":50,"passes":8,"passRate":0.09,"avgScore":0.24758475908000527,"totalDurationS":252864.5129532107,"totalInputTokens":341990928,"totalOutputTokens":4446278},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"glm-5-1","harnessVariant":null,"runs":84,"tasks":45,"splitTasks":50,"passes":10,"passRate":0.1,"avgScore":0.23460225925256942,"totalDurationS":308892.93326904054,"totalInputTokens":394267024,"totalOutputTokens":3808838},{"split":"unlicensed/full-spectrum","harness":"ale_claw","model":"gpt-5-4","harnessVariant":null,"runs":50,"tasks":50,"splitTasks":50,"passes":4,"passRate":0.08,"avgScore":0.2331280905064901,"totalDurationS":49010.803296454986,"totalInputTokens":299762808,"totalOutputTokens":610760},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":52,"tasks":48,"splitTasks":50,"passes":5,"passRate":0.1,"avgScore":0.23143563333333333,"totalDurationS":203091.6520314219,"totalInputTokens":1614859273,"totalOutputTokens":1787785},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"mimo-v2-5","harnessVariant":null,"runs":92,"tasks":50,"splitTasks":50,"passes":9,"passRate":0.1,"avgScore":0.221446,"totalDurationS":199928.7054519036,"totalInputTokens":250676343,"totalOutputTokens":2264732},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"kimi-k2-6","harnessVariant":null,"runs":85,"tasks":50,"splitTasks":50,"passes":6,"passRate":0.07,"avgScore":0.19278448362311412,"totalDurationS":335295.1560987772,"totalInputTokens":127970868,"totalOutputTokens":3431655},{"split":"unlicensed/full-spectrum","harness":"grok_cli","model":"grok-4-3","harnessVariant":null,"runs":48,"tasks":46,"splitTasks":50,"passes":4,"passRate":0.08,"avgScore":0.18911717333333333,"totalDurationS":69259.63338095816,"totalInputTokens":71479467,"totalOutputTokens":897788},{"split":"unlicensed/full-spectrum","harness":"grok_cli","model":"grok-3","harnessVariant":null,"runs":48,"tasks":46,"splitTasks":50,"passes":3,"passRate":0.06,"avgScore":0.140505150751521,"totalDurationS":41583.81237957394,"totalInputTokens":20932064,"totalOutputTokens":181917},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"grok-4-3","harnessVariant":null,"runs":90,"tasks":50,"splitTasks":50,"passes":4,"passRate":0.04,"avgScore":0.13956085333333335,"totalDurationS":138771.15821689696,"totalInputTokens":84055435,"totalOutputTokens":1651052},{"split":"unlicensed/full-spectrum","harness":"droid","model":"claude-opus-4-7","harnessVariant":null,"runs":18,"tasks":18,"splitTasks":50,"passes":1,"passRate":0.02,"avgScore":0.10201958811261162,"totalDurationS":13155.027410391252,"totalInputTokens":33143039,"totalOutputTokens":398088},{"split":"unlicensed/full-spectrum","harness":"openclaw","model":"minimax-m2-7","harnessVariant":null,"runs":82,"tasks":50,"splitTasks":50,"passes":2,"passRate":0.04,"avgScore":0.08425,"totalDurationS":188897.40388108304,"totalInputTokens":142618380,"totalOutputTokens":1824797},{"split":"unlicensed/full-spectrum","harness":"codex","model":"gpt-5-4","harnessVariant":null,"runs":48,"tasks":46,"splitTasks":50,"passes":2,"passRate":0.04,"avgScore":0.06738161075152099,"totalDurationS":26623.453760936623,"totalInputTokens":32596183,"totalOutputTokens":892835},{"split":"unlicensed/full-spectrum","harness":"gemini_cli","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":76,"tasks":42,"splitTasks":50,"passes":0,"passRate":0,"avgScore":0.02174114204242716,"totalDurationS":126215.45193624147,"totalInputTokens":297522060,"totalOutputTokens":3660852},{"split":"unlicensed/last-exam","harness":"droid","model":"gpt-5-5","harnessVariant":null,"runs":30,"tasks":28,"splitTasks":32,"passes":3,"passRate":0.09375,"avgScore":0.156824625,"totalDurationS":126723.11619016809,"totalInputTokens":91005068,"totalOutputTokens":576865},{"split":"unlicensed/last-exam","harness":"ale_claw","model":"gpt-5-5","harnessVariant":null,"runs":31,"tasks":31,"splitTasks":32,"passes":2,"passRate":0.0625,"avgScore":0.13470339272737014,"totalDurationS":57783.61611758302,"totalInputTokens":105801785,"totalOutputTokens":674079},{"split":"unlicensed/last-exam","harness":"cursor_cli","model":"composer-2-5","harnessVariant":null,"runs":29,"tasks":29,"splitTasks":32,"passes":1,"passRate":0.03125,"avgScore":0.1252311875,"totalDurationS":239754.9,"totalInputTokens":116951382,"totalOutputTokens":881474},{"split":"unlicensed/last-exam","harness":"codex","model":"gpt-5-5","harnessVariant":null,"runs":30,"tasks":30,"splitTasks":32,"passes":2,"passRate":0.0625,"avgScore":0.12012216176470589,"totalDurationS":95760.78000609901,"totalInputTokens":197790476,"totalOutputTokens":1161557},{"split":"unlicensed/last-exam","harness":"openclaw","model":"gpt-5-5","harnessVariant":null,"runs":27,"tasks":27,"splitTasks":32,"passes":1,"passRate":0.03125,"avgScore":0.11740738235294117,"totalDurationS":96586.36003749321,"totalInputTokens":146513141,"totalOutputTokens":953366},{"split":"unlicensed/last-exam","harness":"cursor_cli","model":"claude-opus-4-7","harnessVariant":"thinking-high","runs":25,"tasks":25,"splitTasks":32,"passes":2,"passRate":0.0625,"avgScore":0.11547434148227709,"totalDurationS":67417.0252969123,"totalInputTokens":99245519,"totalOutputTokens":1115901},{"split":"unlicensed/last-exam","harness":"cursor_cli","model":"gpt-5-5","harnessVariant":null,"runs":29,"tasks":28,"splitTasks":32,"passes":1,"passRate":0.03125,"avgScore":0.09471731249999998,"totalDurationS":68286.89574431368,"totalInputTokens":53899454,"totalOutputTokens":477463},{"split":"unlicensed/last-exam","harness":"openclaw","model":"gpt-5-4","harnessVariant":null,"runs":33,"tasks":30,"splitTasks":32,"passes":2,"passRate":0.0625,"avgScore":0.089728125,"totalDurationS":140854.82617858774,"totalInputTokens":178272927,"totalOutputTokens":2268610},{"split":"unlicensed/last-exam","harness":"ale_claw","model":"claude-opus-4-7","harnessVariant":null,"runs":31,"tasks":31,"splitTasks":32,"passes":1,"passRate":0.03125,"avgScore":0.086678125,"totalDurationS":154357.00314350135,"totalInputTokens":509615565,"totalOutputTokens":1916172},{"split":"unlicensed/last-exam","harness":"droid","model":"claude-opus-4-7","harnessVariant":null,"runs":22,"tasks":22,"splitTasks":32,"passes":1,"passRate":0.03125,"avgScore":0.08373071875,"totalDurationS":49828.114969140384,"totalInputTokens":173510816,"totalOutputTokens":1002171},{"split":"unlicensed/last-exam","harness":"claude_code","model":"claude-opus-4-8","harnessVariant":null,"runs":28,"tasks":28,"splitTasks":32,"passes":1,"passRate":0.03125,"avgScore":0.07724062500000001,"totalDurationS":426324.1599999999,"totalInputTokens":64294628,"totalOutputTokens":646992},{"split":"unlicensed/last-exam","harness":"openclaw","model":"glm-5-1","harnessVariant":null,"runs":57,"tasks":32,"splitTasks":32,"passes":2,"passRate":0.03125,"avgScore":0.07145978124999999,"totalDurationS":347964.0967674026,"totalInputTokens":502978316,"totalOutputTokens":3882691},{"split":"unlicensed/last-exam","harness":"openclaw","model":"claude-opus-4-6","harnessVariant":null,"runs":35,"tasks":32,"splitTasks":32,"passes":1,"passRate":0.03125,"avgScore":0.065917125,"totalDurationS":183853.8755876089,"totalInputTokens":161951527,"totalOutputTokens":1225424},{"split":"unlicensed/last-exam","harness":"ale_claw","model":"gpt-5-4","harnessVariant":null,"runs":31,"tasks":31,"splitTasks":32,"passes":0,"passRate":0,"avgScore":0.0488529375,"totalDurationS":74274.7901808761,"totalInputTokens":322823762,"totalOutputTokens":536633},{"split":"unlicensed/last-exam","harness":"openclaw","model":"mimo-v2-5","harnessVariant":null,"runs":53,"tasks":32,"splitTasks":32,"passes":1,"passRate":0.015625,"avgScore":0.04593875,"totalDurationS":209208.2983938779,"totalInputTokens":237157265,"totalOutputTokens":2074572},{"split":"unlicensed/last-exam","harness":"openclaw","model":"claude-opus-4-7","harnessVariant":null,"runs":32,"tasks":32,"splitTasks":32,"passes":1,"passRate":0.03125,"avgScore":0.04375,"totalDurationS":217781.42886026998,"totalInputTokens":455433307,"totalOutputTokens":1546576},{"split":"unlicensed/last-exam","harness":"openclaw","model":"deepseek-v4-pro","harnessVariant":null,"runs":62,"tasks":32,"splitTasks":32,"passes":2,"passRate":0.03125,"avgScore":0.042128125,"totalDurationS":278742.66685064265,"totalInputTokens":395048020,"totalOutputTokens":2669438},{"split":"unlicensed/last-exam","harness":"gemini_cli","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":32,"tasks":27,"splitTasks":32,"passes":1,"passRate":0.03125,"avgScore":0.03125,"totalDurationS":47087.921259280294,"totalInputTokens":151514303,"totalOutputTokens":821051},{"split":"unlicensed/last-exam","harness":"openclaw","model":"minimax-m2-7","harnessVariant":null,"runs":43,"tasks":32,"splitTasks":32,"passes":0,"passRate":0,"avgScore":0.029253125,"totalDurationS":144858.54272869843,"totalInputTokens":112047962,"totalOutputTokens":1573119},{"split":"unlicensed/last-exam","harness":"claude_code","model":"claude-opus-4-7","harnessVariant":null,"runs":28,"tasks":28,"splitTasks":32,"passes":0,"passRate":0,"avgScore":0.022434375,"totalDurationS":66186.72876456357,"totalInputTokens":134278645,"totalOutputTokens":1111061},{"split":"unlicensed/last-exam","harness":"openclaw","model":"grok-4-3","harnessVariant":null,"runs":59,"tasks":32,"splitTasks":32,"passes":0,"passRate":0,"avgScore":0.022419484375,"totalDurationS":217286.0878194772,"totalInputTokens":95949021,"totalOutputTokens":1226214},{"split":"unlicensed/last-exam","harness":"grok_cli","model":"grok-3","harnessVariant":null,"runs":32,"tasks":32,"splitTasks":32,"passes":0,"passRate":0,"avgScore":0.02164096875,"totalDurationS":33412.92309264722,"totalInputTokens":8700406,"totalOutputTokens":103394},{"split":"unlicensed/last-exam","harness":"grok_cli","model":"grok-4-3","harnessVariant":null,"runs":35,"tasks":32,"splitTasks":32,"passes":0,"passRate":0,"avgScore":0.0216215,"totalDurationS":70652.35492910666,"totalInputTokens":76397741,"totalOutputTokens":583202},{"split":"unlicensed/last-exam","harness":"openclaw","model":"qwen3-6-plus","harnessVariant":null,"runs":60,"tasks":32,"splitTasks":32,"passes":0,"passRate":0,"avgScore":0.016776828125,"totalDurationS":261031.08721421525,"totalInputTokens":352137998,"totalOutputTokens":3035233},{"split":"unlicensed/last-exam","harness":"openclaw","model":"kimi-k2-6","harnessVariant":null,"runs":49,"tasks":32,"splitTasks":32,"passes":1,"passRate":0.015625,"avgScore":0.0165625,"totalDurationS":265082.9437490376,"totalInputTokens":152227749,"totalOutputTokens":2528648},{"split":"unlicensed/last-exam","harness":"openclaw","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":33,"tasks":31,"splitTasks":32,"passes":0,"passRate":0,"avgScore":0.004078125,"totalDurationS":160041.63884501578,"totalInputTokens":1004747537,"totalOutputTokens":1170102},{"split":"unlicensed/last-exam","harness":"codex","model":"gpt-5-4","harnessVariant":null,"runs":32,"tasks":32,"splitTasks":32,"passes":0,"passRate":0,"avgScore":0,"totalDurationS":54185.68929080991,"totalInputTokens":87731576,"totalOutputTokens":1211698},{"split":"unlicensed/near-term","harness":"ale_claw","model":"gpt-5-5","harnessVariant":null,"runs":59,"tasks":59,"splitTasks":59,"passes":21,"passRate":0.3559322033898305,"avgScore":0.74043143761151,"totalDurationS":58053.733442712,"totalInputTokens":149490626,"totalOutputTokens":868142},{"split":"unlicensed/near-term","harness":"openclaw","model":"gpt-5-5","harnessVariant":null,"runs":57,"tasks":57,"splitTasks":59,"passes":23,"passRate":0.3898305084745763,"avgScore":0.7101409827202968,"totalDurationS":134644.0298788912,"totalInputTokens":187696525,"totalOutputTokens":1208418},{"split":"unlicensed/near-term","harness":"codex","model":"gpt-5-5","harnessVariant":null,"runs":60,"tasks":59,"splitTasks":59,"passes":25,"passRate":0.423728813559322,"avgScore":0.7066704716213895,"totalDurationS":106073.53405749508,"totalInputTokens":206885970,"totalOutputTokens":1277834},{"split":"unlicensed/near-term","harness":"cursor_cli","model":"gpt-5-5","harnessVariant":null,"runs":60,"tasks":58,"splitTasks":59,"passes":22,"passRate":0.3644067796610169,"avgScore":0.6810493947246483,"totalDurationS":130706.10355556315,"totalInputTokens":48373541,"totalOutputTokens":676817},{"split":"unlicensed/near-term","harness":"cursor_cli","model":"claude-opus-4-7","harnessVariant":"thinking-high","runs":58,"tasks":58,"splitTasks":59,"passes":19,"passRate":0.3220338983050847,"avgScore":0.6670294719224473,"totalDurationS":122202.24336090288,"totalInputTokens":94050574,"totalOutputTokens":1436390},{"split":"unlicensed/near-term","harness":"droid","model":"claude-opus-4-7","harnessVariant":null,"runs":62,"tasks":56,"splitTasks":59,"passes":19,"passRate":0.2966101694915254,"avgScore":0.6669075593429997,"totalDurationS":62710.933380717644,"totalInputTokens":142990215,"totalOutputTokens":1324353},{"split":"unlicensed/near-term","harness":"cursor_cli","model":"composer-2-5","harnessVariant":null,"runs":58,"tasks":58,"splitTasks":59,"passes":22,"passRate":0.3728813559322034,"avgScore":0.6650790265166682,"totalDurationS":529283.74,"totalInputTokens":79565421,"totalOutputTokens":922017},{"split":"unlicensed/near-term","harness":"ale_claw","model":"claude-opus-4-7","harnessVariant":null,"runs":59,"tasks":59,"splitTasks":59,"passes":18,"passRate":0.3050847457627119,"avgScore":0.6576604053409587,"totalDurationS":74992.17798316793,"totalInputTokens":296034802,"totalOutputTokens":1710300},{"split":"unlicensed/near-term","harness":"claude_code","model":"claude-opus-4-8","harnessVariant":null,"runs":59,"tasks":59,"splitTasks":59,"passes":17,"passRate":0.288135593220339,"avgScore":0.6535437199716331,"totalDurationS":937424.2199999999,"totalInputTokens":155487361,"totalOutputTokens":1769273},{"split":"unlicensed/near-term","harness":"openclaw","model":"claude-opus-4-7","harnessVariant":null,"runs":58,"tasks":57,"splitTasks":59,"passes":17,"passRate":0.288135593220339,"avgScore":0.6246614353923104,"totalDurationS":164075.04840525772,"totalInputTokens":170394470,"totalOutputTokens":1319098},{"split":"unlicensed/near-term","harness":"openclaw","model":"gpt-5-4","harnessVariant":null,"runs":75,"tasks":57,"splitTasks":59,"passes":28,"passRate":0.3474576271186441,"avgScore":0.6224835646484255,"totalDurationS":179165.28083586786,"totalInputTokens":83656969,"totalOutputTokens":2567220},{"split":"unlicensed/near-term","harness":"claude_code","model":"claude-opus-4-7","harnessVariant":null,"runs":55,"tasks":55,"splitTasks":59,"passes":14,"passRate":0.23728813559322035,"avgScore":0.6165351861901066,"totalDurationS":60097.522464826005,"totalInputTokens":112715211,"totalOutputTokens":1006357},{"split":"unlicensed/near-term","harness":"droid","model":"gpt-5-5","harnessVariant":null,"runs":58,"tasks":58,"splitTasks":59,"passes":18,"passRate":0.3050847457627119,"avgScore":0.6145934653744957,"totalDurationS":94972.81659283297,"totalInputTokens":84717050,"totalOutputTokens":843403},{"split":"unlicensed/near-term","harness":"openclaw","model":"claude-opus-4-6","harnessVariant":null,"runs":79,"tasks":58,"splitTasks":59,"passes":22,"passRate":0.23728813559322035,"avgScore":0.5508555454954841,"totalDurationS":173597.14907350275,"totalInputTokens":185740668,"totalOutputTokens":1606391},{"split":"unlicensed/near-term","harness":"openclaw","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":59,"tasks":57,"splitTasks":59,"passes":18,"passRate":0.2966101694915254,"avgScore":0.5490195892426949,"totalDurationS":182036.45653694542,"totalInputTokens":554077511,"totalOutputTokens":852385},{"split":"unlicensed/near-term","harness":"ale_claw","model":"gpt-5-4","harnessVariant":null,"runs":58,"tasks":58,"splitTasks":59,"passes":14,"passRate":0.23728813559322035,"avgScore":0.5033958266441435,"totalDurationS":65706.78400124719,"totalInputTokens":149147582,"totalOutputTokens":585048},{"split":"unlicensed/near-term","harness":"openclaw","model":"glm-5-1","harnessVariant":null,"runs":114,"tasks":59,"splitTasks":59,"passes":25,"passRate":0.211864406779661,"avgScore":0.489421113603038,"totalDurationS":386351.5623639928,"totalInputTokens":256880461,"totalOutputTokens":2618556},{"split":"unlicensed/near-term","harness":"openclaw","model":"deepseek-v4-pro","harnessVariant":null,"runs":116,"tasks":59,"splitTasks":59,"passes":26,"passRate":0.2090395480225989,"avgScore":0.4805737147733599,"totalDurationS":331607.71330185997,"totalInputTokens":290728838,"totalOutputTokens":3287436},{"split":"unlicensed/near-term","harness":"openclaw","model":"qwen3-6-plus","harnessVariant":null,"runs":114,"tasks":59,"splitTasks":59,"passes":17,"passRate":0.1440677966101695,"avgScore":0.40682991850254435,"totalDurationS":347806.2566155931,"totalInputTokens":364369239,"totalOutputTokens":4387375},{"split":"unlicensed/near-term","harness":"openclaw","model":"kimi-k2-6","harnessVariant":null,"runs":104,"tasks":57,"splitTasks":59,"passes":18,"passRate":0.1694915254237288,"avgScore":0.390426036392594,"totalDurationS":359750.1327385056,"totalInputTokens":171713880,"totalOutputTokens":3319440},{"split":"unlicensed/near-term","harness":"openclaw","model":"mimo-v2-5","harnessVariant":null,"runs":109,"tasks":59,"splitTasks":59,"passes":15,"passRate":0.1271186440677966,"avgScore":0.3899766299664371,"totalDurationS":251042.72691141142,"totalInputTokens":154645562,"totalOutputTokens":2564959},{"split":"unlicensed/near-term","harness":"grok_cli","model":"grok-4-3","harnessVariant":null,"runs":61,"tasks":59,"splitTasks":59,"passes":6,"passRate":0.1016949152542373,"avgScore":0.3447146400410123,"totalDurationS":83012.58024813654,"totalInputTokens":84686127,"totalOutputTokens":893625},{"split":"unlicensed/near-term","harness":"openclaw","model":"grok-4-3","harnessVariant":null,"runs":107,"tasks":58,"splitTasks":59,"passes":9,"passRate":0.07627118644067797,"avgScore":0.2775967442449337,"totalDurationS":215692.61779075128,"totalInputTokens":118584588,"totalOutputTokens":1971713},{"split":"unlicensed/near-term","harness":"openclaw","model":"minimax-m2-7","harnessVariant":null,"runs":95,"tasks":59,"splitTasks":59,"passes":12,"passRate":0.11864406779661017,"avgScore":0.269443216579498,"totalDurationS":271849.77140135574,"totalInputTokens":110943900,"totalOutputTokens":1970826},{"split":"unlicensed/near-term","harness":"codex","model":"gpt-5-4","harnessVariant":null,"runs":60,"tasks":59,"splitTasks":59,"passes":9,"passRate":0.15254237288135594,"avgScore":0.2586922170827128,"totalDurationS":68306.41423679003,"totalInputTokens":52724479,"totalOutputTokens":977202},{"split":"unlicensed/near-term","harness":"grok_cli","model":"grok-3","harnessVariant":null,"runs":59,"tasks":58,"splitTasks":59,"passes":4,"passRate":0.06779661016949153,"avgScore":0.20201263315172804,"totalDurationS":38331.24149998929,"totalInputTokens":26212608,"totalOutputTokens":237127},{"split":"unlicensed/near-term","harness":"gemini_cli","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":98,"tasks":55,"splitTasks":59,"passes":10,"passRate":0.0847457627118644,"avgScore":0.1525108470949625,"totalDurationS":189770.2337840125,"totalInputTokens":305751192,"totalOutputTokens":3568457},{"split":"unlicensed/overall","harness":"ale_claw","model":"gpt-5-5","harnessVariant":null,"runs":140,"tasks":140,"splitTasks":141,"passes":34,"passRate":0.24113475177304963,"avgScore":0.4827824141198713,"totalDurationS":153738.73369854194,"totalInputTokens":293258758,"totalOutputTokens":2191584},{"split":"unlicensed/overall","harness":"codex","model":"gpt-5-5","harnessVariant":null,"runs":139,"tasks":138,"splitTasks":141,"passes":38,"passRate":0.2695035460992908,"avgScore":0.4529601970132335,"totalDurationS":257190.53066681005,"totalInputTokens":521236054,"totalOutputTokens":3632070},{"split":"unlicensed/overall","harness":"cursor_cli","model":"claude-opus-4-7","harnessVariant":"thinking-high","runs":132,"tasks":132,"splitTasks":141,"passes":32,"passRate":0.22695035460992907,"avgScore":0.44713736097477313,"totalDurationS":240671.01722359145,"totalInputTokens":325691635,"totalOutputTokens":3972982},{"split":"unlicensed/overall","harness":"openclaw","model":"gpt-5-5","harnessVariant":null,"runs":133,"tasks":133,"splitTasks":141,"passes":33,"passRate":0.23404255319148937,"avgScore":0.4374932154608513,"totalDurationS":309138.97089059866,"totalInputTokens":406134268,"totalOutputTokens":3129051},{"split":"unlicensed/overall","harness":"ale_claw","model":"claude-opus-4-7","harnessVariant":null,"runs":140,"tasks":140,"splitTasks":141,"passes":28,"passRate":0.19858156028368795,"avgScore":0.4268255398056778,"totalDurationS":287460.96585634,"totalInputTokens":1032898138,"totalOutputTokens":5244792},{"split":"unlicensed/overall","harness":"cursor_cli","model":"gpt-5-5","harnessVariant":null,"runs":138,"tasks":135,"splitTasks":141,"passes":33,"passRate":0.23049645390070922,"avgScore":0.4235485103221341,"totalDurationS":251766.95250475354,"totalInputTokens":134997005,"totalOutputTokens":1662559},{"split":"unlicensed/overall","harness":"cursor_cli","model":"composer-2-5","harnessVariant":null,"runs":135,"tasks":135,"splitTasks":141,"passes":32,"passRate":0.22695035460992907,"avgScore":0.41724051129955386,"totalDurationS":886695.67,"totalInputTokens":286880059,"totalOutputTokens":2727389},{"split":"unlicensed/overall","harness":"droid","model":"gpt-5-5","harnessVariant":null,"runs":136,"tasks":134,"splitTasks":141,"passes":29,"passRate":0.20567375886524822,"avgScore":0.41274183775774204,"totalDurationS":293114.6373430586,"totalInputTokens":223469122,"totalOutputTokens":2160664},{"split":"unlicensed/overall","harness":"openclaw","model":"gpt-5-4","harnessVariant":null,"runs":176,"tasks":137,"splitTasks":141,"passes":42,"passRate":0.2281323877068558,"avgScore":0.40621979602989944,"totalDurationS":500288.5561124389,"totalInputTokens":476083260,"totalOutputTokens":8207890},{"split":"unlicensed/overall","harness":"claude_code","model":"claude-opus-4-8","harnessVariant":null,"runs":137,"tasks":137,"splitTasks":141,"passes":24,"passRate":0.1702127659574468,"avgScore":0.3947118304884597,"totalDurationS":1602779.36,"totalInputTokens":307674458,"totalOutputTokens":3420641},{"split":"unlicensed/overall","harness":"openclaw","model":"claude-opus-4-7","harnessVariant":null,"runs":139,"tasks":138,"splitTasks":141,"passes":23,"passRate":0.16312056737588654,"avgScore":0.3722810157913363,"totalDurationS":468417.34268211323,"totalInputTokens":747921258,"totalOutputTokens":3926682},{"split":"unlicensed/overall","harness":"claude_code","model":"claude-opus-4-7","harnessVariant":null,"runs":130,"tasks":130,"splitTasks":141,"passes":20,"passRate":0.14184397163120568,"avgScore":0.36324014893471623,"totalDurationS":167778.86885808338,"totalInputTokens":378772131,"totalOutputTokens":3423280},{"split":"unlicensed/overall","harness":"openclaw","model":"claude-opus-4-6","harnessVariant":null,"runs":176,"tasks":136,"splitTasks":141,"passes":30,"passRate":0.1524822695035461,"avgScore":0.3426773559534029,"totalDurationS":527151.892599985,"totalInputTokens":439208936,"totalOutputTokens":3818856},{"split":"unlicensed/overall","harness":"droid","model":"claude-opus-4-7","harnessVariant":null,"runs":102,"tasks":96,"splitTasks":141,"passes":21,"passRate":0.13829787234042554,"avgScore":0.33424048515508914,"totalDurationS":125694.07576024928,"totalInputTokens":349644070,"totalOutputTokens":2724612},{"split":"unlicensed/overall","harness":"openclaw","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":144,"tasks":136,"splitTasks":141,"passes":23,"passRate":0.1595744680851064,"avgScore":0.3127265066098273,"totalDurationS":545169.7474133831,"totalInputTokens":3173684321,"totalOutputTokens":3810272},{"split":"unlicensed/overall","harness":"ale_claw","model":"gpt-5-4","harnessVariant":null,"runs":139,"tasks":139,"splitTasks":141,"passes":18,"passRate":0.1276595744680851,"avgScore":0.30439753402360975,"totalDurationS":188992.37747857836,"totalInputTokens":771734152,"totalOutputTokens":1732441},{"split":"unlicensed/overall","harness":"openclaw","model":"glm-5-1","harnessVariant":null,"runs":255,"tasks":136,"splitTasks":141,"passes":37,"passRate":0.13120567375886524,"avgScore":0.3042033451433171,"totalDurationS":1043208.592400436,"totalInputTokens":1154125801,"totalOutputTokens":10310085},{"split":"unlicensed/overall","harness":"openclaw","model":"deepseek-v4-pro","harnessVariant":null,"runs":271,"tasks":141,"splitTasks":141,"passes":39,"passRate":0.1335697399527187,"avgScore":0.3010384799406259,"totalDurationS":793402.3391994024,"totalInputTokens":864015021,"totalOutputTokens":8538468},{"split":"unlicensed/overall","harness":"openclaw","model":"qwen3-6-plus","harnessVariant":null,"runs":266,"tasks":141,"splitTasks":141,"passes":25,"passRate":0.09219858156028368,"avgScore":0.2618373166358184,"totalDurationS":861701.8567830193,"totalInputTokens":1058498165,"totalOutputTokens":11868886},{"split":"unlicensed/overall","harness":"openclaw","model":"mimo-v2-5","harnessVariant":null,"runs":254,"tasks":141,"splitTasks":141,"passes":25,"passRate":0.09219858156028368,"avgScore":0.2521344763689346,"totalDurationS":660179.7307571927,"totalInputTokens":642479170,"totalOutputTokens":6904263},{"split":"unlicensed/overall","harness":"openclaw","model":"kimi-k2-6","harnessVariant":null,"runs":238,"tasks":139,"splitTasks":141,"passes":25,"passRate":0.09929078014184398,"avgScore":0.23549191722211874,"totalDurationS":960128.2325863205,"totalInputTokens":451912497,"totalOutputTokens":9279743},{"split":"unlicensed/overall","harness":"grok_cli","model":"grok-4-3","harnessVariant":null,"runs":144,"tasks":137,"splitTasks":141,"passes":10,"passRate":0.07092198581560284,"avgScore":0.21621213070274037,"totalDurationS":222924.56855820137,"totalInputTokens":232563335,"totalOutputTokens":2374615},{"split":"unlicensed/overall","harness":"openclaw","model":"grok-4-3","harnessVariant":null,"runs":256,"tasks":140,"splitTasks":141,"passes":13,"passRate":0.04609929078014184,"avgScore":0.17073527714267908,"totalDurationS":571749.8638271254,"totalInputTokens":298589044,"totalOutputTokens":4848979},{"split":"unlicensed/overall","harness":"openclaw","model":"minimax-m2-7","harnessVariant":null,"runs":220,"tasks":141,"splitTasks":141,"passes":14,"passRate":0.06382978723404255,"avgScore":0.14926063672475443,"totalDurationS":605605.7180111373,"totalInputTokens":365610242,"totalOutputTokens":5368742},{"split":"unlicensed/overall","harness":"grok_cli","model":"grok-3","harnessVariant":null,"runs":139,"tasks":136,"splitTasks":141,"passes":7,"passRate":0.04964539007092199,"avgScore":0.13926605598246808,"totalDurationS":113327.97697221045,"totalInputTokens":55845078,"totalOutputTokens":522438},{"split":"unlicensed/overall","harness":"codex","model":"gpt-5-4","harnessVariant":null,"runs":140,"tasks":137,"splitTasks":141,"passes":11,"passRate":0.07801418439716312,"avgScore":0.13214128613798654,"totalDurationS":149115.55728853657,"totalInputTokens":173052238,"totalOutputTokens":3081735},{"split":"unlicensed/overall","harness":"gemini_cli","model":"gemini-3-1-pro-preview","harnessVariant":null,"runs":206,"tasks":124,"splitTasks":141,"passes":11,"passRate":0.0425531914893617,"avgScore":0.07861841901222798,"totalDurationS":363073.60697953426,"totalInputTokens":754787555,"totalOutputTokens":8050360}]}