DontPlanToEnd commited on
Commit
bc0f069
·
verified ·
1 Parent(s): bad13a9

Upload ugi-leaderboard-data.csv

Browse files
Files changed (1) hide show
  1. ugi-leaderboard-data.csv +13 -0
ugi-leaderboard-data.csv CHANGED
@@ -751,3 +751,16 @@ openai/gpt-5.2-2025-12-11 (reasoning_effort=low),,12/11/2025,12/11/2025,,,,,FALS
751
  openai/gpt-5.2-2025-12-11 (reasoning_effort=none),,12/11/2025,12/11/2025,,,,,FALSE,FALSE,TRUE,39.45,24.3,1.8,2.8,3.2,1.8,2.0,1.5,53.09,64.51,50.69,44.07,26.43,0.062,0.4795,0.6288,0.5941,0.4391,-20.8%,68.3%,46.4%,46.3%,59.6%,40.0%,61.7%,40.8%,27.7%,38.8%,28.8%,51.0%,51.7%,36.2%,54.6%,58.5%,65.6%,Liberalism,False,0,0,,29.1,0.9,12.1,4.7,0.377,120.0,100.0,0.815,0.448,0.336,1.18,0.404,0.378,116.7,2640.0,71.3,19.65,3.3,3.7
752
  openai/gpt-5.2-2025-12-11 (reasoning_effort=medium),,12/11/2025,12/11/2025,,,,,FALSE,FALSE,TRUE,46.07,32.24,0.0,4.1,6.2,1.8,2.0,1.5,56.65,73.34,56.21,40.39,36.84,0.0977,0.4185,0.5807,0.5757,0.3471,-24.2%,68.2%,47.6%,45.6%,63.7%,43.5%,57.9%,44.4%,27.7%,39.6%,28.1%,49.6%,52.9%,34.4%,60.4%,63.1%,67.5%,Liberalism,True,0,0,,29.8,0.89,11.9,4.6,0.381,88.0,90.0,0.834,0.445,0.335,1.303,0.396,0.311,91.2,3113.0,76.3,19.82,2.4,3.9
753
  openai/gpt-5.2-2025-12-11 (reasoning_effort=high),,12/11/2025,12/11/2025,,,,,FALSE,FALSE,TRUE,44.3,34.62,0.0,4.5,6.2,2.2,3.0,1.5,53.89,67.94,55.86,37.88,38.41,0.0999,0.3885,0.6049,0.462,0.3385,-24.4%,67.2%,47.4%,49.5%,63.3%,43.5%,57.7%,43.3%,30.8%,36.7%,30.8%,57.9%,52.1%,38.5%,55.4%,62.5%,71.9%,Liberalism,True,0,0,,30.7,0.87,11.6,4.7,0.382,82.0,82.0,0.842,0.439,0.333,1.32,0.398,0.308,90.0,3383.0,73.7,20.85,2.6,3.3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
751
  openai/gpt-5.2-2025-12-11 (reasoning_effort=none),,12/11/2025,12/11/2025,,,,,FALSE,FALSE,TRUE,39.45,24.3,1.8,2.8,3.2,1.8,2.0,1.5,53.09,64.51,50.69,44.07,26.43,0.062,0.4795,0.6288,0.5941,0.4391,-20.8%,68.3%,46.4%,46.3%,59.6%,40.0%,61.7%,40.8%,27.7%,38.8%,28.8%,51.0%,51.7%,36.2%,54.6%,58.5%,65.6%,Liberalism,False,0,0,,29.1,0.9,12.1,4.7,0.377,120.0,100.0,0.815,0.448,0.336,1.18,0.404,0.378,116.7,2640.0,71.3,19.65,3.3,3.7
752
  openai/gpt-5.2-2025-12-11 (reasoning_effort=medium),,12/11/2025,12/11/2025,,,,,FALSE,FALSE,TRUE,46.07,32.24,0.0,4.1,6.2,1.8,2.0,1.5,56.65,73.34,56.21,40.39,36.84,0.0977,0.4185,0.5807,0.5757,0.3471,-24.2%,68.2%,47.6%,45.6%,63.7%,43.5%,57.9%,44.4%,27.7%,39.6%,28.1%,49.6%,52.9%,34.4%,60.4%,63.1%,67.5%,Liberalism,True,0,0,,29.8,0.89,11.9,4.6,0.381,88.0,90.0,0.834,0.445,0.335,1.303,0.396,0.311,91.2,3113.0,76.3,19.82,2.4,3.9
753
  openai/gpt-5.2-2025-12-11 (reasoning_effort=high),,12/11/2025,12/11/2025,,,,,FALSE,FALSE,TRUE,44.3,34.62,0.0,4.5,6.2,2.2,3.0,1.5,53.89,67.94,55.86,37.88,38.41,0.0999,0.3885,0.6049,0.462,0.3385,-24.4%,67.2%,47.4%,49.5%,63.3%,43.5%,57.7%,43.3%,30.8%,36.7%,30.8%,57.9%,52.1%,38.5%,55.4%,62.5%,71.9%,Liberalism,True,0,0,,30.7,0.87,11.6,4.7,0.382,82.0,82.0,0.842,0.439,0.333,1.32,0.398,0.308,90.0,3383.0,73.7,20.85,2.6,3.3
754
+ kawasumi/Tema_Q-R3.1,https://huggingface.co/kawasumi/Tema_Q-R3.1,12/6/2025,12/13/2025,gemma-2,9.0,9.0,9.0,True,False,False,28.29,35.86,4.1,1.5,1.5,8.0,9.0,7.0,16.7,14.97,16.21,18.92,22.06,0.2698,0.1383,0.1272,0.1496,0.2612,8.6%,47.0%,53.6%,49.2%,61.1%,42.7%,62.3%,65.8%,57.1%,43.3%,58.5%,42.3%,52.7%,52.5%,61.9%,48.1%,73.3%,Centrism,False,0,0,Gemma2ForCausalLM,30.2,0.77,12.9,7.6,0.3,20.0,22.0,0.901,0.478,0.281,1.427,0.371,0.229,49.3,8465.0,161.5,24.87,4.5,6.4
755
+ mistralai/Devstral-Small-2-24B-Instruct-2512,https://huggingface.co/mistralai/Devstral-Small-2-24B-Instruct-2512,12/9/2025,12/13/2025,mistral V7-Tekken,24.0,24.0,24.0,False,False,True,31.59,42.11,4.7,2.2,3.9,6.8,5.0,8.5,20.9,28.88,12.76,21.07,34.18,0.3411,0.0897,0.1344,0.1574,0.3311,-13.6%,69.2%,43.8%,38.8%,59.0%,41.0%,68.1%,40.6%,31.0%,31.7%,29.6%,53.1%,31.9%,31.2%,52.3%,57.5%,67.3%,Liberalism,False,0,0,Unknown,37.1,0.94,12.9,4.9,0.346,26.0,70.0,0.881,0.48,0.296,1.297,0.557,0.293,41.7,11721.0,158.1,24.7,7.0,5.1
756
+ huihui-ai/Huihui-Orchestrator-8B-abliterated (thinking=False),https://huggingface.co/huihui-ai/Huihui-Orchestrator-8B-abliterated,11/30/2025,12/13/2025,chatml,8.0,8.0,8.0,True,False,False,12.51,26.86,1.2,1.1,1.5,7.2,8.0,6.5,13.14,21.36,5.17,12.9,12.6,0.0441,0.1601,0.1058,0.097,0.2381,-7.2%,56.5%,45.7%,44.5%,55.8%,48.3%,62.5%,47.9%,40.6%,47.9%,42.1%,47.1%,45.6%,40.8%,45.4%,56.5%,65.6%,Liberalism,False,0,1,Qwen3ForCausalLM,25.8,0.86,11.0,5.4,0.372,15.0,4.0,0.867,0.499,0.35,1.417,0.888,0.223,139.6,7547.0,173.1,26.27,3.3,0.8
757
+ huihui-ai/Huihui-Orchestrator-8B-abliterated (thinking=True),https://huggingface.co/huihui-ai/Huihui-Orchestrator-8B-abliterated,11/30/2025,12/13/2025,chatml,8.0,8.0,8.0,True,False,False,8.47,26.5,1.8,0.8,1.1,7.5,8.0,7.0,12.58,21.74,3.45,12.55,11.35,0.1336,0.1737,0.0772,0.0936,0.1493,-9.7%,51.9%,48.3%,47.7%,54.4%,54.4%,56.7%,56.0%,47.5%,55.0%,41.7%,49.4%,49.6%,44.2%,43.1%,56.7%,63.3%,Centrism,True,3074,3,Qwen3ForCausalLM,28.8,0.65,10.4,6.1,0.352,45.0,96.0,0.884,0.493,0.378,1.523,0.817,0.103,76.4,7072.0,194.3,26.38,3.4,1.4
758
+ yamatazen/Qwen3-V-Science-14B (thinking=False),https://huggingface.co/yamatazen/Qwen3-V-Science-14B,12/9/2025,12/13/2025,chatml,14.0,14.0,14.0,True,True,False,32.41,30.89,2.4,1.3,2.0,7.2,6.0,8.5,19.67,29.08,7.93,21.99,17.89,0.0785,0.1726,0.3094,0.2855,0.2536,-16.9%,62.7%,48.3%,48.0%,61.2%,41.5%,58.1%,44.4%,40.2%,39.4%,32.3%,54.4%,47.7%,41.9%,55.8%,56.7%,71.2%,Liberalism,False,0,0,Qwen3ForCausalLM,36.6,1.16,12.6,4.1,0.371,13.0,39.0,0.884,0.462,0.31,1.407,0.524,0.221,102.8,7107.0,111.1,22.7,4.4,4.6
759
+ yamatazen/Qwen3-V-Science-14B (thinking=True),https://huggingface.co/yamatazen/Qwen3-V-Science-14B,12/9/2025,12/13/2025,chatml,14.0,14.0,14.0,True,True,False,31.1,34.04,1.8,1.5,3.4,7.2,6.0,8.5,16.72,29.79,5.17,15.2,22.02,0.1542,0.1627,0.1643,0.1236,0.1554,-18.7%,62.2%,45.4%,49.7%,62.6%,49.0%,57.9%,43.1%,37.5%,42.3%,33.8%,53.3%,49.4%,46.5%,55.6%,60.4%,71.7%,Liberalism,True,2125,0,Qwen3ForCausalLM,33.9,0.91,12.7,5.1,0.338,33.0,90.0,0.897,0.44,0.316,1.59,0.675,0.142,70.3,7452.0,146.2,25.48,4.1,5.3
760
+ MegaScience/Qwen3-14B-MegaScience (thinking=False),https://huggingface.co/MegaScience/Qwen3-14B-MegaScience,7/19/2025,12/13/2025,chatml,14.0,14.0,14.0,True,False,False,34.33,30.86,2.4,2.2,3.0,5.0,4.0,6.0,18.92,25.66,6.21,24.88,24.87,0.2276,0.1667,0.2746,0.3603,0.2147,-22.6%,69.6%,47.2%,46.4%,61.5%,46.5%,62.1%,50.2%,33.8%,29.8%,27.7%,51.5%,45.4%,42.3%,56.0%,63.1%,65.2%,Liberalism,False,0,2,Qwen3ForCausalLM,31.1,0.97,13.7,5.4,0.295,21.0,44.0,0.882,0.451,0.28,1.537,0.247,0.176,55.3,7310.0,117.5,21.85,3.1,5.4
761
+ MegaScience/Qwen3-14B-MegaScience (thinking=True),https://huggingface.co/MegaScience/Qwen3-14B-MegaScience,7/19/2025,12/13/2025,chatml,14.0,14.0,14.0,True,False,False,34.45,35.66,2.9,2.2,3.5,6.0,7.0,5.0,19.46,26.46,7.93,23.99,28.05,0.379,0.1557,0.2315,0.2197,0.2137,-20.8%,66.7%,45.2%,47.6%,62.5%,47.5%,62.1%,45.2%,35.4%,32.7%,31.7%,53.5%,46.2%,42.9%,59.0%,62.9%,65.6%,Liberalism,False,0,0,Qwen3ForCausalLM,31.0,0.98,13.7,5.3,0.301,24.0,54.0,0.883,0.452,0.274,1.577,0.247,0.195,38.4,7715.0,126.8,23.6,3.6,7.5
762
+ Vortex5/Poetic-Rune-12B,https://huggingface.co/Vortex5/Poetic-Rune-12B,10/8/2025,12/13/2025,chatml,12.0,12.0,12.0,True,True,False,NA,27.45,2.4,1.2,1.9,6.0,6.0,6.0,21.89,24.04,20.69,20.93,17.28,0.1554,0.2154,0.1851,0.2165,0.2741,-19.4%,62.2%,44.6%,44.6%,62.6%,44.2%,67.3%,45.2%,42.3%,38.5%,32.5%,47.3%,50.0%,36.5%,59.8%,62.9%,65.2%,Liberalism,False,0,15,MistralForCausalLM,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.437,0.354,0.261,69.9,5918.0,139.3,23.65,NA,NA
763
+ cgato/Nemo-12b-Humanize-SFT-v0.2.5-KTO,https://huggingface.co/cgato/Nemo-12b-Humanize-SFT-v0.2.5-KTO,6/27/2025,12/13/2025,chatml,12.0,12.0,12.0,True,False,False,26.62,24.52,1.8,1.2,1.6,5.5,7.0,4.0,16.66,15.99,16.21,17.79,15.0,0.1842,0.1459,0.1993,0.0684,0.2917,-6.7%,55.1%,44.6%,42.6%,57.2%,45.4%,61.2%,40.4%,39.0%,53.8%,41.9%,40.2%,52.9%,34.6%,57.9%,55.4%,58.3%,Liberalism,False,0,2,MistralForCausalLM,37.0,1.21,14.2,6.1,0.292,35.0,9.0,0.906,0.496,0.281,1.43,0.298,0.282,63.1,8121.0,135.1,27.4,9.3,6.9
764
+ zelk12/MT-Gen4_gemma-3-12B_flatten,https://huggingface.co/zelk12/MT-Gen4_gemma-3-12B_flatten,12/13/2025,12/13/2025,gemma-3,12.0,12.0,12.0,True,True,False,32.14,27.87,1.8,1.4,1.3,7.0,7.0,7.0,13.2,12.32,7.24,20.04,14.7,0.2369,0.2089,0.201,0.1282,0.2269,-8.0%,57.8%,48.0%,42.4%,64.5%,39.6%,59.6%,43.1%,45.2%,42.7%,38.8%,44.0%,42.1%,41.0%,59.2%,57.7%,76.7%,Liberalism,False,0,0,Gemma3ForConditionalGeneration,31.6,0.81,13.7,6.8,0.312,14.0,70.0,0.868,0.433,0.279,1.689,0.139,0.241,53.8,6074.0,134.6,25.37,9.2,6.7
765
+ Vortex5/Red-Synthesis-12B,https://huggingface.co/Vortex5/Red-Synthesis-12B,12/8/2025,12/13/2025,chatml,12.0,12.0,12.0,True,True,False,39.3,31.72,3.5,1.9,1.7,6.0,7.0,5.0,22.9,28.06,18.97,21.67,22.89,0.2041,0.2047,0.1235,0.2882,0.2629,-19.2%,67.2%,44.2%,41.9%,64.4%,46.5%,65.2%,44.4%,38.3%,27.3%,32.9%,43.1%,46.9%,35.8%,62.1%,66.7%,64.6%,Liberalism,False,0,0,MistralForCausalLM,46.9,0.96,13.4,5.6,0.317,20.0,56.0,0.906,0.481,0.279,1.443,0.315,0.234,59.2,6177.0,163.4,22.67,7.0,6.6
766
+ Vortex5/Shining-Prism-12B,https://huggingface.co/Vortex5/Shining-Prism-12B,12/6/2025,12/13/2025,chatml,12.0,12.0,12.0,True,True,False,39.06,25.37,2.9,1.2,1.3,5.2,6.0,4.5,23.73,24.96,23.45,22.77,16.89,0.2041,0.1741,0.1657,0.3266,0.2682,-20.3%,64.8%,43.4%,44.8%,64.4%,51.5%,65.6%,47.3%,41.7%,28.8%,35.2%,49.8%,48.3%,36.2%,64.6%,66.7%,61.9%,Liberalism,False,0,0,MistralForCausalLM,47.1,0.92,13.1,5.6,0.325,20.0,54.0,0.907,0.479,0.285,1.423,0.339,0.236,59.2,7059.0,145.7,22.22,6.1,5.8