krishnateja95 commited on
Commit
d53b96b
·
verified ·
1 Parent(s): de181c1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +115 -3
README.md CHANGED
@@ -1,3 +1,115 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+
6
+ ### Accuracy
7
+ <table>
8
+ <thead>
9
+ <tr>
10
+ <th>Category</th>
11
+ <th>Metric</th>
12
+ <th>meta-llama/Llama-3.3-70B-Instruct</th>
13
+ <th>nm-testing/Llama-3.3-70B-Instruct-FP8-block</th>
14
+ <th>Recovery (%)</th>
15
+ </tr>
16
+ </thead>
17
+ <tbody>
18
+ <!-- OpenLLM Leaderboard V1 -->
19
+ <tr>
20
+ <td rowspan="7"><b>OpenLLM V1</b></td>
21
+ <td>ARC-Challenge (Acc-Norm, 25-shot)</td>
22
+ <td>72.53</td>
23
+ <td>72.61</td>
24
+ <td>100.12</td>
25
+ </tr>
26
+ <tr>
27
+ <td>GSM8K (Strict-Match, 5-shot)</td>
28
+ <td>76.35</td>
29
+ <td>73.16</td>
30
+ <td>95.83</td>
31
+ </tr>
32
+ <tr>
33
+ <td>HellaSwag (Acc-Norm, 10-shot)</td>
34
+ <td>86.65</td>
35
+ <td>86.56</td>
36
+ <td>99.90</td>
37
+ </tr>
38
+ <tr>
39
+ <td>MMLU (Acc, 5-shot)</td>
40
+ <td>82.51</td>
41
+ <td>82.38</td>
42
+ <td>99.84</td>
43
+ </tr>
44
+ <tr>
45
+ <td>TruthfulQA (MC2, 0-shot)</td>
46
+ <td>62.83</td>
47
+ <td>62.64</td>
48
+ <td>99.69</td>
49
+ </tr>
50
+ <tr>
51
+ <td>Winogrande (Acc, 5-shot)</td>
52
+ <td>83.50</td>
53
+ <td>83.27</td>
54
+ <td>99.72</td>
55
+ </tr>
56
+ <tr>
57
+ <td><b>Average Score</b></td>
58
+ <td><b>77.39</b></td>
59
+ <td><b>76.77</b></td>
60
+ <td><b>99.20</b></td>
61
+ </tr>
62
+ <!-- OpenLLM Leaderboard V2 -->
63
+ <tr>
64
+ <td rowspan="7"><b>OpenLLM V2</b></td>
65
+ <td>IFEval (Inst Level Strict Acc, 0-shot)</td>
66
+ <td>92.57</td>
67
+ <td>92.57</td>
68
+ <td>100.00</td>
69
+ </tr>
70
+ <tr>
71
+ <td>BBH (Acc-Norm, 3-shot)</td>
72
+ <td>69.03</td>
73
+ <td>68.98</td>
74
+ <td>99.92</td>
75
+ </tr>
76
+ <tr>
77
+ <td>Math-Hard (Exact-Match, 4-shot)</td>
78
+ <td>49.24</td>
79
+ <td>49.47</td>
80
+ <td>100.46</td>
81
+ </tr>
82
+ <tr>
83
+ <td>GPQA (Acc-Norm, 0-shot)</td>
84
+ <td>32.63</td>
85
+ <td>32.63</td>
86
+ <td>100.00</td>
87
+ </tr>
88
+ <tr>
89
+ <td>MUSR (Acc-Norm, 0-shot)</td>
90
+ <td>44.31</td>
91
+ <td>43.92</td>
92
+ <td>99.10</td>
93
+ </tr>
94
+ <tr>
95
+ <td>MMLU-Pro (Acc, 5-shot)</td>
96
+ <td>53.55</td>
97
+ <td>53.56</td>
98
+ <td>100.02</td>
99
+ </tr>
100
+ <tr>
101
+ <td><b>Average Score</b></td>
102
+ <td><b>56.89</b></td>
103
+ <td><b>56.85</b></td>
104
+ <td><b>99.93</b></td>
105
+ </tr>
106
+ <!-- Coding -->
107
+ <tr>
108
+ <td rowspan="1"><b>Coding</b></td>
109
+ <td>HumanEval Pass@1</td>
110
+ <td>N/A</td>
111
+ <td>N/A</td>
112
+ <td>N/A</td>
113
+ </tr>
114
+ </tbody>
115
+ </table>