adriabama06 commited on
Commit
3fb1a1e
·
verified ·
1 Parent(s): 65bb5e4

Add files using upload-large-folder tool

Browse files
Files changed (44) hide show
  1. .gitattributes +12 -0
  2. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-f16/law_result.json +3 -0
  3. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-f16/math_result.json +3 -0
  4. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-f16/physics_result.json +3 -0
  5. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/chemistry_result.json +3 -0
  6. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/economics_result.json +0 -0
  7. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/engineering_result.json +3 -0
  8. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/health_result.json +0 -0
  9. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/history_result.json +0 -0
  10. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/law_result.json +3 -0
  11. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/law_summary.json +16 -0
  12. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/math_result.json +3 -0
  13. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/math_summary.json +16 -0
  14. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/other_result.json +0 -0
  15. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/other_summary.json +16 -0
  16. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/philosophy_result.json +0 -0
  17. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/philosophy_summary.json +16 -0
  18. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/physics_result.json +3 -0
  19. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/physics_summary.json +16 -0
  20. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/psychology_result.json +0 -0
  21. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/psychology_summary.json +16 -0
  22. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/report.txt +106 -0
  23. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/biology_result.json +0 -0
  24. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/biology_summary.json +16 -0
  25. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/business_result.json +0 -0
  26. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/business_summary.json +16 -0
  27. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/chemistry_result.json +0 -0
  28. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/chemistry_summary.json +16 -0
  29. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/computer science_result.json +0 -0
  30. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/computer science_summary.json +16 -0
  31. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/economics_result.json +0 -0
  32. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/economics_summary.json +16 -0
  33. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/engineering_result.json +3 -0
  34. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/engineering_summary.json +16 -0
  35. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/health_result.json +0 -0
  36. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/health_summary.json +16 -0
  37. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/history_summary.json +16 -0
  38. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/law_result.json +3 -0
  39. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/law_summary.json +16 -0
  40. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/math_result.json +3 -0
  41. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/math_summary.json +16 -0
  42. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/other_summary.json +16 -0
  43. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/philosophy_summary.json +16 -0
  44. Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/physics_result.json +3 -0
.gitattributes CHANGED
@@ -45,3 +45,15 @@ mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-f16/physics_result.json filter=lfs
45
  mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/engineering_result.json filter=lfs diff=lfs merge=lfs -text
46
  mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/law_result.json filter=lfs diff=lfs merge=lfs -text
47
  mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/physics_result.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/engineering_result.json filter=lfs diff=lfs merge=lfs -text
46
  mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/law_result.json filter=lfs diff=lfs merge=lfs -text
47
  mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/physics_result.json filter=lfs diff=lfs merge=lfs -text
48
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/engineering_result.json filter=lfs diff=lfs merge=lfs -text
49
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/math_result.json filter=lfs diff=lfs merge=lfs -text
50
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/physics_result.json filter=lfs diff=lfs merge=lfs -text
51
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/law_result.json filter=lfs diff=lfs merge=lfs -text
52
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-f16/law_result.json filter=lfs diff=lfs merge=lfs -text
53
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-f16/math_result.json filter=lfs diff=lfs merge=lfs -text
54
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/chemistry_result.json filter=lfs diff=lfs merge=lfs -text
55
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-f16/physics_result.json filter=lfs diff=lfs merge=lfs -text
56
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/engineering_result.json filter=lfs diff=lfs merge=lfs -text
57
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/math_result.json filter=lfs diff=lfs merge=lfs -text
58
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/physics_result.json filter=lfs diff=lfs merge=lfs -text
59
+ Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/law_result.json filter=lfs diff=lfs merge=lfs -text
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-f16/law_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b23241512638db98e31d6bec4e5f802a58c924fae3d3f8da02aed47364ce5874
3
+ size 15355396
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-f16/math_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af1044d18a7b913be681d6f5289e143b2921ab0f5140ffb929c941830f09e87
3
+ size 11484819
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-f16/physics_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5877396e5be250783dc31073624b302d20b2b8f98de83dd817d3edf302905bce
3
+ size 11763238
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/chemistry_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfcb447b430b9b7140154f7944a72c00d583cbaf2bbf9c7471182793b19e13c8
3
+ size 10838451
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/economics_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/engineering_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81d54e2714a78dec12c7fb06304987261fecd8a9cdb87159f7ca2533d3f086b8
3
+ size 10894258
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/health_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/history_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/law_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9645f52a597b87325312f0aa3cd020ca0287430cb5b3b86f06675d478a03cc43
3
+ size 15956924
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/law_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "law": {
3
+ "corr": 345.0,
4
+ "wrong": 756.0,
5
+ "acc": 0.3133514986376022
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 3.0
10
+ },
11
+ "total": {
12
+ "corr": 345.0,
13
+ "wrong": 756.0,
14
+ "acc": 0.3133514986376022
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/math_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e825c423d8d24b2207a7b60dd5171f799f9b83dc3dd00facecc0bdef6a22c34
3
+ size 11774643
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/math_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "math": {
3
+ "corr": 1117.0,
4
+ "wrong": 234.0,
5
+ "acc": 0.8267949666913398
6
+ },
7
+ "random": {
8
+ "corr": 8.0,
9
+ "wrong": 43.0
10
+ },
11
+ "total": {
12
+ "corr": 1117.0,
13
+ "wrong": 234.0,
14
+ "acc": 0.8267949666913398
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/other_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/other_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "other": {
3
+ "corr": 520.0,
4
+ "wrong": 404.0,
5
+ "acc": 0.5627705627705628
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 1.0
10
+ },
11
+ "total": {
12
+ "corr": 520.0,
13
+ "wrong": 404.0,
14
+ "acc": 0.5627705627705628
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/philosophy_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/philosophy_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "philosophy": {
3
+ "corr": 266.0,
4
+ "wrong": 233.0,
5
+ "acc": 0.533066132264529
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 0.0
10
+ },
11
+ "total": {
12
+ "corr": 266.0,
13
+ "wrong": 233.0,
14
+ "acc": 0.533066132264529
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/physics_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5867a38a1ff261ff46d64db7c5e09f61827bac6a69a62046b523a6280fc8e6b
3
+ size 12196030
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/physics_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "physics": {
3
+ "corr": 931.0,
4
+ "wrong": 368.0,
5
+ "acc": 0.7167051578137028
6
+ },
7
+ "random": {
8
+ "corr": 1.0,
9
+ "wrong": 9.0
10
+ },
11
+ "total": {
12
+ "corr": 931.0,
13
+ "wrong": 368.0,
14
+ "acc": 0.7167051578137028
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/psychology_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/psychology_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "psychology": {
3
+ "corr": 539.0,
4
+ "wrong": 259.0,
5
+ "acc": 0.6754385964912281
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 0.0
10
+ },
11
+ "total": {
12
+ "corr": 539.0,
13
+ "wrong": 259.0,
14
+ "acc": 0.6754385964912281
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0/report.txt ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-12 12:17:14.006249
2
+ {
3
+ "comment": "",
4
+ "server": {
5
+ "url": "http://localhost:8000/v1",
6
+ "model": "Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q4_0",
7
+ "timeout": 120.0
8
+ },
9
+ "inference": {
10
+ "temperature": 0.7,
11
+ "top_p": 0.8,
12
+ "max_tokens": 4096,
13
+ "system_prompt": "The following are multiple choice questions (with answers) about {subject}. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice.",
14
+ "style": "multi_chat"
15
+ },
16
+ "test": {
17
+ "subset": 1.0,
18
+ "parallel": 8
19
+ },
20
+ "log": {
21
+ "verbosity": 0,
22
+ "log_prompt": true
23
+ }
24
+ }
25
+ Finished testing biology in .
26
+ Total, 569/717, 79.36%
27
+ Random Guess Attempts, 3/717, 0.42%
28
+ Correct Random Guesses, 0/3, 0.00%
29
+ Adjusted Score Without Random Guesses, 569/714, 79.69%
30
+ Finished testing business in .
31
+ Total, 570/789, 72.24%
32
+ Random Guess Attempts, 9/789, 1.14%
33
+ Correct Random Guesses, 2/9, 22.22%
34
+ Adjusted Score Without Random Guesses, 568/780, 72.82%
35
+ Finished testing chemistry in .
36
+ Total, 794/1132, 70.14%
37
+ Random Guess Attempts, 11/1132, 0.97%
38
+ Correct Random Guesses, 2/11, 18.18%
39
+ Adjusted Score Without Random Guesses, 792/1121, 70.65%
40
+ Finished testing computer science in .
41
+ Total, 284/410, 69.27%
42
+ Random Guess Attempts, 6/410, 1.46%
43
+ Correct Random Guesses, 0/6, 0.00%
44
+ Adjusted Score Without Random Guesses, 284/404, 70.30%
45
+ Finished testing economics in .
46
+ Total, 610/844, 72.27%
47
+ Random Guess Attempts, 5/844, 0.59%
48
+ Correct Random Guesses, 1/5, 20.00%
49
+ Adjusted Score Without Random Guesses, 609/839, 72.59%
50
+ Finished testing engineering in .
51
+ Total, 420/969, 43.34%
52
+ Random Guess Attempts, 12/969, 1.24%
53
+ Correct Random Guesses, 2/12, 16.67%
54
+ Adjusted Score Without Random Guesses, 418/957, 43.68%
55
+ Finished testing health in .
56
+ Total, 515/818, 62.96%
57
+ Random Guess Attempts, 1/818, 0.12%
58
+ Correct Random Guesses, 0/1, 0.00%
59
+ Adjusted Score Without Random Guesses, 515/817, 63.04%
60
+ Finished testing history in .
61
+ Total, 184/381, 48.29%
62
+ Random Guess Attempts, 0/381, 0.00%
63
+ Correct Random Guesses, division by zero error
64
+ Adjusted Score Without Random Guesses, 184/381, 48.29%
65
+ Finished testing law in 38 minutes 33 seconds.
66
+ Total, 345/1101, 31.34%
67
+ Random Guess Attempts, 3/1101, 0.27%
68
+ Correct Random Guesses, 0/3, 0.00%
69
+ Adjusted Score Without Random Guesses, 345/1098, 31.42%
70
+ Finished testing math in 1 hours 25 minutes 28 seconds.
71
+ Total, 1117/1351, 82.68%
72
+ Random Guess Attempts, 51/1351, 3.77%
73
+ Correct Random Guesses, 8/51, 15.69%
74
+ Adjusted Score Without Random Guesses, 1109/1300, 85.31%
75
+ Finished testing philosophy in 19 minutes 20 seconds.
76
+ Total, 266/499, 53.31%
77
+ Random Guess Attempts, 0/499, 0.00%
78
+ Correct Random Guesses, division by zero error
79
+ Adjusted Score Without Random Guesses, 266/499, 53.31%
80
+ Finished testing physics in 1 hours 33 minutes 5 seconds.
81
+ Total, 931/1299, 71.67%
82
+ Random Guess Attempts, 10/1299, 0.77%
83
+ Correct Random Guesses, 1/10, 10.00%
84
+ Adjusted Score Without Random Guesses, 930/1289, 72.15%
85
+ Finished testing psychology in 24 minutes 38 seconds.
86
+ Total, 539/798, 67.54%
87
+ Random Guess Attempts, 0/798, 0.00%
88
+ Correct Random Guesses, division by zero error
89
+ Adjusted Score Without Random Guesses, 539/798, 67.54%
90
+ Finished testing other in 27 minutes 35 seconds.
91
+ Total, 520/924, 56.28%
92
+ Random Guess Attempts, 1/924, 0.11%
93
+ Correct Random Guesses, 0/1, 0.00%
94
+ Adjusted Score Without Random Guesses, 520/923, 56.34%
95
+ Finished the benchmark in 4 hours 48 minutes 49 seconds.
96
+ Total, 7664/12032, 63.70%
97
+ Random Guess Attempts, 112/12032, 0.93%
98
+ Correct Random Guesses, 16/112, 14.29%
99
+ Adjusted Score Without Random Guesses, 7648/11920, 64.16%
100
+ Token Usage:
101
+ Prompt tokens: min 902, average 1348, max 2396, total 7363918, tk/s 424.95
102
+ Completion tokens: min 55, average 833, max 3187, total 4550365, tk/s 262.59
103
+ Markdown Table:
104
+ | overall | biology | business | chemistry | computer science | economics | engineering | health | history | law | math | philosophy | physics | psychology | other |
105
+ | ------- | ------- | -------- | --------- | ---------------- | --------- | ----------- | ------ | ------- | --- | ---- | ---------- | ------- | ---------- | ----- |
106
+ | 63.70 | 79.36 | 72.24 | 70.14 | 69.27 | 72.27 | 43.34 | 62.96 | 48.29 | 31.34 | 82.68 | 53.31 | 71.67 | 67.54 | 56.28 |
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/biology_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/biology_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "biology": {
3
+ "corr": 584.0,
4
+ "wrong": 133.0,
5
+ "acc": 0.8145048814504882
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 2.0
10
+ },
11
+ "total": {
12
+ "corr": 584.0,
13
+ "wrong": 133.0,
14
+ "acc": 0.8145048814504882
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/business_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/business_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "business": {
3
+ "corr": 572.0,
4
+ "wrong": 217.0,
5
+ "acc": 0.7249683143219265
6
+ },
7
+ "random": {
8
+ "corr": 1.0,
9
+ "wrong": 3.0
10
+ },
11
+ "total": {
12
+ "corr": 572.0,
13
+ "wrong": 217.0,
14
+ "acc": 0.7249683143219265
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/chemistry_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/chemistry_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chemistry": {
3
+ "corr": 825.0,
4
+ "wrong": 307.0,
5
+ "acc": 0.7287985865724381
6
+ },
7
+ "random": {
8
+ "corr": 1.0,
9
+ "wrong": 11.0
10
+ },
11
+ "total": {
12
+ "corr": 825.0,
13
+ "wrong": 307.0,
14
+ "acc": 0.7287985865724381
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/computer science_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/computer science_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "computer science": {
3
+ "corr": 301.0,
4
+ "wrong": 109.0,
5
+ "acc": 0.7341463414634146
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 2.0
10
+ },
11
+ "total": {
12
+ "corr": 301.0,
13
+ "wrong": 109.0,
14
+ "acc": 0.7341463414634146
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/economics_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/economics_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "economics": {
3
+ "corr": 643.0,
4
+ "wrong": 201.0,
5
+ "acc": 0.7618483412322274
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 2.0
10
+ },
11
+ "total": {
12
+ "corr": 643.0,
13
+ "wrong": 201.0,
14
+ "acc": 0.7618483412322274
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/engineering_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd18b7a0dc848c6b928ce25e554819d3094f5d627846a136faf399f9073c6147
3
+ size 10504544
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/engineering_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "engineering": {
3
+ "corr": 461.0,
4
+ "wrong": 508.0,
5
+ "acc": 0.4757481940144479
6
+ },
7
+ "random": {
8
+ "corr": 2.0,
9
+ "wrong": 13.0
10
+ },
11
+ "total": {
12
+ "corr": 461.0,
13
+ "wrong": 508.0,
14
+ "acc": 0.4757481940144479
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/health_result.json ADDED
The diff for this file is too large to render. See raw diff
 
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/health_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "health": {
3
+ "corr": 541.0,
4
+ "wrong": 277.0,
5
+ "acc": 0.6613691931540342
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 0.0
10
+ },
11
+ "total": {
12
+ "corr": 541.0,
13
+ "wrong": 277.0,
14
+ "acc": 0.6613691931540342
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/history_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "history": {
3
+ "corr": 196.0,
4
+ "wrong": 185.0,
5
+ "acc": 0.5144356955380578
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 0.0
10
+ },
11
+ "total": {
12
+ "corr": 196.0,
13
+ "wrong": 185.0,
14
+ "acc": 0.5144356955380578
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/law_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999fd473fb2469e5171b80b8418ee25a03e10819d9db7ee781f1bccc59abb353
3
+ size 15183561
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/law_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "law": {
3
+ "corr": 370.0,
4
+ "wrong": 731.0,
5
+ "acc": 0.33605812897366033
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 1.0
10
+ },
11
+ "total": {
12
+ "corr": 370.0,
13
+ "wrong": 731.0,
14
+ "acc": 0.33605812897366033
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/math_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17e5af99024292f2744f6d00fdb1759c06a0bb44ede074d6d75a2666fd7226a6
3
+ size 11443321
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/math_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "math": {
3
+ "corr": 1142.0,
4
+ "wrong": 209.0,
5
+ "acc": 0.845299777942265
6
+ },
7
+ "random": {
8
+ "corr": 4.0,
9
+ "wrong": 30.0
10
+ },
11
+ "total": {
12
+ "corr": 1142.0,
13
+ "wrong": 209.0,
14
+ "acc": 0.845299777942265
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/other_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "other": {
3
+ "corr": 528.0,
4
+ "wrong": 396.0,
5
+ "acc": 0.5714285714285714
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 0.0
10
+ },
11
+ "total": {
12
+ "corr": 528.0,
13
+ "wrong": 396.0,
14
+ "acc": 0.5714285714285714
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/philosophy_summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "philosophy": {
3
+ "corr": 275.0,
4
+ "wrong": 224.0,
5
+ "acc": 0.5511022044088176
6
+ },
7
+ "random": {
8
+ "corr": 0.0,
9
+ "wrong": 0.0
10
+ },
11
+ "total": {
12
+ "corr": 275.0,
13
+ "wrong": 224.0,
14
+ "acc": 0.5511022044088176
15
+ }
16
+ }
Qwen3_4B_kv_cache_f16_vs_q8_vs_q4/mmlu-pro/Qwen3-4B-Instruct-2507-UD-Q4_K_XL-kv-q8_0/physics_result.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4d07d1894f995eb47aa3b0868d74336beb3341e3772032a187cd52039505b96
3
+ size 11849014