Update README.md
Browse files
README.md
CHANGED
|
@@ -55,4 +55,26 @@ Conversational AI.
|
|
| 55 |
|
| 56 |
## Evaluations
|
| 57 |
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
## Evaluations
|
| 57 |
|
| 58 |
+
| Tasks |Version| Filter |n-shot| Metric |Value | |Stderr|
|
| 59 |
+
|---------------------------------|-------|----------------|-----:|-----------|-----:|---|-----:|
|
| 60 |
+
|truthfulqa_mc2 | 2|none | 0|acc |0.5627|± |0.0154|
|
| 61 |
+
|gsm8k | 3|strict-match | 5|exact_match|0.5481|± |0.0137|
|
| 62 |
+
| | |flexible-extract| 5|exact_match|0.5557|± |0.0137|
|
| 63 |
+
|agieval_nous |N/A |none | 0|acc |0.3763|± |0.0093|
|
| 64 |
+
| | |none | 0|acc_norm |0.3665|± |0.0093|
|
| 65 |
+
| - agieval_aqua_rat | 1|none | 0|acc |0.2087|± |0.0255|
|
| 66 |
+
| | |none | 0|acc_norm |0.2047|± |0.0254|
|
| 67 |
+
| - agieval_logiqa_en | 1|none | 0|acc |0.3456|± |0.0187|
|
| 68 |
+
| | |none | 0|acc_norm |0.3594|± |0.0188|
|
| 69 |
+
| - agieval_lsat_ar | 1|none | 0|acc |0.1826|± |0.0255|
|
| 70 |
+
| | |none | 0|acc_norm |0.1783|± |0.0253|
|
| 71 |
+
| - agieval_lsat_lr | 1|none | 0|acc |0.3549|± |0.0212|
|
| 72 |
+
| | |none | 0|acc_norm |0.3451|± |0.0211|
|
| 73 |
+
| - agieval_lsat_rc | 1|none | 0|acc |0.5242|± |0.0305|
|
| 74 |
+
| | |none | 0|acc_norm |0.5130|± |0.0305|
|
| 75 |
+
| - agieval_sat_en | 1|none | 0|acc |0.6650|± |0.0330|
|
| 76 |
+
| | |none | 0|acc_norm |0.6505|± |0.0333|
|
| 77 |
+
| - agieval_sat_en_without_passage| 1|none | 0|acc |0.4175|± |0.0344|
|
| 78 |
+
| | |none | 0|acc_norm |0.3738|± |0.0338|
|
| 79 |
+
| - agieval_sat_math | 1|none | 0|acc |0.4227|± |0.0334|
|
| 80 |
+
| | |none | 0|acc_norm |0.3682|± |0.0326|
|