Update README.md
Browse files
README.md
CHANGED
@@ -238,11 +238,9 @@ The model was evaluated in 21 languages on ARC, GSM8K, HellaSwag, TruthfulQA, Tr
|
|
238 |
| Optimizer | AdamW |
|
239 |
| Beta1 | 0.9 |
|
240 |
| Beta2 | 0.95 |
|
241 |
-
| Sequence-parallelism
|
242 |
| Data-type | bf16 |
|
243 |
| Recompute-activations | yes |
|
244 |
| Distributed-optimizers | yes |
|
245 |
-
| Model Initialization | |
|
246 |
|
247 |
### Compute Infrastructure
|
248 |
|
|
|
238 |
| Optimizer | AdamW |
|
239 |
| Beta1 | 0.9 |
|
240 |
| Beta2 | 0.95 |
|
|
|
241 |
| Data-type | bf16 |
|
242 |
| Recompute-activations | yes |
|
243 |
| Distributed-optimizers | yes |
|
|
|
244 |
|
245 |
### Compute Infrastructure
|
246 |
|