versae commited on
Commit
a1f93c9
1 Parent(s): d988382

Updating run script

Browse files
Files changed (1) hide show
  1. run.sh +13 -5
run.sh CHANGED
@@ -1,22 +1,30 @@
 
1
  HUB_TOKEN=`cat $HOME/.huggingface/token`
2
  ./run_mlm_flax.py \
3
  --output_dir="./" \
4
  --model_type="roberta" \
5
  --config_name="./" \
6
  --tokenizer_name="./" \
7
- --dataset_name="large_spanish_corpus" \
8
- --dataset_config_name="combined" \
 
9
  --max_seq_length="128" \
10
  --pad_to_max_length \
11
  --per_device_train_batch_size="128" \
12
  --per_device_eval_batch_size="128" \
13
- --learning_rate="3e-4" \
 
 
 
 
14
  --save_strategy="steps" \
15
  --save_steps="10000" \
16
  --save_total_limit="5" \
17
- --warmup_steps="1000" \
18
  --overwrite_output_dir \
19
- --num_train_epochs="8" \
 
 
20
  --dtype="bfloat16" \
21
  --push_to_hub_model_id="flax-community/bertin-roberta-large-spanish" \
22
  --push_to_hub_token="$HUB_TOKEN"
 
1
+ # From https://arxiv.org/pdf/1907.11692.pdf
2
  HUB_TOKEN=`cat $HOME/.huggingface/token`
3
  ./run_mlm_flax.py \
4
  --output_dir="./" \
5
  --model_type="roberta" \
6
  --config_name="./" \
7
  --tokenizer_name="./" \
8
+ --dataset_name="mc4" \
9
+ --dataset_config_name="es" \
10
+ --dataset_streamnig \
11
  --max_seq_length="128" \
12
  --pad_to_max_length \
13
  --per_device_train_batch_size="128" \
14
  --per_device_eval_batch_size="128" \
15
+ --adam_beta1="0.9" \
16
+ --adam_beta2="0.98" \
17
+ --adam_epsilon="1e-6" \
18
+ --learning_rate="4e-4" \
19
+ --weight_decay="0.01" \
20
  --save_strategy="steps" \
21
  --save_steps="10000" \
22
  --save_total_limit="5" \
23
+ --warmup_steps="30000" \
24
  --overwrite_output_dir \
25
+ --num_train_steps="500000" \
26
+ --eval_steps="10000" \
27
+ --logging_steps="500" \
28
  --dtype="bfloat16" \
29
  --push_to_hub_model_id="flax-community/bertin-roberta-large-spanish" \
30
  --push_to_hub_token="$HUB_TOKEN"