framework: bart
data_dir: ../../dataset
train_data: msd_balence
text_type: all
arch: transformer
workers: 8
epochs: 100
warmup_epochs: 20
start_epoch: 0
batch_size: 64
world_size: 1
lr: 0.0001
min_lr: 1.0e-09
rank: 0
dist_url: tcp://localhost:12312
dist_backend: nccl
seed: null
gpu: 1
print_freq: 10
multiprocessing_distributed: false
cos: true
bart_pretrain: false
label_smoothing: 0.1
use_early_stopping: false
eval_sample: 64
max_length: 128