framework: bart data_dir: ../../dataset train_data: msd_balence text_type: all arch: transformer workers: 8 epochs: 100 warmup_epochs: 20 start_epoch: 0 batch_size: 64 world_size: 1 lr: 0.0001 min_lr: 1.0e-09 rank: 0 dist_url: tcp://localhost:12312 dist_backend: nccl seed: null gpu: 1 print_freq: 10 multiprocessing_distributed: false cos: true bart_pretrain: false label_smoothing: 0.1 use_early_stopping: false eval_sample: 64 max_length: 128