Kotek37 commited on
Commit
2d86f08
·
verified ·
1 Parent(s): a79b007

Upload 3 files

Browse files
Files changed (3) hide show
  1. config (1).yaml +101 -0
  2. source.vocab +0 -0
  3. target.vocab +0 -0
config (1).yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.yaml
2
+
3
+
4
+ ## Where the samples will be written
5
+ save_data: run
6
+
7
+ # Training files
8
+ data:
9
+ corpus_1:
10
+ path_src: tr-os.tr-filtered.tr.subword.train
11
+ path_tgt: tr-os.os-filtered.os.subword.train
12
+ transforms: [filtertoolong]
13
+ valid:
14
+ path_src: tr-os.tr-filtered.tr.subword.dev
15
+ path_tgt: tr-os.os-filtered.os.subword.dev
16
+ transforms: [filtertoolong]
17
+
18
+
19
+ # Vocabulary files, generated by onmt_build_vocab
20
+ src_vocab: run/source.vocab
21
+ tgt_vocab: run/target.vocab
22
+
23
+ # Vocabulary size - should be the same as in sentence piece
24
+ src_vocab_size: 50000
25
+ tgt_vocab_size: 50000
26
+
27
+ # Filter out source/target longer than n if [filtertoolong] enabled
28
+ src_seq_length: 150
29
+ src_seq_length: 150
30
+
31
+ # Tokenization options
32
+ src_subword_model: source.model
33
+ tgt_subword_model: target.model
34
+
35
+ # Where to save the log file and the output models/checkpoints
36
+ log_file: train.log
37
+ save_model: models/model.tros
38
+
39
+ # Stop training if it does not imporve after n validations
40
+ early_stopping: 4
41
+
42
+ # Default: 5000 - Save a model checkpoint for each n
43
+ save_checkpoint_steps: 1500
44
+
45
+ # To save space, limit checkpoints to last n
46
+ # keep_checkpoint: 6
47
+
48
+ seed: 3435
49
+
50
+ # Default: 100000 - Train the model to max n steps
51
+ # Increase to 200000 or more for large datasets
52
+ # For fine-tuning, add up the required steps to the original steps
53
+ train_steps: 100000
54
+
55
+ # Default: 10000 - Run validation after n steps
56
+ valid_steps: 10000
57
+
58
+ # Default: 4000 - for large datasets, try up to 8000
59
+ warmup_steps: 4000
60
+ report_every: 100
61
+
62
+ # Number of GPUs, and IDs of GPUs
63
+ world_size: 1
64
+ gpu_ranks: [0]
65
+
66
+ # Batching
67
+ bucket_size: 262144
68
+ num_workers: 2 # Default: 2, set to 0 when RAM out of memory
69
+ batch_type: "tokens"
70
+ batch_size: 4096 # Tokens per batch, change when CUDA out of memory
71
+ valid_batch_size: 2048
72
+ max_generator_batches: 2
73
+ accum_count: [4]
74
+ accum_steps: [0]
75
+
76
+ # Optimization
77
+ model_dtype: "fp16"
78
+ optim: "adam"
79
+ learning_rate: 2
80
+ warmup_steps: 8000
81
+ decay_method: "noam"
82
+ adam_beta2: 0.998
83
+ max_grad_norm: 0
84
+ label_smoothing: 0.1
85
+ param_init: 0
86
+ param_init_glorot: true
87
+ normalization: "tokens"
88
+
89
+ # Model
90
+ encoder_type: transformer
91
+ decoder_type: transformer
92
+ position_encoding: true
93
+ enc_layers: 6
94
+ dec_layers: 6
95
+ heads: 8
96
+ hidden_size: 512
97
+ word_vec_size: 512
98
+ transformer_ff: 2048
99
+ dropout_steps: [0]
100
+ dropout: [0.1]
101
+ attention_dropout: [0.1]
source.vocab ADDED
The diff for this file is too large to render. See raw diff
 
target.vocab ADDED
The diff for this file is too large to render. See raw diff