Kotek37
/

tros1

Model card Files Files and versions Community

Kotek37 commited on 16 days ago

Commit

2d86f08

verified ·

1 Parent(s): a79b007

Upload 3 files

Browse files

Files changed (3) hide show

config (1).yaml +101 -0
source.vocab +0 -0
target.vocab +0 -0

config (1).yaml ADDED Viewed

	@@ -0,0 +1,101 @@

+# config.yaml
+## Where the samples will be written
+save_data: run
+# Training files
+data:
+    corpus_1:
+        path_src: tr-os.tr-filtered.tr.subword.train
+        path_tgt: tr-os.os-filtered.os.subword.train
+        transforms: [filtertoolong]
+    valid:
+        path_src: tr-os.tr-filtered.tr.subword.dev
+        path_tgt: tr-os.os-filtered.os.subword.dev
+        transforms: [filtertoolong]
+# Vocabulary files, generated by onmt_build_vocab
+src_vocab: run/source.vocab
+tgt_vocab: run/target.vocab
+# Vocabulary size - should be the same as in sentence piece
+src_vocab_size: 50000
+tgt_vocab_size: 50000
+# Filter out source/target longer than n if [filtertoolong] enabled
+src_seq_length: 150
+src_seq_length: 150
+# Tokenization options
+src_subword_model: source.model
+tgt_subword_model: target.model
+# Where to save the log file and the output models/checkpoints
+log_file: train.log
+save_model: models/model.tros
+# Stop training if it does not imporve after n validations
+early_stopping: 4
+# Default: 5000 - Save a model checkpoint for each n
+save_checkpoint_steps: 1500
+# To save space, limit checkpoints to last n
+# keep_checkpoint: 6
+seed: 3435
+# Default: 100000 - Train the model to max n steps
+# Increase to 200000 or more for large datasets
+# For fine-tuning, add up the required steps to the original steps
+train_steps: 100000
+# Default: 10000 - Run validation after n steps
+valid_steps: 10000
+# Default: 4000 - for large datasets, try up to 8000
+warmup_steps: 4000
+report_every: 100
+# Number of GPUs, and IDs of GPUs
+world_size: 1
+gpu_ranks: [0]
+# Batching
+bucket_size: 262144
+num_workers: 2  # Default: 2, set to 0 when RAM out of memory
+batch_type: "tokens"
+batch_size: 4096   # Tokens per batch, change when CUDA out of memory
+valid_batch_size: 2048
+max_generator_batches: 2
+accum_count: [4]
+accum_steps: [0]
+# Optimization
+model_dtype: "fp16"
+optim: "adam"
+learning_rate: 2
+warmup_steps: 8000
+decay_method: "noam"
+adam_beta2: 0.998
+max_grad_norm: 0
+label_smoothing: 0.1
+param_init: 0
+param_init_glorot: true
+normalization: "tokens"
+# Model
+encoder_type: transformer
+decoder_type: transformer
+position_encoding: true
+enc_layers: 6
+dec_layers: 6
+heads: 8
+hidden_size: 512
+word_vec_size: 512
+transformer_ff: 2048
+dropout_steps: [0]
+dropout: [0.1]
+attention_dropout: [0.1]

source.vocab ADDED Viewed

The diff for this file is too large to render. See raw diff

target.vocab ADDED Viewed

The diff for this file is too large to render. See raw diff