|
batch_size_training: '1' |
|
batching_strategy: padding |
|
checkpoint_interval: '5' |
|
checkpoint_type: StateDictType.SHARDED_STATE_DICT |
|
context_length: '4096' |
|
dataset: custom_dataset |
|
dist_checkpoint_folder: fine-tuned |
|
dist_checkpoint_root_folder: ./finetuned_model |
|
enable_fsdp: 'True' |
|
flop_counter: 'False' |
|
flop_counter_start: '3' |
|
freeze_layers: 'False' |
|
from_peft_checkpoint: '' |
|
fsdp_activation_checkpointing: 'True' |
|
fsdp_cpu_offload: 'False' |
|
gamma: '0.85' |
|
gradient_accumulation_steps: '1' |
|
gradient_clipping: 'False' |
|
gradient_clipping_threshold: '1.0' |
|
hsdp: 'False' |
|
low_cpu_fsdp: 'False' |
|
lr: 1e-05 |
|
max_checkpoints_to_keep: '2' |
|
max_eval_step: '0' |
|
max_train_step: '0' |
|
mixed_precision: 'True' |
|
model_name: meta-llama/Llama-3.2-11B-Vision-Instruct |
|
num_epochs: '1' |
|
num_freeze_layers: '1' |
|
num_workers_dataloader: '16' |
|
one_gpu: 'False' |
|
optimizer: AdamW |
|
output_dir: ./finetuned_model/PEFT/model |
|
peft_method: lora |
|
profiler_dir: ./finetuned_model/profiler/results |
|
pure_bf16: 'False' |
|
quantization: None |
|
replica_group_size: '0' |
|
run_validation: 'True' |
|
save_metrics: 'True' |
|
save_model: 'True' |
|
save_optimizer: 'True' |
|
seed: '42' |
|
sharding_group_size: '0' |
|
sharding_strategy: ShardingStrategy.FULL_SHARD |
|
tokenizer_name: None |
|
use_fast_kernels: 'True' |
|
use_fp16: 'False' |
|
use_peft: 'False' |
|
use_profiler: 'False' |
|
use_wandb: 'True' |
|
val_batch_size: '1' |
|
weight_decay: '0.0' |
|
|