qkv512_ff512
Browse files- finetune/sg/effiLLaMA/alpaca_llamaPeft_normBiasLora_qkv512_ff512_learninit_7B/epoch3/consolidated.00-of-01.model.pth +3 -0
- finetune/sg/effiLLaMA/alpaca_llamaPeft_normBiasLora_qkv512_ff512_learninit_7B/llamaPeft_normBiasLora.json +1 -0
- finetune/sg/effiLLaMA/alpaca_llamaPeft_normBiasLora_qkv512_ff512_learninit_7B/log.txt +4 -0
- finetune/sg/effiLLaMA/alpaca_llamaPeft_normBiasLora_qkv512_ff512_learninit_7B/output.log +0 -0
finetune/sg/effiLLaMA/alpaca_llamaPeft_normBiasLora_qkv512_ff512_learninit_7B/epoch3/consolidated.00-of-01.model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e2fa79fe371973aab9bed65f564ed2d47ac3e7bea12d01532f1065ef30e9256
|
3 |
+
size 16038412523
|
finetune/sg/effiLLaMA/alpaca_llamaPeft_normBiasLora_qkv512_ff512_learninit_7B/llamaPeft_normBiasLora.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lora_rank": 512, "lora_rank_feedforward":512, "bias_tuning": true}
|
finetune/sg/effiLLaMA/alpaca_llamaPeft_normBiasLora_qkv512_ff512_learninit_7B/log.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"train_lr": 4.993842364532024e-05, "train_closs": 1.5090417658842485, "train_grad_norm": 16.975479762248806, "epoch": 0, "val_lr": 4.993842364532024e-05, "val_closs": 1.5090417658842485, "val_grad_norm": 16.975479762248806}
|
2 |
+
{"train_lr": 9.179680273851965e-05, "train_closs": 1.0572295954575408, "train_grad_norm": 1.5426717105463807, "epoch": 1, "val_lr": 9.179680273851965e-05, "val_closs": 1.0572295954575408, "val_grad_norm": 1.5426717105463807}
|
3 |
+
{"train_lr": 5.252924876847267e-05, "train_closs": 0.6000296613694464, "train_grad_norm": 1.281603565210192, "epoch": 2, "val_lr": 5.252924876847267e-05, "val_closs": 0.6000296613694464, "val_grad_norm": 1.281603565210192}
|
4 |
+
{"train_lr": 1.3232446029953387e-05, "train_closs": 0.23724095073488718, "train_grad_norm": 1.0203073024749756, "epoch": 3, "val_lr": 1.3232446029953387e-05, "val_closs": 0.23724095073488718, "val_grad_norm": 1.0203073024749756}
|
finetune/sg/effiLLaMA/alpaca_llamaPeft_normBiasLora_qkv512_ff512_learninit_7B/output.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|