{ "activation": "silu", "attn_type": "mha", "bias": false, "d_model": 1024, "dropout": 0.2, "hidden_dim": 1536, "kv_lora_rank": null, "mlp": "GLU", "nope_head_dim": 32, "num_heads": 16, "num_kv_heads": 16, "num_layers": 4, "q_lora_rank": null, "rope_head_dim": null, "seq_len": 256, "v_head_dim": null, "vocab_size": 50257, "weight_tying": true }