{ "metadata": { "ParamSize": 966, "ParamBytes": 2207600640.0, "BitsPerParam": 5.765853952079593 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 335544320, "records": [ { "name": "model.embeddings.weight", "shape": [ 65536, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 335544320, "byteOffset": 0 } ], "md5sum": "439a4f0be329282b033885e3e016e060" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 31416320, "records": [ { "name": "model.blocks.0.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 0 }, { "name": "model.blocks.0.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 5120 }, { "name": "model.blocks.0.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10240 }, { "name": "model.blocks.0.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15360 }, { "name": "model.blocks.0.pre_ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 20480 }, { "name": "model.blocks.0.pre_ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25600 }, { "name": "model.blocks.0.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 30720 }, { "name": "model.blocks.0.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 35840 }, { "name": "model.blocks.0.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 40960 }, { "name": "model.blocks.0.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 46080 }, { "name": "model.blocks.0.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 51200 }, { "name": "model.blocks.0.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 56320 }, { "name": "model.blocks.0.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 61440 }, { "name": "model.blocks.0.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3338240 }, { "name": "model.blocks.0.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3747840 }, { "name": "model.blocks.0.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 7024640 }, { "name": "model.blocks.0.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7434240 }, { "name": "model.blocks.0.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10711040 }, { "name": "model.blocks.0.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11120640 }, { "name": "model.blocks.0.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14397440 }, { "name": "model.blocks.0.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14807040 }, { "name": "model.blocks.0.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18083840 }, { "name": "model.blocks.0.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18493440 }, { "name": "model.blocks.0.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18498560 }, { "name": "model.blocks.0.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18503680 }, { "name": "model.blocks.0.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18508800 }, { "name": "model.blocks.0.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18513920 }, { "name": "model.blocks.0.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29982720 } ], "md5sum": "f4aac722e4ec7a5e3557ecd8200d6265" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.0.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.0.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.0.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.0.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.1.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.1.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.1.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.1.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.1.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.1.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.1.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.1.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.1.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.1.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.1.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.1.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.1.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.1.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.1.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.1.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.1.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.1.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "c700be7a3e45f239b5f21e2566e05d18" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.1.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.1.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.1.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.1.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.1.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.1.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.1.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.1.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.1.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.1.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.1.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.1.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.2.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.2.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.2.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.2.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.2.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.2.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.2.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.2.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.2.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.2.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "8d7c3c9fe2f1bd2fb793c72004fd2d1a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.2.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.2.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.2.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.2.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.2.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.2.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.2.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.2.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.2.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.2.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.2.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.2.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.2.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.2.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.2.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.2.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "25bccd158f2b3887f176eabf2e78c103" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.2.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.2.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.2.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.2.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.3.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.3.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.3.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.3.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.3.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.3.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.3.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.3.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.3.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.3.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.3.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.3.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.3.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.3.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.3.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.3.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.3.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.3.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "b23974c16e6ec730dda1aa5e2c4b6a49" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.3.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.3.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.3.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.3.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.3.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.3.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.3.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.3.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.3.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.3.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.3.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.3.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.4.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.4.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.4.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.4.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.4.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.4.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.4.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.4.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.4.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.4.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "2160ac8626380fa83e3f7abd094ba080" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.4.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.4.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.4.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.4.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.4.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.4.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.4.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.4.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.4.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.4.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.4.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.4.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.4.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.4.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.4.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.4.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "48514b2d6ef702b70517488824f962f2" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.4.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.4.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.4.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.4.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.5.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.5.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.5.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.5.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.5.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.5.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.5.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.5.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.5.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.5.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.5.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.5.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.5.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.5.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.5.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.5.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.5.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.5.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "f35d86ef51b2854a1d4da2a99194e07a" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.5.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.5.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.5.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.5.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.5.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.5.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.5.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.5.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.5.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.5.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.5.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.5.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.6.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.6.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.6.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.6.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.6.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.6.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.6.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.6.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.6.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.6.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "648e500a75eac9c1daa88d0ce1407232" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.6.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.6.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.6.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.6.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.6.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.6.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.6.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.6.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.6.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.6.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.6.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.6.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.6.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.6.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.6.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.6.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "33bb5a5b32c3b1f77c7cecb8e5a0513a" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.6.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.6.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.6.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.6.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.7.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.7.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.7.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.7.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.7.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.7.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.7.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.7.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.7.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.7.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.7.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.7.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.7.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.7.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.7.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.7.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.7.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.7.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "d1324f2926aec5cc47bd446e8ad2ff3c" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.7.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.7.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.7.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.7.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.7.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.7.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.7.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.7.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.7.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.7.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.7.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.7.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.8.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.8.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.8.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.8.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.8.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.8.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.8.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.8.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.8.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.8.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "b263f3afd180d02e16704e182ba6e063" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.8.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.8.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.8.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.8.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.8.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.8.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.8.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.8.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.8.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.8.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.8.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.8.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.8.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.8.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.8.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.8.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "3ccde97377a8d3d88f0a77ec51bbf8a6" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.8.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.8.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.8.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.8.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.9.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.9.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.9.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.9.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.9.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.9.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.9.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.9.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.9.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.9.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.9.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.9.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.9.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.9.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.9.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.9.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.9.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.9.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "71e482049f6b892f9c96e8c997df8866" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.9.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.9.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.9.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.9.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.9.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.9.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.9.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.9.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.9.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.9.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.9.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.9.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.10.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.10.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.10.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.10.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.10.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.10.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.10.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.10.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.10.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.10.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "99fa8b1c5f5ba6685dee6500c3ae0a11" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.10.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.10.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.10.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.10.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.10.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.10.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.10.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.10.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.10.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.10.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.10.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.10.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.10.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.10.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.10.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.10.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "ddf8d386c1f99dde6a6d23918d51b3d7" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.10.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.10.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.10.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.10.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.11.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.11.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.11.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.11.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.11.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.11.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.11.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.11.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.11.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.11.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.11.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.11.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.11.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.11.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.11.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.11.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.11.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.11.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "3447190a0bfd3f0d479085a0930bea24" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.11.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.11.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.11.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.11.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.11.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.11.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.11.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.11.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.11.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.11.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.11.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.11.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.12.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.12.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.12.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.12.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.12.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.12.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.12.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.12.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.12.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.12.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "3a8d9297a69b2919426c0ffcac53c76c" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.12.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.12.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.12.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.12.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.12.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.12.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.12.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.12.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.12.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.12.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.12.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.12.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.12.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.12.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.12.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.12.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "3a7a2813c030b5dcc780d2c7be8b91bf" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.12.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.12.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.12.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.12.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.13.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.13.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.13.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.13.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.13.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.13.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.13.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.13.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.13.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.13.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.13.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.13.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.13.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.13.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.13.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.13.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.13.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.13.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "2c32648a25d7ed659e2efa24bb818fb8" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.13.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.13.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.13.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.13.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.13.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.13.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.13.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.13.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.13.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.13.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.13.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.13.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.14.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.14.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.14.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.14.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.14.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.14.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.14.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.14.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.14.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.14.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "8d55ca6797871be24e7439577be2e09a" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.14.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.14.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.14.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.14.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.14.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.14.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.14.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.14.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.14.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.14.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.14.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.14.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.14.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.14.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.14.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.14.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "b1a5efe7553b5285ecbf54783f346d6e" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.14.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.14.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.14.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.14.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.15.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.15.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.15.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.15.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.15.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.15.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.15.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.15.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.15.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.15.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.15.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.15.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.15.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.15.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.15.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.15.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.15.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.15.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "7dc3e5ebc52b9c2df180bf34d7fef968" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.15.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.15.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.15.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.15.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.15.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.15.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.15.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.15.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.15.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.15.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.15.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.15.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.16.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.16.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.16.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.16.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.16.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.16.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.16.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.16.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.16.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.16.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "256af593e1e319c7f48129bf32560002" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.16.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.16.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.16.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.16.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.16.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.16.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.16.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.16.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.16.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.16.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.16.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.16.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.16.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.16.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.16.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.16.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "fb4aee4dbeab3c7f204a3de414907499" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.16.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.16.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.16.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.16.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.17.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.17.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.17.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.17.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.17.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.17.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.17.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.17.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.17.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.17.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.17.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.17.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.17.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.17.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.17.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.17.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.17.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.17.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "6eda28290f32eac6a26925bec1e32594" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.17.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.17.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.17.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.17.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.17.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.17.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.17.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.17.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.17.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.17.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.17.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.17.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.18.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.18.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.18.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.18.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.18.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.18.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.18.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.18.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.18.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.18.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "9ba611ec21166ec76f35c18cf0c42f85" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.18.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.18.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.18.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.18.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.18.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.18.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.18.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.18.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.18.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.18.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.18.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.18.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.18.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.18.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.18.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.18.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "7023c7b891a0756eea9d070e169be2f0" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.18.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.18.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.18.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.18.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.19.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.19.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.19.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.19.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.19.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.19.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.19.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.19.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.19.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.19.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.19.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.19.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.19.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.19.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.19.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.19.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.19.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.19.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "6185d688330d29607bf74cd8dead0ef1" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.19.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.19.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.19.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.19.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.19.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.19.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.19.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.19.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.19.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.19.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.19.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.19.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.20.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.20.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.20.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.20.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.20.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.20.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.20.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.20.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.20.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.20.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "c8d795a2f4139ac5c94ee98c378c06b1" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.20.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.20.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.20.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.20.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.20.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.20.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.20.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.20.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.20.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.20.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.20.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.20.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.20.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.20.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.20.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.20.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "b753b369d3c578376871d43708c894db" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.20.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.20.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.20.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.20.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.21.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.21.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.21.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.21.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.21.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.21.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.21.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.21.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.21.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.21.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.21.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.21.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.21.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.21.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.21.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.21.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.21.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.21.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "fdc968c8003dd6dc210c9d8bf0f7c003" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.21.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.21.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.21.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.21.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.21.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.21.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.21.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.21.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.21.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.21.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.21.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.21.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.22.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.22.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.22.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.22.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.22.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.22.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.22.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.22.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.22.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.22.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "6f2a54db82db6ef06c811e5d92f321d9" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.22.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.22.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.22.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.22.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.22.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.22.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.22.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.22.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.22.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.22.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.22.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.22.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.22.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.22.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.22.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.22.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "18ec1b005f0a6bd2550b1170746658ec" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.22.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.22.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.22.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.22.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.23.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.23.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.23.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.23.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.23.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.23.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.23.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.23.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.23.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.23.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.23.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.23.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.23.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.23.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.23.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.23.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.23.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.23.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "c9413dcc7cd6e89c6c7161550bce8278" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.23.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.23.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.23.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.23.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.23.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.23.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.23.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.23.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.23.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.23.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.23.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.23.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.24.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.24.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.24.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.24.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.24.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.24.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.24.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.24.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.24.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.24.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "58df3e4e1f9f92ba741df2189ac22b58" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.24.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.24.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.24.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.24.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.24.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.24.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.24.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.24.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.24.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.24.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.24.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.24.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.24.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.24.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.24.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.24.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "db77cd1485c52dbc3c7f6589501759df" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.24.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.24.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.24.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.24.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.25.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.25.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.25.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.25.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.25.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.25.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.25.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.25.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.25.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.25.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.25.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.25.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.25.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.25.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.25.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.25.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.25.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.25.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "bcf1787ccffc7bcf8746ebcafb890f70" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.25.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.25.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.25.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.25.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.25.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.25.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.25.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.25.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.25.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.25.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.25.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.25.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.26.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.26.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.26.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.26.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.26.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.26.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.26.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.26.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.26.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.26.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "8fa9caf205105cc61f1503180db8e027" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.26.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.26.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.26.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.26.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.26.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.26.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.26.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.26.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.26.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.26.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.26.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.26.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.26.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.26.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.26.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.26.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "e5078a97ad26fef719b15c202c0885d3" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.26.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.26.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.26.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.26.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.27.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.27.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.27.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.27.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.27.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.27.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.27.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.27.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.27.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.27.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.27.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.27.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.27.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.27.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.27.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.27.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.27.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.27.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "9c753a270e8f2072db41f6494debff1f" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.27.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.27.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.27.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.27.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.27.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.27.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.27.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.27.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.27.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.27.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.27.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.27.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.28.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.28.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.28.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.28.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.28.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.28.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.28.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.28.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.28.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.28.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "a4e581c571673ee20f64f9ad27c23dfb" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.28.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.28.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.28.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.28.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.28.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.28.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.28.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.28.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.28.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.28.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.28.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.28.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.28.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.28.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.28.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.28.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "f4c73cc83717fd20ac6a728d49c58301" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.28.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.28.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.28.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.28.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.29.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.29.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.29.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.29.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.29.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.29.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.29.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.29.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.29.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.29.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.29.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.29.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.29.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.29.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.29.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.29.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.29.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.29.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "2bbad61fb0f077215dedf117ae8dd3cb" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33249280, "records": [ { "name": "model.blocks.29.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.29.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.29.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.29.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.29.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.29.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.29.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.29.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.29.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.29.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.29.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.29.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.blocks.30.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.blocks.30.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 }, { "name": "model.blocks.30.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33208320 }, { "name": "model.blocks.30.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33213440 }, { "name": "model.blocks.30.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33218560 }, { "name": "model.blocks.30.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33223680 }, { "name": "model.blocks.30.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33228800 }, { "name": "model.blocks.30.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33233920 }, { "name": "model.blocks.30.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33239040 }, { "name": "model.blocks.30.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 33244160 } ], "md5sum": "8530124960c92d3fe2192b78d78d2783" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 31354880, "records": [ { "name": "model.blocks.30.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.30.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.30.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3686400 }, { "name": "model.blocks.30.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 6963200 }, { "name": "model.blocks.30.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7372800 }, { "name": "model.blocks.30.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 10649600 }, { "name": "model.blocks.30.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11059200 }, { "name": "model.blocks.30.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14336000 }, { "name": "model.blocks.30.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "model.blocks.30.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 18022400 }, { "name": "model.blocks.30.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18432000 }, { "name": "model.blocks.30.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18437120 }, { "name": "model.blocks.30.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18442240 }, { "name": "model.blocks.30.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18447360 }, { "name": "model.blocks.30.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 18452480 }, { "name": "model.blocks.30.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 29921280 } ], "md5sum": "347b4deba2f555eb20d27804bcf31373" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 31385600, "records": [ { "name": "model.blocks.30.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.30.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.30.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3686400 }, { "name": "model.blocks.30.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15155200 }, { "name": "model.blocks.31.ln1.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16588800 }, { "name": "model.blocks.31.ln1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16593920 }, { "name": "model.blocks.31.ln2.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16599040 }, { "name": "model.blocks.31.ln2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16604160 }, { "name": "model.blocks.31.attention.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16609280 }, { "name": "model.blocks.31.attention.time_mix_value", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16614400 }, { "name": "model.blocks.31.attention.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16619520 }, { "name": "model.blocks.31.attention.time_mix_gate", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 16624640 }, { "name": "model.blocks.31.attention.time_decay", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16629760 }, { "name": "model.blocks.31.attention.time_faaaa", "shape": [ 40, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 16634880 }, { "name": "model.blocks.31.attention.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16640000 }, { "name": "model.blocks.31.attention.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19916800 }, { "name": "model.blocks.31.attention.key.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20326400 }, { "name": "model.blocks.31.attention.key.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 23603200 }, { "name": "model.blocks.31.attention.value.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24012800 }, { "name": "model.blocks.31.attention.value.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 27289600 }, { "name": "model.blocks.31.attention.output.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 27699200 }, { "name": "model.blocks.31.attention.output.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 30976000 } ], "md5sum": "937b672cabc7d3b19ba6ac8a877be4fe" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 335544320, "records": [ { "name": "head.weight", "shape": [ 65536, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 335544320, "byteOffset": 0 } ], "md5sum": "2d00841a61bdefc4609153fed21b8c4c" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33208320, "records": [ { "name": "model.blocks.31.attention.gate.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.blocks.31.attention.gate.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 3276800 }, { "name": "model.blocks.31.attention.ln_x.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3686400 }, { "name": "model.blocks.31.attention.ln_x.bias", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 3691520 }, { "name": "model.blocks.31.feed_forward.time_mix_key", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3696640 }, { "name": "model.blocks.31.feed_forward.time_mix_receptance", "shape": [ 1, 1, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 3701760 }, { "name": "model.blocks.31.feed_forward.key.q_weight", "shape": [ 8960, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 3706880 }, { "name": "model.blocks.31.feed_forward.key.q_scale", "shape": [ 8960, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 15175680 }, { "name": "model.blocks.31.feed_forward.receptance.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16609280 }, { "name": "model.blocks.31.feed_forward.receptance.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 19886080 }, { "name": "model.blocks.31.feed_forward.value.q_weight", "shape": [ 2560, 1120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11468800, "byteOffset": 20295680 }, { "name": "model.blocks.31.feed_forward.value.q_scale", "shape": [ 2560, 280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1433600, "byteOffset": 31764480 }, { "name": "model.ln_out.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33198080 }, { "name": "model.ln_out.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 33203200 } ], "md5sum": "2ea58339264c276429f91b951c47ceb3" } ] }