{ "metadata": { "ParamSize": 267, "ParamBytes": 256240384.0, "BitsPerParam": 4.149366610435039 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 68067328, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 151936, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 68067328, "byteOffset": 0 } ], "md5sum": "493b5641a854a7800c7fa11483aad4f7" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33148928, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 151936, 28 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8508416, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 8508416 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 8510208 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 10689280 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 10691072 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 15049216 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 15068672 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15070464 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 15072768 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15588864 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 15591168 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 15992576 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 15994368 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 15996160 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 18175232 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 18177024 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 22535168 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 22554624 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22556416 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 22558720 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23074816 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 23077120 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 23478528 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 23480320 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 23482112 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 25661184 }, { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 25662976 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 30021120 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 30040576 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30042368 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 30044672 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30560768 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 30563072 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 30964480 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 30966272 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 30968064 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 33147136 } ], "md5sum": "79de2844d07fb882ecf1b911c8715a73" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 29943808, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 4358144 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 4377600 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4379392 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 4381696 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4897792 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 4900096 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5301504 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5303296 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 5305088 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 7484160 }, { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 7485952 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 11844096 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 11863552 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11865344 }, { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 11867648 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12383744 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 12386048 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 12787456 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 12789248 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 12791040 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 14970112 }, { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 14971904 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 19330048 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 19349504 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19351296 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 19353600 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19869696 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 19872000 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 20273408 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 20275200 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 20276992 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 22456064 }, { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 22457856 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 26816000 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 26835456 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26837248 }, { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 26839552 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27355648 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 27357952 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 27759360 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 27761152 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 27762944 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 29942016 } ], "md5sum": "00349c95b004533b308aa434e6339956" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29943808, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 4358144 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 4377600 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4379392 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 4381696 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4897792 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 4900096 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5301504 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5303296 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 5305088 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 7484160 }, { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 7485952 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 11844096 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 11863552 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11865344 }, { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 11867648 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12383744 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 12386048 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 12787456 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 12789248 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 12791040 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 14970112 }, { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 14971904 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 19330048 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 19349504 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19351296 }, { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 19353600 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19869696 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 19872000 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 20273408 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 20275200 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 20276992 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 22456064 }, { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 22457856 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 26816000 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 26835456 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26837248 }, { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 26839552 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27355648 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 27357952 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 27759360 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 27761152 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 27762944 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 29942016 } ], "md5sum": "52731040d6f319dd34d5011be1af4d3f" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29943808, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 4358144 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 4377600 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4379392 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 4381696 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4897792 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 4900096 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5301504 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5303296 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 5305088 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 7484160 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 7485952 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 11844096 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 11863552 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11865344 }, { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 11867648 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12383744 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 12386048 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 12787456 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 12789248 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 12791040 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 14970112 }, { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 14971904 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 19330048 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 19349504 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19351296 }, { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 19353600 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19869696 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 19872000 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 20273408 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 20275200 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 20276992 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 22456064 }, { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 22457856 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 26816000 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 26835456 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26837248 }, { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 26839552 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27355648 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 27357952 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 27759360 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 27761152 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 27762944 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 29942016 } ], "md5sum": "44d6e6019f28b01e599723635e71685b" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29943808, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 4358144 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 4377600 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4379392 }, { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 4381696 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4897792 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 4900096 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5301504 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5303296 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 5305088 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 7484160 }, { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 7485952 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 11844096 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 11863552 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11865344 }, { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 11867648 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12383744 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 12386048 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 12787456 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 12789248 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 12791040 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 14970112 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 14971904 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 19330048 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 19349504 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19351296 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 19353600 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19869696 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 19872000 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 20273408 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 20275200 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 20276992 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 22456064 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 22457856 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 26816000 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 26835456 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26837248 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 26839552 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27355648 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 27357952 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 27759360 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 27761152 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 27762944 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 29942016 } ], "md5sum": "cc10d2253448fe43dfb3f0c211e5c061" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29943808, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 4358144 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 4377600 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4379392 }, { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 4381696 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4897792 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 4900096 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5301504 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5303296 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 5305088 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 7484160 }, { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 7485952 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 11844096 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 11863552 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11865344 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 11867648 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12383744 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 12386048 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 12787456 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 12789248 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 12791040 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 14970112 }, { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 14971904 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 19330048 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 19349504 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19351296 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 19353600 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19869696 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 19872000 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 20273408 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 20275200 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 20276992 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 22456064 }, { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 22457856 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 26816000 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 26835456 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26837248 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 26839552 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27355648 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 27357952 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 27759360 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 27761152 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4864, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 27762944 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 29942016 } ], "md5sum": "4fa52e2ed2c35a460ecc65ab6362cce7" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 5305088, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 896, 4864 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 1, 9728 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 19456, "byteOffset": 4358144 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 4377600 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4379392 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 896, 576 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 4381696 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 1, 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4897792 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 896, 448 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 4900096 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1, 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5301504 }, { "name": "model.norm.weight", "shape": [ 896 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1792, "byteOffset": 5303296 } ], "md5sum": "23c92d7d241c6bbfd75c4f7a331d6fd7" } ] }