{ "metadata": { "total_size": 11116003968 }, "weight_map": { "blocks.0.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin", "blocks.0.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin", "blocks.0.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin", "blocks.0.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.0.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin", "blocks.1.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin", "blocks.1.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin", "blocks.1.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.1.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin", "blocks.2.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin", "blocks.2.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin", "blocks.2.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.2.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin", "blocks.3.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin", "blocks.3.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin", "blocks.3.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.3.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin", "blocks.4.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin", "blocks.4.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin", "blocks.4.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.4.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin", "blocks.5.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin", "blocks.5.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin", "blocks.5.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.5.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin", "blocks.6.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin", "blocks.6.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin", "blocks.6.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.6.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin", "blocks.7.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin", "blocks.7.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin", "blocks.7.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.7.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin", "blocks.8.FeedForward.norm.bias": "pytorch_model-00002-of-00002.bin", "blocks.8.FeedForward.norm.weight": "pytorch_model-00002-of-00002.bin", "blocks.8.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00002-of-00002.bin", "blocks.8.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin", "blocks.8.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin", "blocks.8.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin", "blocks.8.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.mlp.router.classifier.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.norm.bias": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.norm.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00002-of-00002.bin", "blocks.9.SelfAttention.SelfAttention.o": "pytorch_model-00002-of-00002.bin", "blocks.9.SelfAttention.SelfAttention.qkv": "pytorch_model-00002-of-00002.bin", "blocks.9.SelfAttention.norm.bias": "pytorch_model-00002-of-00002.bin", "blocks.9.SelfAttention.norm.weight": "pytorch_model-00002-of-00002.bin", "embed_tokens.weight": "pytorch_model-00001-of-00002.bin", "logits.bias": "pytorch_model-00001-of-00002.bin", "logits.weight": "pytorch_model-00001-of-00002.bin", "position_embeddings.weight": "pytorch_model-00001-of-00002.bin", "spout.0.weight": "pytorch_model-00002-of-00002.bin", "spout.10.weight": "pytorch_model-00002-of-00002.bin", "spout.12.weight": "pytorch_model-00002-of-00002.bin", "spout.14.weight": "pytorch_model-00002-of-00002.bin", "spout.16.weight": "pytorch_model-00002-of-00002.bin", "spout.2.weight": "pytorch_model-00002-of-00002.bin", "spout.4.weight": "pytorch_model-00002-of-00002.bin", "spout.6.weight": "pytorch_model-00002-of-00002.bin", "spout.8.weight": "pytorch_model-00002-of-00002.bin", "token_bias": "pytorch_model-00001-of-00002.bin" } }