Now it will convert the weights of 24 encoder layers rather than 12

6effaee almost 2 years ago

5.81 kB

	# Copied from https://github.com/nghuyong/ERNIE-Pytorch/blob/master/convert.py
	# with some modifications for ernie-m


	#!/usr/bin/env python
	# encoding: utf-8
	"""
	File Description:
	ernie3.0 series model conversion based on paddlenlp repository
	ernie2.0 series model conversion based on paddlenlp repository
	official repo: https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo
	Author: nghuyong liushu
	Mail: [email protected] [email protected]
	Created Time: 2022/8/17
	"""
	import collections
	import os
	import json
	import paddle.fluid.dygraph as D
	import torch
	from paddle import fluid
	import numpy as np

	def build_params_map(attention_num=24):
	"""
	build params map from paddle-paddle's ERNIE to transformer's BERT
	:return:
	"""
	weight_map = collections.OrderedDict({
	'embeddings.word_embeddings.weight': "embeddings.word_embeddings.weight",
	'embeddings.position_embeddings.weight': "embeddings.position_embeddings.weight",
	# 'ernie.embeddings.token_type_embeddings.weight': "ernie.embeddings.token_type_embeddings.weight",
	# 'ernie.embeddings.task_type_embeddings.weight': "ernie.embeddings.task_type_embeddings.weight",
	'embeddings.layer_norm.weight': 'embeddings.layer_norm.weight',
	'embeddings.layer_norm.bias': 'embeddings.layer_norm.bias',
	})
	# add attention layers
	for i in range(attention_num):
	weight_map[f'encoder.layers.{i}.self_attn.q_proj.weight'] = f'encoder.layers.{i}.self_attn.q_proj.weight'
	weight_map[f'encoder.layers.{i}.self_attn.q_proj.bias'] = f'encoder.layers.{i}.self_attn.q_proj.bias'
	weight_map[f'encoder.layers.{i}.self_attn.k_proj.weight'] = f'encoder.layers.{i}.self_attn.k_proj.weight'
	weight_map[f'encoder.layers.{i}.self_attn.k_proj.bias'] = f'encoder.layers.{i}.self_attn.k_proj.bias'
	weight_map[f'encoder.layers.{i}.self_attn.v_proj.weight'] = f'encoder.layers.{i}.self_attn.v_proj.weight'
	weight_map[f'encoder.layers.{i}.self_attn.v_proj.bias'] = f'encoder.layers.{i}.self_attn.v_proj.bias'
	weight_map[f'encoder.layers.{i}.self_attn.out_proj.weight'] = f'encoder.layers.{i}.self_attn.out_proj.weight'
	weight_map[f'encoder.layers.{i}.self_attn.out_proj.bias'] = f'encoder.layers.{i}.self_attn.out_proj.bias'
	weight_map[f'encoder.layers.{i}.norm1.weight'] = f'encoder.layers.{i}.norm1.weight'
	weight_map[f'encoder.layers.{i}.norm1.bias'] = f'encoder.layers.{i}.norm1.bias'
	weight_map[f'encoder.layers.{i}.linear1.weight'] = f'encoder.layers.{i}.linear1.weight'
	weight_map[f'encoder.layers.{i}.linear1.bias'] = f'encoder.layers.{i}.linear1.bias'
	weight_map[f'encoder.layers.{i}.linear2.weight'] = f'encoder.layers.{i}.linear2.weight'
	weight_map[f'encoder.layers.{i}.linear2.bias'] = f'encoder.layers.{i}.linear2.bias'
	weight_map[f'encoder.layers.{i}.norm2.weight'] = f'encoder.layers.{i}.norm2.weight'
	weight_map[f'encoder.layers.{i}.norm2.bias'] = f'encoder.layers.{i}.norm2.bias'
	#
	weight_map.update(
	{
	'pooler.dense.weight': 'pooler.dense.weight',
	'pooler.dense.bias': 'pooler.dense.bias',
	# 'cls.predictions.transform.weight': 'cls.predictions.transform.dense.weight',
	# 'cls.predictions.transform.bias': 'cls.predictions.transform.dense.bias',
	# 'cls.predictions.layer_norm.weight': 'cls.predictions.transform.LayerNorm.gamma',
	# 'cls.predictions.layer_norm.bias': 'cls.predictions.transform.LayerNorm.beta',
	# 'cls.predictions.decoder_bias': 'cls.predictions.bias'
	}
	)
	return weight_map


	def extract_and_convert(input_dir, output_dir):
	"""
	抽取并转换
	:param input_dir:
	:param output_dir:
	:return:
	"""
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)
	print('=' * 20 + 'save config file' + '=' * 20)
	config = json.load(open(os.path.join(input_dir, 'config.json'), 'rt', encoding='utf-8'))
	# if 'init_args' in config:
	# config = config['init_args'][0]
	# del config['init_class']
	config['layer_norm_eps'] = 1e-5
	# config['model_type'] = 'ernie'
	# config['architectures'] = ["ErnieForMaskedLM"] # or 'BertModel'
	# config['intermediate_size'] = 4 * config['hidden_size']
	json.dump(config, open(os.path.join(output_dir, 'config.json'), 'wt', encoding='utf-8'), indent=4)
	print('=' * 20 + 'save vocab file' + '=' * 20)
	with open(os.path.join(input_dir, 'vocab.txt'), 'rt', encoding='utf-8') as f:
	words = f.read().splitlines()
	words = [word.split('\t')[0] for word in words]
	with open(os.path.join(output_dir, 'vocab.txt'), 'wt', encoding='utf-8') as f:
	for word in words:
	f.write(word + "\n")
	print('=' * 20 + 'extract weights' + '=' * 20)
	state_dict = collections.OrderedDict()
	weight_map = build_params_map(attention_num=config['num_hidden_layers'])
	with fluid.dygraph.guard():
	paddle_paddle_params, _ = D.load_dygraph(os.path.join(input_dir, 'model_state.pdparams'))
	for weight_name, weight_value in paddle_paddle_params.items():
	if 'weight' in weight_name:
	if 'encoder' in weight_name or 'pooler' in weight_name or 'cls.' in weight_name:
	weight_value = weight_value.transpose()

	if weight_name not in weight_map:
	print('=' * 20, '[SKIP]', weight_name, '=' * 20)
	continue
	state_dict[weight_map[weight_name]] = torch.FloatTensor(weight_value)
	print(weight_name, '->', weight_map[weight_name], weight_value.shape)
	torch.save(state_dict, os.path.join(output_dir, "pytorch_model.bin"))


	if __name__ == '__main__':
	extract_and_convert("./ernie_m_large_paddle/", "./ernie_m_large_torch/")