aapot
commited on
Commit
•
6d2b0f2
1
Parent(s):
658d98d
move cleaned data tokenizer to main
Browse files- cleaned_data/config.json +0 -25
- cleaned_data/tokenizer.json +0 -0
- flax_model_to_pytorch.py +30 -0
- tokenizer.json +0 -0
cleaned_data/config.json
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"architectures": [
|
3 |
-
"RobertaForMaskedLM"
|
4 |
-
],
|
5 |
-
"attention_probs_dropout_prob": 0.1,
|
6 |
-
"bos_token_id": 0,
|
7 |
-
"eos_token_id": 2,
|
8 |
-
"gradient_checkpointing": false,
|
9 |
-
"hidden_act": "gelu",
|
10 |
-
"hidden_dropout_prob": 0.1,
|
11 |
-
"hidden_size": 1024,
|
12 |
-
"initializer_range": 0.02,
|
13 |
-
"intermediate_size": 4096,
|
14 |
-
"layer_norm_eps": 1e-05,
|
15 |
-
"max_position_embeddings": 514,
|
16 |
-
"model_type": "roberta",
|
17 |
-
"num_attention_heads": 16,
|
18 |
-
"num_hidden_layers": 24,
|
19 |
-
"pad_token_id": 1,
|
20 |
-
"position_embedding_type": "absolute",
|
21 |
-
"transformers_version": "4.9.0.dev0",
|
22 |
-
"type_vocab_size": 1,
|
23 |
-
"use_cache": true,
|
24 |
-
"vocab_size": 50265
|
25 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cleaned_data/tokenizer.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
flax_model_to_pytorch.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import RobertaForMaskedLM, FlaxRobertaForMaskedLM, AutoTokenizer
|
2 |
+
import torch
|
3 |
+
import numpy as np
|
4 |
+
import jax
|
5 |
+
import jax.numpy as jnp
|
6 |
+
|
7 |
+
MODEL_PATH = "./"
|
8 |
+
|
9 |
+
model = FlaxRobertaForMaskedLM.from_pretrained(MODEL_PATH)
|
10 |
+
|
11 |
+
def to_f32(t):
|
12 |
+
return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
|
13 |
+
|
14 |
+
model.params = to_f32(model.params)
|
15 |
+
model.save_pretrained(MODEL_PATH)
|
16 |
+
|
17 |
+
pt_model = RobertaForMaskedLM.from_pretrained(MODEL_PATH, from_flax=True)
|
18 |
+
|
19 |
+
input_ids = np.asarray(2 * [128 * [0]], dtype=np.int32)
|
20 |
+
input_ids_pt = torch.tensor(input_ids)
|
21 |
+
|
22 |
+
logits_pt = pt_model(input_ids_pt).logits
|
23 |
+
print(logits_pt)
|
24 |
+
logits_fx = model(input_ids).logits
|
25 |
+
print(logits_fx)
|
26 |
+
pt_model.save_pretrained(MODEL_PATH)
|
27 |
+
|
28 |
+
# also save tokenizer
|
29 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
30 |
+
tokenizer.save_pretrained(MODEL_PATH)
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|