crochereau commited on
Commit
f98078b
·
verified ·
1 Parent(s): 068bde6

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +9 -0
  2. tokenizer_config.json +17 -0
  3. vocab.txt +68 -0
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<bos>",
3
+ "cls_token": "<cls>",
4
+ "eos_token": "<eos>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "<sep>",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {},
3
+ "bos_token": "<bos>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "cls_df": "<cls_sf>",
6
+ "cls_dna": "<cls_dna>",
7
+ "cls_prot": "<cls_prot>",
8
+ "cls_token": "<cls>",
9
+ "do_lower_case": false,
10
+ "eos_token": "<eos>",
11
+ "mask_token": "<mask>",
12
+ "model_max_length": 1000000000000000019884624838656,
13
+ "pad_token": "<pad>",
14
+ "sep_token": "<sep>",
15
+ "tokenizer_class": "MgmTokenizer",
16
+ "unk_token": "<unk>"
17
+ }
vocab.txt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <bos>
2
+ <cls>
3
+ <cls_aa>
4
+ <cls_nt>
5
+ <cls_sf>
6
+ <cls_struct>
7
+ <eos>
8
+ <mask>
9
+ <null_1>
10
+ <pad>
11
+ <reserved>
12
+ <sep>
13
+ <unk>
14
+ L
15
+ A
16
+ G
17
+ V
18
+ S
19
+ E
20
+ R
21
+ T
22
+ I
23
+ D
24
+ P
25
+ K
26
+ Q
27
+ N
28
+ F
29
+ Y
30
+ M
31
+ H
32
+ W
33
+ C
34
+ X
35
+ B
36
+ U
37
+ Z
38
+ O
39
+ .
40
+ -
41
+ a
42
+ c
43
+ g
44
+ t
45
+ n
46
+ u
47
+ [Branch3]
48
+ [C@@H1]
49
+ [P]
50
+ [#Branch1]
51
+ [C]
52
+ [Branch2]
53
+ [O]
54
+ [S]
55
+ [Branch1]
56
+ [=O]
57
+ [#C]
58
+ [C@H1]
59
+ [=Branch1]
60
+ [=N]
61
+ [#Branch2]
62
+ [N]
63
+ [=C]
64
+ [Ring1]
65
+ [NH1]
66
+ [Branch4]
67
+ [Ring2]
68
+ [=Branch2]