nicoladecao commited on
Commit
daae92b
1 Parent(s): 5d59a3f

Initial commit

Browse files
README.md ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+
3
+ language:
4
+ - multilingual
5
+ - af
6
+ - am
7
+ - ar
8
+ - as
9
+ - az
10
+ - be
11
+ - bg
12
+ - bm
13
+ - bn
14
+ - br
15
+ - bs
16
+ - ca
17
+ - cs
18
+ - cy
19
+ - da
20
+ - de
21
+ - el
22
+ - en
23
+ - eo
24
+ - es
25
+ - et
26
+ - eu
27
+ - fa
28
+ - ff
29
+ - fi
30
+ - fr
31
+ - fy
32
+ - ga
33
+ - gd
34
+ - gl
35
+ - gn
36
+ - gu
37
+ - ha
38
+ - he
39
+ - hi
40
+ - hr
41
+ - ht
42
+ - hu
43
+ - hy
44
+ - id
45
+ - ig
46
+ - is
47
+ - it
48
+ - ja
49
+ - jv
50
+ - ka
51
+ - kg
52
+ - kk
53
+ - km
54
+ - kn
55
+ - ko
56
+ - ku
57
+ - ky
58
+ - la
59
+ - lg
60
+ - ln
61
+ - lo
62
+ - lt
63
+ - lv
64
+ - mg
65
+ - mk
66
+ - ml
67
+ - mn
68
+ - mr
69
+ - ms
70
+ - my
71
+ - ne
72
+ - nl
73
+ - no
74
+ - om
75
+ - or
76
+ - pa
77
+ - pl
78
+ - ps
79
+ - pt
80
+ - qu
81
+ - ro
82
+ - ru
83
+ - sa
84
+ - sd
85
+ - si
86
+ - sk
87
+ - sl
88
+ - so
89
+ - sq
90
+ - sr
91
+ - ss
92
+ - su
93
+ - sv
94
+ - sw
95
+ - ta
96
+ - te
97
+ - th
98
+ - ti
99
+ - tl
100
+ - tn
101
+ - tr
102
+ - uk
103
+ - ur
104
+ - uz
105
+ - vi
106
+ - wo
107
+ - xh
108
+ - yo
109
+ - zh
110
+
111
+
112
+ tags:
113
+ - retrieval
114
+ - entity-retrieval
115
+ - named-entity-disambiguation
116
+ - entity-disambiguation
117
+ - named-entity-linking
118
+ - entity-linking
119
+ - text2text-generation
120
+ ---
121
+
122
+
123
+ # mGENRE
124
+
config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/mgenre-wiki",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "BartForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.0,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 1024,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_ffn_dim": 4096,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_normalize_before": true,
20
+ "decoder_start_token_id": 2,
21
+ "dropout": 0.1,
22
+ "early_stopping": true,
23
+ "encoder_attention_heads": 16,
24
+ "encoder_ffn_dim": 4096,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 12,
27
+ "encoder_normalize_before": true,
28
+ "eos_token_id": 2,
29
+ "eos_token_ids": [
30
+ 2
31
+ ],
32
+ "forced_eos_token_id": 2,
33
+ "gradient_checkpointing": false,
34
+ "init_std": 0.02,
35
+ "is_encoder_decoder": true,
36
+ "max_length": 1024,
37
+ "max_position_embeddings": 1024,
38
+ "min_length": 0,
39
+ "model_type": "bart",
40
+ "normalize_embedding": false,
41
+ "num_beams": 5,
42
+ "num_hidden_layers": 12,
43
+ "pad_token_id": 1,
44
+ "scale_embedding": true,
45
+ "static_position_embeddings": false,
46
+ "student_decoder_layers": null,
47
+ "student_encoder_layers": null,
48
+ "task_specific_params": {},
49
+ "transformers_version": "4.19.2",
50
+ "use_cache": true,
51
+ "vocab_size": 256001
52
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c1a05450995f9b3b1ddb146271c77f71574833346c7fbc10322328a3719d6a2
3
+ size 2469065849
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee4dc054a17c18fe81f76c0b1cda00e9fc1cfd9e0f1a16cb6d77009e2076653
3
+ size 4870365
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5368e45f810725f023451207956a99f324c0253d334f4bdaaeb2ae3dcd331f3d
3
+ size 2469461956
titles_lang_all105_marisa_trie_with_redirect.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a35a904ee44b95e5e10d63f066fddcc5117cb5daf4e637ba5fadced92707756
3
+ size 581590386
titles_lang_all105_trie_with_redirect.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af6069f11ec1d2f2ce1ef8f034eec761c0bee5971f663f6355442683e533935
3
+ size 1754513408
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sp_model_kwargs": {}, "tokenizer_class": "XLMRobertaTokenizer"}