diff --git "a/EXTERNAL_MODEL_RESULTS.json" "b/EXTERNAL_MODEL_RESULTS.json"
--- "a/EXTERNAL_MODEL_RESULTS.json"
+++ "b/EXTERNAL_MODEL_RESULTS.json"
@@ -1,29 +1,294 @@
{
"Alibaba-NLP__gte-Qwen1.5-7B-instruct": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "BornholmBitextMining": 40.62,
+ "Tatoeba (bos-eng)": 85.59,
+ "Tatoeba (ile-eng)": 74.66,
+ "Tatoeba (ukr-eng)": 88.42,
+ "Tatoeba (heb-eng)": 57.62,
+ "Tatoeba (eus-eng)": 12.59,
+ "Tatoeba (lvs-eng)": 68.21,
+ "Tatoeba (hrv-eng)": 88.6,
+ "Tatoeba (mhr-eng)": 8.29,
+ "Tatoeba (lfn-eng)": 57.45,
+ "Tatoeba (uig-eng)": 3.12,
+ "Tatoeba (zsm-eng)": 92.46,
+ "Tatoeba (tur-eng)": 83.9,
+ "Tatoeba (max-eng)": 41.02,
+ "Tatoeba (spa-eng)": 98.42,
+ "Tatoeba (pes-eng)": 83.46,
+ "Tatoeba (wuu-eng)": 90.55,
+ "Tatoeba (lat-eng)": 67.43,
+ "Tatoeba (hin-eng)": 88.22,
+ "Tatoeba (tel-eng)": 25.05,
+ "Tatoeba (ceb-eng)": 19.11,
+ "Tatoeba (dsb-eng)": 35.02,
+ "Tatoeba (amh-eng)": 0.6,
+ "Tatoeba (slk-eng)": 86.07,
+ "Tatoeba (gle-eng)": 24.33,
+ "Tatoeba (xho-eng)": 12.02,
+ "Tatoeba (ben-eng)": 65.12,
+ "Tatoeba (dan-eng)": 92.04,
+ "Tatoeba (cor-eng)": 4.07,
+ "Tatoeba (fao-eng)": 44.17,
+ "Tatoeba (pol-eng)": 95.56,
+ "Tatoeba (kor-eng)": 87.91,
+ "Tatoeba (pms-eng)": 41.88,
+ "Tatoeba (urd-eng)": 51.26,
+ "Tatoeba (glg-eng)": 87.85,
+ "Tatoeba (tha-eng)": 89.15,
+ "Tatoeba (tgl-eng)": 75.03,
+ "Tatoeba (nov-eng)": 61.22,
+ "Tatoeba (epo-eng)": 72.91,
+ "Tatoeba (nob-eng)": 94.97,
+ "Tatoeba (ina-eng)": 92.02,
+ "Tatoeba (ita-eng)": 92.62,
+ "Tatoeba (cmn-eng)": 96.35,
+ "Tatoeba (fra-eng)": 95.19,
+ "Tatoeba (fin-eng)": 85.6,
+ "Tatoeba (kur-eng)": 10.97,
+ "Tatoeba (gsw-eng)": 50.97,
+ "Tatoeba (awa-eng)": 41.64,
+ "Tatoeba (nds-eng)": 61.72,
+ "Tatoeba (csb-eng)": 25.18,
+ "Tatoeba (mar-eng)": 64.38,
+ "Tatoeba (kat-eng)": 39.63,
+ "Tatoeba (nno-eng)": 79.71,
+ "Tatoeba (ang-eng)": 62.42,
+ "Tatoeba (bre-eng)": 5.79,
+ "Tatoeba (ido-eng)": 62.61,
+ "Tatoeba (afr-eng)": 84.39,
+ "Tatoeba (bel-eng)": 57.68,
+ "Tatoeba (nld-eng)": 95.88,
+ "Tatoeba (ber-eng)": 5.17,
+ "Tatoeba (est-eng)": 56.72,
+ "Tatoeba (hye-eng)": 5.03,
+ "Tatoeba (mkd-eng)": 77.95,
+ "Tatoeba (ell-eng)": 85.69,
+ "Tatoeba (tat-eng)": 9.8,
+ "Tatoeba (ast-eng)": 80.38,
+ "Tatoeba (ind-eng)": 91.29,
+ "Tatoeba (por-eng)": 93.62,
+ "Tatoeba (arq-eng)": 22.07,
+ "Tatoeba (fry-eng)": 50.29,
+ "Tatoeba (tam-eng)": 52.97,
+ "Tatoeba (oci-eng)": 45.81,
+ "Tatoeba (sqi-eng)": 33.81,
+ "Tatoeba (ron-eng)": 93.03,
+ "Tatoeba (orv-eng)": 25.13,
+ "Tatoeba (cbk-eng)": 65.47,
+ "Tatoeba (cym-eng)": 20.45,
+ "Tatoeba (khm-eng)": 24.03,
+ "Tatoeba (cat-eng)": 88.4,
+ "Tatoeba (yid-eng)": 2.21,
+ "Tatoeba (slv-eng)": 80.4,
+ "Tatoeba (arz-eng)": 62.12,
+ "Tatoeba (kaz-eng)": 39.43,
+ "Tatoeba (aze-eng)": 63.53,
+ "Tatoeba (swg-eng)": 52.96,
+ "Tatoeba (bul-eng)": 85.33,
+ "Tatoeba (hsb-eng)": 46.82,
+ "Tatoeba (kab-eng)": 1.43,
+ "Tatoeba (jpn-eng)": 93.32,
+ "Tatoeba (hun-eng)": 81.38,
+ "Tatoeba (lit-eng)": 74.97,
+ "Tatoeba (rus-eng)": 92.95,
+ "Tatoeba (yue-eng)": 91.94,
+ "Tatoeba (ces-eng)": 91.46,
+ "Tatoeba (kzj-eng)": 5.69,
+ "Tatoeba (tzl-eng)": 40.33,
+ "Tatoeba (pam-eng)": 6.6,
+ "Tatoeba (vie-eng)": 94.92,
+ "Tatoeba (isl-eng)": 49.03,
+ "Tatoeba (ara-eng)": 85.19,
+ "Tatoeba (deu-eng)": 99.08,
+ "Tatoeba (swh-eng)": 38.89,
+ "Tatoeba (gla-eng)": 11.86,
+ "Tatoeba (war-eng)": 17.44,
+ "Tatoeba (dtp-eng)": 4.93,
+ "Tatoeba (swe-eng)": 92.5,
+ "Tatoeba (mal-eng)": 70.18,
+ "Tatoeba (uzb-eng)": 15.49,
+ "Tatoeba (srp-eng)": 85.04,
+ "Tatoeba (jav-eng)": 21.06,
+ "Tatoeba (mon-eng)": 33.92,
+ "Tatoeba (cha-eng)": 22.36,
+ "Tatoeba (tuk-eng)": 18.04
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "gte-Qwen1.5-7B-instruct",
+ "AllegroReviews": 52.72,
+ "AmazonCounterfactualClassification (en-ext)": 80.55,
"AmazonCounterfactualClassification (en)": 83.16,
+ "AmazonCounterfactualClassification (de)": 74.94,
+ "AmazonCounterfactualClassification (ja)": 81.6,
"AmazonPolarityClassification": 96.7,
"AmazonReviewsClassification (en)": 62.17,
+ "AmazonReviewsClassification (de)": 58.19,
+ "AmazonReviewsClassification (es)": 56.05,
+ "AmazonReviewsClassification (fr)": 54.59,
+ "AmazonReviewsClassification (ja)": 54.39,
"AmazonReviewsClassification (zh)": 52.95,
+ "AngryTweetsClassification": 66.74,
"Banking77Classification": 81.68,
+ "CBD": 70.61,
+ "DanishPoliticalCommentsClassification": 46.89,
"EmotionClassification": 54.53,
+ "GeoreviewClassification": 59.31,
+ "HeadlineClassification": 82.32,
"IFlyTek": 53.77,
"ImdbClassification": 95.58,
+ "InappropriatenessClassification": 73.26,
"JDReview": 88.2,
+ "KinopoiskClassification": 67.17,
+ "LccSentimentClassification": 69.4,
"MTOPDomainClassification (en)": 95.75,
+ "MTOPDomainClassification (de)": 91.56,
+ "MTOPDomainClassification (es)": 91.96,
+ "MTOPDomainClassification (fr)": 89.66,
+ "MTOPDomainClassification (hi)": 86.31,
+ "MTOPDomainClassification (th)": 80.72,
"MTOPIntentClassification (en)": 84.26,
- "MassiveIntentClassification (zh-CN)": 76.25,
+ "MTOPIntentClassification (de)": 79.15,
+ "MTOPIntentClassification (es)": 81.06,
+ "MTOPIntentClassification (fr)": 77.69,
+ "MTOPIntentClassification (hi)": 67.88,
+ "MTOPIntentClassification (th)": 67.46,
+ "MasakhaNEWSClassification (amh)": 31.41,
+ "MasakhaNEWSClassification (eng)": 82.71,
+ "MasakhaNEWSClassification (fra)": 81.78,
+ "MasakhaNEWSClassification (hau)": 66.8,
+ "MasakhaNEWSClassification (ibo)": 68.62,
+ "MasakhaNEWSClassification (lin)": 80.74,
+ "MasakhaNEWSClassification (lug)": 65.34,
+ "MasakhaNEWSClassification (orm)": 62.31,
+ "MasakhaNEWSClassification (pcm)": 94.03,
+ "MasakhaNEWSClassification (run)": 79.53,
+ "MasakhaNEWSClassification (sna)": 79.76,
+ "MasakhaNEWSClassification (som)": 62.42,
+ "MasakhaNEWSClassification (swa)": 77.18,
+ "MasakhaNEWSClassification (tir)": 27.61,
+ "MasakhaNEWSClassification (xho)": 78.01,
+ "MasakhaNEWSClassification (yor)": 79.66,
+ "MassiveIntentClassification (ru)": 73.15,
+ "MassiveIntentClassification (fi)": 63.41,
+ "MassiveIntentClassification (sl)": 61.7,
+ "MassiveIntentClassification (lv)": 52.65,
+ "MassiveIntentClassification (te)": 33.68,
+ "MassiveIntentClassification (ka)": 38.5,
+ "MassiveIntentClassification (ko)": 67.71,
+ "MassiveIntentClassification (sq)": 39.98,
+ "MassiveIntentClassification (my)": 34.12,
+ "MassiveIntentClassification (jv)": 41.73,
+ "MassiveIntentClassification (zh-TW)": 70.87,
+ "MassiveIntentClassification (bn)": 60.69,
+ "MassiveIntentClassification (ta)": 39.49,
+ "MassiveIntentClassification (fa)": 66.15,
+ "MassiveIntentClassification (ar)": 57.09,
+ "MassiveIntentClassification (pl)": 69.2,
+ "MassiveIntentClassification (mn)": 38.69,
+ "MassiveIntentClassification (ms)": 64.78,
+ "MassiveIntentClassification (nl)": 72.74,
+ "MassiveIntentClassification (el)": 60.04,
+ "MassiveIntentClassification (fr)": 72.88,
+ "MassiveIntentClassification (km)": 33.4,
+ "MassiveIntentClassification (sv)": 70.44,
+ "MassiveIntentClassification (kn)": 29.46,
+ "MassiveIntentClassification (hi)": 65.42,
+ "MassiveIntentClassification (ml)": 49.24,
+ "MassiveIntentClassification (da)": 68.09,
+ "MassiveIntentClassification (ro)": 64.01,
+ "MassiveIntentClassification (it)": 72.46,
+ "MassiveIntentClassification (th)": 60.01,
+ "MassiveIntentClassification (nb)": 64.16,
+ "MassiveIntentClassification (sw)": 42.72,
+ "MassiveIntentClassification (ur)": 44.88,
+ "MassiveIntentClassification (tl)": 60.08,
+ "MassiveIntentClassification (tr)": 64.52,
+ "MassiveIntentClassification (cy)": 31.04,
+ "MassiveIntentClassification (is)": 37.19,
+ "MassiveIntentClassification (es)": 71.68,
"MassiveIntentClassification (en)": 78.47,
+ "MassiveIntentClassification (hy)": 22.14,
+ "MassiveIntentClassification (he)": 45.05,
+ "MassiveIntentClassification (hu)": 60.33,
+ "MassiveIntentClassification (id)": 68.91,
+ "MassiveIntentClassification (pt)": 73.42,
+ "MassiveIntentClassification (ja)": 73.31,
+ "MassiveIntentClassification (vi)": 67.44,
+ "MassiveIntentClassification (af)": 62.62,
+ "MassiveIntentClassification (de)": 70.88,
+ "MassiveIntentClassification (zh-CN)": 76.25,
+ "MassiveIntentClassification (az)": 56.78,
+ "MassiveIntentClassification (am)": 19.46,
+ "MassiveScenarioClassification (ru)": 75.46,
+ "MassiveScenarioClassification (ml)": 56.2,
+ "MassiveScenarioClassification (km)": 46.63,
+ "MassiveScenarioClassification (te)": 42.86,
+ "MassiveScenarioClassification (zh-TW)": 74.5,
+ "MassiveScenarioClassification (hy)": 34.0,
+ "MassiveScenarioClassification (kn)": 41.11,
+ "MassiveScenarioClassification (tr)": 68.72,
+ "MassiveScenarioClassification (hu)": 67.01,
+ "MassiveScenarioClassification (lv)": 61.15,
+ "MassiveScenarioClassification (ja)": 74.96,
+ "MassiveScenarioClassification (pt)": 73.99,
+ "MassiveScenarioClassification (he)": 57.06,
+ "MassiveScenarioClassification (vi)": 71.88,
+ "MassiveScenarioClassification (af)": 69.27,
"MassiveScenarioClassification (en)": 78.19,
+ "MassiveScenarioClassification (sl)": 66.4,
+ "MassiveScenarioClassification (fa)": 70.72,
+ "MassiveScenarioClassification (is)": 52.15,
+ "MassiveScenarioClassification (ro)": 68.82,
+ "MassiveScenarioClassification (pl)": 72.18,
+ "MassiveScenarioClassification (fi)": 66.32,
+ "MassiveScenarioClassification (es)": 74.89,
+ "MassiveScenarioClassification (az)": 61.39,
+ "MassiveScenarioClassification (nl)": 74.39,
+ "MassiveScenarioClassification (jv)": 52.93,
+ "MassiveScenarioClassification (de)": 75.34,
+ "MassiveScenarioClassification (fr)": 74.64,
+ "MassiveScenarioClassification (sq)": 52.41,
+ "MassiveScenarioClassification (hi)": 69.71,
"MassiveScenarioClassification (zh-CN)": 77.26,
+ "MassiveScenarioClassification (mn)": 47.0,
+ "MassiveScenarioClassification (id)": 72.61,
+ "MassiveScenarioClassification (ms)": 71.61,
+ "MassiveScenarioClassification (sv)": 74.26,
+ "MassiveScenarioClassification (th)": 69.09,
+ "MassiveScenarioClassification (ta)": 50.26,
+ "MassiveScenarioClassification (nb)": 68.59,
+ "MassiveScenarioClassification (da)": 72.84,
+ "MassiveScenarioClassification (cy)": 44.52,
+ "MassiveScenarioClassification (am)": 27.95,
+ "MassiveScenarioClassification (ka)": 50.49,
+ "MassiveScenarioClassification (sw)": 54.13,
+ "MassiveScenarioClassification (ko)": 72.65,
+ "MassiveScenarioClassification (it)": 75.7,
+ "MassiveScenarioClassification (my)": 41.89,
+ "MassiveScenarioClassification (tl)": 66.9,
+ "MassiveScenarioClassification (ar)": 66.1,
+ "MassiveScenarioClassification (el)": 67.87,
+ "MassiveScenarioClassification (ur)": 54.9,
+ "MassiveScenarioClassification (bn)": 67.18,
"MultilingualSentiment": 77.42,
+ "NoRecClassification": 55.81,
+ "NordicLangClassification": 72.94,
"OnlineShopping": 94.48,
+ "PAC": 66.38,
+ "PolEmo2.0-IN": 75.48,
+ "PolEmo2.0-OUT": 47.04,
+ "RuReviewsClassification": 72.89,
+ "RuSciBenchGRNTIClassification": 67.35,
+ "RuSciBenchOECDClassification": 54.38,
"TNews": 51.24,
"ToxicConversationsClassification": 78.75,
"TweetSentimentExtractionClassification": 66.0,
@@ -35,18 +300,68 @@
"v_measure": [
{
"Model": "gte-Qwen1.5-7B-instruct",
+ "AlloProfClusteringP2P": 70.99,
+ "AlloProfClusteringS2S": 58.27,
"ArxivClusteringP2P": 56.4,
"ArxivClusteringS2S": 51.45,
"BiorxivClusteringP2P": 49.01,
"BiorxivClusteringS2S": 45.06,
+ "BlurbsClusteringP2P": 45.28,
+ "BlurbsClusteringS2S": 20.1,
"CLSClusteringP2P": 47.21,
"CLSClusteringS2S": 45.79,
+ "GeoreviewClusteringP2P": 74.9,
+ "HALClusteringS2S": 28.35,
+ "MLSUMClusteringP2P (de)": 50.2,
+ "MLSUMClusteringP2P (fr)": 48.3,
+ "MLSUMClusteringP2P (ru)": 58.75,
+ "MLSUMClusteringP2P (es)": 50.48,
+ "MLSUMClusteringS2S (de)": 50.14,
+ "MLSUMClusteringS2S (fr)": 46.91,
+ "MLSUMClusteringS2S (ru)": 58.21,
+ "MLSUMClusteringS2S (es)": 48.13,
+ "MasakhaNEWSClusteringP2P (amh)": 42.33,
+ "MasakhaNEWSClusteringP2P (eng)": 63.81,
+ "MasakhaNEWSClusteringP2P (fra)": 68.07,
+ "MasakhaNEWSClusteringP2P (hau)": 45.04,
+ "MasakhaNEWSClusteringP2P (ibo)": 57.78,
+ "MasakhaNEWSClusteringP2P (lin)": 73.24,
+ "MasakhaNEWSClusteringP2P (lug)": 50.8,
+ "MasakhaNEWSClusteringP2P (orm)": 28.08,
+ "MasakhaNEWSClusteringP2P (pcm)": 93.82,
+ "MasakhaNEWSClusteringP2P (run)": 66.18,
+ "MasakhaNEWSClusteringP2P (sna)": 77.87,
+ "MasakhaNEWSClusteringP2P (som)": 41.27,
+ "MasakhaNEWSClusteringP2P (swa)": 42.45,
+ "MasakhaNEWSClusteringP2P (tir)": 41.57,
+ "MasakhaNEWSClusteringP2P (xho)": 50.68,
+ "MasakhaNEWSClusteringP2P (yor)": 55.25,
+ "MasakhaNEWSClusteringS2S (amh)": 41.39,
+ "MasakhaNEWSClusteringS2S (eng)": 59.16,
+ "MasakhaNEWSClusteringS2S (fra)": 70.81,
+ "MasakhaNEWSClusteringS2S (hau)": 17.93,
+ "MasakhaNEWSClusteringS2S (ibo)": 45.7,
+ "MasakhaNEWSClusteringS2S (lin)": 75.44,
+ "MasakhaNEWSClusteringS2S (lug)": 42.41,
+ "MasakhaNEWSClusteringS2S (orm)": 24.36,
+ "MasakhaNEWSClusteringS2S (pcm)": 81.96,
+ "MasakhaNEWSClusteringS2S (run)": 58.82,
+ "MasakhaNEWSClusteringS2S (sna)": 42.33,
+ "MasakhaNEWSClusteringS2S (som)": 31.87,
+ "MasakhaNEWSClusteringS2S (swa)": 35.13,
+ "MasakhaNEWSClusteringS2S (tir)": 42.06,
+ "MasakhaNEWSClusteringS2S (xho)": 27.07,
+ "MasakhaNEWSClusteringS2S (yor)": 49.06,
"MedrxivClusteringP2P": 44.37,
"MedrxivClusteringS2S": 42.0,
"RedditClustering": 73.37,
"RedditClusteringP2P": 72.51,
+ "RuSciBenchGRNTIClusteringP2P": 62.53,
+ "RuSciBenchOECDClusteringP2P": 53.89,
"StackExchangeClustering": 79.07,
"StackExchangeClusteringP2P": 49.57,
+ "TenKGnadClusteringP2P": 53.6,
+ "TenKGnadClusteringS2S": 39.3,
"ThuNewsClusteringP2P": 87.43,
"ThuNewsClusteringS2S": 87.9,
"TwentyNewsgroupsClustering": 51.31
@@ -57,17 +372,56 @@
"max_ap": [
{
"Model": "gte-Qwen1.5-7B-instruct",
- "Cmnli": 91.81,
- "Ocnli": 85.22,
- "SprintDuplicateQuestions": 95.99,
- "TwitterSemEval2015": 79.36,
- "TwitterURLCorpus": 86.79
+ "CDSC-E": 74.9,
+ "FalseFriendsGermanEnglish": 52.61,
+ "OpusparcusPC (de)": 97.61,
+ "OpusparcusPC (en)": 99.1,
+ "OpusparcusPC (fi)": 93.32,
+ "OpusparcusPC (fr)": 95.29,
+ "OpusparcusPC (ru)": 89.72,
+ "OpusparcusPC (sv)": 95.65,
+ "PSC": 98.05,
+ "PawsXPairClassification (de)": 74.44,
+ "PawsXPairClassification (en)": 84.33,
+ "PawsXPairClassification (es)": 76.88,
+ "PawsXPairClassification (fr)": 78.51,
+ "PawsXPairClassification (ja)": 66.5,
+ "PawsXPairClassification (ko)": 64.0,
+ "PawsXPairClassification (zh)": 75.39,
+ "SICK-E-PL": 79.81,
+ "TERRa": 69.44
},
{
"Model": "gte-Qwen1.5-7B-instruct",
+ "CDSC-E": 74.94,
"Cmnli": 91.85,
+ "FalseFriendsGermanEnglish": 52.61,
"Ocnli": 85.28,
+ "OpusparcusPC (de)": 97.62,
+ "OpusparcusPC (en)": 99.1,
+ "OpusparcusPC (fi)": 93.32,
+ "OpusparcusPC (fr)": 95.29,
+ "OpusparcusPC (ru)": 89.76,
+ "OpusparcusPC (sv)": 95.68,
+ "PSC": 98.05,
+ "PawsXPairClassification (de)": 74.49,
+ "PawsXPairClassification (en)": 84.37,
+ "PawsXPairClassification (es)": 76.94,
+ "PawsXPairClassification (fr)": 78.51,
+ "PawsXPairClassification (ja)": 66.53,
+ "PawsXPairClassification (ko)": 64.07,
+ "PawsXPairClassification (zh)": 75.39,
+ "SICK-E-PL": 79.9,
"SprintDuplicateQuestions": 96.07,
+ "TERRa": 69.5,
+ "TwitterSemEval2015": 79.36,
+ "TwitterURLCorpus": 86.79
+ },
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "Cmnli": 91.81,
+ "Ocnli": 85.22,
+ "SprintDuplicateQuestions": 95.99,
"TwitterSemEval2015": 79.36,
"TwitterURLCorpus": 86.79
}
@@ -77,13 +431,37 @@
"map": [
{
"Model": "gte-Qwen1.5-7B-instruct",
+ "AlloprofReranking": 75.56,
"AskUbuntuDupQuestions": 66.0,
"CMedQAv1": 86.37,
"CMedQAv2": 87.41,
"MindSmallReranking": 32.71,
+ "RuBQReranking": 70.28,
"SciDocsRR": 87.89,
"StackOverflowDupQuestions": 53.93,
+ "SyntecReranking": 86.98,
"T2Reranking": 68.11
+ },
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "MIRACLReranking (ru)": 55.34,
+ "MIRACLReranking (ar)": 46.94,
+ "MIRACLReranking (bn)": 52.66,
+ "MIRACLReranking (de)": 49.61,
+ "MIRACLReranking (en)": 57.4,
+ "MIRACLReranking (es)": 55.89,
+ "MIRACLReranking (fa)": 33.31,
+ "MIRACLReranking (fi)": 60.99,
+ "MIRACLReranking (fr)": 47.51,
+ "MIRACLReranking (hi)": 46.49,
+ "MIRACLReranking (id)": 48.16,
+ "MIRACLReranking (ja)": 47.3,
+ "MIRACLReranking (ko)": 41.37,
+ "MIRACLReranking (sw)": 45.44,
+ "MIRACLReranking (te)": 50.46,
+ "MIRACLReranking (th)": 56.03,
+ "MIRACLReranking (yo)": 58.02,
+ "MIRACLReranking (zh)": 46.1
}
]
},
@@ -91,7 +469,14 @@
"ndcg_at_10": [
{
"Model": "gte-Qwen1.5-7B-instruct",
+ "AILACasedocs": 34.07,
+ "AILAStatutes": 31.46,
+ "ARCChallenge": 26.34,
+ "AlloprofRetrieval": 51.11,
+ "AlphaNLI": 33.84,
+ "AppsRetrieval": 30.19,
"ArguAna": 62.65,
+ "BSARDRetrieval": 23.58,
"BrightRetrieval (stackoverflow)": 19.85,
"BrightRetrieval (earth_science)": 36.22,
"BrightRetrieval (leetcode)": 25.46,
@@ -107,25 +492,121 @@
"CQADupstackRetrieval": 40.64,
"ClimateFEVER": 44.0,
"CmedqaRetrieval": 43.47,
+ "CodeFeedbackMT": 52.15,
+ "CodeFeedbackST": 82.57,
+ "CodeSearchNetCCRetrieval (python)": 70.64,
+ "CodeSearchNetCCRetrieval (javascript)": 63.28,
+ "CodeSearchNetCCRetrieval (go)": 59.7,
+ "CodeSearchNetCCRetrieval (ruby)": 61.95,
+ "CodeSearchNetCCRetrieval (java)": 60.42,
+ "CodeSearchNetCCRetrieval (php)": 49.08,
+ "CodeSearchNetRetrieval (python)": 91.17,
+ "CodeSearchNetRetrieval (javascript)": 78.19,
+ "CodeSearchNetRetrieval (go)": 91.95,
+ "CodeSearchNetRetrieval (ruby)": 84.58,
+ "CodeSearchNetRetrieval (java)": 85.39,
+ "CodeSearchNetRetrieval (php)": 81.35,
+ "CodeTransOceanContest": 77.71,
+ "CodeTransOceanDL": 29.84,
+ "CosQA": 31.74,
"CovidRetrieval": 80.87,
"DBPedia": 48.04,
"DuRetrieval": 86.01,
"EcomRetrieval": 66.46,
"FEVER": 93.35,
"FiQA2018": 55.31,
+ "GerDaLIR": 6.51,
+ "GerDaLIRSmall": 14.81,
+ "GermanQuAD-Retrieval": 91.52,
+ "HellaSwag": 31.92,
"HotpotQA": 72.25,
+ "LEMBNarrativeQARetrieval": 35.21,
+ "LEMBQMSumRetrieval": 27.03,
+ "LEMBSummScreenFDRetrieval": 70.43,
+ "LEMBWikimQARetrieval": 50.97,
+ "LeCaRDv2": 62.12,
+ "LegalBenchConsumerContractsQA": 76.06,
+ "LegalBenchCorporateLobbying": 94.51,
+ "LegalQuAD": 37.99,
+ "LegalSummarization": 61.94,
"MMarcoRetrieval": 73.83,
"MSMARCO": 41.68,
"MedicalRetrieval": 61.33,
+ "MintakaRetrieval (ar)": 17.77,
+ "MintakaRetrieval (de)": 40.73,
+ "MintakaRetrieval (es)": 39.93,
+ "MintakaRetrieval (fr)": 42.44,
+ "MintakaRetrieval (hi)": 18.89,
+ "MintakaRetrieval (it)": 42.28,
+ "MintakaRetrieval (ja)": 24.74,
+ "MintakaRetrieval (pt)": 38.98,
"NFCorpus": 38.25,
"NQ": 61.79,
+ "PIQA": 40.98,
+ "Quail": 18.73,
"QuoraRetrieval": 89.61,
+ "RARbCode": 87.67,
+ "RARbMath": 74.56,
+ "RiaNewsRetrieval": 61.1,
+ "RuBQRetrieval": 65.17,
"SCIDOCS": 27.69,
+ "SIQA": 5.21,
"SciFact": 75.31,
+ "SciFact-PL": 57.38,
+ "SpartQA": 16.95,
+ "StackOverflowQA": 91.02,
+ "SyntecRetrieval": 86.47,
+ "SyntheticText2SQL": 55.6,
"T2Retrieval": 83.58,
"TRECCOVID": 72.72,
+ "TRECCOVID-PL": 69.14,
+ "TempReasonL1": 1.84,
+ "TempReasonL2Fact": 44.81,
+ "TempReasonL2Pure": 5.34,
+ "TempReasonL3Fact": 33.49,
+ "TempReasonL3Pure": 9.01,
"Touche2020": 20.3,
- "VideoRetrieval": 69.41
+ "VideoRetrieval": 69.41,
+ "WinoGrande": 64.8,
+ "XMarket (de)": 23.15,
+ "XMarket (en)": 36.25,
+ "XMarket (es)": 22.93,
+ "XPQARetrieval (ara-ara)": 36.85,
+ "XPQARetrieval (eng-ara)": 25.84,
+ "XPQARetrieval (ara-eng)": 37.12,
+ "XPQARetrieval (deu-deu)": 71.07,
+ "XPQARetrieval (eng-deu)": 41.29,
+ "XPQARetrieval (deu-eng)": 66.7,
+ "XPQARetrieval (spa-spa)": 55.62,
+ "XPQARetrieval (eng-spa)": 30.69,
+ "XPQARetrieval (spa-eng)": 55.47,
+ "XPQARetrieval (fra-fra)": 61.01,
+ "XPQARetrieval (eng-fra)": 40.46,
+ "XPQARetrieval (fra-eng)": 58.94,
+ "XPQARetrieval (hin-hin)": 64.15,
+ "XPQARetrieval (eng-hin)": 18.76,
+ "XPQARetrieval (hin-eng)": 50.65,
+ "XPQARetrieval (ita-ita)": 65.84,
+ "XPQARetrieval (eng-ita)": 35.49,
+ "XPQARetrieval (ita-eng)": 61.3,
+ "XPQARetrieval (jpn-jpn)": 66.64,
+ "XPQARetrieval (eng-jpn)": 38.88,
+ "XPQARetrieval (jpn-eng)": 64.6,
+ "XPQARetrieval (kor-kor)": 30.69,
+ "XPQARetrieval (eng-kor)": 27.28,
+ "XPQARetrieval (kor-eng)": 28.51,
+ "XPQARetrieval (pol-pol)": 39.11,
+ "XPQARetrieval (eng-pol)": 23.3,
+ "XPQARetrieval (pol-eng)": 37.17,
+ "XPQARetrieval (por-por)": 41.84,
+ "XPQARetrieval (eng-por)": 19.63,
+ "XPQARetrieval (por-eng)": 43.75,
+ "XPQARetrieval (tam-tam)": 23.55,
+ "XPQARetrieval (eng-tam)": 5.04,
+ "XPQARetrieval (tam-eng)": 13.74,
+ "XPQARetrieval (cmn-cmn)": 64.98,
+ "XPQARetrieval (eng-cmn)": 36.15,
+ "XPQARetrieval (cmn-eng)": 60.31
}
],
"recall_at_1": [
@@ -164,6 +645,80 @@
"STS22 (zh)": 67.36,
"STSB": 81.37,
"STSBenchmark": 87.35
+ },
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "CDSC-R": 85.32,
+ "GermanSTSBenchmark": 81.1,
+ "RUParaPhraserSTS": 73.65,
+ "RuSTSBenchmarkSTS": 80.03,
+ "SICK-R-PL": 73.6,
+ "SICKFr": 76.53,
+ "STS22 (ru)": 60.37,
+ "STS22 (pl)": 40.2,
+ "STS22 (fr)": 81.74,
+ "STS22 (de-pl)": 53.46,
+ "STS22 (it)": 77.46,
+ "STS22 (de-fr)": 67.08,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (es-en)": 77.16,
+ "STS22 (es)": 67.35,
+ "STS22 (tr)": 65.5,
+ "STS22 (de)": 55.27,
+ "STS22 (zh)": 67.9,
+ "STS22 (de-en)": 53.99,
+ "STS22 (zh-en)": 69.8,
+ "STS22 (es-it)": 74.55,
+ "STS22 (ar)": 58.84,
+ "STS22 (en)": 67.1,
+ "STS22 (pl-en)": 76.53,
+ "STSBenchmarkMultilingualSTS (en)": 87.64,
+ "STSBenchmarkMultilingualSTS (zh)": 81.33,
+ "STSBenchmarkMultilingualSTS (es)": 83.68,
+ "STSBenchmarkMultilingualSTS (pl)": 77.17,
+ "STSBenchmarkMultilingualSTS (fr)": 82.69,
+ "STSBenchmarkMultilingualSTS (it)": 81.59,
+ "STSBenchmarkMultilingualSTS (nl)": 78.86,
+ "STSBenchmarkMultilingualSTS (de)": 82.29,
+ "STSBenchmarkMultilingualSTS (ru)": 80.48,
+ "STSBenchmarkMultilingualSTS (pt)": 81.98
+ },
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "CDSC-R": 85.32,
+ "GermanSTSBenchmark": 81.1,
+ "RUParaPhraserSTS": 73.65,
+ "RuSTSBenchmarkSTS": 80.03,
+ "SICK-R-PL": 73.6,
+ "SICKFr": 76.53,
+ "STS22 (ru)": 60.37,
+ "STS22 (pl)": 40.2,
+ "STS22 (fr)": 81.74,
+ "STS22 (de-pl)": 53.46,
+ "STS22 (it)": 77.46,
+ "STS22 (de-fr)": 67.08,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (es-en)": 77.16,
+ "STS22 (es)": 67.35,
+ "STS22 (tr)": 65.5,
+ "STS22 (de)": 55.27,
+ "STS22 (zh)": 67.9,
+ "STS22 (de-en)": 53.99,
+ "STS22 (zh-en)": 69.8,
+ "STS22 (es-it)": 74.55,
+ "STS22 (ar)": 58.84,
+ "STS22 (en)": 67.1,
+ "STS22 (pl-en)": 76.53,
+ "STSBenchmarkMultilingualSTS (en)": 87.64,
+ "STSBenchmarkMultilingualSTS (zh)": 81.33,
+ "STSBenchmarkMultilingualSTS (es)": 83.68,
+ "STSBenchmarkMultilingualSTS (pl)": 77.17,
+ "STSBenchmarkMultilingualSTS (fr)": 82.69,
+ "STSBenchmarkMultilingualSTS (it)": 81.59,
+ "STSBenchmarkMultilingualSTS (nl)": 78.86,
+ "STSBenchmarkMultilingualSTS (de)": 82.29,
+ "STSBenchmarkMultilingualSTS (ru)": 80.48,
+ "STSBenchmarkMultilingualSTS (pt)": 81.98
}
]
},
@@ -172,36 +727,460 @@
{
"Model": "gte-Qwen1.5-7B-instruct",
"SummEval": 31.46
+ },
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "SummEvalFr": 30.04
+ },
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "SummEvalFr": 30.04
}
]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "CEDRClassification": 54.77,
+ "SensitiveTopicsClassification": 35.62
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "gte-Qwen1.5-7B-instruct",
+ "Core17InstructionRetrieval": 8.37,
+ "News21InstructionRetrieval": 1.82,
+ "Robust04InstructionRetrieval": 5.88
+ }
+ ]
}
},
"Alibaba-NLP__gte-Qwen2-7B-instruct": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "BornholmBitextMining": 50.16,
+ "Tatoeba (dan-eng)": 94.29,
+ "Tatoeba (ara-eng)": 92.36,
+ "Tatoeba (vie-eng)": 96.7,
+ "Tatoeba (pam-eng)": 16.08,
+ "Tatoeba (lat-eng)": 81.91,
+ "Tatoeba (jpn-eng)": 94.72,
+ "Tatoeba (ina-eng)": 95.2,
+ "Tatoeba (kab-eng)": 5.1,
+ "Tatoeba (hun-eng)": 86.36,
+ "Tatoeba (bel-eng)": 91.13,
+ "Tatoeba (fao-eng)": 73.0,
+ "Tatoeba (glg-eng)": 93.65,
+ "Tatoeba (swg-eng)": 79.91,
+ "Tatoeba (yue-eng)": 92.0,
+ "Tatoeba (ell-eng)": 92.78,
+ "Tatoeba (kur-eng)": 39.98,
+ "Tatoeba (spa-eng)": 98.68,
+ "Tatoeba (fin-eng)": 92.1,
+ "Tatoeba (amh-eng)": 25.63,
+ "Tatoeba (pes-eng)": 93.35,
+ "Tatoeba (eus-eng)": 37.91,
+ "Tatoeba (mar-eng)": 81.77,
+ "Tatoeba (awa-eng)": 68.02,
+ "Tatoeba (zsm-eng)": 94.15,
+ "Tatoeba (mon-eng)": 69.42,
+ "Tatoeba (arq-eng)": 55.37,
+ "Tatoeba (tur-eng)": 95.67,
+ "Tatoeba (arz-eng)": 76.51,
+ "Tatoeba (tat-eng)": 44.61,
+ "Tatoeba (lfn-eng)": 78.37,
+ "Tatoeba (jav-eng)": 38.98,
+ "Tatoeba (hrv-eng)": 94.05,
+ "Tatoeba (csb-eng)": 65.8,
+ "Tatoeba (orv-eng)": 62.42,
+ "Tatoeba (swe-eng)": 93.82,
+ "Tatoeba (kat-eng)": 78.69,
+ "Tatoeba (hin-eng)": 95.8,
+ "Tatoeba (tgl-eng)": 91.13,
+ "Tatoeba (oci-eng)": 64.81,
+ "Tatoeba (pms-eng)": 73.33,
+ "Tatoeba (mkd-eng)": 86.9,
+ "Tatoeba (dsb-eng)": 68.89,
+ "Tatoeba (mhr-eng)": 17.17,
+ "Tatoeba (ceb-eng)": 39.57,
+ "Tatoeba (cor-eng)": 9.1,
+ "Tatoeba (gle-eng)": 65.0,
+ "Tatoeba (sqi-eng)": 61.21,
+ "Tatoeba (tzl-eng)": 50.98,
+ "Tatoeba (kaz-eng)": 70.45,
+ "Tatoeba (swh-eng)": 52.93,
+ "Tatoeba (tel-eng)": 55.11,
+ "Tatoeba (kor-eng)": 91.41,
+ "Tatoeba (gla-eng)": 48.68,
+ "Tatoeba (mal-eng)": 87.48,
+ "Tatoeba (yid-eng)": 58.32,
+ "Tatoeba (ile-eng)": 85.28,
+ "Tatoeba (dtp-eng)": 11.63,
+ "Tatoeba (ang-eng)": 80.97,
+ "Tatoeba (ukr-eng)": 92.95,
+ "Tatoeba (ron-eng)": 94.18,
+ "Tatoeba (ita-eng)": 94.42,
+ "Tatoeba (epo-eng)": 92.14,
+ "Tatoeba (deu-eng)": 99.07,
+ "Tatoeba (cha-eng)": 45.44,
+ "Tatoeba (est-eng)": 81.14,
+ "Tatoeba (nld-eng)": 97.0,
+ "Tatoeba (kzj-eng)": 13.24,
+ "Tatoeba (max-eng)": 65.98,
+ "Tatoeba (khm-eng)": 58.6,
+ "Tatoeba (bul-eng)": 92.65,
+ "Tatoeba (nds-eng)": 82.73,
+ "Tatoeba (xho-eng)": 31.28,
+ "Tatoeba (hye-eng)": 48.49,
+ "Tatoeba (bos-eng)": 92.23,
+ "Tatoeba (bre-eng)": 14.41,
+ "Tatoeba (lvs-eng)": 87.21,
+ "Tatoeba (por-eng)": 94.28,
+ "Tatoeba (nno-eng)": 90.4,
+ "Tatoeba (ast-eng)": 85.83,
+ "Tatoeba (cmn-eng)": 96.15,
+ "Tatoeba (cym-eng)": 65.3,
+ "Tatoeba (ben-eng)": 86.77,
+ "Tatoeba (tam-eng)": 72.01,
+ "Tatoeba (fry-eng)": 70.23,
+ "Tatoeba (slk-eng)": 92.82,
+ "Tatoeba (war-eng)": 38.19,
+ "Tatoeba (nob-eng)": 97.7,
+ "Tatoeba (pol-eng)": 97.65,
+ "Tatoeba (afr-eng)": 91.77,
+ "Tatoeba (rus-eng)": 93.87,
+ "Tatoeba (cbk-eng)": 82.18,
+ "Tatoeba (lit-eng)": 89.38,
+ "Tatoeba (ind-eng)": 94.17,
+ "Tatoeba (tha-eng)": 97.2,
+ "Tatoeba (ido-eng)": 83.79,
+ "Tatoeba (nov-eng)": 72.49,
+ "Tatoeba (wuu-eng)": 92.68,
+ "Tatoeba (srp-eng)": 90.92,
+ "Tatoeba (isl-eng)": 79.5,
+ "Tatoeba (cat-eng)": 92.59,
+ "Tatoeba (fra-eng)": 95.16,
+ "Tatoeba (ber-eng)": 9.88,
+ "Tatoeba (ces-eng)": 94.92,
+ "Tatoeba (urd-eng)": 89.88,
+ "Tatoeba (slv-eng)": 86.48,
+ "Tatoeba (tuk-eng)": 47.64,
+ "Tatoeba (uzb-eng)": 58.58,
+ "Tatoeba (hsb-eng)": 79.21,
+ "Tatoeba (heb-eng)": 88.88,
+ "Tatoeba (gsw-eng)": 55.2,
+ "Tatoeba (uig-eng)": 71.71,
+ "Tatoeba (aze-eng)": 88.29
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "AmazonCounterfactualClassification (en-ext)": 93.04,
+ "AmazonCounterfactualClassification (en)": 91.33,
+ "AmazonCounterfactualClassification (de)": 75.62,
+ "AmazonCounterfactualClassification (ja)": 83.59,
+ "AmazonReviewsClassification (en)": 62.11,
+ "AmazonReviewsClassification (de)": 59.37,
+ "AmazonReviewsClassification (es)": 55.85,
+ "AmazonReviewsClassification (fr)": 55.54,
+ "AmazonReviewsClassification (ja)": 57.01,
+ "AmazonReviewsClassification (zh)": 53.55,
+ "AngryTweetsClassification": 64.4,
+ "DanishPoliticalCommentsClassification": 48.46,
+ "GeoreviewClassification": 60.01,
+ "HeadlineClassification": 76.38,
+ "InappropriatenessClassification": 76.41,
+ "KinopoiskClassification": 71.04,
+ "LccSentimentClassification": 76.87,
+ "MTOPDomainClassification (en)": 99.04,
+ "MTOPDomainClassification (de)": 97.17,
+ "MTOPDomainClassification (es)": 97.77,
+ "MTOPDomainClassification (fr)": 96.69,
+ "MTOPDomainClassification (hi)": 96.25,
+ "MTOPDomainClassification (th)": 93.25,
+ "MTOPIntentClassification (en)": 91.78,
+ "MTOPIntentClassification (de)": 88.18,
+ "MTOPIntentClassification (es)": 91.14,
+ "MTOPIntentClassification (fr)": 87.47,
+ "MTOPIntentClassification (hi)": 84.08,
+ "MTOPIntentClassification (th)": 84.48,
+ "MasakhaNEWSClassification (amh)": 72.71,
+ "MasakhaNEWSClassification (eng)": 81.78,
+ "MasakhaNEWSClassification (fra)": 80.78,
+ "MasakhaNEWSClassification (hau)": 79.45,
+ "MasakhaNEWSClassification (ibo)": 75.87,
+ "MasakhaNEWSClassification (lin)": 83.6,
+ "MasakhaNEWSClassification (lug)": 74.75,
+ "MasakhaNEWSClassification (orm)": 83.17,
+ "MasakhaNEWSClassification (pcm)": 94.95,
+ "MasakhaNEWSClassification (run)": 83.11,
+ "MasakhaNEWSClassification (sna)": 88.64,
+ "MasakhaNEWSClassification (som)": 68.06,
+ "MasakhaNEWSClassification (swa)": 77.94,
+ "MasakhaNEWSClassification (tir)": 55.51,
+ "MasakhaNEWSClassification (xho)": 83.94,
+ "MasakhaNEWSClassification (yor)": 86.06,
+ "MassiveIntentClassification (ar)": 71.16,
+ "MassiveIntentClassification (ja)": 81.92,
+ "MassiveIntentClassification (zh-TW)": 76.96,
+ "MassiveIntentClassification (bn)": 75.95,
+ "MassiveIntentClassification (hy)": 42.18,
+ "MassiveIntentClassification (lv)": 69.49,
+ "MassiveIntentClassification (sw)": 52.05,
+ "MassiveIntentClassification (el)": 75.35,
+ "MassiveIntentClassification (pt)": 81.94,
+ "MassiveIntentClassification (tr)": 77.81,
+ "MassiveIntentClassification (ru)": 82.1,
+ "MassiveIntentClassification (my)": 56.7,
+ "MassiveIntentClassification (it)": 81.58,
+ "MassiveIntentClassification (nb)": 78.71,
+ "MassiveIntentClassification (kn)": 52.18,
+ "MassiveIntentClassification (af)": 76.01,
+ "MassiveIntentClassification (vi)": 79.89,
+ "MassiveIntentClassification (te)": 57.4,
+ "MassiveIntentClassification (az)": 72.24,
+ "MassiveIntentClassification (fa)": 77.86,
+ "MassiveIntentClassification (th)": 74.09,
+ "MassiveIntentClassification (sq)": 50.92,
+ "MassiveIntentClassification (fr)": 81.9,
+ "MassiveIntentClassification (es)": 81.29,
+ "MassiveIntentClassification (da)": 78.15,
+ "MassiveIntentClassification (sv)": 79.13,
+ "MassiveIntentClassification (jv)": 58.24,
+ "MassiveIntentClassification (ta)": 52.74,
+ "MassiveIntentClassification (cy)": 49.37,
+ "MassiveIntentClassification (tl)": 73.98,
+ "MassiveIntentClassification (ur)": 72.4,
+ "MassiveIntentClassification (en)": 85.43,
+ "MassiveIntentClassification (ro)": 76.41,
+ "MassiveIntentClassification (hi)": 78.0,
+ "MassiveIntentClassification (id)": 80.11,
+ "MassiveIntentClassification (he)": 74.39,
+ "MassiveIntentClassification (km)": 49.36,
+ "MassiveIntentClassification (nl)": 81.59,
+ "MassiveIntentClassification (sl)": 75.0,
+ "MassiveIntentClassification (ms)": 76.4,
+ "MassiveIntentClassification (ko)": 79.24,
+ "MassiveIntentClassification (ml)": 64.98,
+ "MassiveIntentClassification (pl)": 80.89,
+ "MassiveIntentClassification (fi)": 74.58,
+ "MassiveIntentClassification (zh-CN)": 81.09,
+ "MassiveIntentClassification (hu)": 72.1,
+ "MassiveIntentClassification (is)": 56.6,
+ "MassiveIntentClassification (am)": 32.29,
+ "MassiveIntentClassification (mn)": 53.96,
+ "MassiveIntentClassification (de)": 80.49,
+ "MassiveIntentClassification (ka)": 54.94,
+ "MassiveScenarioClassification (ru)": 87.43,
+ "MassiveScenarioClassification (is)": 64.96,
+ "MassiveScenarioClassification (en)": 89.7,
+ "MassiveScenarioClassification (fi)": 79.5,
+ "MassiveScenarioClassification (sw)": 61.58,
+ "MassiveScenarioClassification (af)": 83.1,
+ "MassiveScenarioClassification (hu)": 78.12,
+ "MassiveScenarioClassification (ms)": 82.99,
+ "MassiveScenarioClassification (ka)": 63.42,
+ "MassiveScenarioClassification (kn)": 64.91,
+ "MassiveScenarioClassification (id)": 86.1,
+ "MassiveScenarioClassification (it)": 87.3,
+ "MassiveScenarioClassification (de)": 86.82,
+ "MassiveScenarioClassification (km)": 56.31,
+ "MassiveScenarioClassification (pl)": 85.54,
+ "MassiveScenarioClassification (vi)": 84.95,
+ "MassiveScenarioClassification (pt)": 86.3,
+ "MassiveScenarioClassification (th)": 81.19,
+ "MassiveScenarioClassification (zh-CN)": 85.74,
+ "MassiveScenarioClassification (sv)": 85.01,
+ "MassiveScenarioClassification (es)": 87.02,
+ "MassiveScenarioClassification (ta)": 57.91,
+ "MassiveScenarioClassification (sq)": 57.85,
+ "MassiveScenarioClassification (ur)": 77.18,
+ "MassiveScenarioClassification (he)": 80.12,
+ "MassiveScenarioClassification (cy)": 57.57,
+ "MassiveScenarioClassification (jv)": 69.44,
+ "MassiveScenarioClassification (nb)": 84.15,
+ "MassiveScenarioClassification (am)": 41.09,
+ "MassiveScenarioClassification (hy)": 49.28,
+ "MassiveScenarioClassification (zh-TW)": 83.56,
+ "MassiveScenarioClassification (az)": 76.8,
+ "MassiveScenarioClassification (tl)": 79.26,
+ "MassiveScenarioClassification (te)": 68.13,
+ "MassiveScenarioClassification (hi)": 83.21,
+ "MassiveScenarioClassification (ja)": 87.44,
+ "MassiveScenarioClassification (fa)": 82.51,
+ "MassiveScenarioClassification (ml)": 71.49,
+ "MassiveScenarioClassification (sl)": 79.98,
+ "MassiveScenarioClassification (ar)": 77.71,
+ "MassiveScenarioClassification (ro)": 81.88,
+ "MassiveScenarioClassification (lv)": 76.39,
+ "MassiveScenarioClassification (bn)": 80.4,
+ "MassiveScenarioClassification (ko)": 86.28,
+ "MassiveScenarioClassification (tr)": 83.32,
+ "MassiveScenarioClassification (el)": 81.23,
+ "MassiveScenarioClassification (fr)": 86.64,
+ "MassiveScenarioClassification (da)": 84.33,
+ "MassiveScenarioClassification (my)": 60.44,
+ "MassiveScenarioClassification (mn)": 59.65,
+ "MassiveScenarioClassification (nl)": 86.68,
+ "NoRecClassification": 65.99,
+ "NordicLangClassification": 73.23,
+ "PAC": 66.32,
+ "PolEmo2.0-OUT": 54.49,
+ "RuReviewsClassification": 74.85,
+ "RuSciBenchGRNTIClassification": 71.01,
+ "RuSciBenchOECDClassification": 57.68,
+ "ToxicConversationsClassification": 85.74
+ }
+ ]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "BlurbsClusteringP2P": 47.0,
+ "BlurbsClusteringS2S": 21.74,
+ "GeoreviewClusteringP2P": 78.32,
+ "MLSUMClusteringP2P (de)": 49.66,
+ "MLSUMClusteringP2P (fr)": 49.71,
+ "MLSUMClusteringP2P (ru)": 58.58,
+ "MLSUMClusteringP2P (es)": 50.43,
+ "MLSUMClusteringS2S (de)": 47.75,
+ "MLSUMClusteringS2S (fr)": 48.91,
+ "MLSUMClusteringS2S (ru)": 55.79,
+ "MLSUMClusteringS2S (es)": 48.93,
+ "MasakhaNEWSClusteringP2P (amh)": 51.99,
+ "MasakhaNEWSClusteringP2P (eng)": 63.97,
+ "MasakhaNEWSClusteringP2P (fra)": 63.67,
+ "MasakhaNEWSClusteringP2P (hau)": 57.93,
+ "MasakhaNEWSClusteringP2P (ibo)": 66.36,
+ "MasakhaNEWSClusteringP2P (lin)": 84.66,
+ "MasakhaNEWSClusteringP2P (lug)": 50.98,
+ "MasakhaNEWSClusteringP2P (orm)": 56.51,
+ "MasakhaNEWSClusteringP2P (pcm)": 89.16,
+ "MasakhaNEWSClusteringP2P (run)": 63.84,
+ "MasakhaNEWSClusteringP2P (sna)": 80.08,
+ "MasakhaNEWSClusteringP2P (som)": 43.45,
+ "MasakhaNEWSClusteringP2P (swa)": 45.64,
+ "MasakhaNEWSClusteringP2P (tir)": 58.86,
+ "MasakhaNEWSClusteringP2P (xho)": 54.15,
+ "MasakhaNEWSClusteringP2P (yor)": 68.84,
+ "MasakhaNEWSClusteringS2S (amh)": 48.74,
+ "MasakhaNEWSClusteringS2S (eng)": 57.22,
+ "MasakhaNEWSClusteringS2S (fra)": 60.93,
+ "MasakhaNEWSClusteringS2S (hau)": 29.97,
+ "MasakhaNEWSClusteringS2S (ibo)": 55.42,
+ "MasakhaNEWSClusteringS2S (lin)": 69.5,
+ "MasakhaNEWSClusteringS2S (lug)": 49.28,
+ "MasakhaNEWSClusteringS2S (orm)": 35.88,
+ "MasakhaNEWSClusteringS2S (pcm)": 79.67,
+ "MasakhaNEWSClusteringS2S (run)": 59.68,
+ "MasakhaNEWSClusteringS2S (sna)": 63.13,
+ "MasakhaNEWSClusteringS2S (som)": 36.68,
+ "MasakhaNEWSClusteringS2S (swa)": 31.39,
+ "MasakhaNEWSClusteringS2S (tir)": 45.25,
+ "MasakhaNEWSClusteringS2S (xho)": 28.47,
+ "MasakhaNEWSClusteringS2S (yor)": 47.31,
+ "RuSciBenchGRNTIClusteringP2P": 65.19,
+ "RuSciBenchOECDClusteringP2P": 55.6,
+ "TenKGnadClusteringP2P": 52.6,
+ "TenKGnadClusteringS2S": 38.04
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "FalseFriendsGermanEnglish": 55.91,
+ "OpusparcusPC (de)": 98.51,
+ "OpusparcusPC (en)": 99.12,
+ "OpusparcusPC (fi)": 94.12,
+ "OpusparcusPC (fr)": 95.95,
+ "OpusparcusPC (ru)": 92.5,
+ "OpusparcusPC (sv)": 95.66,
+ "PawsXPairClassification (de)": 77.16,
+ "PawsXPairClassification (en)": 82.37,
+ "PawsXPairClassification (es)": 79.4,
+ "PawsXPairClassification (fr)": 80.69,
+ "PawsXPairClassification (ja)": 68.72,
+ "PawsXPairClassification (ko)": 68.65,
+ "PawsXPairClassification (zh)": 76.53,
+ "SprintDuplicateQuestions": 92.82,
+ "TERRa": 67.45,
+ "TwitterURLCorpus": 86.59
+ },
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "FalseFriendsGermanEnglish": 55.91,
+ "OpusparcusPC (de)": 98.51,
+ "OpusparcusPC (en)": 99.12,
+ "OpusparcusPC (fi)": 94.12,
+ "OpusparcusPC (fr)": 95.95,
+ "OpusparcusPC (ru)": 92.5,
+ "OpusparcusPC (sv)": 95.7,
+ "PawsXPairClassification (de)": 77.71,
+ "PawsXPairClassification (en)": 82.39,
+ "PawsXPairClassification (es)": 79.44,
+ "PawsXPairClassification (fr)": 80.8,
+ "PawsXPairClassification (ja)": 68.82,
+ "PawsXPairClassification (ko)": 68.7,
+ "PawsXPairClassification (zh)": 76.81,
+ "SprintDuplicateQuestions": 93.14,
+ "TERRa": 67.61,
+ "TwitterURLCorpus": 86.61
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "AlloprofReranking": 81.1,
+ "RuBQReranking": 74.13,
+ "T2Reranking": 67.8
+ },
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "MIRACLReranking (ru)": 54.95,
+ "MIRACLReranking (ar)": 68.47,
+ "MIRACLReranking (bn)": 63.87,
+ "MIRACLReranking (de)": 50.18,
+ "MIRACLReranking (en)": 62.37,
+ "MIRACLReranking (es)": 60.78,
+ "MIRACLReranking (fa)": 51.21,
+ "MIRACLReranking (fi)": 68.98,
+ "MIRACLReranking (fr)": 50.82,
+ "MIRACLReranking (hi)": 59.2,
+ "MIRACLReranking (id)": 52.97,
+ "MIRACLReranking (ja)": 60.39,
+ "MIRACLReranking (ko)": 50.56,
+ "MIRACLReranking (sw)": 53.39,
+ "MIRACLReranking (te)": 67.83,
+ "MIRACLReranking (th)": 69.34,
+ "MIRACLReranking (yo)": 64.19,
+ "MIRACLReranking (zh)": 52.11
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
{
"Model": "gte-Qwen2-7B-instruct",
+ "AILACasedocs": 29.74,
+ "AILAStatutes": 33.76,
+ "ARCChallenge": 20.46,
+ "AlloprofRetrieval": 59.15,
+ "AlphaNLI": 43.93,
+ "AppsRetrieval": 28.39,
+ "ArguAna": 54.56,
"BrightRetrieval (earth_science)": 40.66,
"BrightRetrieval (sustainable_living)": 20.82,
"BrightRetrieval (theoremqa_theorems)": 34.22,
@@ -213,7 +1192,95 @@
"BrightRetrieval (biology)": 32.09,
"BrightRetrieval (theoremqa_questions)": 29.9,
"BrightRetrieval (robotics)": 12.82,
- "BrightRetrieval (psychology)": 26.58
+ "BrightRetrieval (psychology)": 26.58,
+ "CodeFeedbackMT": 57.66,
+ "CodeFeedbackST": 84.15,
+ "CodeSearchNetCCRetrieval (python)": 75.41,
+ "CodeSearchNetCCRetrieval (javascript)": 69.2,
+ "CodeSearchNetCCRetrieval (go)": 62.59,
+ "CodeSearchNetCCRetrieval (ruby)": 65.18,
+ "CodeSearchNetCCRetrieval (java)": 67.15,
+ "CodeSearchNetCCRetrieval (php)": 57.93,
+ "CodeSearchNetRetrieval (python)": 91.93,
+ "CodeSearchNetRetrieval (javascript)": 79.82,
+ "CodeSearchNetRetrieval (go)": 94.26,
+ "CodeSearchNetRetrieval (ruby)": 84.76,
+ "CodeSearchNetRetrieval (java)": 86.49,
+ "CodeSearchNetRetrieval (php)": 84.52,
+ "CodeTransOceanContest": 81.83,
+ "CodeTransOceanDL": 32.17,
+ "CosQA": 32.24,
+ "CovidRetrieval": 81.04,
+ "GerDaLIR": 7.75,
+ "GerDaLIRSmall": 16.94,
+ "GermanQuAD-Retrieval": 94.54,
+ "HellaSwag": 37.61,
+ "LEMBNarrativeQARetrieval": 45.46,
+ "LEMBQMSumRetrieval": 31.27,
+ "LEMBSummScreenFDRetrieval": 76.08,
+ "LEMBWikimQARetrieval": 61.15,
+ "LeCaRDv2": 69.72,
+ "LegalBenchConsumerContractsQA": 75.83,
+ "LegalBenchCorporateLobbying": 95.21,
+ "LegalQuAD": 46.1,
+ "LegalSummarization": 66.58,
+ "PIQA": 44.05,
+ "Quail": 26.57,
+ "RARbCode": 90.49,
+ "RARbMath": 85.27,
+ "RiaNewsRetrieval": 76.26,
+ "RuBQRetrieval": 72.72,
+ "SCIDOCS": 23.48,
+ "SIQA": 8.36,
+ "SpartQA": 18.78,
+ "StackOverflowQA": 84.35,
+ "SyntheticText2SQL": 53.22,
+ "TRECCOVID": 80.37,
+ "TempReasonL1": 2.18,
+ "TempReasonL2Fact": 64.12,
+ "TempReasonL2Pure": 6.32,
+ "TempReasonL3Fact": 47.39,
+ "TempReasonL3Pure": 9.79,
+ "WinoGrande": 66.81,
+ "XMarket (de)": 29.61,
+ "XMarket (en)": 39.89,
+ "XMarket (es)": 32.7,
+ "XPQARetrieval (ara-ara)": 50.05,
+ "XPQARetrieval (eng-ara)": 40.41,
+ "XPQARetrieval (ara-eng)": 48.41,
+ "XPQARetrieval (deu-deu)": 78.07,
+ "XPQARetrieval (eng-deu)": 59.84,
+ "XPQARetrieval (deu-eng)": 74.41,
+ "XPQARetrieval (spa-spa)": 63.68,
+ "XPQARetrieval (eng-spa)": 52.02,
+ "XPQARetrieval (spa-eng)": 61.5,
+ "XPQARetrieval (fra-fra)": 70.76,
+ "XPQARetrieval (eng-fra)": 57.21,
+ "XPQARetrieval (fra-eng)": 67.89,
+ "XPQARetrieval (hin-hin)": 72.32,
+ "XPQARetrieval (eng-hin)": 42.7,
+ "XPQARetrieval (hin-eng)": 67.74,
+ "XPQARetrieval (ita-ita)": 72.61,
+ "XPQARetrieval (eng-ita)": 51.55,
+ "XPQARetrieval (ita-eng)": 70.43,
+ "XPQARetrieval (jpn-jpn)": 74.54,
+ "XPQARetrieval (eng-jpn)": 51.62,
+ "XPQARetrieval (jpn-eng)": 71.94,
+ "XPQARetrieval (kor-kor)": 39.61,
+ "XPQARetrieval (eng-kor)": 38.93,
+ "XPQARetrieval (kor-eng)": 37.75,
+ "XPQARetrieval (pol-pol)": 47.81,
+ "XPQARetrieval (eng-pol)": 37.95,
+ "XPQARetrieval (pol-eng)": 44.67,
+ "XPQARetrieval (por-por)": 48.96,
+ "XPQARetrieval (eng-por)": 36.09,
+ "XPQARetrieval (por-eng)": 48.82,
+ "XPQARetrieval (tam-tam)": 36.09,
+ "XPQARetrieval (eng-tam)": 20.8,
+ "XPQARetrieval (tam-eng)": 26.95,
+ "XPQARetrieval (cmn-cmn)": 65.85,
+ "XPQARetrieval (eng-cmn)": 36.18,
+ "XPQARetrieval (cmn-eng)": 62.85
}
],
"recall_at_1": [
@@ -231,80 +1298,849 @@
]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "GermanSTSBenchmark": 84.61,
+ "RUParaPhraserSTS": 77.45,
+ "RuSTSBenchmarkSTS": 83.82,
+ "SICK-R": 79.16,
+ "STS12": 79.53,
+ "STS13": 88.97,
+ "STS14": 83.87,
+ "STS15": 88.48,
+ "STS17 (en-en)": 88.75,
+ "STS17 (ko-ko)": 83.86,
+ "STS17 (it-en)": 87.98,
+ "STS17 (en-tr)": 77.15,
+ "STS17 (en-ar)": 84.05,
+ "STS17 (es-es)": 89.01,
+ "STS17 (ar-ar)": 85.92,
+ "STS17 (es-en)": 86.84,
+ "STS17 (en-de)": 87.22,
+ "STS17 (nl-en)": 86.65,
+ "STS17 (fr-en)": 87.14,
+ "STS22 (ru)": 63.82,
+ "STS22 (pl)": 40.87,
+ "STS22 (es-en)": 78.38,
+ "STS22 (en)": 67.16,
+ "STS22 (zh-en)": 72.08,
+ "STS22 (tr)": 67.44,
+ "STS22 (ar)": 56.69,
+ "STS22 (es-it)": 74.84,
+ "STS22 (es)": 67.24,
+ "STS22 (de-fr)": 64.76,
+ "STS22 (de-pl)": 54.6,
+ "STS22 (it)": 78.4,
+ "STS22 (pl-en)": 74.58,
+ "STS22 (fr)": 82.49,
+ "STS22 (de)": 58.05,
+ "STS22 (de-en)": 57.62,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (zh)": 65.77,
+ "STSB": 81.05,
+ "STSBenchmark": 86.81,
+ "STSBenchmarkMultilingualSTS (fr)": 85.44,
+ "STSBenchmarkMultilingualSTS (pt)": 85.11,
+ "STSBenchmarkMultilingualSTS (en)": 86.85,
+ "STSBenchmarkMultilingualSTS (de)": 85.05,
+ "STSBenchmarkMultilingualSTS (it)": 84.73,
+ "STSBenchmarkMultilingualSTS (es)": 85.74,
+ "STSBenchmarkMultilingualSTS (zh)": 82.91,
+ "STSBenchmarkMultilingualSTS (ru)": 83.86,
+ "STSBenchmarkMultilingualSTS (pl)": 83.95,
+ "STSBenchmarkMultilingualSTS (nl)": 84.53
+ },
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "GermanSTSBenchmark": 84.61,
+ "RUParaPhraserSTS": 77.45,
+ "RuSTSBenchmarkSTS": 83.82,
+ "SICK-R": 79.16,
+ "STS12": 79.53,
+ "STS13": 88.97,
+ "STS14": 83.87,
+ "STS15": 88.48,
+ "STS17 (en-en)": 88.75,
+ "STS17 (ko-ko)": 83.86,
+ "STS17 (it-en)": 87.98,
+ "STS17 (en-tr)": 77.15,
+ "STS17 (en-ar)": 84.05,
+ "STS17 (es-es)": 89.01,
+ "STS17 (ar-ar)": 85.92,
+ "STS17 (es-en)": 86.84,
+ "STS17 (en-de)": 87.22,
+ "STS17 (nl-en)": 86.65,
+ "STS17 (fr-en)": 87.14,
+ "STS22 (ru)": 63.82,
+ "STS22 (pl)": 40.87,
+ "STS22 (es-en)": 78.38,
+ "STS22 (en)": 67.16,
+ "STS22 (zh-en)": 72.08,
+ "STS22 (tr)": 67.44,
+ "STS22 (ar)": 56.69,
+ "STS22 (es-it)": 74.84,
+ "STS22 (es)": 67.24,
+ "STS22 (de-fr)": 64.76,
+ "STS22 (de-pl)": 54.6,
+ "STS22 (it)": 78.4,
+ "STS22 (pl-en)": 74.58,
+ "STS22 (fr)": 82.49,
+ "STS22 (de)": 58.05,
+ "STS22 (de-en)": 57.62,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (zh)": 65.77,
+ "STSB": 81.05,
+ "STSBenchmark": 86.81,
+ "STSBenchmarkMultilingualSTS (fr)": 85.44,
+ "STSBenchmarkMultilingualSTS (pt)": 85.11,
+ "STSBenchmarkMultilingualSTS (en)": 86.85,
+ "STSBenchmarkMultilingualSTS (de)": 85.05,
+ "STSBenchmarkMultilingualSTS (it)": 84.73,
+ "STSBenchmarkMultilingualSTS (es)": 85.74,
+ "STSBenchmarkMultilingualSTS (zh)": 82.91,
+ "STSBenchmarkMultilingualSTS (ru)": 83.86,
+ "STSBenchmarkMultilingualSTS (pl)": 83.95,
+ "STSBenchmarkMultilingualSTS (nl)": 84.53
+ }
+ ]
},
"Summarization": {
"cosine_spearman": []
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "CEDRClassification": 52.78,
+ "SensitiveTopicsClassification": 35.76
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "gte-Qwen2-7B-instruct",
+ "Core17InstructionRetrieval": 6.78,
+ "News21InstructionRetrieval": 4.11,
+ "Robust04InstructionRetrieval": 3.93
+ }
+ ]
}
},
"BAAI__bge-base-en-v1.5": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "bge-base-en-v1.5",
+ "BornholmBitextMining": 27.6,
+ "Tatoeba (kzj-eng)": 3.89,
+ "Tatoeba (ile-eng)": 21.25,
+ "Tatoeba (ara-eng)": 0.3,
+ "Tatoeba (cha-eng)": 15.41,
+ "Tatoeba (tzl-eng)": 16.83,
+ "Tatoeba (hin-eng)": 0.1,
+ "Tatoeba (zsm-eng)": 8.0,
+ "Tatoeba (yid-eng)": 0.2,
+ "Tatoeba (vie-eng)": 5.01,
+ "Tatoeba (tat-eng)": 0.94,
+ "Tatoeba (nno-eng)": 7.28,
+ "Tatoeba (mal-eng)": 0.39,
+ "Tatoeba (nld-eng)": 12.18,
+ "Tatoeba (tgl-eng)": 3.79,
+ "Tatoeba (cym-eng)": 6.51,
+ "Tatoeba (eus-eng)": 7.21,
+ "Tatoeba (rus-eng)": 0.25,
+ "Tatoeba (orv-eng)": 0.13,
+ "Tatoeba (tha-eng)": 1.1,
+ "Tatoeba (ceb-eng)": 5.13,
+ "Tatoeba (ast-eng)": 17.75,
+ "Tatoeba (uig-eng)": 0.4,
+ "Tatoeba (jav-eng)": 4.87,
+ "Tatoeba (arq-eng)": 0.3,
+ "Tatoeba (hsb-eng)": 4.34,
+ "Tatoeba (srp-eng)": 3.12,
+ "Tatoeba (bul-eng)": 0.82,
+ "Tatoeba (tuk-eng)": 4.34,
+ "Tatoeba (sqi-eng)": 5.87,
+ "Tatoeba (ces-eng)": 4.76,
+ "Tatoeba (ell-eng)": 0.5,
+ "Tatoeba (kaz-eng)": 0.43,
+ "Tatoeba (spa-eng)": 18.16,
+ "Tatoeba (lit-eng)": 2.35,
+ "Tatoeba (ita-eng)": 18.82,
+ "Tatoeba (gsw-eng)": 14.92,
+ "Tatoeba (urd-eng)": 0.0,
+ "Tatoeba (cmn-eng)": 2.99,
+ "Tatoeba (awa-eng)": 0.51,
+ "Tatoeba (pms-eng)": 11.58,
+ "Tatoeba (mar-eng)": 0.09,
+ "Tatoeba (ang-eng)": 14.83,
+ "Tatoeba (ukr-eng)": 0.83,
+ "Tatoeba (jpn-eng)": 1.59,
+ "Tatoeba (arz-eng)": 0.0,
+ "Tatoeba (nob-eng)": 10.7,
+ "Tatoeba (mhr-eng)": 0.03,
+ "Tatoeba (gle-eng)": 3.39,
+ "Tatoeba (hrv-eng)": 6.18,
+ "Tatoeba (swe-eng)": 8.66,
+ "Tatoeba (pol-eng)": 5.87,
+ "Tatoeba (swh-eng)": 7.85,
+ "Tatoeba (dan-eng)": 10.08,
+ "Tatoeba (tam-eng)": 0.55,
+ "Tatoeba (ina-eng)": 29.74,
+ "Tatoeba (lvs-eng)": 4.24,
+ "Tatoeba (hun-eng)": 4.9,
+ "Tatoeba (fra-eng)": 20.65,
+ "Tatoeba (aze-eng)": 3.18,
+ "Tatoeba (cat-eng)": 14.93,
+ "Tatoeba (ido-eng)": 16.24,
+ "Tatoeba (kur-eng)": 6.67,
+ "Tatoeba (por-eng)": 16.47,
+ "Tatoeba (cbk-eng)": 13.72,
+ "Tatoeba (glg-eng)": 17.84,
+ "Tatoeba (dsb-eng)": 4.43,
+ "Tatoeba (mon-eng)": 1.42,
+ "Tatoeba (fin-eng)": 3.14,
+ "Tatoeba (cor-eng)": 3.17,
+ "Tatoeba (afr-eng)": 7.96,
+ "Tatoeba (ind-eng)": 7.52,
+ "Tatoeba (kor-eng)": 1.07,
+ "Tatoeba (xho-eng)": 3.86,
+ "Tatoeba (dtp-eng)": 3.32,
+ "Tatoeba (lat-eng)": 10.58,
+ "Tatoeba (kat-eng)": 0.55,
+ "Tatoeba (fao-eng)": 6.58,
+ "Tatoeba (swg-eng)": 11.45,
+ "Tatoeba (kab-eng)": 0.99,
+ "Tatoeba (csb-eng)": 6.79,
+ "Tatoeba (slk-eng)": 6.0,
+ "Tatoeba (fry-eng)": 14.25,
+ "Tatoeba (pam-eng)": 4.95,
+ "Tatoeba (yue-eng)": 1.54,
+ "Tatoeba (mkd-eng)": 0.21,
+ "Tatoeba (lfn-eng)": 12.2,
+ "Tatoeba (nov-eng)": 27.29,
+ "Tatoeba (tel-eng)": 0.24,
+ "Tatoeba (max-eng)": 7.97,
+ "Tatoeba (tur-eng)": 4.29,
+ "Tatoeba (hye-eng)": 0.3,
+ "Tatoeba (uzb-eng)": 3.6,
+ "Tatoeba (gla-eng)": 3.16,
+ "Tatoeba (khm-eng)": 0.42,
+ "Tatoeba (ron-eng)": 9.92,
+ "Tatoeba (isl-eng)": 3.36,
+ "Tatoeba (ben-eng)": 0.13,
+ "Tatoeba (amh-eng)": 0.6,
+ "Tatoeba (epo-eng)": 10.42,
+ "Tatoeba (deu-eng)": 15.53,
+ "Tatoeba (oci-eng)": 11.01,
+ "Tatoeba (wuu-eng)": 2.05,
+ "Tatoeba (ber-eng)": 5.5,
+ "Tatoeba (nds-eng)": 12.51,
+ "Tatoeba (bre-eng)": 3.44,
+ "Tatoeba (war-eng)": 5.61,
+ "Tatoeba (bos-eng)": 8.02,
+ "Tatoeba (pes-eng)": 0.1,
+ "Tatoeba (est-eng)": 3.75,
+ "Tatoeba (heb-eng)": 0.48,
+ "Tatoeba (slv-eng)": 5.44,
+ "Tatoeba (bel-eng)": 0.94
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "bge-base-en-v1.5",
+ "AllegroReviews": 25.03,
+ "AmazonCounterfactualClassification (en-ext)": 74.66,
+ "AmazonCounterfactualClassification (en)": 75.01,
+ "AmazonCounterfactualClassification (de)": 55.79,
+ "AmazonCounterfactualClassification (ja)": 58.61,
+ "AmazonReviewsClassification (en)": 50.73,
+ "AmazonReviewsClassification (de)": 26.04,
+ "AmazonReviewsClassification (es)": 33.95,
+ "AmazonReviewsClassification (fr)": 29.29,
+ "AmazonReviewsClassification (ja)": 23.63,
+ "AmazonReviewsClassification (zh)": 23.65,
+ "AngryTweetsClassification": 43.69,
+ "CBD": 51.55,
+ "DanishPoliticalCommentsClassification": 29.44,
+ "GeoreviewClassification": 27.74,
+ "HeadlineClassification": 30.07,
+ "InappropriatenessClassification": 51.63,
+ "KinopoiskClassification": 35.58,
+ "LccSentimentClassification": 40.33,
+ "MTOPDomainClassification (en)": 93.18,
+ "MTOPDomainClassification (de)": 69.48,
+ "MTOPDomainClassification (es)": 76.31,
+ "MTOPDomainClassification (fr)": 75.85,
+ "MTOPDomainClassification (hi)": 36.44,
+ "MTOPDomainClassification (th)": 16.13,
+ "MTOPIntentClassification (en)": 67.14,
+ "MTOPIntentClassification (de)": 44.82,
+ "MTOPIntentClassification (es)": 42.67,
+ "MTOPIntentClassification (fr)": 39.54,
+ "MTOPIntentClassification (hi)": 15.37,
+ "MTOPIntentClassification (th)": 5.24,
+ "MasakhaNEWSClassification (amh)": 34.04,
+ "MasakhaNEWSClassification (eng)": 79.02,
+ "MasakhaNEWSClassification (fra)": 76.07,
+ "MasakhaNEWSClassification (hau)": 63.2,
+ "MasakhaNEWSClassification (ibo)": 59.92,
+ "MasakhaNEWSClassification (lin)": 72.51,
+ "MasakhaNEWSClassification (lug)": 55.92,
+ "MasakhaNEWSClassification (orm)": 57.11,
+ "MasakhaNEWSClassification (pcm)": 92.1,
+ "MasakhaNEWSClassification (run)": 63.51,
+ "MasakhaNEWSClassification (sna)": 73.63,
+ "MasakhaNEWSClassification (som)": 49.15,
+ "MasakhaNEWSClassification (swa)": 54.35,
+ "MasakhaNEWSClassification (tir)": 25.88,
+ "MasakhaNEWSClassification (xho)": 61.14,
+ "MasakhaNEWSClassification (yor)": 66.28,
+ "MassiveIntentClassification (ru)": 31.7,
+ "MassiveIntentClassification (fi)": 41.0,
+ "MassiveIntentClassification (lv)": 39.23,
+ "MassiveIntentClassification (sw)": 38.11,
+ "MassiveIntentClassification (ta)": 11.24,
+ "MassiveIntentClassification (az)": 38.58,
+ "MassiveIntentClassification (bn)": 15.79,
+ "MassiveIntentClassification (ml)": 3.24,
+ "MassiveIntentClassification (hu)": 36.2,
+ "MassiveIntentClassification (id)": 40.1,
+ "MassiveIntentClassification (th)": 12.65,
+ "MassiveIntentClassification (fr)": 43.91,
+ "MassiveIntentClassification (zh-CN)": 23.75,
+ "MassiveIntentClassification (ms)": 37.3,
+ "MassiveIntentClassification (sl)": 38.57,
+ "MassiveIntentClassification (ur)": 16.7,
+ "MassiveIntentClassification (ro)": 39.93,
+ "MassiveIntentClassification (de)": 41.73,
+ "MassiveIntentClassification (fa)": 24.44,
+ "MassiveIntentClassification (te)": 2.72,
+ "MassiveIntentClassification (pl)": 38.51,
+ "MassiveIntentClassification (nb)": 38.88,
+ "MassiveIntentClassification (es)": 43.08,
+ "MassiveIntentClassification (ja)": 29.99,
+ "MassiveIntentClassification (zh-TW)": 22.34,
+ "MassiveIntentClassification (nl)": 38.75,
+ "MassiveIntentClassification (it)": 44.42,
+ "MassiveIntentClassification (pt)": 43.45,
+ "MassiveIntentClassification (ka)": 11.76,
+ "MassiveIntentClassification (ar)": 20.63,
+ "MassiveIntentClassification (tr)": 39.25,
+ "MassiveIntentClassification (cy)": 33.54,
+ "MassiveIntentClassification (en)": 72.64,
+ "MassiveIntentClassification (mn)": 20.53,
+ "MassiveIntentClassification (hi)": 13.89,
+ "MassiveIntentClassification (ko)": 20.17,
+ "MassiveIntentClassification (af)": 36.89,
+ "MassiveIntentClassification (km)": 4.75,
+ "MassiveIntentClassification (vi)": 37.62,
+ "MassiveIntentClassification (my)": 3.8,
+ "MassiveIntentClassification (am)": 2.75,
+ "MassiveIntentClassification (tl)": 41.78,
+ "MassiveIntentClassification (sv)": 38.39,
+ "MassiveIntentClassification (kn)": 3.27,
+ "MassiveIntentClassification (jv)": 35.15,
+ "MassiveIntentClassification (hy)": 11.24,
+ "MassiveIntentClassification (da)": 41.04,
+ "MassiveIntentClassification (sq)": 38.12,
+ "MassiveIntentClassification (is)": 34.25,
+ "MassiveIntentClassification (el)": 27.29,
+ "MassiveIntentClassification (he)": 23.25,
+ "MassiveScenarioClassification (am)": 8.29,
+ "MassiveScenarioClassification (fa)": 30.38,
+ "MassiveScenarioClassification (nl)": 47.81,
+ "MassiveScenarioClassification (ar)": 30.58,
+ "MassiveScenarioClassification (ka)": 18.04,
+ "MassiveScenarioClassification (nb)": 48.41,
+ "MassiveScenarioClassification (af)": 45.94,
+ "MassiveScenarioClassification (km)": 9.02,
+ "MassiveScenarioClassification (is)": 45.11,
+ "MassiveScenarioClassification (ta)": 17.82,
+ "MassiveScenarioClassification (he)": 25.38,
+ "MassiveScenarioClassification (zh-TW)": 31.33,
+ "MassiveScenarioClassification (cy)": 39.07,
+ "MassiveScenarioClassification (ru)": 36.99,
+ "MassiveScenarioClassification (tl)": 51.55,
+ "MassiveScenarioClassification (bn)": 21.49,
+ "MassiveScenarioClassification (sq)": 47.4,
+ "MassiveScenarioClassification (lv)": 43.41,
+ "MassiveScenarioClassification (hi)": 19.69,
+ "MassiveScenarioClassification (zh-CN)": 33.62,
+ "MassiveScenarioClassification (jv)": 44.53,
+ "MassiveScenarioClassification (sv)": 47.55,
+ "MassiveScenarioClassification (pt)": 53.56,
+ "MassiveScenarioClassification (ro)": 49.47,
+ "MassiveScenarioClassification (az)": 44.58,
+ "MassiveScenarioClassification (ko)": 26.1,
+ "MassiveScenarioClassification (it)": 56.3,
+ "MassiveScenarioClassification (kn)": 8.44,
+ "MassiveScenarioClassification (el)": 37.53,
+ "MassiveScenarioClassification (fi)": 44.54,
+ "MassiveScenarioClassification (sl)": 42.65,
+ "MassiveScenarioClassification (tr)": 45.61,
+ "MassiveScenarioClassification (ml)": 6.64,
+ "MassiveScenarioClassification (da)": 49.22,
+ "MassiveScenarioClassification (sw)": 44.79,
+ "MassiveScenarioClassification (fr)": 53.77,
+ "MassiveScenarioClassification (en)": 76.51,
+ "MassiveScenarioClassification (id)": 47.3,
+ "MassiveScenarioClassification (my)": 10.25,
+ "MassiveScenarioClassification (ja)": 36.32,
+ "MassiveScenarioClassification (es)": 53.4,
+ "MassiveScenarioClassification (mn)": 26.68,
+ "MassiveScenarioClassification (th)": 23.0,
+ "MassiveScenarioClassification (ur)": 25.4,
+ "MassiveScenarioClassification (de)": 54.33,
+ "MassiveScenarioClassification (pl)": 47.15,
+ "MassiveScenarioClassification (te)": 6.6,
+ "MassiveScenarioClassification (vi)": 41.87,
+ "MassiveScenarioClassification (ms)": 46.35,
+ "MassiveScenarioClassification (hu)": 43.08,
+ "MassiveScenarioClassification (hy)": 17.38,
+ "NoRecClassification": 38.83,
+ "NordicLangClassification": 53.43,
+ "PAC": 59.61,
+ "PolEmo2.0-IN": 44.25,
+ "PolEmo2.0-OUT": 30.79,
+ "RuReviewsClassification": 43.47,
+ "RuSciBenchGRNTIClassification": 17.34,
+ "RuSciBenchOECDClassification": 13.16,
+ "ToxicConversationsClassification": 67.04
+ }
+ ]
},
"Clustering": {
"v_measure": [
{
"Model": "bge-base-en-v1.5",
+ "AlloProfClusteringP2P": 59.78,
+ "AlloProfClusteringS2S": 38.3,
"BiorxivClusteringP2P": 39.44,
"BiorxivClusteringS2S": 36.62,
+ "BlurbsClusteringP2P": 25.15,
+ "BlurbsClusteringS2S": 11.38,
+ "GeoreviewClusteringP2P": 23.09,
+ "HALClusteringS2S": 23.18,
+ "MLSUMClusteringP2P (de)": 39.14,
+ "MLSUMClusteringP2P (fr)": 41.97,
+ "MLSUMClusteringP2P (ru)": 21.47,
+ "MLSUMClusteringP2P (es)": 40.97,
+ "MLSUMClusteringS2S (de)": 38.56,
+ "MLSUMClusteringS2S (fr)": 41.47,
+ "MLSUMClusteringS2S (ru)": 20.52,
+ "MLSUMClusteringS2S (es)": 40.77,
+ "MasakhaNEWSClusteringP2P (amh)": 40.48,
+ "MasakhaNEWSClusteringP2P (eng)": 49.92,
+ "MasakhaNEWSClusteringP2P (fra)": 47.41,
+ "MasakhaNEWSClusteringP2P (hau)": 41.76,
+ "MasakhaNEWSClusteringP2P (ibo)": 38.01,
+ "MasakhaNEWSClusteringP2P (lin)": 66.31,
+ "MasakhaNEWSClusteringP2P (lug)": 48.4,
+ "MasakhaNEWSClusteringP2P (orm)": 24.88,
+ "MasakhaNEWSClusteringP2P (pcm)": 84.63,
+ "MasakhaNEWSClusteringP2P (run)": 51.28,
+ "MasakhaNEWSClusteringP2P (sna)": 47.27,
+ "MasakhaNEWSClusteringP2P (som)": 33.96,
+ "MasakhaNEWSClusteringP2P (swa)": 22.85,
+ "MasakhaNEWSClusteringP2P (tir)": 47.31,
+ "MasakhaNEWSClusteringP2P (xho)": 26.52,
+ "MasakhaNEWSClusteringP2P (yor)": 37.97,
+ "MasakhaNEWSClusteringS2S (amh)": 44.05,
+ "MasakhaNEWSClusteringS2S (eng)": 37.48,
+ "MasakhaNEWSClusteringS2S (fra)": 44.15,
+ "MasakhaNEWSClusteringS2S (hau)": 19.01,
+ "MasakhaNEWSClusteringS2S (ibo)": 41.69,
+ "MasakhaNEWSClusteringS2S (lin)": 54.94,
+ "MasakhaNEWSClusteringS2S (lug)": 46.07,
+ "MasakhaNEWSClusteringS2S (orm)": 25.37,
+ "MasakhaNEWSClusteringS2S (pcm)": 83.13,
+ "MasakhaNEWSClusteringS2S (run)": 54.53,
+ "MasakhaNEWSClusteringS2S (sna)": 47.65,
+ "MasakhaNEWSClusteringS2S (som)": 27.86,
+ "MasakhaNEWSClusteringS2S (swa)": 20.2,
+ "MasakhaNEWSClusteringS2S (tir)": 42.87,
+ "MasakhaNEWSClusteringS2S (xho)": 26.32,
+ "MasakhaNEWSClusteringS2S (yor)": 32.17,
"MedrxivClusteringP2P": 33.21,
"MedrxivClusteringS2S": 31.68,
"RedditClustering": 56.61,
"RedditClusteringP2P": 62.66,
+ "RuSciBenchGRNTIClusteringP2P": 15.54,
+ "RuSciBenchOECDClusteringP2P": 14.16,
"StackExchangeClustering": 66.11,
"StackExchangeClusteringP2P": 35.24,
+ "TenKGnadClusteringP2P": 42.11,
+ "TenKGnadClusteringS2S": 22.07,
"TwentyNewsgroupsClustering": 50.75
}
]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "bge-base-en-v1.5",
+ "CDSC-E": 46.84,
+ "FalseFriendsGermanEnglish": 47.76,
+ "OpusparcusPC (de)": 90.85,
+ "OpusparcusPC (en)": 98.58,
+ "OpusparcusPC (fi)": 85.23,
+ "OpusparcusPC (fr)": 85.92,
+ "OpusparcusPC (ru)": 80.09,
+ "OpusparcusPC (sv)": 82.52,
+ "PSC": 92.98,
+ "PawsXPairClassification (de)": 52.1,
+ "PawsXPairClassification (en)": 59.05,
+ "PawsXPairClassification (es)": 53.77,
+ "PawsXPairClassification (fr)": 55.17,
+ "PawsXPairClassification (ja)": 48.3,
+ "PawsXPairClassification (ko)": 51.05,
+ "PawsXPairClassification (zh)": 52.85,
+ "SICK-E-PL": 43.02,
+ "SprintDuplicateQuestions": 96.33,
+ "TERRa": 47.12,
+ "TwitterURLCorpus": 85.65
+ },
+ {
+ "Model": "bge-base-en-v1.5",
+ "CDSC-E": 46.88,
+ "FalseFriendsGermanEnglish": 47.77,
+ "OpusparcusPC (de)": 90.92,
+ "OpusparcusPC (en)": 98.58,
+ "OpusparcusPC (fi)": 85.23,
+ "OpusparcusPC (fr)": 85.92,
+ "OpusparcusPC (ru)": 80.09,
+ "OpusparcusPC (sv)": 82.52,
+ "PSC": 92.98,
+ "PawsXPairClassification (de)": 52.1,
+ "PawsXPairClassification (en)": 59.09,
+ "PawsXPairClassification (es)": 53.77,
+ "PawsXPairClassification (fr)": 55.17,
+ "PawsXPairClassification (ja)": 48.46,
+ "PawsXPairClassification (ko)": 51.06,
+ "PawsXPairClassification (zh)": 53.0,
+ "SICK-E-PL": 43.03,
+ "SprintDuplicateQuestions": 96.37,
+ "TERRa": 47.12,
+ "TwitterURLCorpus": 85.65
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "bge-base-en-v1.5",
+ "AlloprofReranking": 62.19,
+ "RuBQReranking": 44.52,
+ "SyntecReranking": 73.0,
+ "T2Reranking": 63.09
+ },
+ {
+ "Model": "bge-base-en-v1.5",
+ "MIRACLReranking (ar)": 16.03,
+ "MIRACLReranking (bn)": 19.79,
+ "MIRACLReranking (de)": 25.83,
+ "MIRACLReranking (en)": 56.99,
+ "MIRACLReranking (es)": 38.38,
+ "MIRACLReranking (fa)": 15.77,
+ "MIRACLReranking (fi)": 48.23,
+ "MIRACLReranking (fr)": 27.9,
+ "MIRACLReranking (hi)": 10.78,
+ "MIRACLReranking (id)": 30.65,
+ "MIRACLReranking (ja)": 17.32,
+ "MIRACLReranking (ko)": 19.91,
+ "MIRACLReranking (ru)": 22.42,
+ "MIRACLReranking (sw)": 38.72,
+ "MIRACLReranking (te)": 1.55,
+ "MIRACLReranking (th)": 5.4,
+ "MIRACLReranking (yo)": 58.13,
+ "MIRACLReranking (zh)": 13.84
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
{
"Model": "bge-base-en-v1.5",
+ "AILACasedocs": 27.36,
+ "AILAStatutes": 23.35,
"ARCChallenge": 9.66,
+ "AlloprofRetrieval": 31.28,
"AlphaNLI": 10.99,
+ "AppsRetrieval": 6.45,
+ "ArguAna": 63.75,
+ "BSARDRetrieval": 11.67,
+ "CmedqaRetrieval": 2.42,
+ "CodeFeedbackMT": 33.65,
+ "CodeFeedbackST": 70.0,
+ "CodeSearchNetCCRetrieval (python)": 59.77,
+ "CodeSearchNetCCRetrieval (javascript)": 54.23,
+ "CodeSearchNetCCRetrieval (go)": 34.6,
+ "CodeSearchNetCCRetrieval (ruby)": 56.37,
+ "CodeSearchNetCCRetrieval (java)": 56.21,
+ "CodeSearchNetCCRetrieval (php)": 44.15,
+ "CodeSearchNetRetrieval (python)": 89.09,
+ "CodeSearchNetRetrieval (javascript)": 75.78,
+ "CodeSearchNetRetrieval (go)": 95.42,
+ "CodeSearchNetRetrieval (ruby)": 81.33,
+ "CodeSearchNetRetrieval (java)": 81.05,
+ "CodeSearchNetRetrieval (php)": 87.99,
+ "CodeTransOceanContest": 45.65,
+ "CodeTransOceanDL": 23.49,
+ "CosQA": 33.71,
+ "CovidRetrieval": 23.21,
+ "GerDaLIR": 1.25,
+ "GerDaLIRSmall": 3.32,
+ "GermanQuAD-Retrieval": 79.77,
"HellaSwag": 26.64,
+ "LEMBNarrativeQARetrieval": 25.63,
+ "LEMBQMSumRetrieval": 22.43,
+ "LEMBSummScreenFDRetrieval": 60.34,
+ "LEMBWikimQARetrieval": 51.67,
+ "LeCaRDv2": 23.34,
+ "LegalBenchConsumerContractsQA": 73.48,
+ "LegalBenchCorporateLobbying": 91.67,
+ "LegalQuAD": 16.01,
+ "LegalSummarization": 63.41,
+ "MIRACLRetrieval (ar)": 1.0,
+ "MIRACLRetrieval (bn)": 1.53,
+ "MIRACLRetrieval (de)": 15.18,
+ "MIRACLRetrieval (en)": 48.63,
+ "MIRACLRetrieval (es)": 25.17,
+ "MIRACLRetrieval (fa)": 0.94,
+ "MIRACLRetrieval (fi)": 31.93,
+ "MIRACLRetrieval (fr)": 17.69,
+ "MIRACLRetrieval (hi)": 0.63,
+ "MIRACLRetrieval (id)": 21.02,
+ "MIRACLRetrieval (ja)": 3.85,
+ "MIRACLRetrieval (ko)": 5.89,
+ "MIRACLRetrieval (ru)": 6.02,
+ "MIRACLRetrieval (sw)": 31.61,
+ "MIRACLRetrieval (te)": 0.11,
+ "MIRACLRetrieval (th)": 0.52,
+ "MIRACLRetrieval (yo)": 54.14,
+ "MIRACLRetrieval (zh)": 0.98,
+ "MintakaRetrieval (ar)": 4.82,
+ "MintakaRetrieval (de)": 17.38,
+ "MintakaRetrieval (es)": 15.89,
+ "MintakaRetrieval (fr)": 19.27,
+ "MintakaRetrieval (hi)": 3.67,
+ "MintakaRetrieval (it)": 14.21,
+ "MintakaRetrieval (ja)": 8.79,
+ "MintakaRetrieval (pt)": 14.08,
+ "NFCorpus": 37.37,
"PIQA": 25.69,
"Quail": 1.42,
"RARbCode": 46.47,
"RARbMath": 46.86,
+ "RiaNewsRetrieval": 19.6,
+ "RuBQRetrieval": 13.27,
+ "SCIDOCS": 21.73,
"SIQA": 0.94,
+ "SciFact": 74.35,
+ "SciFact-PL": 40.81,
"SpartQA": 3.37,
+ "StackOverflowQA": 80.23,
+ "SyntecRetrieval": 63.7,
+ "SyntheticText2SQL": 49.98,
+ "TRECCOVID": 78.03,
+ "TRECCOVID-PL": 37.34,
"TempReasonL1": 1.07,
"TempReasonL2Fact": 17.23,
"TempReasonL2Pure": 1.29,
"TempReasonL3Fact": 13.36,
"TempReasonL3Pure": 5.2,
- "WinoGrande": 13.76
+ "WinoGrande": 13.76,
+ "XMarket (de)": 15.71,
+ "XMarket (en)": 33.61,
+ "XMarket (es)": 17.5,
+ "XPQARetrieval (ara-ara)": 10.64,
+ "XPQARetrieval (eng-ara)": 3.81,
+ "XPQARetrieval (ara-eng)": 8.07,
+ "XPQARetrieval (deu-deu)": 58.05,
+ "XPQARetrieval (eng-deu)": 9.9,
+ "XPQARetrieval (deu-eng)": 30.84,
+ "XPQARetrieval (spa-spa)": 44.76,
+ "XPQARetrieval (eng-spa)": 8.58,
+ "XPQARetrieval (spa-eng)": 25.23,
+ "XPQARetrieval (fra-fra)": 51.81,
+ "XPQARetrieval (eng-fra)": 14.4,
+ "XPQARetrieval (fra-eng)": 32.94,
+ "XPQARetrieval (hin-hin)": 32.67,
+ "XPQARetrieval (eng-hin)": 5.81,
+ "XPQARetrieval (hin-eng)": 6.86,
+ "XPQARetrieval (ita-ita)": 63.61,
+ "XPQARetrieval (eng-ita)": 9.4,
+ "XPQARetrieval (ita-eng)": 28.35,
+ "XPQARetrieval (jpn-jpn)": 42.12,
+ "XPQARetrieval (eng-jpn)": 6.0,
+ "XPQARetrieval (jpn-eng)": 16.81,
+ "XPQARetrieval (kor-kor)": 15.84,
+ "XPQARetrieval (eng-kor)": 8.51,
+ "XPQARetrieval (kor-eng)": 8.13,
+ "XPQARetrieval (pol-pol)": 30.99,
+ "XPQARetrieval (eng-pol)": 11.94,
+ "XPQARetrieval (pol-eng)": 18.29,
+ "XPQARetrieval (por-por)": 37.14,
+ "XPQARetrieval (eng-por)": 8.38,
+ "XPQARetrieval (por-eng)": 23.01,
+ "XPQARetrieval (tam-tam)": 13.05,
+ "XPQARetrieval (eng-tam)": 4.12,
+ "XPQARetrieval (tam-eng)": 3.44,
+ "XPQARetrieval (cmn-cmn)": 25.73,
+ "XPQARetrieval (eng-cmn)": 7.49,
+ "XPQARetrieval (cmn-eng)": 14.89
}
]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "bge-base-en-v1.5",
+ "CDSC-R": 82.49,
+ "GermanSTSBenchmark": 62.87,
+ "RUParaPhraserSTS": 48.73,
+ "RuSTSBenchmarkSTS": 59.7,
+ "SICK-R": 80.3,
+ "SICK-R-PL": 52.21,
+ "SICKFr": 65.48,
+ "STS12": 78.03,
+ "STS13": 84.18,
+ "STS14": 82.27,
+ "STS15": 87.96,
+ "STS17 (en-tr)": 9.04,
+ "STS17 (it-en)": 33.78,
+ "STS17 (es-es)": 79.63,
+ "STS17 (ar-ar)": 53.71,
+ "STS17 (fr-en)": 40.83,
+ "STS17 (en-en)": 86.41,
+ "STS17 (ko-ko)": 51.96,
+ "STS17 (en-de)": 37.37,
+ "STS17 (es-en)": 34.94,
+ "STS17 (nl-en)": 33.35,
+ "STS17 (en-ar)": 4.5,
+ "STS22 (de-fr)": 41.15,
+ "STS22 (fr)": 75.72,
+ "STS22 (pl)": 35.91,
+ "STS22 (tr)": 41.34,
+ "STS22 (es)": 51.51,
+ "STS22 (it)": 61.44,
+ "STS22 (pl-en)": 38.27,
+ "STS22 (zh-en)": 44.76,
+ "STS22 (ar)": 24.97,
+ "STS22 (zh)": 50.2,
+ "STS22 (en)": 66.54,
+ "STS22 (es-it)": 50.38,
+ "STS22 (ru)": 16.18,
+ "STS22 (fr-pl)": 39.44,
+ "STS22 (es-en)": 59.23,
+ "STS22 (de)": 32.68,
+ "STS22 (de-en)": 46.58,
+ "STS22 (de-pl)": 31.16,
+ "STSB": 41.69,
+ "STSBenchmark": 86.42,
+ "STSBenchmarkMultilingualSTS (en)": 86.42,
+ "STSBenchmarkMultilingualSTS (pt)": 65.71,
+ "STSBenchmarkMultilingualSTS (pl)": 59.86,
+ "STSBenchmarkMultilingualSTS (zh)": 41.25,
+ "STSBenchmarkMultilingualSTS (de)": 62.63,
+ "STSBenchmarkMultilingualSTS (es)": 68.01,
+ "STSBenchmarkMultilingualSTS (fr)": 66.28,
+ "STSBenchmarkMultilingualSTS (it)": 66.54,
+ "STSBenchmarkMultilingualSTS (nl)": 60.19,
+ "STSBenchmarkMultilingualSTS (ru)": 59.85
+ },
+ {
+ "Model": "bge-base-en-v1.5",
+ "CDSC-R": 82.49,
+ "GermanSTSBenchmark": 62.87,
+ "RUParaPhraserSTS": 48.73,
+ "RuSTSBenchmarkSTS": 59.7,
+ "SICK-R": 80.3,
+ "SICK-R-PL": 52.21,
+ "SICKFr": 65.48,
+ "STS12": 78.03,
+ "STS13": 84.18,
+ "STS14": 82.27,
+ "STS15": 87.96,
+ "STS17 (en-tr)": 9.04,
+ "STS17 (it-en)": 33.78,
+ "STS17 (es-es)": 79.63,
+ "STS17 (ar-ar)": 53.71,
+ "STS17 (fr-en)": 40.83,
+ "STS17 (en-en)": 86.41,
+ "STS17 (ko-ko)": 51.96,
+ "STS17 (en-de)": 37.37,
+ "STS17 (es-en)": 34.94,
+ "STS17 (nl-en)": 33.35,
+ "STS17 (en-ar)": 4.5,
+ "STS22 (de-fr)": 41.15,
+ "STS22 (fr)": 75.72,
+ "STS22 (pl)": 36.01,
+ "STS22 (tr)": 41.34,
+ "STS22 (es)": 51.51,
+ "STS22 (it)": 61.44,
+ "STS22 (pl-en)": 38.27,
+ "STS22 (zh-en)": 44.76,
+ "STS22 (ar)": 24.95,
+ "STS22 (zh)": 50.2,
+ "STS22 (en)": 66.54,
+ "STS22 (es-it)": 50.38,
+ "STS22 (ru)": 16.18,
+ "STS22 (fr-pl)": 39.44,
+ "STS22 (es-en)": 59.23,
+ "STS22 (de)": 32.67,
+ "STS22 (de-en)": 46.58,
+ "STS22 (de-pl)": 31.16,
+ "STSB": 41.69,
+ "STSBenchmark": 86.42,
+ "STSBenchmarkMultilingualSTS (en)": 86.42,
+ "STSBenchmarkMultilingualSTS (pt)": 65.71,
+ "STSBenchmarkMultilingualSTS (pl)": 59.86,
+ "STSBenchmarkMultilingualSTS (zh)": 41.24,
+ "STSBenchmarkMultilingualSTS (de)": 62.63,
+ "STSBenchmarkMultilingualSTS (es)": 68.01,
+ "STSBenchmarkMultilingualSTS (fr)": 66.28,
+ "STSBenchmarkMultilingualSTS (it)": 66.54,
+ "STSBenchmarkMultilingualSTS (nl)": 60.19,
+ "STSBenchmarkMultilingualSTS (ru)": 59.85
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "bge-base-en-v1.5",
+ "SummEvalFr": 30.72
+ },
+ {
+ "Model": "bge-base-en-v1.5",
+ "SummEvalFr": 30.72
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "bge-base-en-v1.5",
+ "CEDRClassification": 33.62,
+ "SensitiveTopicsClassification": 18.05
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "bge-base-en-v1.5",
+ "Core17InstructionRetrieval": -3.42,
+ "News21InstructionRetrieval": -1.0,
+ "Robust04InstructionRetrieval": -7.53
+ }
+ ]
}
},
"BAAI__bge-base-en-v1.5-instruct": {
@@ -456,28 +2292,441 @@
},
"BAAI__bge-large-en-v1.5": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "bge-large-en-v1.5",
+ "BornholmBitextMining": 34.09,
+ "Tatoeba (nld-eng)": 14.89,
+ "Tatoeba (isl-eng)": 4.57,
+ "Tatoeba (ces-eng)": 5.41,
+ "Tatoeba (ina-eng)": 38.55,
+ "Tatoeba (hye-eng)": 0.3,
+ "Tatoeba (heb-eng)": 0.7,
+ "Tatoeba (spa-eng)": 25.08,
+ "Tatoeba (slv-eng)": 5.77,
+ "Tatoeba (tat-eng)": 0.9,
+ "Tatoeba (kur-eng)": 7.31,
+ "Tatoeba (cbk-eng)": 18.32,
+ "Tatoeba (nob-eng)": 11.34,
+ "Tatoeba (ben-eng)": 0.01,
+ "Tatoeba (srp-eng)": 3.31,
+ "Tatoeba (tel-eng)": 0.78,
+ "Tatoeba (yue-eng)": 1.55,
+ "Tatoeba (hun-eng)": 5.15,
+ "Tatoeba (swe-eng)": 10.6,
+ "Tatoeba (bre-eng)": 4.2,
+ "Tatoeba (csb-eng)": 6.17,
+ "Tatoeba (epo-eng)": 11.93,
+ "Tatoeba (cat-eng)": 17.77,
+ "Tatoeba (jav-eng)": 5.46,
+ "Tatoeba (ast-eng)": 21.06,
+ "Tatoeba (ind-eng)": 7.67,
+ "Tatoeba (kaz-eng)": 0.61,
+ "Tatoeba (fry-eng)": 17.97,
+ "Tatoeba (ber-eng)": 5.06,
+ "Tatoeba (nds-eng)": 15.16,
+ "Tatoeba (est-eng)": 2.76,
+ "Tatoeba (zsm-eng)": 8.01,
+ "Tatoeba (mar-eng)": 0.09,
+ "Tatoeba (tgl-eng)": 5.95,
+ "Tatoeba (cmn-eng)": 2.95,
+ "Tatoeba (bos-eng)": 8.9,
+ "Tatoeba (sqi-eng)": 5.58,
+ "Tatoeba (cha-eng)": 14.46,
+ "Tatoeba (ita-eng)": 25.37,
+ "Tatoeba (kab-eng)": 0.97,
+ "Tatoeba (gsw-eng)": 15.41,
+ "Tatoeba (slk-eng)": 5.94,
+ "Tatoeba (lvs-eng)": 4.67,
+ "Tatoeba (nno-eng)": 6.71,
+ "Tatoeba (lat-eng)": 11.54,
+ "Tatoeba (nov-eng)": 30.37,
+ "Tatoeba (eus-eng)": 7.22,
+ "Tatoeba (lit-eng)": 2.78,
+ "Tatoeba (hrv-eng)": 7.32,
+ "Tatoeba (pol-eng)": 6.98,
+ "Tatoeba (ceb-eng)": 5.83,
+ "Tatoeba (ile-eng)": 27.22,
+ "Tatoeba (lfn-eng)": 16.0,
+ "Tatoeba (tuk-eng)": 6.19,
+ "Tatoeba (ell-eng)": 0.4,
+ "Tatoeba (afr-eng)": 9.78,
+ "Tatoeba (ang-eng)": 16.0,
+ "Tatoeba (tam-eng)": 0.38,
+ "Tatoeba (cym-eng)": 6.96,
+ "Tatoeba (rus-eng)": 0.4,
+ "Tatoeba (tha-eng)": 0.9,
+ "Tatoeba (vie-eng)": 4.96,
+ "Tatoeba (dsb-eng)": 5.92,
+ "Tatoeba (pam-eng)": 5.33,
+ "Tatoeba (uzb-eng)": 2.15,
+ "Tatoeba (yid-eng)": 0.0,
+ "Tatoeba (swg-eng)": 9.77,
+ "Tatoeba (awa-eng)": 0.2,
+ "Tatoeba (dtp-eng)": 3.45,
+ "Tatoeba (mon-eng)": 1.34,
+ "Tatoeba (cor-eng)": 3.37,
+ "Tatoeba (bel-eng)": 1.42,
+ "Tatoeba (ukr-eng)": 1.03,
+ "Tatoeba (max-eng)": 11.79,
+ "Tatoeba (por-eng)": 23.03,
+ "Tatoeba (uig-eng)": 0.53,
+ "Tatoeba (ido-eng)": 20.0,
+ "Tatoeba (hsb-eng)": 5.16,
+ "Tatoeba (kat-eng)": 0.59,
+ "Tatoeba (khm-eng)": 0.42,
+ "Tatoeba (orv-eng)": 0.24,
+ "Tatoeba (mal-eng)": 0.16,
+ "Tatoeba (swh-eng)": 7.79,
+ "Tatoeba (gla-eng)": 1.93,
+ "Tatoeba (gle-eng)": 3.14,
+ "Tatoeba (pes-eng)": 0.3,
+ "Tatoeba (wuu-eng)": 2.44,
+ "Tatoeba (dan-eng)": 12.75,
+ "Tatoeba (tzl-eng)": 18.68,
+ "Tatoeba (fin-eng)": 3.73,
+ "Tatoeba (war-eng)": 6.91,
+ "Tatoeba (ron-eng)": 11.35,
+ "Tatoeba (mhr-eng)": 0.07,
+ "Tatoeba (tur-eng)": 4.66,
+ "Tatoeba (kzj-eng)": 3.56,
+ "Tatoeba (urd-eng)": 0.0,
+ "Tatoeba (pms-eng)": 13.5,
+ "Tatoeba (bul-eng)": 0.93,
+ "Tatoeba (arz-eng)": 0.0,
+ "Tatoeba (aze-eng)": 3.79,
+ "Tatoeba (kor-eng)": 1.39,
+ "Tatoeba (ara-eng)": 0.58,
+ "Tatoeba (deu-eng)": 23.9,
+ "Tatoeba (fra-eng)": 34.94,
+ "Tatoeba (amh-eng)": 0.45,
+ "Tatoeba (mkd-eng)": 0.21,
+ "Tatoeba (glg-eng)": 22.82,
+ "Tatoeba (hin-eng)": 0.07,
+ "Tatoeba (jpn-eng)": 0.92,
+ "Tatoeba (xho-eng)": 4.22,
+ "Tatoeba (fao-eng)": 9.22,
+ "Tatoeba (oci-eng)": 13.59,
+ "Tatoeba (arq-eng)": 0.37
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "bge-large-en-v1.5",
+ "AllegroReviews": 24.37,
+ "AmazonCounterfactualClassification (en-ext)": 75.04,
+ "AmazonCounterfactualClassification (en)": 73.94,
+ "AmazonCounterfactualClassification (de)": 55.25,
+ "AmazonCounterfactualClassification (ja)": 56.98,
+ "AmazonReviewsClassification (en)": 50.23,
+ "AmazonReviewsClassification (de)": 27.53,
+ "AmazonReviewsClassification (es)": 34.65,
+ "AmazonReviewsClassification (fr)": 31.55,
+ "AmazonReviewsClassification (ja)": 22.85,
+ "AmazonReviewsClassification (zh)": 22.27,
+ "AngryTweetsClassification": 44.97,
+ "CBD": 51.28,
+ "DanishPoliticalCommentsClassification": 29.79,
+ "GeoreviewClassification": 28.64,
+ "HeadlineClassification": 33.56,
+ "InappropriatenessClassification": 51.81,
+ "KinopoiskClassification": 35.68,
+ "LccSentimentClassification": 38.27,
+ "MTOPDomainClassification (en)": 94.0,
+ "MTOPDomainClassification (de)": 75.43,
+ "MTOPDomainClassification (es)": 78.81,
+ "MTOPDomainClassification (fr)": 80.27,
+ "MTOPDomainClassification (hi)": 32.73,
+ "MTOPDomainClassification (th)": 15.91,
+ "MTOPIntentClassification (en)": 69.96,
+ "MTOPIntentClassification (de)": 44.3,
+ "MTOPIntentClassification (es)": 47.38,
+ "MTOPIntentClassification (fr)": 41.36,
+ "MTOPIntentClassification (hi)": 13.53,
+ "MTOPIntentClassification (th)": 5.02,
+ "MasakhaNEWSClassification (amh)": 32.02,
+ "MasakhaNEWSClassification (eng)": 79.86,
+ "MasakhaNEWSClassification (fra)": 76.97,
+ "MasakhaNEWSClassification (hau)": 65.24,
+ "MasakhaNEWSClassification (ibo)": 62.03,
+ "MasakhaNEWSClassification (lin)": 75.43,
+ "MasakhaNEWSClassification (lug)": 57.31,
+ "MasakhaNEWSClassification (orm)": 59.08,
+ "MasakhaNEWSClassification (pcm)": 93.38,
+ "MasakhaNEWSClassification (run)": 65.19,
+ "MasakhaNEWSClassification (sna)": 75.07,
+ "MasakhaNEWSClassification (som)": 50.58,
+ "MasakhaNEWSClassification (swa)": 58.21,
+ "MasakhaNEWSClassification (tir)": 23.49,
+ "MasakhaNEWSClassification (xho)": 63.8,
+ "MasakhaNEWSClassification (yor)": 67.81,
+ "MassiveIntentClassification (am)": 3.01,
+ "MassiveIntentClassification (nb)": 39.67,
+ "MassiveIntentClassification (kn)": 3.14,
+ "MassiveIntentClassification (sq)": 36.62,
+ "MassiveIntentClassification (pl)": 38.6,
+ "MassiveIntentClassification (fa)": 26.89,
+ "MassiveIntentClassification (pt)": 46.21,
+ "MassiveIntentClassification (az)": 37.33,
+ "MassiveIntentClassification (en)": 74.34,
+ "MassiveIntentClassification (ta)": 7.96,
+ "MassiveIntentClassification (af)": 37.67,
+ "MassiveIntentClassification (bn)": 10.88,
+ "MassiveIntentClassification (he)": 23.98,
+ "MassiveIntentClassification (da)": 40.99,
+ "MassiveIntentClassification (fi)": 38.04,
+ "MassiveIntentClassification (ur)": 15.81,
+ "MassiveIntentClassification (is)": 32.31,
+ "MassiveIntentClassification (fr)": 47.83,
+ "MassiveIntentClassification (ko)": 14.8,
+ "MassiveIntentClassification (sv)": 39.28,
+ "MassiveIntentClassification (sw)": 36.96,
+ "MassiveIntentClassification (tr)": 37.77,
+ "MassiveIntentClassification (mn)": 22.38,
+ "MassiveIntentClassification (cy)": 31.92,
+ "MassiveIntentClassification (el)": 32.84,
+ "MassiveIntentClassification (te)": 2.54,
+ "MassiveIntentClassification (my)": 4.0,
+ "MassiveIntentClassification (ja)": 29.62,
+ "MassiveIntentClassification (ml)": 3.18,
+ "MassiveIntentClassification (sl)": 35.56,
+ "MassiveIntentClassification (id)": 38.11,
+ "MassiveIntentClassification (ro)": 39.82,
+ "MassiveIntentClassification (zh-TW)": 17.03,
+ "MassiveIntentClassification (tl)": 39.53,
+ "MassiveIntentClassification (ar)": 14.05,
+ "MassiveIntentClassification (ka)": 8.42,
+ "MassiveIntentClassification (th)": 10.87,
+ "MassiveIntentClassification (hi)": 13.46,
+ "MassiveIntentClassification (hu)": 34.38,
+ "MassiveIntentClassification (nl)": 41.43,
+ "MassiveIntentClassification (it)": 46.19,
+ "MassiveIntentClassification (lv)": 35.81,
+ "MassiveIntentClassification (vi)": 35.68,
+ "MassiveIntentClassification (km)": 4.44,
+ "MassiveIntentClassification (jv)": 33.82,
+ "MassiveIntentClassification (de)": 43.23,
+ "MassiveIntentClassification (zh-CN)": 18.44,
+ "MassiveIntentClassification (ms)": 36.34,
+ "MassiveIntentClassification (es)": 45.64,
+ "MassiveIntentClassification (hy)": 6.94,
+ "MassiveIntentClassification (ru)": 32.31,
+ "MassiveScenarioClassification (ru)": 38.22,
+ "MassiveScenarioClassification (ka)": 13.35,
+ "MassiveScenarioClassification (nb)": 50.68,
+ "MassiveScenarioClassification (is)": 43.01,
+ "MassiveScenarioClassification (am)": 7.36,
+ "MassiveScenarioClassification (it)": 55.15,
+ "MassiveScenarioClassification (ml)": 7.08,
+ "MassiveScenarioClassification (pt)": 56.07,
+ "MassiveScenarioClassification (sq)": 46.71,
+ "MassiveScenarioClassification (bn)": 16.2,
+ "MassiveScenarioClassification (ur)": 22.4,
+ "MassiveScenarioClassification (jv)": 42.62,
+ "MassiveScenarioClassification (he)": 27.41,
+ "MassiveScenarioClassification (ko)": 19.02,
+ "MassiveScenarioClassification (hi)": 17.95,
+ "MassiveScenarioClassification (tr)": 45.72,
+ "MassiveScenarioClassification (sl)": 41.89,
+ "MassiveScenarioClassification (my)": 10.1,
+ "MassiveScenarioClassification (da)": 51.76,
+ "MassiveScenarioClassification (sw)": 43.94,
+ "MassiveScenarioClassification (ar)": 22.25,
+ "MassiveScenarioClassification (zh-CN)": 29.79,
+ "MassiveScenarioClassification (cy)": 37.85,
+ "MassiveScenarioClassification (az)": 44.88,
+ "MassiveScenarioClassification (th)": 19.92,
+ "MassiveScenarioClassification (de)": 59.02,
+ "MassiveScenarioClassification (fa)": 30.39,
+ "MassiveScenarioClassification (kn)": 7.92,
+ "MassiveScenarioClassification (ms)": 47.21,
+ "MassiveScenarioClassification (vi)": 41.41,
+ "MassiveScenarioClassification (ro)": 50.86,
+ "MassiveScenarioClassification (km)": 9.31,
+ "MassiveScenarioClassification (hu)": 41.37,
+ "MassiveScenarioClassification (sv)": 49.22,
+ "MassiveScenarioClassification (te)": 6.68,
+ "MassiveScenarioClassification (fi)": 42.97,
+ "MassiveScenarioClassification (en)": 77.39,
+ "MassiveScenarioClassification (mn)": 27.9,
+ "MassiveScenarioClassification (nl)": 52.0,
+ "MassiveScenarioClassification (pl)": 46.56,
+ "MassiveScenarioClassification (fr)": 57.13,
+ "MassiveScenarioClassification (hy)": 12.46,
+ "MassiveScenarioClassification (af)": 48.03,
+ "MassiveScenarioClassification (ja)": 36.14,
+ "MassiveScenarioClassification (tl)": 50.66,
+ "MassiveScenarioClassification (ta)": 12.75,
+ "MassiveScenarioClassification (lv)": 40.06,
+ "MassiveScenarioClassification (es)": 54.89,
+ "MassiveScenarioClassification (el)": 41.93,
+ "MassiveScenarioClassification (zh-TW)": 26.79,
+ "MassiveScenarioClassification (id)": 46.94,
+ "NoRecClassification": 39.55,
+ "NordicLangClassification": 55.07,
+ "PAC": 60.88,
+ "PolEmo2.0-IN": 43.92,
+ "PolEmo2.0-OUT": 24.13,
+ "RuReviewsClassification": 44.62,
+ "RuSciBenchGRNTIClassification": 22.05,
+ "RuSciBenchOECDClassification": 16.53,
+ "ToxicConversationsClassification": 66.48
+ }
+ ]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "bge-large-en-v1.5",
+ "AlloProfClusteringP2P": 60.31,
+ "AlloProfClusteringS2S": 41.15,
+ "BlurbsClusteringP2P": 29.05,
+ "BlurbsClusteringS2S": 12.3,
+ "GeoreviewClusteringP2P": 21.87,
+ "HALClusteringS2S": 25.36,
+ "MLSUMClusteringP2P (de)": 42.55,
+ "MLSUMClusteringP2P (fr)": 42.64,
+ "MLSUMClusteringP2P (ru)": 24.24,
+ "MLSUMClusteringP2P (es)": 42.98,
+ "MLSUMClusteringS2S (de)": 41.87,
+ "MLSUMClusteringS2S (fr)": 42.61,
+ "MLSUMClusteringS2S (ru)": 21.48,
+ "MLSUMClusteringS2S (es)": 42.34,
+ "MasakhaNEWSClusteringP2P (amh)": 40.67,
+ "MasakhaNEWSClusteringP2P (eng)": 60.51,
+ "MasakhaNEWSClusteringP2P (fra)": 46.71,
+ "MasakhaNEWSClusteringP2P (hau)": 47.0,
+ "MasakhaNEWSClusteringP2P (ibo)": 43.33,
+ "MasakhaNEWSClusteringP2P (lin)": 69.98,
+ "MasakhaNEWSClusteringP2P (lug)": 54.39,
+ "MasakhaNEWSClusteringP2P (orm)": 32.23,
+ "MasakhaNEWSClusteringP2P (pcm)": 79.75,
+ "MasakhaNEWSClusteringP2P (run)": 57.72,
+ "MasakhaNEWSClusteringP2P (sna)": 60.44,
+ "MasakhaNEWSClusteringP2P (som)": 35.77,
+ "MasakhaNEWSClusteringP2P (swa)": 26.07,
+ "MasakhaNEWSClusteringP2P (tir)": 45.23,
+ "MasakhaNEWSClusteringP2P (xho)": 36.73,
+ "MasakhaNEWSClusteringP2P (yor)": 36.21,
+ "MasakhaNEWSClusteringS2S (amh)": 42.03,
+ "MasakhaNEWSClusteringS2S (eng)": 40.9,
+ "MasakhaNEWSClusteringS2S (fra)": 48.92,
+ "MasakhaNEWSClusteringS2S (hau)": 17.37,
+ "MasakhaNEWSClusteringS2S (ibo)": 40.42,
+ "MasakhaNEWSClusteringS2S (lin)": 55.71,
+ "MasakhaNEWSClusteringS2S (lug)": 43.59,
+ "MasakhaNEWSClusteringS2S (orm)": 26.56,
+ "MasakhaNEWSClusteringS2S (pcm)": 73.17,
+ "MasakhaNEWSClusteringS2S (run)": 54.44,
+ "MasakhaNEWSClusteringS2S (sna)": 42.07,
+ "MasakhaNEWSClusteringS2S (som)": 34.27,
+ "MasakhaNEWSClusteringS2S (swa)": 22.01,
+ "MasakhaNEWSClusteringS2S (tir)": 43.34,
+ "MasakhaNEWSClusteringS2S (xho)": 22.12,
+ "MasakhaNEWSClusteringS2S (yor)": 32.6,
+ "RuSciBenchGRNTIClusteringP2P": 19.01,
+ "RuSciBenchOECDClusteringP2P": 15.98,
+ "TenKGnadClusteringP2P": 44.52,
+ "TenKGnadClusteringS2S": 24.68
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "bge-large-en-v1.5",
+ "CDSC-E": 47.58,
+ "FalseFriendsGermanEnglish": 47.77,
+ "OpusparcusPC (de)": 90.71,
+ "OpusparcusPC (en)": 98.69,
+ "OpusparcusPC (fi)": 84.99,
+ "OpusparcusPC (fr)": 87.64,
+ "OpusparcusPC (ru)": 79.78,
+ "OpusparcusPC (sv)": 82.63,
+ "PSC": 93.8,
+ "PawsXPairClassification (de)": 52.43,
+ "PawsXPairClassification (en)": 61.79,
+ "PawsXPairClassification (es)": 53.65,
+ "PawsXPairClassification (fr)": 54.89,
+ "PawsXPairClassification (ja)": 47.84,
+ "PawsXPairClassification (ko)": 49.99,
+ "PawsXPairClassification (zh)": 52.14,
+ "SICK-E-PL": 45.73,
+ "SprintDuplicateQuestions": 96.73,
+ "TERRa": 47.52,
+ "TwitterURLCorpus": 85.6
+ },
+ {
+ "Model": "bge-large-en-v1.5",
+ "CDSC-E": 47.66,
+ "FalseFriendsGermanEnglish": 47.77,
+ "OpusparcusPC (de)": 90.71,
+ "OpusparcusPC (en)": 98.69,
+ "OpusparcusPC (fi)": 84.99,
+ "OpusparcusPC (fr)": 87.64,
+ "OpusparcusPC (ru)": 79.78,
+ "OpusparcusPC (sv)": 82.63,
+ "PSC": 93.8,
+ "PawsXPairClassification (de)": 52.43,
+ "PawsXPairClassification (en)": 61.81,
+ "PawsXPairClassification (es)": 53.65,
+ "PawsXPairClassification (fr)": 54.89,
+ "PawsXPairClassification (ja)": 48.03,
+ "PawsXPairClassification (ko)": 50.03,
+ "PawsXPairClassification (zh)": 52.2,
+ "SICK-E-PL": 45.75,
+ "SprintDuplicateQuestions": 96.75,
+ "TERRa": 47.55,
+ "TwitterURLCorpus": 85.6
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "bge-large-en-v1.5",
+ "AlloprofReranking": 67.41,
+ "RuBQReranking": 47.66,
+ "SyntecReranking": 72.67,
+ "T2Reranking": 63.07
+ },
+ {
+ "Model": "bge-large-en-v1.5",
+ "MIRACLReranking (ru)": 25.96,
+ "MIRACLReranking (ar)": 25.08,
+ "MIRACLReranking (bn)": 15.35,
+ "MIRACLReranking (de)": 30.9,
+ "MIRACLReranking (en)": 57.39,
+ "MIRACLReranking (es)": 42.47,
+ "MIRACLReranking (fa)": 19.12,
+ "MIRACLReranking (fi)": 41.3,
+ "MIRACLReranking (fr)": 31.82,
+ "MIRACLReranking (hi)": 11.98,
+ "MIRACLReranking (id)": 28.43,
+ "MIRACLReranking (ja)": 20.48,
+ "MIRACLReranking (ko)": 23.85,
+ "MIRACLReranking (sw)": 37.71,
+ "MIRACLReranking (te)": 4.89,
+ "MIRACLReranking (th)": 9.36,
+ "MIRACLReranking (yo)": 55.88,
+ "MIRACLReranking (zh)": 15.02
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
{
"Model": "bge-large-en-v1.5",
- "AILACasedocs": 25.15,
- "AILAStatutes": 20.74,
+ "AILACasedocs": 25.98,
+ "AILAStatutes": 23.06,
"ARCChallenge": 9.99,
+ "AlloprofRetrieval": 38.74,
"AlphaNLI": 13.13,
+ "AppsRetrieval": 7.57,
+ "ArguAna": 64.52,
+ "BSARDRetrieval": 10.92,
"BrightRetrieval (stackoverflow)": 9.51,
"BrightRetrieval (earth_science)": 24.15,
"BrightRetrieval (aops)": 6.08,
@@ -490,25 +2739,126 @@
"BrightRetrieval (theoremqa_questions)": 12.56,
"BrightRetrieval (leetcode)": 26.68,
"BrightRetrieval (economics)": 16.59,
- "GerDaLIRSmall": 3.96,
+ "CmedqaRetrieval": 2.07,
+ "CodeFeedbackMT": 36.89,
+ "CodeFeedbackST": 71.77,
+ "CodeSearchNetCCRetrieval (python)": 60.47,
+ "CodeSearchNetCCRetrieval (javascript)": 53.23,
+ "CodeSearchNetCCRetrieval (go)": 41.05,
+ "CodeSearchNetCCRetrieval (ruby)": 58.4,
+ "CodeSearchNetCCRetrieval (java)": 57.06,
+ "CodeSearchNetCCRetrieval (php)": 46.01,
+ "CodeSearchNetRetrieval (python)": 90.8,
+ "CodeSearchNetRetrieval (javascript)": 76.92,
+ "CodeSearchNetRetrieval (go)": 94.95,
+ "CodeSearchNetRetrieval (ruby)": 82.41,
+ "CodeSearchNetRetrieval (java)": 85.4,
+ "CodeSearchNetRetrieval (php)": 87.83,
+ "CodeTransOceanContest": 53.13,
+ "CodeTransOceanDL": 21.04,
+ "CosQA": 34.36,
+ "CovidRetrieval": 17.18,
+ "GerDaLIR": 1.61,
+ "GerDaLIRSmall": 4.16,
+ "GermanQuAD-Retrieval": 83.41,
"HellaSwag": 28.5,
- "LeCaRDv2": 22.68,
- "LegalBenchConsumerContractsQA": 73.52,
- "LegalBenchCorporateLobbying": 91.51,
- "LegalQuAD": 16.22,
- "LegalSummarization": 59.99,
+ "LEMBNarrativeQARetrieval": 27.92,
+ "LEMBQMSumRetrieval": 23.29,
+ "LEMBSummScreenFDRetrieval": 68.73,
+ "LEMBWikimQARetrieval": 56.42,
+ "LeCaRDv2": 21.67,
+ "LegalBenchConsumerContractsQA": 73.33,
+ "LegalBenchCorporateLobbying": 90.77,
+ "LegalQuAD": 16.56,
+ "LegalSummarization": 61.12,
+ "MIRACLRetrieval (ru)": 10.73,
+ "MIRACLRetrieval (ar)": 3.6,
+ "MIRACLRetrieval (bn)": 4.92,
+ "MIRACLRetrieval (de)": 17.65,
+ "MIRACLRetrieval (en)": 49.54,
+ "MIRACLRetrieval (es)": 28.5,
+ "MIRACLRetrieval (fa)": 3.6,
+ "MIRACLRetrieval (fi)": 24.56,
+ "MIRACLRetrieval (fr)": 19.15,
+ "MIRACLRetrieval (hi)": 0.62,
+ "MIRACLRetrieval (id)": 17.48,
+ "MIRACLRetrieval (ja)": 4.12,
+ "MIRACLRetrieval (ko)": 9.89,
+ "MIRACLRetrieval (sw)": 29.89,
+ "MIRACLRetrieval (te)": 0.11,
+ "MIRACLRetrieval (th)": 0.59,
+ "MIRACLRetrieval (yo)": 54.39,
+ "MIRACLRetrieval (zh)": 0.87,
+ "MintakaRetrieval (ar)": 3.36,
+ "MintakaRetrieval (de)": 18.33,
+ "MintakaRetrieval (es)": 16.42,
+ "MintakaRetrieval (fr)": 20.44,
+ "MintakaRetrieval (hi)": 3.1,
+ "MintakaRetrieval (it)": 15.85,
+ "MintakaRetrieval (ja)": 8.48,
+ "MintakaRetrieval (pt)": 15.72,
+ "NFCorpus": 38.06,
"PIQA": 27.99,
"Quail": 1.83,
"RARbCode": 48.12,
"RARbMath": 57.36,
+ "RiaNewsRetrieval": 29.09,
+ "RuBQRetrieval": 21.05,
+ "SCIDOCS": 22.63,
"SIQA": 1.04,
+ "SciFact": 74.64,
+ "SciFact-PL": 39.46,
"SpartQA": 2.99,
+ "StackOverflowQA": 83.07,
+ "SyntecRetrieval": 62.29,
+ "SyntheticText2SQL": 50.64,
+ "TRECCOVID": 74.7,
+ "TRECCOVID-PL": 31.13,
"TempReasonL1": 1.46,
"TempReasonL2Fact": 24.25,
"TempReasonL2Pure": 2.35,
"TempReasonL3Fact": 20.64,
"TempReasonL3Pure": 6.67,
- "WinoGrande": 19.18
+ "WinoGrande": 19.18,
+ "XMarket (de)": 14.52,
+ "XMarket (en)": 33.37,
+ "XMarket (es)": 16.0,
+ "XPQARetrieval (ara-ara)": 11.06,
+ "XPQARetrieval (eng-ara)": 2.97,
+ "XPQARetrieval (ara-eng)": 8.68,
+ "XPQARetrieval (deu-deu)": 59.66,
+ "XPQARetrieval (eng-deu)": 12.85,
+ "XPQARetrieval (deu-eng)": 32.48,
+ "XPQARetrieval (spa-spa)": 47.89,
+ "XPQARetrieval (eng-spa)": 9.27,
+ "XPQARetrieval (spa-eng)": 29.33,
+ "XPQARetrieval (fra-fra)": 55.47,
+ "XPQARetrieval (eng-fra)": 15.52,
+ "XPQARetrieval (fra-eng)": 35.39,
+ "XPQARetrieval (hin-hin)": 25.9,
+ "XPQARetrieval (eng-hin)": 6.93,
+ "XPQARetrieval (hin-eng)": 7.93,
+ "XPQARetrieval (ita-ita)": 60.43,
+ "XPQARetrieval (eng-ita)": 9.66,
+ "XPQARetrieval (ita-eng)": 29.93,
+ "XPQARetrieval (jpn-jpn)": 43.18,
+ "XPQARetrieval (eng-jpn)": 5.22,
+ "XPQARetrieval (jpn-eng)": 16.47,
+ "XPQARetrieval (kor-kor)": 17.32,
+ "XPQARetrieval (eng-kor)": 8.82,
+ "XPQARetrieval (kor-eng)": 8.32,
+ "XPQARetrieval (pol-pol)": 33.06,
+ "XPQARetrieval (eng-pol)": 11.27,
+ "XPQARetrieval (pol-eng)": 20.21,
+ "XPQARetrieval (por-por)": 38.57,
+ "XPQARetrieval (eng-por)": 8.14,
+ "XPQARetrieval (por-eng)": 25.24,
+ "XPQARetrieval (tam-tam)": 8.49,
+ "XPQARetrieval (eng-tam)": 4.38,
+ "XPQARetrieval (tam-eng)": 3.46,
+ "XPQARetrieval (cmn-cmn)": 24.99,
+ "XPQARetrieval (eng-cmn)": 8.52,
+ "XPQARetrieval (cmn-eng)": 16.81
}
],
"recall_at_1": [
@@ -526,16 +2876,149 @@
]
},
"STS": {
- "cosine_spearman": []
- },
- "Summarization": {
- "cosine_spearman": []
- },
+ "cosine_spearman": [
+ {
+ "Model": "bge-large-en-v1.5",
+ "CDSC-R": 82.28,
+ "GermanSTSBenchmark": 63.74,
+ "RUParaPhraserSTS": 49.11,
+ "RuSTSBenchmarkSTS": 60.33,
+ "SICK-R": 81.68,
+ "SICK-R-PL": 52.73,
+ "SICKFr": 69.33,
+ "STS12": 79.05,
+ "STS13": 86.37,
+ "STS14": 82.78,
+ "STS15": 88.03,
+ "STS17 (ko-ko)": 38.5,
+ "STS17 (en-ar)": 6.25,
+ "STS17 (es-en)": 44.24,
+ "STS17 (ar-ar)": 43.61,
+ "STS17 (en-tr)": 10.12,
+ "STS17 (es-es)": 79.62,
+ "STS17 (en-de)": 45.46,
+ "STS17 (fr-en)": 48.28,
+ "STS17 (en-en)": 87.49,
+ "STS17 (it-en)": 44.48,
+ "STS17 (nl-en)": 40.92,
+ "STS22 (ru)": 25.18,
+ "STS22 (pl-en)": 54.46,
+ "STS22 (de-en)": 50.66,
+ "STS22 (de-fr)": 40.97,
+ "STS22 (zh)": 51.6,
+ "STS22 (pl)": 33.72,
+ "STS22 (zh-en)": 49.02,
+ "STS22 (en)": 67.52,
+ "STS22 (tr)": 44.96,
+ "STS22 (es-en)": 55.47,
+ "STS22 (es)": 57.92,
+ "STS22 (ar)": 22.55,
+ "STS22 (fr-pl)": 28.17,
+ "STS22 (de)": 40.26,
+ "STS22 (it)": 65.13,
+ "STS22 (fr)": 79.43,
+ "STS22 (de-pl)": 23.31,
+ "STS22 (es-it)": 57.7,
+ "STSB": 36.44,
+ "STSBenchmark": 87.52,
+ "STSBenchmarkMultilingualSTS (it)": 69.38,
+ "STSBenchmarkMultilingualSTS (zh)": 37.32,
+ "STSBenchmarkMultilingualSTS (es)": 71.47,
+ "STSBenchmarkMultilingualSTS (pl)": 60.99,
+ "STSBenchmarkMultilingualSTS (de)": 64.59,
+ "STSBenchmarkMultilingualSTS (nl)": 65.61,
+ "STSBenchmarkMultilingualSTS (fr)": 70.05,
+ "STSBenchmarkMultilingualSTS (pt)": 68.57,
+ "STSBenchmarkMultilingualSTS (en)": 87.52,
+ "STSBenchmarkMultilingualSTS (ru)": 60.06
+ },
+ {
+ "Model": "bge-large-en-v1.5",
+ "CDSC-R": 82.28,
+ "GermanSTSBenchmark": 63.74,
+ "RUParaPhraserSTS": 49.11,
+ "RuSTSBenchmarkSTS": 60.33,
+ "SICK-R": 81.68,
+ "SICK-R-PL": 52.73,
+ "SICKFr": 69.33,
+ "STS12": 79.05,
+ "STS13": 86.37,
+ "STS14": 82.78,
+ "STS15": 88.03,
+ "STS17 (ko-ko)": 38.5,
+ "STS17 (en-ar)": 6.25,
+ "STS17 (es-en)": 44.24,
+ "STS17 (ar-ar)": 43.61,
+ "STS17 (en-tr)": 10.12,
+ "STS17 (es-es)": 79.62,
+ "STS17 (en-de)": 45.46,
+ "STS17 (fr-en)": 48.28,
+ "STS17 (en-en)": 87.49,
+ "STS17 (it-en)": 44.48,
+ "STS17 (nl-en)": 40.92,
+ "STS22 (ru)": 25.18,
+ "STS22 (pl-en)": 54.46,
+ "STS22 (de-en)": 50.66,
+ "STS22 (de-fr)": 40.97,
+ "STS22 (zh)": 51.6,
+ "STS22 (pl)": 33.72,
+ "STS22 (zh-en)": 49.02,
+ "STS22 (en)": 67.52,
+ "STS22 (tr)": 44.96,
+ "STS22 (es-en)": 55.47,
+ "STS22 (es)": 57.92,
+ "STS22 (ar)": 22.53,
+ "STS22 (fr-pl)": 28.17,
+ "STS22 (de)": 40.27,
+ "STS22 (it)": 65.13,
+ "STS22 (fr)": 79.43,
+ "STS22 (de-pl)": 23.31,
+ "STS22 (es-it)": 57.7,
+ "STSB": 36.44,
+ "STSBenchmark": 87.52,
+ "STSBenchmarkMultilingualSTS (it)": 69.38,
+ "STSBenchmarkMultilingualSTS (zh)": 37.32,
+ "STSBenchmarkMultilingualSTS (es)": 71.47,
+ "STSBenchmarkMultilingualSTS (pl)": 60.99,
+ "STSBenchmarkMultilingualSTS (de)": 64.59,
+ "STSBenchmarkMultilingualSTS (nl)": 65.61,
+ "STSBenchmarkMultilingualSTS (fr)": 70.05,
+ "STSBenchmarkMultilingualSTS (pt)": 68.57,
+ "STSBenchmarkMultilingualSTS (en)": 87.52,
+ "STSBenchmarkMultilingualSTS (ru)": 60.07
+ }
+ ]
+ },
+ "Summarization": {
+ "cosine_spearman": [
+ {
+ "Model": "bge-large-en-v1.5",
+ "SummEvalFr": 29.5
+ },
+ {
+ "Model": "bge-large-en-v1.5",
+ "SummEvalFr": 29.5
+ }
+ ]
+ },
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "bge-large-en-v1.5",
+ "CEDRClassification": 36.15,
+ "SensitiveTopicsClassification": 17.97
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "bge-large-en-v1.5",
+ "Core17InstructionRetrieval": -1.93,
+ "News21InstructionRetrieval": 0.33,
+ "Robust04InstructionRetrieval": -5.26
+ }
+ ]
}
},
"BAAI__bge-large-en-v1.5-instruct": {
@@ -786,7 +3269,119 @@
"f1": [
{
"Model": "bge-m3",
- "Tatoeba (rus-eng)": 93.42
+ "BornholmBitextMining": 44.11,
+ "Tatoeba (swh-eng)": 73.89,
+ "Tatoeba (arz-eng)": 68.59,
+ "Tatoeba (tam-eng)": 88.38,
+ "Tatoeba (gsw-eng)": 50.07,
+ "Tatoeba (fra-eng)": 94.3,
+ "Tatoeba (hsb-eng)": 61.14,
+ "Tatoeba (hrv-eng)": 95.35,
+ "Tatoeba (cha-eng)": 32.57,
+ "Tatoeba (ido-eng)": 71.22,
+ "Tatoeba (spa-eng)": 96.82,
+ "Tatoeba (tzl-eng)": 49.63,
+ "Tatoeba (cor-eng)": 5.9,
+ "Tatoeba (hun-eng)": 93.05,
+ "Tatoeba (max-eng)": 59.87,
+ "Tatoeba (uig-eng)": 79.95,
+ "Tatoeba (tel-eng)": 92.81,
+ "Tatoeba (kzj-eng)": 10.46,
+ "Tatoeba (fin-eng)": 95.7,
+ "Tatoeba (kat-eng)": 90.73,
+ "Tatoeba (awa-eng)": 74.37,
+ "Tatoeba (mhr-eng)": 11.36,
+ "Tatoeba (kab-eng)": 2.45,
+ "Tatoeba (eus-eng)": 73.6,
+ "Tatoeba (ceb-eng)": 32.14,
+ "Tatoeba (bre-eng)": 11.93,
+ "Tatoeba (ast-eng)": 76.64,
+ "Tatoeba (tur-eng)": 94.63,
+ "Tatoeba (ukr-eng)": 92.32,
+ "Tatoeba (kaz-eng)": 79.14,
+ "Tatoeba (csb-eng)": 42.61,
+ "Tatoeba (vie-eng)": 96.3,
+ "Tatoeba (isl-eng)": 93.35,
+ "Tatoeba (orv-eng)": 44.76,
+ "Tatoeba (bos-eng)": 92.98,
+ "Tatoeba (epo-eng)": 93.85,
+ "Tatoeba (ile-eng)": 77.32,
+ "Tatoeba (yid-eng)": 48.99,
+ "Tatoeba (por-eng)": 94.31,
+ "Tatoeba (khm-eng)": 76.02,
+ "Tatoeba (ina-eng)": 90.4,
+ "Tatoeba (deu-eng)": 99.1,
+ "Tatoeba (dtp-eng)": 10.16,
+ "Tatoeba (pms-eng)": 53.64,
+ "Tatoeba (ber-eng)": 7.78,
+ "Tatoeba (arq-eng)": 30.88,
+ "Tatoeba (swe-eng)": 93.63,
+ "Tatoeba (ind-eng)": 93.36,
+ "Tatoeba (urd-eng)": 90.48,
+ "Tatoeba (tha-eng)": 96.59,
+ "Tatoeba (rus-eng)": 93.27,
+ "Tatoeba (gla-eng)": 33.5,
+ "Tatoeba (pol-eng)": 96.6,
+ "Tatoeba (fao-eng)": 74.92,
+ "Tatoeba (ara-eng)": 87.76,
+ "Tatoeba (afr-eng)": 91.88,
+ "Tatoeba (cbk-eng)": 68.84,
+ "Tatoeba (mar-eng)": 88.81,
+ "Tatoeba (zsm-eng)": 95.25,
+ "Tatoeba (srp-eng)": 92.43,
+ "Tatoeba (jav-eng)": 60.86,
+ "Tatoeba (hye-eng)": 89.15,
+ "Tatoeba (lvs-eng)": 90.55,
+ "Tatoeba (tuk-eng)": 25.36,
+ "Tatoeba (ces-eng)": 94.75,
+ "Tatoeba (mkd-eng)": 89.26,
+ "Tatoeba (lfn-eng)": 65.76,
+ "Tatoeba (kor-eng)": 89.84,
+ "Tatoeba (ita-eng)": 92.57,
+ "Tatoeba (tgl-eng)": 79.91,
+ "Tatoeba (mon-eng)": 89.6,
+ "Tatoeba (ron-eng)": 95.62,
+ "Tatoeba (lit-eng)": 90.7,
+ "Tatoeba (uzb-eng)": 59.22,
+ "Tatoeba (cmn-eng)": 95.27,
+ "Tatoeba (swg-eng)": 47.69,
+ "Tatoeba (tat-eng)": 66.01,
+ "Tatoeba (ben-eng)": 85.97,
+ "Tatoeba (nno-eng)": 91.8,
+ "Tatoeba (dsb-eng)": 52.29,
+ "Tatoeba (wuu-eng)": 83.41,
+ "Tatoeba (glg-eng)": 91.11,
+ "Tatoeba (slk-eng)": 93.55,
+ "Tatoeba (pes-eng)": 92.13,
+ "Tatoeba (nds-eng)": 62.6,
+ "Tatoeba (nld-eng)": 95.78,
+ "Tatoeba (kur-eng)": 63.79,
+ "Tatoeba (nov-eng)": 75.48,
+ "Tatoeba (nob-eng)": 97.37,
+ "Tatoeba (heb-eng)": 87.5,
+ "Tatoeba (sqi-eng)": 93.65,
+ "Tatoeba (lat-eng)": 56.52,
+ "Tatoeba (cym-eng)": 61.64,
+ "Tatoeba (aze-eng)": 89.22,
+ "Tatoeba (xho-eng)": 64.92,
+ "Tatoeba (ell-eng)": 95.15,
+ "Tatoeba (cat-eng)": 93.49,
+ "Tatoeba (dan-eng)": 94.32,
+ "Tatoeba (pam-eng)": 10.89,
+ "Tatoeba (slv-eng)": 89.43,
+ "Tatoeba (bul-eng)": 91.63,
+ "Tatoeba (amh-eng)": 82.54,
+ "Tatoeba (jpn-eng)": 93.73,
+ "Tatoeba (yue-eng)": 85.48,
+ "Tatoeba (bel-eng)": 92.27,
+ "Tatoeba (oci-eng)": 55.5,
+ "Tatoeba (mal-eng)": 97.19,
+ "Tatoeba (ang-eng)": 46.95,
+ "Tatoeba (gle-eng)": 58.09,
+ "Tatoeba (est-eng)": 90.72,
+ "Tatoeba (hin-eng)": 95.45,
+ "Tatoeba (fry-eng)": 73.31,
+ "Tatoeba (war-eng)": 33.49
}
]
},
@@ -794,15 +3389,93 @@
"accuracy": [
{
"Model": "bge-m3",
+ "AmazonCounterfactualClassification (en-ext)": 76.23,
+ "AmazonCounterfactualClassification (en)": 75.63,
+ "AmazonCounterfactualClassification (de)": 69.11,
+ "AmazonCounterfactualClassification (ja)": 77.67,
+ "AmazonReviewsClassification (de)": 46.37,
"GeoreviewClassification": 48.27,
"HeadlineClassification": 70.32,
"InappropriatenessClassification": 59.87,
"KinopoiskClassification": 58.23,
- "MassiveIntentClassification (ru)": 68.75,
+ "MTOPDomainClassification (de)": 91.22,
+ "MTOPIntentClassification (de)": 68.08,
+ "MasakhaNEWSClassification (amh)": 84.76,
+ "MasakhaNEWSClassification (eng)": 78.73,
+ "MasakhaNEWSClassification (fra)": 76.4,
+ "MasakhaNEWSClassification (hau)": 78.26,
+ "MasakhaNEWSClassification (ibo)": 64.26,
+ "MasakhaNEWSClassification (lin)": 72.46,
+ "MasakhaNEWSClassification (lug)": 65.25,
+ "MasakhaNEWSClassification (orm)": 74.58,
+ "MasakhaNEWSClassification (pcm)": 91.11,
+ "MasakhaNEWSClassification (run)": 76.74,
+ "MasakhaNEWSClassification (sna)": 84.74,
+ "MasakhaNEWSClassification (som)": 64.76,
+ "MasakhaNEWSClassification (swa)": 73.8,
+ "MasakhaNEWSClassification (tir)": 68.16,
+ "MasakhaNEWSClassification (xho)": 77.81,
+ "MasakhaNEWSClassification (yor)": 79.03,
+ "MassiveIntentClassification (sl)": 65.53,
+ "MassiveIntentClassification (ko)": 66.53,
+ "MassiveIntentClassification (tl)": 59.53,
+ "MassiveIntentClassification (sv)": 69.15,
+ "MassiveIntentClassification (pl)": 67.95,
+ "MassiveIntentClassification (ru)": 68.76,
+ "MassiveIntentClassification (en)": 71.08,
+ "MassiveIntentClassification (af)": 63.33,
+ "MassiveIntentClassification (az)": 64.24,
+ "MassiveIntentClassification (fr)": 67.45,
+ "MassiveIntentClassification (bn)": 62.95,
+ "MassiveIntentClassification (vi)": 67.59,
+ "MassiveIntentClassification (am)": 55.09,
+ "MassiveIntentClassification (fi)": 67.26,
+ "MassiveIntentClassification (tr)": 66.96,
+ "MassiveIntentClassification (el)": 66.22,
+ "MassiveIntentClassification (jv)": 52.32,
+ "MassiveIntentClassification (nl)": 68.26,
+ "MassiveIntentClassification (fa)": 69.44,
+ "MassiveIntentClassification (sw)": 56.82,
+ "MassiveIntentClassification (da)": 67.26,
+ "MassiveIntentClassification (ar)": 55.75,
+ "MassiveIntentClassification (nb)": 67.07,
+ "MassiveIntentClassification (hi)": 65.09,
+ "MassiveIntentClassification (hy)": 63.92,
+ "MassiveIntentClassification (ja)": 69.24,
+ "MassiveIntentClassification (mn)": 62.61,
+ "MassiveIntentClassification (zh-CN)": 69.02,
+ "MassiveIntentClassification (th)": 64.56,
+ "MassiveIntentClassification (cy)": 53.17,
+ "MassiveIntentClassification (te)": 62.85,
+ "MassiveIntentClassification (ka)": 55.21,
+ "MassiveIntentClassification (ro)": 65.09,
+ "MassiveIntentClassification (id)": 66.9,
+ "MassiveIntentClassification (de)": 65.74,
+ "MassiveIntentClassification (lv)": 64.9,
+ "MassiveIntentClassification (zh-TW)": 64.16,
+ "MassiveIntentClassification (sq)": 64.8,
+ "MassiveIntentClassification (my)": 60.0,
+ "MassiveIntentClassification (it)": 67.05,
+ "MassiveIntentClassification (he)": 65.16,
+ "MassiveIntentClassification (km)": 51.06,
+ "MassiveIntentClassification (ms)": 65.35,
+ "MassiveIntentClassification (hu)": 65.03,
+ "MassiveIntentClassification (is)": 61.64,
+ "MassiveIntentClassification (kn)": 61.66,
+ "MassiveIntentClassification (ml)": 64.67,
+ "MassiveIntentClassification (ta)": 60.4,
+ "MassiveIntentClassification (ur)": 63.17,
+ "MassiveIntentClassification (pt)": 67.63,
+ "MassiveIntentClassification (es)": 67.04,
"MassiveScenarioClassification (ru)": 73.42,
+ "MassiveScenarioClassification (de)": 73.54,
+ "NordicLangClassification": 38.93,
+ "PAC": 69.13,
+ "PolEmo2.0-OUT": 49.47,
"RuReviewsClassification": 66.91,
"RuSciBenchGRNTIClassification": 55.81,
- "RuSciBenchOECDClassification": 42.57
+ "RuSciBenchOECDClassification": 42.57,
+ "ToxicConversationsClassification": 68.72
}
]
},
@@ -810,7 +3483,25 @@
"v_measure": [
{
"Model": "bge-m3",
+ "BlurbsClusteringP2P": 38.69,
+ "BlurbsClusteringS2S": 16.28,
"GeoreviewClusteringP2P": 63.09,
+ "MasakhaNEWSClusteringS2S (amh)": 46.92,
+ "MasakhaNEWSClusteringS2S (eng)": 26.25,
+ "MasakhaNEWSClusteringS2S (fra)": 42.4,
+ "MasakhaNEWSClusteringS2S (hau)": 37.07,
+ "MasakhaNEWSClusteringS2S (ibo)": 38.82,
+ "MasakhaNEWSClusteringS2S (lin)": 52.16,
+ "MasakhaNEWSClusteringS2S (lug)": 48.08,
+ "MasakhaNEWSClusteringS2S (orm)": 28.09,
+ "MasakhaNEWSClusteringS2S (pcm)": 62.49,
+ "MasakhaNEWSClusteringS2S (run)": 47.41,
+ "MasakhaNEWSClusteringS2S (sna)": 48.44,
+ "MasakhaNEWSClusteringS2S (som)": 37.44,
+ "MasakhaNEWSClusteringS2S (swa)": 22.99,
+ "MasakhaNEWSClusteringS2S (tir)": 54.6,
+ "MasakhaNEWSClusteringS2S (xho)": 30.48,
+ "MasakhaNEWSClusteringS2S (yor)": 26.7,
"RuSciBenchGRNTIClusteringP2P": 50.83,
"RuSciBenchOECDClusteringP2P": 43.21
}
@@ -820,13 +3511,41 @@
"max_ap": [
{
"Model": "bge-m3",
+ "OpusparcusPC (de)": 96.6,
+ "OpusparcusPC (en)": 98.77,
+ "OpusparcusPC (fi)": 94.65,
+ "OpusparcusPC (fr)": 92.88,
"OpusparcusPC (ru)": 89.64,
- "TERRa": 60.6
+ "OpusparcusPC (sv)": 95.36,
+ "PawsXPairClassification (de)": 56.79,
+ "PawsXPairClassification (en)": 61.09,
+ "PawsXPairClassification (es)": 57.3,
+ "PawsXPairClassification (fr)": 59.57,
+ "PawsXPairClassification (ja)": 51.7,
+ "PawsXPairClassification (ko)": 52.33,
+ "PawsXPairClassification (zh)": 57.05,
+ "SprintDuplicateQuestions": 97.33,
+ "TERRa": 60.6,
+ "TwitterURLCorpus": 85.89
},
{
"Model": "bge-m3",
- "OpusparcusPC (ru)": 89.64,
- "TERRa": 60.6
+ "OpusparcusPC (de)": 96.6,
+ "OpusparcusPC (en)": 98.77,
+ "OpusparcusPC (fi)": 94.65,
+ "OpusparcusPC (fr)": 92.88,
+ "OpusparcusPC (ru)": 89.65,
+ "OpusparcusPC (sv)": 95.36,
+ "PawsXPairClassification (de)": 57.12,
+ "PawsXPairClassification (en)": 61.13,
+ "PawsXPairClassification (es)": 57.35,
+ "PawsXPairClassification (fr)": 59.6,
+ "PawsXPairClassification (ja)": 51.79,
+ "PawsXPairClassification (ko)": 52.33,
+ "PawsXPairClassification (zh)": 57.11,
+ "SprintDuplicateQuestions": 97.33,
+ "TERRa": 60.6,
+ "TwitterURLCorpus": 85.89
}
]
},
@@ -834,11 +3553,13 @@
"map": [
{
"Model": "bge-m3",
- "MIRACLReranking (ru)": 65.38
+ "AlloprofReranking": 73.87,
+ "RuBQReranking": 74.03,
+ "T2Reranking": 66.83
},
{
"Model": "bge-m3",
- "RuBQReranking": 74.03
+ "MIRACLReranking (ru)": 65.38
}
]
},
@@ -846,13 +3567,17 @@
"ndcg_at_10": [
{
"Model": "bge-m3",
+ "AILAStatutes": 29.04,
"ARCChallenge": 9.02,
"AlphaNLI": 24.73,
+ "ArguAna": 54.04,
+ "CovidRetrieval": 77.51,
"HellaSwag": 25.67,
"LEMBNarrativeQARetrieval": 45.76,
"LEMBQMSumRetrieval": 35.54,
"LEMBSummScreenFDRetrieval": 94.09,
"LEMBWikimQARetrieval": 77.73,
+ "LegalBenchCorporateLobbying": 90.34,
"MIRACLRetrieval (ru)": 70.16,
"PIQA": 22.93,
"Quail": 7.51,
@@ -860,8 +3585,11 @@
"RARbMath": 69.19,
"RiaNewsRetrieval": 82.99,
"RuBQRetrieval": 71.22,
+ "SCIDOCS": 16.31,
"SIQA": 4.89,
"SpartQA": 7.49,
+ "StackOverflowQA": 80.6,
+ "TRECCOVID": 54.72,
"TempReasonL1": 0.99,
"TempReasonL2Fact": 33.23,
"TempReasonL2Pure": 0.68,
@@ -875,10 +3603,51 @@
"cosine_spearman": [
{
"Model": "bge-m3",
+ "GermanSTSBenchmark": 80.79,
"RUParaPhraserSTS": 74.9,
"RuSTSBenchmarkSTS": 79.87,
+ "SICK-R": 79.72,
+ "STS12": 78.73,
+ "STS13": 79.6,
+ "STS14": 79.0,
+ "STS15": 87.81,
+ "STS17 (en-en)": 87.13,
+ "STS17 (es-es)": 87.34,
+ "STS17 (en-de)": 82.24,
+ "STS17 (en-tr)": 72.52,
+ "STS17 (en-ar)": 69.41,
+ "STS17 (ar-ar)": 80.74,
+ "STS17 (es-en)": 75.57,
+ "STS17 (nl-en)": 80.82,
+ "STS17 (fr-en)": 79.8,
+ "STS17 (it-en)": 79.12,
+ "STS17 (ko-ko)": 81.43,
"STS22 (ru)": 66.26,
+ "STSB": 80.6,
+ "STSBenchmark": 84.87,
"STSBenchmarkMultilingualSTS (ru)": 79.27
+ },
+ {
+ "Model": "bge-m3",
+ "GermanSTSBenchmark": 80.79,
+ "SICK-R": 79.72,
+ "STS12": 78.73,
+ "STS13": 79.6,
+ "STS14": 79.0,
+ "STS15": 87.81,
+ "STS17 (en-en)": 87.13,
+ "STS17 (es-es)": 87.34,
+ "STS17 (en-de)": 82.24,
+ "STS17 (en-tr)": 72.52,
+ "STS17 (en-ar)": 69.41,
+ "STS17 (ar-ar)": 80.74,
+ "STS17 (es-en)": 75.57,
+ "STS17 (nl-en)": 80.82,
+ "STS17 (fr-en)": 79.8,
+ "STS17 (it-en)": 79.12,
+ "STS17 (ko-ko)": 81.43,
+ "STSB": 80.6,
+ "STSBenchmark": 84.87
}
]
},
@@ -895,7 +3664,14 @@
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "bge-m3",
+ "Core17InstructionRetrieval": -1.25,
+ "News21InstructionRetrieval": -1.39,
+ "Robust04InstructionRetrieval": -6.69
+ }
+ ]
}
},
"BAAI__bge-m3-instruct": {
@@ -951,53 +3727,708 @@
},
"BAAI__bge-small-en-v1.5": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "bge-small-en-v1.5",
+ "BornholmBitextMining": 33.92,
+ "Tatoeba (arq-eng)": 0.39,
+ "Tatoeba (lfn-eng)": 9.26,
+ "Tatoeba (rus-eng)": 0.21,
+ "Tatoeba (ceb-eng)": 3.58,
+ "Tatoeba (cym-eng)": 6.99,
+ "Tatoeba (ile-eng)": 19.35,
+ "Tatoeba (nds-eng)": 11.51,
+ "Tatoeba (wuu-eng)": 1.9,
+ "Tatoeba (amh-eng)": 0.91,
+ "Tatoeba (max-eng)": 10.36,
+ "Tatoeba (jpn-eng)": 0.94,
+ "Tatoeba (tzl-eng)": 15.44,
+ "Tatoeba (xho-eng)": 3.87,
+ "Tatoeba (tur-eng)": 4.16,
+ "Tatoeba (urd-eng)": 0.03,
+ "Tatoeba (hrv-eng)": 5.55,
+ "Tatoeba (khm-eng)": 0.15,
+ "Tatoeba (gla-eng)": 2.71,
+ "Tatoeba (fao-eng)": 7.03,
+ "Tatoeba (kor-eng)": 0.85,
+ "Tatoeba (por-eng)": 10.93,
+ "Tatoeba (tat-eng)": 0.77,
+ "Tatoeba (pam-eng)": 4.81,
+ "Tatoeba (ces-eng)": 3.41,
+ "Tatoeba (uzb-eng)": 3.77,
+ "Tatoeba (yue-eng)": 1.18,
+ "Tatoeba (orv-eng)": 0.09,
+ "Tatoeba (aze-eng)": 3.68,
+ "Tatoeba (ang-eng)": 17.22,
+ "Tatoeba (est-eng)": 2.11,
+ "Tatoeba (kat-eng)": 0.58,
+ "Tatoeba (lvs-eng)": 3.5,
+ "Tatoeba (nob-eng)": 8.64,
+ "Tatoeba (epo-eng)": 9.42,
+ "Tatoeba (cor-eng)": 2.88,
+ "Tatoeba (yid-eng)": 0.21,
+ "Tatoeba (dan-eng)": 9.8,
+ "Tatoeba (hsb-eng)": 4.79,
+ "Tatoeba (bos-eng)": 7.18,
+ "Tatoeba (ara-eng)": 0.17,
+ "Tatoeba (swg-eng)": 9.47,
+ "Tatoeba (kab-eng)": 1.03,
+ "Tatoeba (eus-eng)": 7.21,
+ "Tatoeba (ido-eng)": 12.06,
+ "Tatoeba (glg-eng)": 12.43,
+ "Tatoeba (awa-eng)": 0.52,
+ "Tatoeba (hun-eng)": 4.22,
+ "Tatoeba (deu-eng)": 10.09,
+ "Tatoeba (nno-eng)": 6.76,
+ "Tatoeba (fra-eng)": 16.67,
+ "Tatoeba (tam-eng)": 0.08,
+ "Tatoeba (zsm-eng)": 6.99,
+ "Tatoeba (cmn-eng)": 2.47,
+ "Tatoeba (swh-eng)": 5.7,
+ "Tatoeba (ukr-eng)": 0.67,
+ "Tatoeba (gsw-eng)": 13.64,
+ "Tatoeba (ina-eng)": 24.66,
+ "Tatoeba (csb-eng)": 3.42,
+ "Tatoeba (oci-eng)": 8.07,
+ "Tatoeba (hin-eng)": 0.0,
+ "Tatoeba (ast-eng)": 13.66,
+ "Tatoeba (gle-eng)": 3.12,
+ "Tatoeba (bel-eng)": 1.22,
+ "Tatoeba (nld-eng)": 11.36,
+ "Tatoeba (mhr-eng)": 0.0,
+ "Tatoeba (isl-eng)": 3.78,
+ "Tatoeba (ron-eng)": 7.24,
+ "Tatoeba (mkd-eng)": 0.2,
+ "Tatoeba (fin-eng)": 3.49,
+ "Tatoeba (ind-eng)": 6.35,
+ "Tatoeba (cat-eng)": 12.93,
+ "Tatoeba (sqi-eng)": 5.09,
+ "Tatoeba (pms-eng)": 9.63,
+ "Tatoeba (tgl-eng)": 3.69,
+ "Tatoeba (lat-eng)": 7.06,
+ "Tatoeba (mar-eng)": 0.04,
+ "Tatoeba (srp-eng)": 2.25,
+ "Tatoeba (ben-eng)": 0.02,
+ "Tatoeba (afr-eng)": 6.77,
+ "Tatoeba (pol-eng)": 5.51,
+ "Tatoeba (mon-eng)": 1.6,
+ "Tatoeba (swe-eng)": 7.68,
+ "Tatoeba (bul-eng)": 0.56,
+ "Tatoeba (nov-eng)": 25.06,
+ "Tatoeba (uig-eng)": 0.59,
+ "Tatoeba (ell-eng)": 0.17,
+ "Tatoeba (hye-eng)": 0.41,
+ "Tatoeba (mal-eng)": 0.05,
+ "Tatoeba (dsb-eng)": 4.79,
+ "Tatoeba (slk-eng)": 3.97,
+ "Tatoeba (fry-eng)": 16.8,
+ "Tatoeba (dtp-eng)": 3.03,
+ "Tatoeba (kur-eng)": 7.53,
+ "Tatoeba (vie-eng)": 4.4,
+ "Tatoeba (war-eng)": 5.06,
+ "Tatoeba (kzj-eng)": 3.64,
+ "Tatoeba (ber-eng)": 4.47,
+ "Tatoeba (cbk-eng)": 10.19,
+ "Tatoeba (jav-eng)": 4.77,
+ "Tatoeba (slv-eng)": 3.89,
+ "Tatoeba (ita-eng)": 14.34,
+ "Tatoeba (tuk-eng)": 3.64,
+ "Tatoeba (arz-eng)": 0.06,
+ "Tatoeba (spa-eng)": 14.72,
+ "Tatoeba (bre-eng)": 4.08,
+ "Tatoeba (cha-eng)": 14.39,
+ "Tatoeba (tha-eng)": 1.28,
+ "Tatoeba (lit-eng)": 2.3,
+ "Tatoeba (kaz-eng)": 0.55,
+ "Tatoeba (heb-eng)": 0.47,
+ "Tatoeba (tel-eng)": 0.2,
+ "Tatoeba (pes-eng)": 0.63
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "bge-small-en-v1.5",
+ "AllegroReviews": 25.64,
+ "AmazonCounterfactualClassification (en-ext)": 71.81,
+ "AmazonCounterfactualClassification (en)": 71.84,
+ "AmazonCounterfactualClassification (de)": 58.53,
+ "AmazonCounterfactualClassification (ja)": 58.03,
+ "AmazonReviewsClassification (en)": 49.19,
+ "AmazonReviewsClassification (de)": 26.82,
+ "AmazonReviewsClassification (es)": 32.81,
+ "AmazonReviewsClassification (fr)": 28.3,
+ "AmazonReviewsClassification (ja)": 23.8,
+ "AmazonReviewsClassification (zh)": 23.54,
+ "AngryTweetsClassification": 43.31,
+ "CBD": 51.9,
+ "DanishPoliticalCommentsClassification": 28.74,
+ "GeoreviewClassification": 27.23,
+ "HeadlineClassification": 29.75,
+ "InappropriatenessClassification": 51.22,
+ "KinopoiskClassification": 35.31,
+ "LccSentimentClassification": 38.53,
+ "MTOPDomainClassification (en)": 92.31,
+ "MTOPDomainClassification (de)": 69.72,
+ "MTOPDomainClassification (es)": 74.81,
+ "MTOPDomainClassification (fr)": 75.49,
+ "MTOPDomainClassification (hi)": 26.01,
+ "MTOPDomainClassification (th)": 16.95,
+ "MTOPIntentClassification (en)": 63.87,
+ "MTOPIntentClassification (de)": 40.14,
+ "MTOPIntentClassification (es)": 40.36,
+ "MTOPIntentClassification (fr)": 37.32,
+ "MTOPIntentClassification (hi)": 9.4,
+ "MTOPIntentClassification (th)": 5.26,
+ "MasakhaNEWSClassification (amh)": 35.05,
+ "MasakhaNEWSClassification (eng)": 80.13,
+ "MasakhaNEWSClassification (fra)": 76.35,
+ "MasakhaNEWSClassification (hau)": 60.16,
+ "MasakhaNEWSClassification (ibo)": 56.97,
+ "MasakhaNEWSClassification (lin)": 69.37,
+ "MasakhaNEWSClassification (lug)": 57.49,
+ "MasakhaNEWSClassification (orm)": 60.52,
+ "MasakhaNEWSClassification (pcm)": 93.93,
+ "MasakhaNEWSClassification (run)": 63.23,
+ "MasakhaNEWSClassification (sna)": 73.77,
+ "MasakhaNEWSClassification (som)": 52.99,
+ "MasakhaNEWSClassification (swa)": 55.53,
+ "MasakhaNEWSClassification (tir)": 27.46,
+ "MasakhaNEWSClassification (xho)": 63.64,
+ "MasakhaNEWSClassification (yor)": 63.63,
+ "MassiveIntentClassification (de)": 40.71,
+ "MassiveIntentClassification (it)": 42.67,
+ "MassiveIntentClassification (kn)": 3.33,
+ "MassiveIntentClassification (am)": 2.47,
+ "MassiveIntentClassification (af)": 36.68,
+ "MassiveIntentClassification (tl)": 38.83,
+ "MassiveIntentClassification (ro)": 40.92,
+ "MassiveIntentClassification (zh-CN)": 21.95,
+ "MassiveIntentClassification (is)": 34.1,
+ "MassiveIntentClassification (ml)": 2.65,
+ "MassiveIntentClassification (ru)": 24.69,
+ "MassiveIntentClassification (pt)": 44.25,
+ "MassiveIntentClassification (mn)": 21.87,
+ "MassiveIntentClassification (my)": 4.7,
+ "MassiveIntentClassification (bn)": 10.08,
+ "MassiveIntentClassification (te)": 2.3,
+ "MassiveIntentClassification (nl)": 38.9,
+ "MassiveIntentClassification (fi)": 40.13,
+ "MassiveIntentClassification (sw)": 35.33,
+ "MassiveIntentClassification (vi)": 36.18,
+ "MassiveIntentClassification (lv)": 38.48,
+ "MassiveIntentClassification (fa)": 21.05,
+ "MassiveIntentClassification (sl)": 38.52,
+ "MassiveIntentClassification (ja)": 27.8,
+ "MassiveIntentClassification (ms)": 36.06,
+ "MassiveIntentClassification (hy)": 9.7,
+ "MassiveIntentClassification (da)": 43.48,
+ "MassiveIntentClassification (km)": 4.7,
+ "MassiveIntentClassification (el)": 23.78,
+ "MassiveIntentClassification (jv)": 34.78,
+ "MassiveIntentClassification (ur)": 9.98,
+ "MassiveIntentClassification (tr)": 38.43,
+ "MassiveIntentClassification (he)": 18.24,
+ "MassiveIntentClassification (nb)": 40.05,
+ "MassiveIntentClassification (en)": 70.76,
+ "MassiveIntentClassification (fr)": 41.51,
+ "MassiveIntentClassification (sv)": 40.35,
+ "MassiveIntentClassification (ar)": 18.67,
+ "MassiveIntentClassification (th)": 12.11,
+ "MassiveIntentClassification (ko)": 17.23,
+ "MassiveIntentClassification (az)": 37.24,
+ "MassiveIntentClassification (pl)": 39.01,
+ "MassiveIntentClassification (es)": 40.74,
+ "MassiveIntentClassification (hi)": 8.72,
+ "MassiveIntentClassification (hu)": 38.68,
+ "MassiveIntentClassification (ka)": 11.32,
+ "MassiveIntentClassification (ta)": 10.09,
+ "MassiveIntentClassification (cy)": 34.79,
+ "MassiveIntentClassification (id)": 40.08,
+ "MassiveIntentClassification (sq)": 40.64,
+ "MassiveIntentClassification (zh-TW)": 20.78,
+ "MassiveScenarioClassification (fi)": 44.7,
+ "MassiveScenarioClassification (sl)": 44.36,
+ "MassiveScenarioClassification (zh-CN)": 32.17,
+ "MassiveScenarioClassification (th)": 22.44,
+ "MassiveScenarioClassification (jv)": 43.42,
+ "MassiveScenarioClassification (ms)": 42.98,
+ "MassiveScenarioClassification (ka)": 17.01,
+ "MassiveScenarioClassification (fa)": 28.07,
+ "MassiveScenarioClassification (id)": 44.04,
+ "MassiveScenarioClassification (ko)": 23.89,
+ "MassiveScenarioClassification (pl)": 46.1,
+ "MassiveScenarioClassification (az)": 43.56,
+ "MassiveScenarioClassification (af)": 45.56,
+ "MassiveScenarioClassification (he)": 22.47,
+ "MassiveScenarioClassification (da)": 51.6,
+ "MassiveScenarioClassification (ru)": 29.84,
+ "MassiveScenarioClassification (mn)": 28.31,
+ "MassiveScenarioClassification (nl)": 48.31,
+ "MassiveScenarioClassification (ml)": 7.97,
+ "MassiveScenarioClassification (lv)": 42.93,
+ "MassiveScenarioClassification (zh-TW)": 31.33,
+ "MassiveScenarioClassification (sq)": 48.7,
+ "MassiveScenarioClassification (kn)": 8.03,
+ "MassiveScenarioClassification (te)": 7.81,
+ "MassiveScenarioClassification (tr)": 43.77,
+ "MassiveScenarioClassification (ur)": 17.86,
+ "MassiveScenarioClassification (hy)": 16.49,
+ "MassiveScenarioClassification (nb)": 47.72,
+ "MassiveScenarioClassification (ja)": 36.78,
+ "MassiveScenarioClassification (sw)": 44.31,
+ "MassiveScenarioClassification (tl)": 48.6,
+ "MassiveScenarioClassification (ro)": 51.26,
+ "MassiveScenarioClassification (en)": 75.21,
+ "MassiveScenarioClassification (cy)": 40.22,
+ "MassiveScenarioClassification (am)": 7.53,
+ "MassiveScenarioClassification (el)": 33.87,
+ "MassiveScenarioClassification (fr)": 54.25,
+ "MassiveScenarioClassification (hi)": 14.9,
+ "MassiveScenarioClassification (it)": 51.93,
+ "MassiveScenarioClassification (km)": 9.89,
+ "MassiveScenarioClassification (pt)": 53.39,
+ "MassiveScenarioClassification (de)": 49.62,
+ "MassiveScenarioClassification (ar)": 26.89,
+ "MassiveScenarioClassification (my)": 10.37,
+ "MassiveScenarioClassification (sv)": 46.86,
+ "MassiveScenarioClassification (ta)": 17.11,
+ "MassiveScenarioClassification (vi)": 42.1,
+ "MassiveScenarioClassification (hu)": 44.92,
+ "MassiveScenarioClassification (bn)": 15.29,
+ "MassiveScenarioClassification (is)": 43.11,
+ "MassiveScenarioClassification (es)": 51.67,
+ "NoRecClassification": 37.22,
+ "NordicLangClassification": 52.02,
+ "PAC": 57.42,
+ "PolEmo2.0-IN": 42.92,
+ "PolEmo2.0-OUT": 24.05,
+ "RuReviewsClassification": 43.95,
+ "RuSciBenchGRNTIClassification": 13.96,
+ "RuSciBenchOECDClassification": 10.76,
+ "ToxicConversationsClassification": 65.48
+ }
+ ]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "bge-small-en-v1.5",
+ "AlloProfClusteringP2P": 58.48,
+ "AlloProfClusteringS2S": 33.09,
+ "BlurbsClusteringP2P": 23.89,
+ "BlurbsClusteringS2S": 10.61,
+ "GeoreviewClusteringP2P": 21.15,
+ "HALClusteringS2S": 21.68,
+ "MLSUMClusteringP2P (de)": 37.74,
+ "MLSUMClusteringP2P (fr)": 40.52,
+ "MLSUMClusteringP2P (ru)": 21.86,
+ "MLSUMClusteringP2P (es)": 40.18,
+ "MLSUMClusteringS2S (de)": 37.93,
+ "MLSUMClusteringS2S (fr)": 39.47,
+ "MLSUMClusteringS2S (ru)": 21.02,
+ "MLSUMClusteringS2S (es)": 39.84,
+ "MasakhaNEWSClusteringP2P (amh)": 41.69,
+ "MasakhaNEWSClusteringP2P (eng)": 63.38,
+ "MasakhaNEWSClusteringP2P (fra)": 58.71,
+ "MasakhaNEWSClusteringP2P (hau)": 39.11,
+ "MasakhaNEWSClusteringP2P (ibo)": 52.02,
+ "MasakhaNEWSClusteringP2P (lin)": 54.46,
+ "MasakhaNEWSClusteringP2P (lug)": 54.67,
+ "MasakhaNEWSClusteringP2P (orm)": 33.71,
+ "MasakhaNEWSClusteringP2P (pcm)": 80.96,
+ "MasakhaNEWSClusteringP2P (run)": 58.78,
+ "MasakhaNEWSClusteringP2P (sna)": 55.31,
+ "MasakhaNEWSClusteringP2P (som)": 32.43,
+ "MasakhaNEWSClusteringP2P (swa)": 26.27,
+ "MasakhaNEWSClusteringP2P (tir)": 42.83,
+ "MasakhaNEWSClusteringP2P (xho)": 31.5,
+ "MasakhaNEWSClusteringP2P (yor)": 35.94,
+ "MasakhaNEWSClusteringS2S (amh)": 41.91,
+ "MasakhaNEWSClusteringS2S (eng)": 32.88,
+ "MasakhaNEWSClusteringS2S (fra)": 48.14,
+ "MasakhaNEWSClusteringS2S (hau)": 13.35,
+ "MasakhaNEWSClusteringS2S (ibo)": 40.28,
+ "MasakhaNEWSClusteringS2S (lin)": 60.73,
+ "MasakhaNEWSClusteringS2S (lug)": 50.01,
+ "MasakhaNEWSClusteringS2S (orm)": 21.4,
+ "MasakhaNEWSClusteringS2S (pcm)": 67.86,
+ "MasakhaNEWSClusteringS2S (run)": 56.95,
+ "MasakhaNEWSClusteringS2S (sna)": 48.25,
+ "MasakhaNEWSClusteringS2S (som)": 26.53,
+ "MasakhaNEWSClusteringS2S (swa)": 19.1,
+ "MasakhaNEWSClusteringS2S (tir)": 42.42,
+ "MasakhaNEWSClusteringS2S (xho)": 26.22,
+ "MasakhaNEWSClusteringS2S (yor)": 32.34,
+ "RuSciBenchGRNTIClusteringP2P": 14.22,
+ "RuSciBenchOECDClusteringP2P": 12.15,
+ "TenKGnadClusteringP2P": 41.47,
+ "TenKGnadClusteringS2S": 20.34
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "bge-small-en-v1.5",
+ "CDSC-E": 52.32,
+ "FalseFriendsGermanEnglish": 48.35,
+ "OpusparcusPC (de)": 89.96,
+ "OpusparcusPC (en)": 98.67,
+ "OpusparcusPC (fi)": 85.45,
+ "OpusparcusPC (fr)": 87.13,
+ "OpusparcusPC (ru)": 78.74,
+ "OpusparcusPC (sv)": 83.59,
+ "PSC": 91.06,
+ "PawsXPairClassification (de)": 51.35,
+ "PawsXPairClassification (en)": 56.61,
+ "PawsXPairClassification (es)": 52.88,
+ "PawsXPairClassification (fr)": 53.94,
+ "PawsXPairClassification (ja)": 49.31,
+ "PawsXPairClassification (ko)": 51.04,
+ "PawsXPairClassification (zh)": 53.16,
+ "SICK-E-PL": 47.15,
+ "SprintDuplicateQuestions": 96.67,
+ "TERRa": 44.15,
+ "TwitterURLCorpus": 84.84
+ },
+ {
+ "Model": "bge-small-en-v1.5",
+ "CDSC-E": 52.37,
+ "FalseFriendsGermanEnglish": 48.44,
+ "OpusparcusPC (de)": 89.96,
+ "OpusparcusPC (en)": 98.67,
+ "OpusparcusPC (fi)": 85.45,
+ "OpusparcusPC (fr)": 87.13,
+ "OpusparcusPC (ru)": 78.74,
+ "OpusparcusPC (sv)": 83.59,
+ "PSC": 91.06,
+ "PawsXPairClassification (de)": 51.35,
+ "PawsXPairClassification (en)": 56.77,
+ "PawsXPairClassification (es)": 52.91,
+ "PawsXPairClassification (fr)": 53.94,
+ "PawsXPairClassification (ja)": 49.56,
+ "PawsXPairClassification (ko)": 51.04,
+ "PawsXPairClassification (zh)": 53.27,
+ "SICK-E-PL": 47.23,
+ "SprintDuplicateQuestions": 96.67,
+ "TERRa": 44.15,
+ "TwitterURLCorpus": 84.84
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "bge-small-en-v1.5",
+ "AlloprofReranking": 61.74,
+ "RuBQReranking": 38.37,
+ "SyntecReranking": 69.4,
+ "T2Reranking": 63.0
+ },
+ {
+ "Model": "bge-small-en-v1.5",
+ "MIRACLReranking (ar)": 8.5,
+ "MIRACLReranking (bn)": 9.78,
+ "MIRACLReranking (de)": 26.04,
+ "MIRACLReranking (en)": 54.81,
+ "MIRACLReranking (es)": 36.01,
+ "MIRACLReranking (fa)": 8.72,
+ "MIRACLReranking (fi)": 44.66,
+ "MIRACLReranking (fr)": 27.89,
+ "MIRACLReranking (hi)": 8.33,
+ "MIRACLReranking (id)": 28.47,
+ "MIRACLReranking (ja)": 16.44,
+ "MIRACLReranking (ko)": 13.52,
+ "MIRACLReranking (ru)": 15.81,
+ "MIRACLReranking (sw)": 37.39,
+ "MIRACLReranking (te)": 2.58,
+ "MIRACLReranking (th)": 5.14,
+ "MIRACLReranking (yo)": 57.13,
+ "MIRACLReranking (zh)": 14.4
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
{
"Model": "bge-small-en-v1.5",
+ "AILACasedocs": 23.5,
+ "AILAStatutes": 23.0,
"ARCChallenge": 8.95,
+ "AlloprofRetrieval": 30.77,
"AlphaNLI": 11.64,
+ "AppsRetrieval": 5.64,
+ "ArguAna": 60.35,
+ "BSARDRetrieval": 12.89,
+ "CmedqaRetrieval": 2.13,
+ "CodeFeedbackMT": 35.09,
+ "CodeFeedbackST": 67.79,
+ "CodeSearchNetCCRetrieval (python)": 56.45,
+ "CodeSearchNetCCRetrieval (javascript)": 50.45,
+ "CodeSearchNetCCRetrieval (go)": 28.97,
+ "CodeSearchNetCCRetrieval (ruby)": 55.13,
+ "CodeSearchNetCCRetrieval (java)": 53.71,
+ "CodeSearchNetCCRetrieval (php)": 42.44,
+ "CodeSearchNetRetrieval (python)": 88.79,
+ "CodeSearchNetRetrieval (javascript)": 73.73,
+ "CodeSearchNetRetrieval (go)": 94.51,
+ "CodeSearchNetRetrieval (ruby)": 79.53,
+ "CodeSearchNetRetrieval (java)": 83.33,
+ "CodeSearchNetRetrieval (php)": 87.08,
+ "CodeTransOceanContest": 48.22,
+ "CodeTransOceanDL": 25.67,
+ "CosQA": 32.04,
+ "CovidRetrieval": 14.97,
+ "GerDaLIR": 0.87,
+ "GerDaLIRSmall": 2.33,
+ "GermanQuAD-Retrieval": 75.24,
"HellaSwag": 25.44,
+ "LEMBNarrativeQARetrieval": 22.18,
+ "LEMBQMSumRetrieval": 21.29,
+ "LEMBSummScreenFDRetrieval": 57.75,
+ "LEMBWikimQARetrieval": 43.52,
+ "LeCaRDv2": 20.06,
+ "LegalBenchConsumerContractsQA": 73.37,
+ "LegalBenchCorporateLobbying": 90.32,
+ "LegalQuAD": 11.74,
+ "LegalSummarization": 61.49,
+ "MIRACLRetrieval (ar)": 0.2,
+ "MIRACLRetrieval (bn)": 0.13,
+ "MIRACLRetrieval (de)": 14.63,
+ "MIRACLRetrieval (en)": 46.02,
+ "MIRACLRetrieval (es)": 22.74,
+ "MIRACLRetrieval (fa)": 0.1,
+ "MIRACLRetrieval (fi)": 30.02,
+ "MIRACLRetrieval (fr)": 17.19,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 17.85,
+ "MIRACLRetrieval (ja)": 3.71,
+ "MIRACLRetrieval (ko)": 3.69,
+ "MIRACLRetrieval (ru)": 2.42,
+ "MIRACLRetrieval (sw)": 30.78,
+ "MIRACLRetrieval (te)": 0.06,
+ "MIRACLRetrieval (th)": 0.47,
+ "MIRACLRetrieval (yo)": 54.53,
+ "MIRACLRetrieval (zh)": 0.82,
+ "MintakaRetrieval (ar)": 1.54,
+ "MintakaRetrieval (de)": 18.36,
+ "MintakaRetrieval (es)": 17.71,
+ "MintakaRetrieval (fr)": 18.69,
+ "MintakaRetrieval (hi)": 2.74,
+ "MintakaRetrieval (it)": 16.13,
+ "MintakaRetrieval (ja)": 8.54,
+ "MintakaRetrieval (pt)": 17.06,
+ "NFCorpus": 34.26,
"PIQA": 23.92,
"Quail": 1.75,
"RARbCode": 42.36,
"RARbMath": 44.98,
+ "RiaNewsRetrieval": 10.58,
+ "RuBQRetrieval": 8.17,
+ "SCIDOCS": 20.52,
"SIQA": 0.77,
+ "SciFact": 71.27,
+ "SciFact-PL": 30.48,
"SpartQA": 3.55,
+ "StackOverflowQA": 78.05,
+ "SyntecRetrieval": 60.74,
+ "SyntheticText2SQL": 45.12,
+ "TRECCOVID": 75.53,
+ "TRECCOVID-PL": 24.85,
"TempReasonL1": 1.41,
"TempReasonL2Fact": 17.56,
"TempReasonL2Pure": 1.05,
"TempReasonL3Fact": 13.88,
"TempReasonL3Pure": 4.76,
- "WinoGrande": 10.28
+ "WinoGrande": 10.28,
+ "XMarket (de)": 13.22,
+ "XMarket (en)": 31.95,
+ "XMarket (es)": 14.97,
+ "XPQARetrieval (ara-ara)": 7.5,
+ "XPQARetrieval (eng-ara)": 3.82,
+ "XPQARetrieval (ara-eng)": 8.35,
+ "XPQARetrieval (deu-deu)": 56.72,
+ "XPQARetrieval (eng-deu)": 11.29,
+ "XPQARetrieval (deu-eng)": 27.53,
+ "XPQARetrieval (spa-spa)": 41.62,
+ "XPQARetrieval (eng-spa)": 10.11,
+ "XPQARetrieval (spa-eng)": 22.43,
+ "XPQARetrieval (fra-fra)": 51.78,
+ "XPQARetrieval (eng-fra)": 12.23,
+ "XPQARetrieval (fra-eng)": 29.32,
+ "XPQARetrieval (hin-hin)": 19.32,
+ "XPQARetrieval (eng-hin)": 5.74,
+ "XPQARetrieval (hin-eng)": 5.66,
+ "XPQARetrieval (ita-ita)": 60.16,
+ "XPQARetrieval (eng-ita)": 9.89,
+ "XPQARetrieval (ita-eng)": 25.8,
+ "XPQARetrieval (jpn-jpn)": 39.11,
+ "XPQARetrieval (eng-jpn)": 5.05,
+ "XPQARetrieval (jpn-eng)": 14.37,
+ "XPQARetrieval (kor-kor)": 12.77,
+ "XPQARetrieval (eng-kor)": 7.18,
+ "XPQARetrieval (kor-eng)": 6.38,
+ "XPQARetrieval (pol-pol)": 30.33,
+ "XPQARetrieval (eng-pol)": 9.87,
+ "XPQARetrieval (pol-eng)": 17.83,
+ "XPQARetrieval (por-por)": 37.41,
+ "XPQARetrieval (eng-por)": 8.41,
+ "XPQARetrieval (por-eng)": 22.67,
+ "XPQARetrieval (tam-tam)": 10.25,
+ "XPQARetrieval (eng-tam)": 4.19,
+ "XPQARetrieval (tam-eng)": 3.42,
+ "XPQARetrieval (cmn-cmn)": 24.2,
+ "XPQARetrieval (eng-cmn)": 7.81,
+ "XPQARetrieval (cmn-eng)": 15.38
}
]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "bge-small-en-v1.5",
+ "CDSC-R": 84.98,
+ "GermanSTSBenchmark": 64.25,
+ "RUParaPhraserSTS": 44.65,
+ "RuSTSBenchmarkSTS": 57.44,
+ "SICK-R": 79.41,
+ "SICK-R-PL": 54.12,
+ "SICKFr": 62.43,
+ "STS12": 77.44,
+ "STS13": 82.98,
+ "STS14": 81.84,
+ "STS15": 87.27,
+ "STS17 (en-de)": 28.84,
+ "STS17 (fr-en)": 35.02,
+ "STS17 (en-ar)": 6.64,
+ "STS17 (en-tr)": 13.56,
+ "STS17 (it-en)": 28.17,
+ "STS17 (es-en)": 28.15,
+ "STS17 (ar-ar)": 49.71,
+ "STS17 (nl-en)": 31.57,
+ "STS17 (ko-ko)": 45.13,
+ "STS17 (es-es)": 77.73,
+ "STS17 (en-en)": 87.14,
+ "STS22 (es-en)": 58.66,
+ "STS22 (de-pl)": 18.17,
+ "STS22 (zh-en)": 48.61,
+ "STS22 (pl)": 31.95,
+ "STS22 (de)": 32.8,
+ "STS22 (ru)": 17.88,
+ "STS22 (de-en)": 44.93,
+ "STS22 (zh)": 47.48,
+ "STS22 (tr)": 44.15,
+ "STS22 (it)": 63.98,
+ "STS22 (de-fr)": 44.7,
+ "STS22 (fr-pl)": 61.98,
+ "STS22 (es)": 55.47,
+ "STS22 (en)": 65.79,
+ "STS22 (ar)": 16.42,
+ "STS22 (fr)": 73.95,
+ "STS22 (pl-en)": 37.46,
+ "STS22 (es-it)": 50.23,
+ "STSB": 40.8,
+ "STSBenchmark": 85.86,
+ "STSBenchmarkMultilingualSTS (it)": 65.48,
+ "STSBenchmarkMultilingualSTS (zh)": 41.47,
+ "STSBenchmarkMultilingualSTS (en)": 85.86,
+ "STSBenchmarkMultilingualSTS (de)": 64.6,
+ "STSBenchmarkMultilingualSTS (ru)": 57.13,
+ "STSBenchmarkMultilingualSTS (pt)": 65.68,
+ "STSBenchmarkMultilingualSTS (es)": 66.5,
+ "STSBenchmarkMultilingualSTS (nl)": 61.82,
+ "STSBenchmarkMultilingualSTS (fr)": 63.67,
+ "STSBenchmarkMultilingualSTS (pl)": 60.25
+ },
+ {
+ "Model": "bge-small-en-v1.5",
+ "CDSC-R": 84.98,
+ "GermanSTSBenchmark": 64.25,
+ "RUParaPhraserSTS": 44.65,
+ "RuSTSBenchmarkSTS": 57.44,
+ "SICK-R": 79.41,
+ "SICK-R-PL": 54.12,
+ "SICKFr": 62.43,
+ "STS12": 77.44,
+ "STS13": 82.98,
+ "STS14": 81.84,
+ "STS15": 87.27,
+ "STS17 (en-de)": 28.84,
+ "STS17 (fr-en)": 35.02,
+ "STS17 (en-ar)": 6.64,
+ "STS17 (en-tr)": 13.56,
+ "STS17 (it-en)": 28.17,
+ "STS17 (es-en)": 28.15,
+ "STS17 (ar-ar)": 49.71,
+ "STS17 (nl-en)": 31.57,
+ "STS17 (ko-ko)": 45.13,
+ "STS17 (es-es)": 77.73,
+ "STS17 (en-en)": 87.14,
+ "STS22 (es-en)": 58.66,
+ "STS22 (de-pl)": 18.17,
+ "STS22 (zh-en)": 48.61,
+ "STS22 (pl)": 31.94,
+ "STS22 (de)": 32.79,
+ "STS22 (ru)": 17.88,
+ "STS22 (de-en)": 44.93,
+ "STS22 (zh)": 47.48,
+ "STS22 (tr)": 44.15,
+ "STS22 (it)": 63.98,
+ "STS22 (de-fr)": 44.7,
+ "STS22 (fr-pl)": 61.98,
+ "STS22 (es)": 55.47,
+ "STS22 (en)": 65.79,
+ "STS22 (ar)": 16.42,
+ "STS22 (fr)": 73.95,
+ "STS22 (pl-en)": 37.46,
+ "STS22 (es-it)": 50.23,
+ "STSB": 40.8,
+ "STSBenchmark": 85.86,
+ "STSBenchmarkMultilingualSTS (it)": 65.48,
+ "STSBenchmarkMultilingualSTS (zh)": 41.46,
+ "STSBenchmarkMultilingualSTS (en)": 85.86,
+ "STSBenchmarkMultilingualSTS (de)": 64.6,
+ "STSBenchmarkMultilingualSTS (ru)": 57.13,
+ "STSBenchmarkMultilingualSTS (pt)": 65.68,
+ "STSBenchmarkMultilingualSTS (es)": 66.5,
+ "STSBenchmarkMultilingualSTS (nl)": 61.82,
+ "STSBenchmarkMultilingualSTS (fr)": 63.67,
+ "STSBenchmarkMultilingualSTS (pl)": 60.25
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "bge-small-en-v1.5",
+ "SummEvalFr": 29.59
+ },
+ {
+ "Model": "bge-small-en-v1.5",
+ "SummEvalFr": 29.59
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "bge-small-en-v1.5",
+ "CEDRClassification": 36.1,
+ "SensitiveTopicsClassification": 18.09
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "bge-small-en-v1.5",
+ "Core17InstructionRetrieval": 1.51,
+ "News21InstructionRetrieval": -1.32,
+ "Robust04InstructionRetrieval": -5.77
+ }
+ ]
}
},
"BAAI__bge-small-en-v1.5-instruct": {
@@ -1149,28 +4580,435 @@
},
"Cohere__Cohere-embed-english-v3.0": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "BornholmBitextMining": 40.85,
+ "Tatoeba (wuu-eng)": 2.65,
+ "Tatoeba (amh-eng)": 0.01,
+ "Tatoeba (ell-eng)": 0.74,
+ "Tatoeba (jpn-eng)": 2.11,
+ "Tatoeba (cym-eng)": 7.26,
+ "Tatoeba (fin-eng)": 6.23,
+ "Tatoeba (bel-eng)": 3.28,
+ "Tatoeba (aze-eng)": 5.38,
+ "Tatoeba (cat-eng)": 27.67,
+ "Tatoeba (eus-eng)": 8.12,
+ "Tatoeba (yid-eng)": 0.35,
+ "Tatoeba (ast-eng)": 34.32,
+ "Tatoeba (ron-eng)": 18.57,
+ "Tatoeba (gle-eng)": 3.57,
+ "Tatoeba (vie-eng)": 6.03,
+ "Tatoeba (cbk-eng)": 28.59,
+ "Tatoeba (cor-eng)": 3.45,
+ "Tatoeba (epo-eng)": 17.08,
+ "Tatoeba (uzb-eng)": 4.59,
+ "Tatoeba (tam-eng)": 0.35,
+ "Tatoeba (ceb-eng)": 7.2,
+ "Tatoeba (max-eng)": 14.66,
+ "Tatoeba (sqi-eng)": 10.45,
+ "Tatoeba (ile-eng)": 35.53,
+ "Tatoeba (hrv-eng)": 14.32,
+ "Tatoeba (yue-eng)": 2.33,
+ "Tatoeba (por-eng)": 44.85,
+ "Tatoeba (swh-eng)": 7.11,
+ "Tatoeba (cmn-eng)": 3.42,
+ "Tatoeba (ber-eng)": 5.21,
+ "Tatoeba (tzl-eng)": 21.77,
+ "Tatoeba (fra-eng)": 44.84,
+ "Tatoeba (mkd-eng)": 1.39,
+ "Tatoeba (dsb-eng)": 10.77,
+ "Tatoeba (hin-eng)": 0.14,
+ "Tatoeba (kab-eng)": 1.43,
+ "Tatoeba (ind-eng)": 8.79,
+ "Tatoeba (tel-eng)": 0.22,
+ "Tatoeba (ang-eng)": 28.1,
+ "Tatoeba (lfn-eng)": 23.83,
+ "Tatoeba (tur-eng)": 5.85,
+ "Tatoeba (ara-eng)": 0.7,
+ "Tatoeba (nld-eng)": 28.42,
+ "Tatoeba (dtp-eng)": 3.79,
+ "Tatoeba (pam-eng)": 5.71,
+ "Tatoeba (ces-eng)": 7.45,
+ "Tatoeba (kat-eng)": 1.1,
+ "Tatoeba (tuk-eng)": 3.14,
+ "Tatoeba (bul-eng)": 3.45,
+ "Tatoeba (lit-eng)": 4.03,
+ "Tatoeba (fry-eng)": 25.55,
+ "Tatoeba (lat-eng)": 13.73,
+ "Tatoeba (war-eng)": 6.91,
+ "Tatoeba (swg-eng)": 17.59,
+ "Tatoeba (tgl-eng)": 8.22,
+ "Tatoeba (xho-eng)": 2.92,
+ "Tatoeba (arq-eng)": 0.63,
+ "Tatoeba (urd-eng)": 0.03,
+ "Tatoeba (zsm-eng)": 10.32,
+ "Tatoeba (kaz-eng)": 1.39,
+ "Tatoeba (pol-eng)": 10.17,
+ "Tatoeba (bre-eng)": 4.21,
+ "Tatoeba (ido-eng)": 23.31,
+ "Tatoeba (mon-eng)": 2.28,
+ "Tatoeba (mal-eng)": 0.15,
+ "Tatoeba (swe-eng)": 22.17,
+ "Tatoeba (gla-eng)": 3.13,
+ "Tatoeba (hun-eng)": 7.57,
+ "Tatoeba (dan-eng)": 27.16,
+ "Tatoeba (khm-eng)": 0.28,
+ "Tatoeba (mhr-eng)": 0.81,
+ "Tatoeba (ukr-eng)": 2.56,
+ "Tatoeba (uig-eng)": 0.6,
+ "Tatoeba (slk-eng)": 10.11,
+ "Tatoeba (ben-eng)": 0.05,
+ "Tatoeba (kzj-eng)": 5.02,
+ "Tatoeba (rus-eng)": 3.58,
+ "Tatoeba (isl-eng)": 8.1,
+ "Tatoeba (nov-eng)": 37.09,
+ "Tatoeba (srp-eng)": 7.44,
+ "Tatoeba (kor-eng)": 1.5,
+ "Tatoeba (hye-eng)": 0.67,
+ "Tatoeba (afr-eng)": 13.68,
+ "Tatoeba (lvs-eng)": 6.8,
+ "Tatoeba (deu-eng)": 54.19,
+ "Tatoeba (orv-eng)": 0.7,
+ "Tatoeba (oci-eng)": 16.78,
+ "Tatoeba (csb-eng)": 14.03,
+ "Tatoeba (kur-eng)": 9.75,
+ "Tatoeba (cha-eng)": 19.06,
+ "Tatoeba (nds-eng)": 23.19,
+ "Tatoeba (mar-eng)": 0.35,
+ "Tatoeba (slv-eng)": 11.7,
+ "Tatoeba (glg-eng)": 38.0,
+ "Tatoeba (hsb-eng)": 10.78,
+ "Tatoeba (fao-eng)": 17.84,
+ "Tatoeba (tat-eng)": 0.87,
+ "Tatoeba (bos-eng)": 15.81,
+ "Tatoeba (pms-eng)": 15.39,
+ "Tatoeba (est-eng)": 4.89,
+ "Tatoeba (awa-eng)": 0.34,
+ "Tatoeba (ina-eng)": 47.01,
+ "Tatoeba (tha-eng)": 1.23,
+ "Tatoeba (heb-eng)": 0.94,
+ "Tatoeba (ita-eng)": 30.21,
+ "Tatoeba (jav-eng)": 5.61,
+ "Tatoeba (pes-eng)": 0.71,
+ "Tatoeba (spa-eng)": 46.47,
+ "Tatoeba (gsw-eng)": 20.78,
+ "Tatoeba (nno-eng)": 17.71,
+ "Tatoeba (arz-eng)": 0.17,
+ "Tatoeba (nob-eng)": 24.8
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "AllegroReviews": 28.84,
+ "AmazonCounterfactualClassification (en-ext)": 78.05,
+ "AmazonCounterfactualClassification (en)": 80.64,
+ "AmazonCounterfactualClassification (de)": 56.48,
+ "AmazonCounterfactualClassification (ja)": 54.93,
+ "AmazonReviewsClassification (en)": 53.73,
+ "AmazonReviewsClassification (de)": 37.55,
+ "AmazonReviewsClassification (es)": 41.49,
+ "AmazonReviewsClassification (fr)": 39.11,
+ "AmazonReviewsClassification (ja)": 23.23,
+ "AmazonReviewsClassification (zh)": 23.82,
+ "AngryTweetsClassification": 48.63,
+ "CBD": 52.93,
+ "DanishPoliticalCommentsClassification": 30.06,
+ "GeoreviewClassification": 32.95,
+ "HeadlineClassification": 45.37,
+ "InappropriatenessClassification": 54.71,
+ "KinopoiskClassification": 36.31,
+ "LccSentimentClassification": 45.4,
+ "MTOPDomainClassification (en)": 94.71,
+ "MTOPDomainClassification (de)": 81.83,
+ "MTOPDomainClassification (es)": 84.21,
+ "MTOPDomainClassification (fr)": 82.72,
+ "MTOPDomainClassification (hi)": 38.3,
+ "MTOPDomainClassification (th)": 15.41,
+ "MTOPIntentClassification (en)": 68.08,
+ "MTOPIntentClassification (de)": 50.58,
+ "MTOPIntentClassification (es)": 50.62,
+ "MTOPIntentClassification (fr)": 43.71,
+ "MTOPIntentClassification (hi)": 17.12,
+ "MTOPIntentClassification (th)": 4.9,
+ "MasakhaNEWSClassification (amh)": 34.57,
+ "MasakhaNEWSClassification (eng)": 79.54,
+ "MasakhaNEWSClassification (fra)": 79.41,
+ "MasakhaNEWSClassification (hau)": 67.38,
+ "MasakhaNEWSClassification (ibo)": 63.87,
+ "MasakhaNEWSClassification (lin)": 76.29,
+ "MasakhaNEWSClassification (lug)": 66.14,
+ "MasakhaNEWSClassification (orm)": 64.4,
+ "MasakhaNEWSClassification (pcm)": 91.44,
+ "MasakhaNEWSClassification (run)": 69.29,
+ "MasakhaNEWSClassification (sna)": 79.35,
+ "MasakhaNEWSClassification (som)": 52.76,
+ "MasakhaNEWSClassification (swa)": 61.41,
+ "MasakhaNEWSClassification (tir)": 23.31,
+ "MasakhaNEWSClassification (xho)": 66.77,
+ "MasakhaNEWSClassification (yor)": 73.43,
+ "MassiveIntentClassification (el)": 33.62,
+ "MassiveIntentClassification (sl)": 40.83,
+ "MassiveIntentClassification (fa)": 32.68,
+ "MassiveIntentClassification (ta)": 13.08,
+ "MassiveIntentClassification (ml)": 1.96,
+ "MassiveIntentClassification (ar)": 26.91,
+ "MassiveIntentClassification (sq)": 44.17,
+ "MassiveIntentClassification (zh-TW)": 21.64,
+ "MassiveIntentClassification (fi)": 41.16,
+ "MassiveIntentClassification (nl)": 43.97,
+ "MassiveIntentClassification (hu)": 40.04,
+ "MassiveIntentClassification (tr)": 42.88,
+ "MassiveIntentClassification (ka)": 12.09,
+ "MassiveIntentClassification (pl)": 41.81,
+ "MassiveIntentClassification (vi)": 36.32,
+ "MassiveIntentClassification (hy)": 13.01,
+ "MassiveIntentClassification (sw)": 40.11,
+ "MassiveIntentClassification (fr)": 52.0,
+ "MassiveIntentClassification (id)": 44.09,
+ "MassiveIntentClassification (en)": 71.8,
+ "MassiveIntentClassification (ru)": 41.81,
+ "MassiveIntentClassification (cy)": 39.46,
+ "MassiveIntentClassification (az)": 40.37,
+ "MassiveIntentClassification (ms)": 41.36,
+ "MassiveIntentClassification (de)": 50.54,
+ "MassiveIntentClassification (ur)": 21.32,
+ "MassiveIntentClassification (kn)": 2.52,
+ "MassiveIntentClassification (it)": 49.63,
+ "MassiveIntentClassification (da)": 46.16,
+ "MassiveIntentClassification (am)": 2.64,
+ "MassiveIntentClassification (hi)": 19.98,
+ "MassiveIntentClassification (tl)": 44.05,
+ "MassiveIntentClassification (es)": 51.43,
+ "MassiveIntentClassification (mn)": 27.68,
+ "MassiveIntentClassification (km)": 3.45,
+ "MassiveIntentClassification (ro)": 45.61,
+ "MassiveIntentClassification (he)": 27.04,
+ "MassiveIntentClassification (af)": 42.61,
+ "MassiveIntentClassification (ja)": 33.04,
+ "MassiveIntentClassification (te)": 2.36,
+ "MassiveIntentClassification (bn)": 24.44,
+ "MassiveIntentClassification (zh-CN)": 22.45,
+ "MassiveIntentClassification (ko)": 23.43,
+ "MassiveIntentClassification (nb)": 43.9,
+ "MassiveIntentClassification (jv)": 39.44,
+ "MassiveIntentClassification (sv)": 43.98,
+ "MassiveIntentClassification (is)": 36.44,
+ "MassiveIntentClassification (th)": 11.25,
+ "MassiveIntentClassification (lv)": 41.86,
+ "MassiveIntentClassification (pt)": 51.93,
+ "MassiveIntentClassification (my)": 2.83,
+ "MassiveScenarioClassification (is)": 45.12,
+ "MassiveScenarioClassification (da)": 55.72,
+ "MassiveScenarioClassification (tl)": 53.33,
+ "MassiveScenarioClassification (zh-CN)": 31.89,
+ "MassiveScenarioClassification (el)": 44.14,
+ "MassiveScenarioClassification (sw)": 46.93,
+ "MassiveScenarioClassification (bn)": 32.0,
+ "MassiveScenarioClassification (fi)": 47.83,
+ "MassiveScenarioClassification (cy)": 46.83,
+ "MassiveScenarioClassification (pt)": 59.42,
+ "MassiveScenarioClassification (vi)": 42.58,
+ "MassiveScenarioClassification (th)": 20.07,
+ "MassiveScenarioClassification (kn)": 6.86,
+ "MassiveScenarioClassification (my)": 5.23,
+ "MassiveScenarioClassification (sl)": 48.23,
+ "MassiveScenarioClassification (fa)": 37.23,
+ "MassiveScenarioClassification (mn)": 32.36,
+ "MassiveScenarioClassification (ml)": 5.92,
+ "MassiveScenarioClassification (hi)": 24.26,
+ "MassiveScenarioClassification (az)": 46.84,
+ "MassiveScenarioClassification (ms)": 49.86,
+ "MassiveScenarioClassification (ka)": 19.95,
+ "MassiveScenarioClassification (hu)": 48.55,
+ "MassiveScenarioClassification (tr)": 49.89,
+ "MassiveScenarioClassification (de)": 62.39,
+ "MassiveScenarioClassification (ko)": 30.41,
+ "MassiveScenarioClassification (am)": 7.6,
+ "MassiveScenarioClassification (it)": 57.91,
+ "MassiveScenarioClassification (ro)": 54.28,
+ "MassiveScenarioClassification (jv)": 47.24,
+ "MassiveScenarioClassification (km)": 5.83,
+ "MassiveScenarioClassification (sq)": 51.95,
+ "MassiveScenarioClassification (pl)": 50.47,
+ "MassiveScenarioClassification (nl)": 54.86,
+ "MassiveScenarioClassification (ru)": 46.46,
+ "MassiveScenarioClassification (es)": 60.4,
+ "MassiveScenarioClassification (hy)": 19.38,
+ "MassiveScenarioClassification (af)": 52.75,
+ "MassiveScenarioClassification (nb)": 53.54,
+ "MassiveScenarioClassification (ur)": 29.99,
+ "MassiveScenarioClassification (en)": 76.58,
+ "MassiveScenarioClassification (he)": 30.33,
+ "MassiveScenarioClassification (fr)": 59.83,
+ "MassiveScenarioClassification (id)": 50.43,
+ "MassiveScenarioClassification (lv)": 46.74,
+ "MassiveScenarioClassification (sv)": 53.84,
+ "MassiveScenarioClassification (ta)": 17.77,
+ "MassiveScenarioClassification (ar)": 33.7,
+ "MassiveScenarioClassification (zh-TW)": 30.9,
+ "MassiveScenarioClassification (te)": 7.52,
+ "MassiveScenarioClassification (ja)": 41.94,
+ "NoRecClassification": 41.54,
+ "NordicLangClassification": 58.2,
+ "PAC": 69.12,
+ "PolEmo2.0-IN": 50.18,
+ "PolEmo2.0-OUT": 26.82,
+ "RuReviewsClassification": 48.71,
+ "RuSciBenchGRNTIClassification": 24.06,
+ "RuSciBenchOECDClassification": 19.18
+ }
+ ]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "AlloProfClusteringP2P": 61.15,
+ "AlloProfClusteringS2S": 35.69,
+ "BlurbsClusteringP2P": 33.8,
+ "BlurbsClusteringS2S": 13.31,
+ "GeoreviewClusteringP2P": 27.27,
+ "HALClusteringS2S": 23.61,
+ "MLSUMClusteringP2P (de)": 45.78,
+ "MLSUMClusteringP2P (fr)": 44.54,
+ "MLSUMClusteringP2P (ru)": 27.93,
+ "MLSUMClusteringP2P (es)": 44.53,
+ "MLSUMClusteringS2S (de)": 44.11,
+ "MLSUMClusteringS2S (fr)": 43.62,
+ "MLSUMClusteringS2S (ru)": 26.85,
+ "MLSUMClusteringS2S (es)": 44.08,
+ "MasakhaNEWSClusteringP2P (amh)": 42.0,
+ "MasakhaNEWSClusteringP2P (eng)": 59.86,
+ "MasakhaNEWSClusteringP2P (fra)": 68.68,
+ "MasakhaNEWSClusteringP2P (hau)": 41.5,
+ "MasakhaNEWSClusteringP2P (ibo)": 43.18,
+ "MasakhaNEWSClusteringP2P (lin)": 71.26,
+ "MasakhaNEWSClusteringP2P (lug)": 58.01,
+ "MasakhaNEWSClusteringP2P (orm)": 27.48,
+ "MasakhaNEWSClusteringP2P (pcm)": 86.01,
+ "MasakhaNEWSClusteringP2P (run)": 53.68,
+ "MasakhaNEWSClusteringP2P (sna)": 67.47,
+ "MasakhaNEWSClusteringP2P (som)": 34.74,
+ "MasakhaNEWSClusteringP2P (swa)": 31.4,
+ "MasakhaNEWSClusteringP2P (tir)": 44.31,
+ "MasakhaNEWSClusteringP2P (xho)": 37.29,
+ "MasakhaNEWSClusteringP2P (yor)": 40.79,
+ "MasakhaNEWSClusteringS2S (amh)": 44.55,
+ "MasakhaNEWSClusteringS2S (eng)": 44.41,
+ "MasakhaNEWSClusteringS2S (fra)": 39.8,
+ "MasakhaNEWSClusteringS2S (hau)": 20.74,
+ "MasakhaNEWSClusteringS2S (ibo)": 46.43,
+ "MasakhaNEWSClusteringS2S (lin)": 52.66,
+ "MasakhaNEWSClusteringS2S (lug)": 42.28,
+ "MasakhaNEWSClusteringS2S (orm)": 28.05,
+ "MasakhaNEWSClusteringS2S (pcm)": 77.26,
+ "MasakhaNEWSClusteringS2S (run)": 48.73,
+ "MasakhaNEWSClusteringS2S (sna)": 40.39,
+ "MasakhaNEWSClusteringS2S (som)": 33.5,
+ "MasakhaNEWSClusteringS2S (swa)": 18.32,
+ "MasakhaNEWSClusteringS2S (tir)": 43.27,
+ "MasakhaNEWSClusteringS2S (xho)": 27.07,
+ "MasakhaNEWSClusteringS2S (yor)": 34.08,
+ "RuSciBenchGRNTIClusteringP2P": 23.4,
+ "RuSciBenchOECDClusteringP2P": 20.64,
+ "TenKGnadClusteringP2P": 47.9,
+ "TenKGnadClusteringS2S": 26.9
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "CDSC-E": 62.91,
+ "FalseFriendsGermanEnglish": 47.58,
+ "OpusparcusPC (de)": 91.52,
+ "OpusparcusPC (en)": 98.38,
+ "OpusparcusPC (fi)": 87.04,
+ "OpusparcusPC (fr)": 88.39,
+ "OpusparcusPC (ru)": 80.87,
+ "OpusparcusPC (sv)": 84.44,
+ "PSC": 96.09,
+ "PawsXPairClassification (de)": 51.6,
+ "PawsXPairClassification (en)": 63.44,
+ "PawsXPairClassification (es)": 53.64,
+ "PawsXPairClassification (fr)": 56.8,
+ "PawsXPairClassification (ja)": 49.85,
+ "PawsXPairClassification (ko)": 50.82,
+ "PawsXPairClassification (zh)": 52.47,
+ "SICK-E-PL": 61.81,
+ "TERRa": 47.68
+ },
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "CDSC-E": 62.91,
+ "FalseFriendsGermanEnglish": 47.59,
+ "OpusparcusPC (de)": 91.52,
+ "OpusparcusPC (en)": 98.39,
+ "OpusparcusPC (fi)": 87.11,
+ "OpusparcusPC (fr)": 88.44,
+ "OpusparcusPC (ru)": 80.88,
+ "OpusparcusPC (sv)": 84.45,
+ "PSC": 96.09,
+ "PawsXPairClassification (de)": 52.18,
+ "PawsXPairClassification (en)": 63.59,
+ "PawsXPairClassification (es)": 53.68,
+ "PawsXPairClassification (fr)": 56.82,
+ "PawsXPairClassification (ja)": 49.94,
+ "PawsXPairClassification (ko)": 51.02,
+ "PawsXPairClassification (zh)": 52.49,
+ "SICK-E-PL": 61.81,
+ "TERRa": 47.68
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "AlloprofReranking": 68.26,
+ "RuBQReranking": 51.31,
+ "SyntecReranking": 76.13,
+ "T2Reranking": 62.84
+ },
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "MIRACLReranking (ar)": 29.78,
+ "MIRACLReranking (bn)": 29.69,
+ "MIRACLReranking (de)": 36.5,
+ "MIRACLReranking (en)": 62.59,
+ "MIRACLReranking (es)": 49.73,
+ "MIRACLReranking (fa)": 18.93,
+ "MIRACLReranking (fi)": 50.03,
+ "MIRACLReranking (fr)": 44.25,
+ "MIRACLReranking (hi)": 22.73,
+ "MIRACLReranking (id)": 33.97,
+ "MIRACLReranking (ja)": 22.4,
+ "MIRACLReranking (ko)": 20.14,
+ "MIRACLReranking (ru)": 28.25,
+ "MIRACLReranking (sw)": 34.97,
+ "MIRACLReranking (te)": 2.74,
+ "MIRACLReranking (th)": 4.46,
+ "MIRACLReranking (yo)": 56.78,
+ "MIRACLReranking (zh)": 15.67
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
{
"Model": "Cohere-embed-english-v3.0",
- "AILACasedocs": 31.54,
- "AILAStatutes": 27.15,
- "ARCChallenge": 9.89,
- "AlphaNLI": 15.1,
+ "AILACasedocs": 32.35,
+ "AILAStatutes": 26.38,
+ "ARCChallenge": 9.92,
+ "AlloprofRetrieval": 38.83,
+ "AlphaNLI": 15.09,
+ "AppsRetrieval": 13.72,
+ "BSARDRetrieval": 13.19,
"BrightRetrieval (psychology)": 21.82,
"BrightRetrieval (economics)": 20.18,
"BrightRetrieval (robotics)": 16.21,
@@ -1183,25 +5021,104 @@
"BrightRetrieval (theoremqa_questions)": 15.07,
"BrightRetrieval (leetcode)": 26.78,
"BrightRetrieval (earth_science)": 27.45,
- "GerDaLIRSmall": 6.05,
- "HellaSwag": 26.35,
- "LeCaRDv2": 21.02,
- "LegalBenchConsumerContractsQA": 77.12,
- "LegalBenchCorporateLobbying": 93.68,
- "LegalQuAD": 26.08,
- "LegalSummarization": 61.7,
- "PIQA": 28.49,
- "Quail": 4.1,
- "RARbCode": 57.19,
- "RARbMath": 72.26,
- "SIQA": 4.26,
- "SpartQA": 3.75,
- "TempReasonL1": 1.5,
- "TempReasonL2Fact": 35.91,
+ "CmedqaRetrieval": 2.95,
+ "CodeFeedbackMT": 47.02,
+ "CodeFeedbackST": 74.82,
+ "CodeSearchNetCCRetrieval (python)": 60.38,
+ "CodeSearchNetCCRetrieval (javascript)": 57.62,
+ "CodeSearchNetCCRetrieval (go)": 36.29,
+ "CodeSearchNetCCRetrieval (ruby)": 59.15,
+ "CodeSearchNetCCRetrieval (java)": 56.76,
+ "CodeSearchNetCCRetrieval (php)": 46.64,
+ "CodeSearchNetRetrieval (python)": 87.89,
+ "CodeSearchNetRetrieval (javascript)": 72.14,
+ "CodeSearchNetRetrieval (go)": 93.29,
+ "CodeSearchNetRetrieval (ruby)": 80.01,
+ "CodeSearchNetRetrieval (java)": 80.68,
+ "CodeSearchNetRetrieval (php)": 84.07,
+ "CodeTransOceanContest": 65.28,
+ "CodeTransOceanDL": 31.38,
+ "CosQA": 30.65,
+ "CovidRetrieval": 27.82,
+ "GerDaLIR": 2.38,
+ "GerDaLIRSmall": 6.22,
+ "GermanQuAD-Retrieval": 88.23,
+ "HellaSwag": 26.38,
+ "LEMBNarrativeQARetrieval": 25.03,
+ "LEMBQMSumRetrieval": 23.82,
+ "LEMBSummScreenFDRetrieval": 75.77,
+ "LEMBWikimQARetrieval": 59.03,
+ "LeCaRDv2": 21.03,
+ "LegalBenchConsumerContractsQA": 77.42,
+ "LegalBenchCorporateLobbying": 93.64,
+ "LegalQuAD": 25.77,
+ "LegalSummarization": 61.92,
+ "MintakaRetrieval (ar)": 3.12,
+ "MintakaRetrieval (de)": 30.88,
+ "MintakaRetrieval (es)": 29.12,
+ "MintakaRetrieval (fr)": 30.19,
+ "MintakaRetrieval (hi)": 6.22,
+ "MintakaRetrieval (it)": 26.8,
+ "MintakaRetrieval (ja)": 9.35,
+ "MintakaRetrieval (pt)": 28.46,
+ "PIQA": 28.55,
+ "Quail": 4.09,
+ "RARbCode": 57.27,
+ "RARbMath": 72.25,
+ "RiaNewsRetrieval": 21.74,
+ "RuBQRetrieval": 26.66,
+ "SIQA": 4.27,
+ "SciFact-PL": 39.36,
+ "SpartQA": 3.74,
+ "StackOverflowQA": 89.35,
+ "SyntecRetrieval": 72.17,
+ "SyntheticText2SQL": 57.2,
+ "TRECCOVID-PL": 47.27,
+ "TempReasonL1": 1.51,
+ "TempReasonL2Fact": 35.94,
"TempReasonL2Pure": 1.89,
- "TempReasonL3Fact": 27.51,
+ "TempReasonL3Fact": 27.52,
"TempReasonL3Pure": 8.53,
- "WinoGrande": 58.01
+ "WinoGrande": 57.94,
+ "XMarket (de)": 14.36,
+ "XMarket (en)": 32.99,
+ "XMarket (es)": 15.81,
+ "XPQARetrieval (ara-ara)": 15.56,
+ "XPQARetrieval (eng-ara)": 5.23,
+ "XPQARetrieval (ara-eng)": 10.23,
+ "XPQARetrieval (deu-deu)": 61.94,
+ "XPQARetrieval (eng-deu)": 24.72,
+ "XPQARetrieval (deu-eng)": 38.58,
+ "XPQARetrieval (spa-spa)": 51.1,
+ "XPQARetrieval (eng-spa)": 21.07,
+ "XPQARetrieval (spa-eng)": 32.96,
+ "XPQARetrieval (fra-fra)": 57.61,
+ "XPQARetrieval (eng-fra)": 26.21,
+ "XPQARetrieval (fra-eng)": 40.77,
+ "XPQARetrieval (hin-hin)": 36.4,
+ "XPQARetrieval (eng-hin)": 10.72,
+ "XPQARetrieval (hin-eng)": 10.19,
+ "XPQARetrieval (ita-ita)": 63.39,
+ "XPQARetrieval (eng-ita)": 17.67,
+ "XPQARetrieval (ita-eng)": 36.49,
+ "XPQARetrieval (jpn-jpn)": 47.23,
+ "XPQARetrieval (eng-jpn)": 9.74,
+ "XPQARetrieval (jpn-eng)": 20.05,
+ "XPQARetrieval (kor-kor)": 15.81,
+ "XPQARetrieval (eng-kor)": 10.11,
+ "XPQARetrieval (kor-eng)": 8.63,
+ "XPQARetrieval (pol-pol)": 35.5,
+ "XPQARetrieval (eng-pol)": 15.66,
+ "XPQARetrieval (pol-eng)": 22.57,
+ "XPQARetrieval (por-por)": 40.45,
+ "XPQARetrieval (eng-por)": 15.6,
+ "XPQARetrieval (por-eng)": 29.83,
+ "XPQARetrieval (tam-tam)": 6.63,
+ "XPQARetrieval (eng-tam)": 6.34,
+ "XPQARetrieval (tam-eng)": 5.56,
+ "XPQARetrieval (cmn-cmn)": 23.53,
+ "XPQARetrieval (eng-cmn)": 10.92,
+ "XPQARetrieval (cmn-eng)": 15.4
}
],
"recall_at_1": [
@@ -1219,21 +5136,113 @@
]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "CDSC-R": 79.92,
+ "GermanSTSBenchmark": 67.07,
+ "RUParaPhraserSTS": 48.32,
+ "RuSTSBenchmarkSTS": 58.74,
+ "SICK-R-PL": 58.33,
+ "SICKFr": 70.9,
+ "STS22 (de-en)": 53.28,
+ "STS22 (zh)": 49.65,
+ "STS22 (pl-en)": 68.66,
+ "STS22 (de-pl)": 40.65,
+ "STS22 (es)": 60.81,
+ "STS22 (fr)": 81.09,
+ "STS22 (es-en)": 73.36,
+ "STS22 (en)": 68.15,
+ "STS22 (de-fr)": 67.87,
+ "STS22 (tr)": 53.62,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (zh-en)": 47.47,
+ "STS22 (ar)": 38.69,
+ "STS22 (pl)": 36.17,
+ "STS22 (de)": 43.62,
+ "STS22 (ru)": 36.52,
+ "STS22 (it)": 73.12,
+ "STS22 (es-it)": 68.51,
+ "STSB": 38.46,
+ "STSBenchmarkMultilingualSTS (pl)": 58.93,
+ "STSBenchmarkMultilingualSTS (nl)": 66.15,
+ "STSBenchmarkMultilingualSTS (it)": 67.54,
+ "STSBenchmarkMultilingualSTS (en)": 86.52,
+ "STSBenchmarkMultilingualSTS (fr)": 71.11,
+ "STSBenchmarkMultilingualSTS (pt)": 68.8,
+ "STSBenchmarkMultilingualSTS (de)": 67.95,
+ "STSBenchmarkMultilingualSTS (zh)": 37.44,
+ "STSBenchmarkMultilingualSTS (ru)": 58.77,
+ "STSBenchmarkMultilingualSTS (es)": 72.96
+ },
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "CDSC-R": 79.92,
+ "GermanSTSBenchmark": 67.07,
+ "RUParaPhraserSTS": 48.32,
+ "RuSTSBenchmarkSTS": 58.74,
+ "SICK-R-PL": 58.33,
+ "SICKFr": 70.9,
+ "STS22 (de-en)": 53.28,
+ "STS22 (zh)": 49.65,
+ "STS22 (pl-en)": 68.66,
+ "STS22 (de-pl)": 40.65,
+ "STS22 (es)": 60.81,
+ "STS22 (fr)": 81.09,
+ "STS22 (es-en)": 73.36,
+ "STS22 (en)": 68.15,
+ "STS22 (de-fr)": 67.87,
+ "STS22 (tr)": 53.62,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (zh-en)": 47.47,
+ "STS22 (ar)": 38.69,
+ "STS22 (pl)": 36.17,
+ "STS22 (de)": 43.62,
+ "STS22 (ru)": 36.52,
+ "STS22 (it)": 73.12,
+ "STS22 (es-it)": 68.51,
+ "STSB": 38.46,
+ "STSBenchmarkMultilingualSTS (pl)": 58.93,
+ "STSBenchmarkMultilingualSTS (nl)": 66.15,
+ "STSBenchmarkMultilingualSTS (it)": 67.54,
+ "STSBenchmarkMultilingualSTS (en)": 86.52,
+ "STSBenchmarkMultilingualSTS (fr)": 71.11,
+ "STSBenchmarkMultilingualSTS (pt)": 68.8,
+ "STSBenchmarkMultilingualSTS (de)": 67.95,
+ "STSBenchmarkMultilingualSTS (zh)": 37.44,
+ "STSBenchmarkMultilingualSTS (ru)": 58.77,
+ "STSBenchmarkMultilingualSTS (es)": 72.96
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "SummEvalFr": 30.15
+ },
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "SummEvalFr": 30.15
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "Cohere-embed-english-v3.0",
+ "CEDRClassification": 35.56,
+ "SensitiveTopicsClassification": 18.73
+ }
+ ]
},
"InstructionRetrieval": {
"p-MRR": [
{
"Model": "Cohere-embed-english-v3.0",
- "Core17InstructionRetrieval": 2.8,
- "News21InstructionRetrieval": 0.2,
- "Robust04InstructionRetrieval": -3.63
+ "Core17InstructionRetrieval": 2.68,
+ "News21InstructionRetrieval": 0.3,
+ "Robust04InstructionRetrieval": -3.49
}
]
}
@@ -1291,18 +5300,286 @@
},
"Cohere__Cohere-embed-multilingual-light-v3.0": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "Cohere-embed-multilingual-light-v3.0",
+ "BornholmBitextMining": 36.64,
+ "Tatoeba (slv-eng)": 62.38,
+ "Tatoeba (kaz-eng)": 61.91,
+ "Tatoeba (bos-eng)": 73.84,
+ "Tatoeba (hin-eng)": 88.19,
+ "Tatoeba (wuu-eng)": 55.0,
+ "Tatoeba (epo-eng)": 79.45,
+ "Tatoeba (deu-eng)": 94.81,
+ "Tatoeba (xho-eng)": 40.42,
+ "Tatoeba (cym-eng)": 48.44,
+ "Tatoeba (kzj-eng)": 2.77,
+ "Tatoeba (nld-eng)": 86.46,
+ "Tatoeba (rus-eng)": 86.86,
+ "Tatoeba (hsb-eng)": 20.25,
+ "Tatoeba (por-eng)": 79.97,
+ "Tatoeba (hrv-eng)": 78.21,
+ "Tatoeba (ben-eng)": 75.07,
+ "Tatoeba (ido-eng)": 49.3,
+ "Tatoeba (ceb-eng)": 24.63,
+ "Tatoeba (eus-eng)": 37.3,
+ "Tatoeba (ron-eng)": 76.3,
+ "Tatoeba (arq-eng)": 16.72,
+ "Tatoeba (war-eng)": 24.29,
+ "Tatoeba (tzl-eng)": 17.23,
+ "Tatoeba (max-eng)": 40.8,
+ "Tatoeba (tur-eng)": 81.36,
+ "Tatoeba (hye-eng)": 70.67,
+ "Tatoeba (swg-eng)": 35.43,
+ "Tatoeba (uzb-eng)": 42.95,
+ "Tatoeba (vie-eng)": 84.12,
+ "Tatoeba (pam-eng)": 3.37,
+ "Tatoeba (lit-eng)": 46.27,
+ "Tatoeba (ast-eng)": 45.84,
+ "Tatoeba (kur-eng)": 22.44,
+ "Tatoeba (lfn-eng)": 36.33,
+ "Tatoeba (pes-eng)": 73.91,
+ "Tatoeba (ind-eng)": 81.83,
+ "Tatoeba (ita-eng)": 80.7,
+ "Tatoeba (nno-eng)": 61.07,
+ "Tatoeba (yid-eng)": 47.46,
+ "Tatoeba (fin-eng)": 60.74,
+ "Tatoeba (gle-eng)": 41.46,
+ "Tatoeba (isl-eng)": 52.69,
+ "Tatoeba (kab-eng)": 5.98,
+ "Tatoeba (urd-eng)": 78.17,
+ "Tatoeba (nov-eng)": 45.93,
+ "Tatoeba (tgl-eng)": 66.58,
+ "Tatoeba (est-eng)": 47.9,
+ "Tatoeba (khm-eng)": 26.01,
+ "Tatoeba (tel-eng)": 78.42,
+ "Tatoeba (lvs-eng)": 46.31,
+ "Tatoeba (awa-eng)": 59.5,
+ "Tatoeba (cat-eng)": 62.02,
+ "Tatoeba (cbk-eng)": 36.88,
+ "Tatoeba (ile-eng)": 54.61,
+ "Tatoeba (orv-eng)": 8.05,
+ "Tatoeba (srp-eng)": 74.4,
+ "Tatoeba (mon-eng)": 60.97,
+ "Tatoeba (pol-eng)": 78.49,
+ "Tatoeba (nds-eng)": 37.97,
+ "Tatoeba (uig-eng)": 44.38,
+ "Tatoeba (pms-eng)": 22.31,
+ "Tatoeba (cmn-eng)": 82.2,
+ "Tatoeba (ces-eng)": 66.62,
+ "Tatoeba (gsw-eng)": 28.46,
+ "Tatoeba (swe-eng)": 81.61,
+ "Tatoeba (mkd-eng)": 55.13,
+ "Tatoeba (amh-eng)": 54.67,
+ "Tatoeba (nob-eng)": 84.58,
+ "Tatoeba (spa-eng)": 85.6,
+ "Tatoeba (csb-eng)": 7.02,
+ "Tatoeba (yue-eng)": 57.38,
+ "Tatoeba (fry-eng)": 34.59,
+ "Tatoeba (ang-eng)": 30.55,
+ "Tatoeba (zsm-eng)": 85.68,
+ "Tatoeba (ara-eng)": 71.5,
+ "Tatoeba (bel-eng)": 73.25,
+ "Tatoeba (heb-eng)": 61.63,
+ "Tatoeba (swh-eng)": 49.84,
+ "Tatoeba (bre-eng)": 3.93,
+ "Tatoeba (mal-eng)": 93.51,
+ "Tatoeba (arz-eng)": 43.94,
+ "Tatoeba (hun-eng)": 60.87,
+ "Tatoeba (tha-eng)": 84.66,
+ "Tatoeba (bul-eng)": 79.47,
+ "Tatoeba (tuk-eng)": 7.74,
+ "Tatoeba (kat-eng)": 59.68,
+ "Tatoeba (fao-eng)": 40.75,
+ "Tatoeba (ina-eng)": 69.4,
+ "Tatoeba (slk-eng)": 65.58,
+ "Tatoeba (jpn-eng)": 68.54,
+ "Tatoeba (dtp-eng)": 3.32,
+ "Tatoeba (kor-eng)": 62.94,
+ "Tatoeba (ell-eng)": 80.02,
+ "Tatoeba (glg-eng)": 33.35,
+ "Tatoeba (tam-eng)": 76.32,
+ "Tatoeba (mar-eng)": 81.79,
+ "Tatoeba (oci-eng)": 12.17,
+ "Tatoeba (dsb-eng)": 15.5,
+ "Tatoeba (jav-eng)": 36.99,
+ "Tatoeba (lat-eng)": 24.35,
+ "Tatoeba (sqi-eng)": 79.78,
+ "Tatoeba (tat-eng)": 47.6,
+ "Tatoeba (ukr-eng)": 75.97,
+ "Tatoeba (aze-eng)": 71.26,
+ "Tatoeba (cha-eng)": 13.4,
+ "Tatoeba (cor-eng)": 3.63,
+ "Tatoeba (dan-eng)": 83.41,
+ "Tatoeba (afr-eng)": 73.58,
+ "Tatoeba (ber-eng)": 7.63,
+ "Tatoeba (gla-eng)": 24.94,
+ "Tatoeba (mhr-eng)": 2.33,
+ "Tatoeba (fra-eng)": 83.62
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "Cohere-embed-multilingual-light-v3.0",
- "AmazonReviewsClassification (fr)": 38.6,
- "MTOPDomainClassification (fr)": 80.79,
- "MTOPIntentClassification (fr)": 50.01,
- "MasakhaNEWSClassification (fra)": 82.58,
- "MassiveIntentClassification (fr)": 56.31,
- "MassiveScenarioClassification (fr)": 59.5
+ "AllegroReviews": 37.55,
+ "AmazonCounterfactualClassification (en-ext)": 70.76,
+ "AmazonCounterfactualClassification (en)": 69.96,
+ "AmazonCounterfactualClassification (de)": 68.67,
+ "AmazonCounterfactualClassification (ja)": 59.69,
+ "AmazonReviewsClassification (fr)": 41.14,
+ "AmazonReviewsClassification (en)": 46.54,
+ "AmazonReviewsClassification (de)": 42.42,
+ "AmazonReviewsClassification (es)": 41.96,
+ "AmazonReviewsClassification (ja)": 38.41,
+ "AmazonReviewsClassification (zh)": 38.37,
+ "AngryTweetsClassification": 54.8,
+ "CBD": 61.72,
+ "DanishPoliticalCommentsClassification": 36.95,
+ "GeoreviewClassification": 44.47,
+ "HeadlineClassification": 76.3,
+ "InappropriatenessClassification": 59.73,
+ "KinopoiskClassification": 50.88,
+ "LccSentimentClassification": 56.53,
+ "MTOPDomainClassification (fr)": 86.06,
+ "MTOPDomainClassification (en)": 91.87,
+ "MTOPDomainClassification (de)": 88.77,
+ "MTOPDomainClassification (es)": 90.4,
+ "MTOPDomainClassification (hi)": 88.21,
+ "MTOPDomainClassification (th)": 85.55,
+ "MTOPIntentClassification (fr)": 53.11,
+ "MTOPIntentClassification (en)": 62.23,
+ "MTOPIntentClassification (de)": 61.57,
+ "MTOPIntentClassification (es)": 62.33,
+ "MTOPIntentClassification (hi)": 59.26,
+ "MTOPIntentClassification (th)": 59.12,
+ "MasakhaNEWSClassification (fra)": 79.31,
+ "MasakhaNEWSClassification (amh)": 87.29,
+ "MasakhaNEWSClassification (eng)": 80.14,
+ "MasakhaNEWSClassification (hau)": 75.75,
+ "MasakhaNEWSClassification (ibo)": 68.08,
+ "MasakhaNEWSClassification (lin)": 75.09,
+ "MasakhaNEWSClassification (lug)": 71.12,
+ "MasakhaNEWSClassification (orm)": 71.05,
+ "MasakhaNEWSClassification (pcm)": 93.31,
+ "MasakhaNEWSClassification (run)": 80.0,
+ "MasakhaNEWSClassification (sna)": 86.31,
+ "MasakhaNEWSClassification (som)": 62.31,
+ "MasakhaNEWSClassification (swa)": 74.03,
+ "MasakhaNEWSClassification (tir)": 69.78,
+ "MasakhaNEWSClassification (xho)": 76.77,
+ "MasakhaNEWSClassification (yor)": 76.96,
+ "MassiveIntentClassification (fr)": 61.85,
+ "MassiveIntentClassification (da)": 59.39,
+ "MassiveIntentClassification (lv)": 51.66,
+ "MassiveIntentClassification (tl)": 55.25,
+ "MassiveIntentClassification (ru)": 61.88,
+ "MassiveIntentClassification (pl)": 60.69,
+ "MassiveIntentClassification (ur)": 57.27,
+ "MassiveIntentClassification (tr)": 59.69,
+ "MassiveIntentClassification (hi)": 60.13,
+ "MassiveIntentClassification (az)": 56.0,
+ "MassiveIntentClassification (ko)": 59.68,
+ "MassiveIntentClassification (kn)": 55.04,
+ "MassiveIntentClassification (ro)": 57.71,
+ "MassiveIntentClassification (zh-TW)": 58.92,
+ "MassiveIntentClassification (fi)": 56.94,
+ "MassiveIntentClassification (mn)": 54.36,
+ "MassiveIntentClassification (nb)": 58.27,
+ "MassiveIntentClassification (id)": 60.96,
+ "MassiveIntentClassification (ar)": 51.3,
+ "MassiveIntentClassification (af)": 55.18,
+ "MassiveIntentClassification (jv)": 49.66,
+ "MassiveIntentClassification (hu)": 56.36,
+ "MassiveIntentClassification (cy)": 45.42,
+ "MassiveIntentClassification (bn)": 57.56,
+ "MassiveIntentClassification (sv)": 60.87,
+ "MassiveIntentClassification (en)": 67.36,
+ "MassiveIntentClassification (it)": 61.58,
+ "MassiveIntentClassification (ja)": 64.44,
+ "MassiveIntentClassification (ta)": 55.05,
+ "MassiveIntentClassification (pt)": 63.52,
+ "MassiveIntentClassification (my)": 53.29,
+ "MassiveIntentClassification (he)": 54.14,
+ "MassiveIntentClassification (is)": 49.53,
+ "MassiveIntentClassification (ml)": 59.72,
+ "MassiveIntentClassification (sw)": 52.52,
+ "MassiveIntentClassification (te)": 55.63,
+ "MassiveIntentClassification (vi)": 58.43,
+ "MassiveIntentClassification (ka)": 46.25,
+ "MassiveIntentClassification (am)": 49.31,
+ "MassiveIntentClassification (de)": 58.69,
+ "MassiveIntentClassification (ms)": 56.8,
+ "MassiveIntentClassification (km)": 40.79,
+ "MassiveIntentClassification (zh-CN)": 64.86,
+ "MassiveIntentClassification (es)": 62.17,
+ "MassiveIntentClassification (fa)": 61.88,
+ "MassiveIntentClassification (hy)": 54.73,
+ "MassiveIntentClassification (sq)": 54.53,
+ "MassiveIntentClassification (el)": 57.33,
+ "MassiveIntentClassification (nl)": 62.38,
+ "MassiveIntentClassification (th)": 59.84,
+ "MassiveIntentClassification (sl)": 53.55,
+ "MassiveScenarioClassification (fr)": 67.5,
+ "MassiveScenarioClassification (vi)": 64.61,
+ "MassiveScenarioClassification (tr)": 64.68,
+ "MassiveScenarioClassification (ta)": 59.22,
+ "MassiveScenarioClassification (fa)": 67.76,
+ "MassiveScenarioClassification (el)": 65.25,
+ "MassiveScenarioClassification (fi)": 63.44,
+ "MassiveScenarioClassification (hu)": 64.85,
+ "MassiveScenarioClassification (is)": 58.15,
+ "MassiveScenarioClassification (lv)": 57.28,
+ "MassiveScenarioClassification (mn)": 58.41,
+ "MassiveScenarioClassification (zh-CN)": 71.66,
+ "MassiveScenarioClassification (pl)": 66.65,
+ "MassiveScenarioClassification (id)": 66.54,
+ "MassiveScenarioClassification (ja)": 70.82,
+ "MassiveScenarioClassification (hy)": 57.86,
+ "MassiveScenarioClassification (sl)": 59.98,
+ "MassiveScenarioClassification (sq)": 61.63,
+ "MassiveScenarioClassification (bn)": 63.41,
+ "MassiveScenarioClassification (pt)": 65.3,
+ "MassiveScenarioClassification (hi)": 66.29,
+ "MassiveScenarioClassification (jv)": 57.1,
+ "MassiveScenarioClassification (am)": 56.32,
+ "MassiveScenarioClassification (sw)": 59.54,
+ "MassiveScenarioClassification (nb)": 65.64,
+ "MassiveScenarioClassification (az)": 59.42,
+ "MassiveScenarioClassification (es)": 67.2,
+ "MassiveScenarioClassification (en)": 72.63,
+ "MassiveScenarioClassification (ko)": 68.03,
+ "MassiveScenarioClassification (th)": 67.91,
+ "MassiveScenarioClassification (zh-TW)": 66.67,
+ "MassiveScenarioClassification (nl)": 69.05,
+ "MassiveScenarioClassification (my)": 58.03,
+ "MassiveScenarioClassification (de)": 68.12,
+ "MassiveScenarioClassification (ms)": 64.24,
+ "MassiveScenarioClassification (ka)": 51.97,
+ "MassiveScenarioClassification (ur)": 63.09,
+ "MassiveScenarioClassification (ar)": 58.24,
+ "MassiveScenarioClassification (da)": 67.38,
+ "MassiveScenarioClassification (af)": 63.79,
+ "MassiveScenarioClassification (kn)": 59.3,
+ "MassiveScenarioClassification (km)": 47.04,
+ "MassiveScenarioClassification (cy)": 53.0,
+ "MassiveScenarioClassification (sv)": 68.99,
+ "MassiveScenarioClassification (ml)": 66.25,
+ "MassiveScenarioClassification (ru)": 67.1,
+ "MassiveScenarioClassification (te)": 61.28,
+ "MassiveScenarioClassification (he)": 62.46,
+ "MassiveScenarioClassification (it)": 66.58,
+ "MassiveScenarioClassification (ro)": 64.57,
+ "MassiveScenarioClassification (tl)": 61.11,
+ "NoRecClassification": 49.0,
+ "NordicLangClassification": 65.97,
+ "PAC": 67.11,
+ "PolEmo2.0-IN": 69.0,
+ "PolEmo2.0-OUT": 43.36,
+ "RuReviewsClassification": 61.96,
+ "RuSciBenchGRNTIClassification": 58.37,
+ "RuSciBenchOECDClassification": 45.1
}
]
},
@@ -1310,13 +5587,58 @@
"v_measure": [
{
"Model": "Cohere-embed-multilingual-light-v3.0",
- "AlloProfClusteringP2P": 61.96,
- "AlloProfClusteringS2S": 31.36,
- "HALClusteringS2S": 17.31,
+ "AlloProfClusteringP2P": 63.22,
+ "AlloProfClusteringS2S": 40.34,
+ "BlurbsClusteringP2P": 38.9,
+ "BlurbsClusteringS2S": 16.12,
+ "GeoreviewClusteringP2P": 61.06,
+ "HALClusteringS2S": 24.67,
"MLSUMClusteringP2P": 42.8,
+ "MLSUMClusteringP2P (de)": 42.98,
+ "MLSUMClusteringP2P (fr)": 43.54,
+ "MLSUMClusteringP2P (ru)": 44.81,
+ "MLSUMClusteringP2P (es)": 46.01,
"MLSUMClusteringS2S": 32.72,
- "MasakhaNEWSClusteringP2P (fra)": 56.81,
- "MasakhaNEWSClusteringS2S (fra)": 29.41
+ "MLSUMClusteringS2S (de)": 42.84,
+ "MLSUMClusteringS2S (fr)": 43.6,
+ "MLSUMClusteringS2S (ru)": 43.16,
+ "MLSUMClusteringS2S (es)": 44.91,
+ "MasakhaNEWSClusteringP2P (fra)": 62.82,
+ "MasakhaNEWSClusteringP2P (amh)": 68.51,
+ "MasakhaNEWSClusteringP2P (eng)": 64.65,
+ "MasakhaNEWSClusteringP2P (hau)": 62.53,
+ "MasakhaNEWSClusteringP2P (ibo)": 54.8,
+ "MasakhaNEWSClusteringP2P (lin)": 62.84,
+ "MasakhaNEWSClusteringP2P (lug)": 71.91,
+ "MasakhaNEWSClusteringP2P (orm)": 35.6,
+ "MasakhaNEWSClusteringP2P (pcm)": 83.55,
+ "MasakhaNEWSClusteringP2P (run)": 57.36,
+ "MasakhaNEWSClusteringP2P (sna)": 60.32,
+ "MasakhaNEWSClusteringP2P (som)": 43.44,
+ "MasakhaNEWSClusteringP2P (swa)": 31.63,
+ "MasakhaNEWSClusteringP2P (tir)": 67.09,
+ "MasakhaNEWSClusteringP2P (xho)": 41.42,
+ "MasakhaNEWSClusteringP2P (yor)": 44.26,
+ "MasakhaNEWSClusteringS2S (fra)": 48.97,
+ "MasakhaNEWSClusteringS2S (amh)": 54.37,
+ "MasakhaNEWSClusteringS2S (eng)": 56.49,
+ "MasakhaNEWSClusteringS2S (hau)": 20.89,
+ "MasakhaNEWSClusteringS2S (ibo)": 32.92,
+ "MasakhaNEWSClusteringS2S (lin)": 62.98,
+ "MasakhaNEWSClusteringS2S (lug)": 43.72,
+ "MasakhaNEWSClusteringS2S (orm)": 21.5,
+ "MasakhaNEWSClusteringS2S (pcm)": 71.32,
+ "MasakhaNEWSClusteringS2S (run)": 47.4,
+ "MasakhaNEWSClusteringS2S (sna)": 50.36,
+ "MasakhaNEWSClusteringS2S (som)": 27.7,
+ "MasakhaNEWSClusteringS2S (swa)": 18.25,
+ "MasakhaNEWSClusteringS2S (tir)": 57.3,
+ "MasakhaNEWSClusteringS2S (xho)": 35.72,
+ "MasakhaNEWSClusteringS2S (yor)": 29.02,
+ "RuSciBenchGRNTIClusteringP2P": 53.84,
+ "RuSciBenchOECDClusteringP2P": 46.46,
+ "TenKGnadClusteringP2P": 47.32,
+ "TenKGnadClusteringS2S": 29.27
}
]
},
@@ -1324,13 +5646,50 @@
"max_ap": [
{
"Model": "Cohere-embed-multilingual-light-v3.0",
+ "CDSC-E": 70.98,
+ "FalseFriendsGermanEnglish": 49.82,
+ "OpusparcusPC (de)": 95.03,
+ "OpusparcusPC (en)": 98.3,
+ "OpusparcusPC (fi)": 88.58,
"OpusparcusPC (fr)": 90.92,
- "PawsXPairClassification (fr)": 57.32
+ "OpusparcusPC (ru)": 85.41,
+ "OpusparcusPC (sv)": 91.14,
+ "PSC": 99.33,
+ "PawsXPairClassification (de)": 54.78,
+ "PawsXPairClassification (en)": 57.12,
+ "PawsXPairClassification (es)": 55.06,
+ "PawsXPairClassification (fr)": 57.33,
+ "PawsXPairClassification (ja)": 49.82,
+ "PawsXPairClassification (ko)": 50.79,
+ "PawsXPairClassification (zh)": 55.2,
+ "SICK-E-PL": 66.56,
+ "TERRa": 56.66
+ },
+ {
+ "Model": "Cohere-embed-multilingual-light-v3.0",
+ "CDSC-E": 71.05,
+ "FalseFriendsGermanEnglish": 49.82,
+ "OpusparcusPC (fr)": 90.92,
+ "OpusparcusPC (de)": 95.03,
+ "OpusparcusPC (en)": 98.3,
+ "OpusparcusPC (fi)": 88.58,
+ "OpusparcusPC (ru)": 85.41,
+ "OpusparcusPC (sv)": 91.14,
+ "PSC": 99.33,
+ "PawsXPairClassification (fr)": 57.35,
+ "PawsXPairClassification (de)": 54.88,
+ "PawsXPairClassification (en)": 57.24,
+ "PawsXPairClassification (es)": 55.09,
+ "PawsXPairClassification (ja)": 49.85,
+ "PawsXPairClassification (ko)": 50.79,
+ "PawsXPairClassification (zh)": 55.22,
+ "SICK-E-PL": 66.57,
+ "TERRa": 56.67
},
{
"Model": "Cohere-embed-multilingual-light-v3.0",
"OpusparcusPC (fr)": 90.92,
- "PawsXPairClassification (fr)": 57.35
+ "PawsXPairClassification (fr)": 57.32
}
]
},
@@ -1338,8 +5697,31 @@
"map": [
{
"Model": "Cohere-embed-multilingual-light-v3.0",
- "AlloprofReranking": 51.6,
- "SyntecReranking": 88.03
+ "AlloprofReranking": 72.49,
+ "RuBQReranking": 71.12,
+ "SyntecReranking": 85.96,
+ "T2Reranking": 67.46
+ },
+ {
+ "Model": "Cohere-embed-multilingual-light-v3.0",
+ "MIRACLReranking (ar)": 74.34,
+ "MIRACLReranking (bn)": 71.23,
+ "MIRACLReranking (de)": 48.25,
+ "MIRACLReranking (en)": 60.8,
+ "MIRACLReranking (es)": 62.1,
+ "MIRACLReranking (fa)": 54.52,
+ "MIRACLReranking (fi)": 76.11,
+ "MIRACLReranking (fr)": 51.29,
+ "MIRACLReranking (hi)": 60.07,
+ "MIRACLReranking (id)": 57.15,
+ "MIRACLReranking (ja)": 63.91,
+ "MIRACLReranking (ko)": 54.58,
+ "MIRACLReranking (ru)": 61.22,
+ "MIRACLReranking (sw)": 63.28,
+ "MIRACLReranking (te)": 78.62,
+ "MIRACLReranking (th)": 73.62,
+ "MIRACLReranking (yo)": 67.25,
+ "MIRACLReranking (zh)": 46.58
}
]
},
@@ -1347,16 +5729,190 @@
"ndcg_at_10": [
{
"Model": "Cohere-embed-multilingual-light-v3.0",
- "AlloprofRetrieval": 35.39,
- "BSARDRetrieval": 0.0,
- "MintakaRetrieval (fr)": 23.0,
- "SyntecRetrieval": 76.88,
- "XPQARetrieval (fr)": 45.23
+ "AILACasedocs": 26.92,
+ "AILAStatutes": 29.12,
+ "ARCChallenge": 7.73,
+ "AlloprofRetrieval": 46.43,
+ "AlphaNLI": 21.33,
+ "AppsRetrieval": 8.27,
+ "BSARDRetrieval": 17.34,
+ "CmedqaRetrieval": 26.17,
+ "CodeFeedbackMT": 39.34,
+ "CodeFeedbackST": 70.71,
+ "CodeSearchNetCCRetrieval (python)": 61.34,
+ "CodeSearchNetCCRetrieval (javascript)": 60.8,
+ "CodeSearchNetCCRetrieval (go)": 45.26,
+ "CodeSearchNetCCRetrieval (ruby)": 58.99,
+ "CodeSearchNetCCRetrieval (java)": 55.42,
+ "CodeSearchNetCCRetrieval (php)": 46.96,
+ "CodeSearchNetRetrieval (python)": 86.75,
+ "CodeSearchNetRetrieval (javascript)": 73.5,
+ "CodeSearchNetRetrieval (go)": 92.13,
+ "CodeSearchNetRetrieval (ruby)": 77.16,
+ "CodeSearchNetRetrieval (java)": 71.04,
+ "CodeSearchNetRetrieval (php)": 81.35,
+ "CodeTransOceanContest": 59.35,
+ "CodeTransOceanDL": 29.06,
+ "CosQA": 27.99,
+ "CovidRetrieval": 72.97,
+ "GerDaLIR": 7.02,
+ "GerDaLIRSmall": 15.97,
+ "GermanQuAD-Retrieval": 92.25,
+ "LEMBNarrativeQARetrieval": 21.36,
+ "LEMBQMSumRetrieval": 22.08,
+ "LEMBSummScreenFDRetrieval": 66.32,
+ "LEMBWikimQARetrieval": 57.84,
+ "LeCaRDv2": 59.75,
+ "LegalBenchConsumerContractsQA": 71.43,
+ "LegalBenchCorporateLobbying": 92.64,
+ "LegalQuAD": 44.21,
+ "LegalSummarization": 61.37,
+ "MintakaRetrieval (fr)": 26.68,
+ "MintakaRetrieval (ar)": 19.86,
+ "MintakaRetrieval (de)": 25.36,
+ "MintakaRetrieval (es)": 26.03,
+ "MintakaRetrieval (hi)": 20.16,
+ "MintakaRetrieval (it)": 26.62,
+ "MintakaRetrieval (ja)": 21.28,
+ "MintakaRetrieval (pt)": 26.98,
+ "PIQA": 24.67,
+ "Quail": 3.05,
+ "RARbCode": 41.03,
+ "RiaNewsRetrieval": 74.28,
+ "RuBQRetrieval": 67.24,
+ "SciFact-PL": 62.27,
+ "SpartQA": 5.4,
+ "StackOverflowQA": 81.47,
+ "SyntecRetrieval": 82.68,
+ "SyntheticText2SQL": 53.26,
+ "TRECCOVID-PL": 74.68,
+ "TempReasonL1": 0.94,
+ "TempReasonL2Fact": 27.02,
+ "TempReasonL2Pure": 0.75,
+ "TempReasonL3Fact": 22.9,
+ "TempReasonL3Pure": 6.32,
+ "WinoGrande": 61.87,
+ "XMarket (de)": 20.96,
+ "XMarket (en)": 26.68,
+ "XMarket (es)": 19.19,
+ "XPQARetrieval (fr)": 45.23,
+ "XPQARetrieval (ara-ara)": 42.71,
+ "XPQARetrieval (eng-ara)": 24.05,
+ "XPQARetrieval (ara-eng)": 34.64,
+ "XPQARetrieval (deu-deu)": 71.54,
+ "XPQARetrieval (eng-deu)": 35.15,
+ "XPQARetrieval (deu-eng)": 59.76,
+ "XPQARetrieval (spa-spa)": 57.85,
+ "XPQARetrieval (eng-spa)": 32.72,
+ "XPQARetrieval (spa-eng)": 49.1,
+ "XPQARetrieval (fra-fra)": 64.68,
+ "XPQARetrieval (eng-fra)": 36.9,
+ "XPQARetrieval (fra-eng)": 52.26,
+ "XPQARetrieval (hin-hin)": 73.61,
+ "XPQARetrieval (eng-hin)": 31.79,
+ "XPQARetrieval (hin-eng)": 65.19,
+ "XPQARetrieval (ita-ita)": 70.98,
+ "XPQARetrieval (eng-ita)": 30.47,
+ "XPQARetrieval (ita-eng)": 53.46,
+ "XPQARetrieval (jpn-jpn)": 70.31,
+ "XPQARetrieval (eng-jpn)": 33.82,
+ "XPQARetrieval (jpn-eng)": 57.97,
+ "XPQARetrieval (kor-kor)": 33.69,
+ "XPQARetrieval (eng-kor)": 28.25,
+ "XPQARetrieval (kor-eng)": 25.06,
+ "XPQARetrieval (pol-pol)": 45.37,
+ "XPQARetrieval (eng-pol)": 23.99,
+ "XPQARetrieval (pol-eng)": 36.88,
+ "XPQARetrieval (por-por)": 43.62,
+ "XPQARetrieval (eng-por)": 25.14,
+ "XPQARetrieval (por-eng)": 35.36,
+ "XPQARetrieval (tam-tam)": 38.79,
+ "XPQARetrieval (eng-tam)": 17.54,
+ "XPQARetrieval (tam-eng)": 30.35,
+ "XPQARetrieval (cmn-cmn)": 63.71,
+ "XPQARetrieval (eng-cmn)": 22.11,
+ "XPQARetrieval (cmn-eng)": 47.18
}
]
},
"STS": {
"cosine_spearman": [
+ {
+ "Model": "Cohere-embed-multilingual-light-v3.0",
+ "CDSC-R": 90.9,
+ "GermanSTSBenchmark": 76.78,
+ "RUParaPhraserSTS": 69.33,
+ "RuSTSBenchmarkSTS": 77.15,
+ "SICK-R-PL": 68.14,
+ "SICKFr": 75.51,
+ "STS22 (it)": 78.41,
+ "STS22 (fr)": 82.8,
+ "STS22 (pl-en)": 79.82,
+ "STS22 (de-en)": 53.01,
+ "STS22 (ar)": 58.03,
+ "STS22 (de-fr)": 65.06,
+ "STS22 (en)": 67.5,
+ "STS22 (de)": 60.0,
+ "STS22 (zh)": 67.02,
+ "STS22 (es)": 67.44,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (es-it)": 73.91,
+ "STS22 (de-pl)": 46.31,
+ "STS22 (ru)": 63.39,
+ "STS22 (pl)": 39.33,
+ "STS22 (tr)": 66.08,
+ "STS22 (zh-en)": 65.99,
+ "STS22 (es-en)": 77.32,
+ "STSB": 76.03,
+ "STSBenchmarkMultilingualSTS (zh)": 76.59,
+ "STSBenchmarkMultilingualSTS (ru)": 77.14,
+ "STSBenchmarkMultilingualSTS (fr)": 76.48,
+ "STSBenchmarkMultilingualSTS (es)": 78.79,
+ "STSBenchmarkMultilingualSTS (pl)": 71.3,
+ "STSBenchmarkMultilingualSTS (pt)": 73.29,
+ "STSBenchmarkMultilingualSTS (nl)": 74.54,
+ "STSBenchmarkMultilingualSTS (en)": 83.42,
+ "STSBenchmarkMultilingualSTS (it)": 75.75,
+ "STSBenchmarkMultilingualSTS (de)": 77.52
+ },
+ {
+ "Model": "Cohere-embed-multilingual-light-v3.0",
+ "CDSC-R": 90.9,
+ "GermanSTSBenchmark": 76.78,
+ "RUParaPhraserSTS": 69.33,
+ "RuSTSBenchmarkSTS": 77.15,
+ "SICK-R-PL": 68.14,
+ "SICKFr": 75.51,
+ "STS22 (it)": 78.41,
+ "STS22 (fr)": 82.8,
+ "STS22 (pl-en)": 79.82,
+ "STS22 (de-en)": 53.01,
+ "STS22 (ar)": 58.03,
+ "STS22 (de-fr)": 65.06,
+ "STS22 (en)": 67.5,
+ "STS22 (de)": 60.0,
+ "STS22 (zh)": 67.02,
+ "STS22 (es)": 67.44,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (es-it)": 73.91,
+ "STS22 (de-pl)": 46.31,
+ "STS22 (ru)": 63.39,
+ "STS22 (pl)": 39.33,
+ "STS22 (tr)": 66.08,
+ "STS22 (zh-en)": 65.99,
+ "STS22 (es-en)": 77.32,
+ "STSB": 76.03,
+ "STSBenchmarkMultilingualSTS (zh)": 76.59,
+ "STSBenchmarkMultilingualSTS (ru)": 77.14,
+ "STSBenchmarkMultilingualSTS (fr)": 76.48,
+ "STSBenchmarkMultilingualSTS (es)": 78.79,
+ "STSBenchmarkMultilingualSTS (pl)": 71.3,
+ "STSBenchmarkMultilingualSTS (pt)": 73.29,
+ "STSBenchmarkMultilingualSTS (nl)": 74.54,
+ "STSBenchmarkMultilingualSTS (en)": 83.42,
+ "STSBenchmarkMultilingualSTS (it)": 75.75,
+ "STSBenchmarkMultilingualSTS (de)": 77.52
+ },
{
"Model": "Cohere-embed-multilingual-light-v3.0",
"SICKFr": 75.5,
@@ -1370,30 +5926,319 @@
{
"Model": "Cohere-embed-multilingual-light-v3.0",
"SummEvalFr": 31.4
+ },
+ {
+ "Model": "Cohere-embed-multilingual-light-v3.0",
+ "SummEvalFr": 31.41
+ },
+ {
+ "Model": "Cohere-embed-multilingual-light-v3.0",
+ "SummEvalFr": 31.41
}
]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "Cohere-embed-multilingual-light-v3.0",
+ "CEDRClassification": 38.5,
+ "SensitiveTopicsClassification": 27.29
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "Cohere-embed-multilingual-light-v3.0",
+ "Core17InstructionRetrieval": 1.58,
+ "News21InstructionRetrieval": -0.83,
+ "Robust04InstructionRetrieval": -8.11
+ }
+ ]
}
},
"Cohere__Cohere-embed-multilingual-v3.0": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "BornholmBitextMining": 35.6,
+ "Tatoeba (tur-eng)": 95.33,
+ "Tatoeba (wuu-eng)": 81.85,
+ "Tatoeba (mkd-eng)": 86.69,
+ "Tatoeba (xho-eng)": 71.03,
+ "Tatoeba (ina-eng)": 88.51,
+ "Tatoeba (hrv-eng)": 95.08,
+ "Tatoeba (cha-eng)": 23.45,
+ "Tatoeba (dtp-eng)": 5.68,
+ "Tatoeba (ceb-eng)": 50.4,
+ "Tatoeba (gle-eng)": 68.24,
+ "Tatoeba (dan-eng)": 94.99,
+ "Tatoeba (swh-eng)": 68.22,
+ "Tatoeba (nov-eng)": 66.11,
+ "Tatoeba (fao-eng)": 71.4,
+ "Tatoeba (slk-eng)": 92.69,
+ "Tatoeba (por-eng)": 92.88,
+ "Tatoeba (vie-eng)": 96.15,
+ "Tatoeba (lit-eng)": 86.7,
+ "Tatoeba (pam-eng)": 7.22,
+ "Tatoeba (uzb-eng)": 66.08,
+ "Tatoeba (bel-eng)": 91.35,
+ "Tatoeba (pes-eng)": 90.23,
+ "Tatoeba (sqi-eng)": 93.85,
+ "Tatoeba (cmn-eng)": 94.85,
+ "Tatoeba (bul-eng)": 92.37,
+ "Tatoeba (war-eng)": 51.66,
+ "Tatoeba (bre-eng)": 10.28,
+ "Tatoeba (pms-eng)": 49.51,
+ "Tatoeba (tat-eng)": 67.82,
+ "Tatoeba (csb-eng)": 34.22,
+ "Tatoeba (epo-eng)": 94.75,
+ "Tatoeba (orv-eng)": 33.34,
+ "Tatoeba (mar-eng)": 87.77,
+ "Tatoeba (cym-eng)": 75.97,
+ "Tatoeba (ast-eng)": 72.4,
+ "Tatoeba (khm-eng)": 49.34,
+ "Tatoeba (jpn-eng)": 92.6,
+ "Tatoeba (nno-eng)": 89.31,
+ "Tatoeba (ben-eng)": 82.93,
+ "Tatoeba (ukr-eng)": 92.97,
+ "Tatoeba (heb-eng)": 87.09,
+ "Tatoeba (nds-eng)": 62.46,
+ "Tatoeba (gla-eng)": 50.55,
+ "Tatoeba (mal-eng)": 96.7,
+ "Tatoeba (jav-eng)": 68.65,
+ "Tatoeba (kaz-eng)": 78.64,
+ "Tatoeba (kab-eng)": 25.52,
+ "Tatoeba (ang-eng)": 40.19,
+ "Tatoeba (arq-eng)": 31.67,
+ "Tatoeba (est-eng)": 83.62,
+ "Tatoeba (tel-eng)": 88.06,
+ "Tatoeba (tzl-eng)": 42.22,
+ "Tatoeba (ara-eng)": 87.68,
+ "Tatoeba (aze-eng)": 86.04,
+ "Tatoeba (ber-eng)": 26.19,
+ "Tatoeba (uig-eng)": 66.63,
+ "Tatoeba (ido-eng)": 79.36,
+ "Tatoeba (yid-eng)": 70.75,
+ "Tatoeba (mhr-eng)": 6.95,
+ "Tatoeba (srp-eng)": 92.97,
+ "Tatoeba (nob-eng)": 96.72,
+ "Tatoeba (ell-eng)": 93.2,
+ "Tatoeba (dsb-eng)": 42.47,
+ "Tatoeba (slv-eng)": 88.65,
+ "Tatoeba (amh-eng)": 76.53,
+ "Tatoeba (lfn-eng)": 58.58,
+ "Tatoeba (lat-eng)": 51.06,
+ "Tatoeba (tha-eng)": 95.74,
+ "Tatoeba (hye-eng)": 88.23,
+ "Tatoeba (arz-eng)": 72.54,
+ "Tatoeba (cor-eng)": 5.99,
+ "Tatoeba (urd-eng)": 87.2,
+ "Tatoeba (glg-eng)": 75.7,
+ "Tatoeba (cat-eng)": 88.53,
+ "Tatoeba (ita-eng)": 92.48,
+ "Tatoeba (spa-eng)": 96.52,
+ "Tatoeba (awa-eng)": 70.39,
+ "Tatoeba (isl-eng)": 90.92,
+ "Tatoeba (ron-eng)": 94.45,
+ "Tatoeba (oci-eng)": 37.4,
+ "Tatoeba (max-eng)": 54.89,
+ "Tatoeba (mon-eng)": 84.82,
+ "Tatoeba (swg-eng)": 56.43,
+ "Tatoeba (kur-eng)": 56.24,
+ "Tatoeba (hin-eng)": 93.47,
+ "Tatoeba (nld-eng)": 96.38,
+ "Tatoeba (ces-eng)": 94.36,
+ "Tatoeba (cbk-eng)": 63.39,
+ "Tatoeba (tam-eng)": 85.65,
+ "Tatoeba (fra-eng)": 93.82,
+ "Tatoeba (tgl-eng)": 87.63,
+ "Tatoeba (tuk-eng)": 25.92,
+ "Tatoeba (hun-eng)": 92.96,
+ "Tatoeba (deu-eng)": 99.2,
+ "Tatoeba (yue-eng)": 85.87,
+ "Tatoeba (eus-eng)": 70.8,
+ "Tatoeba (ind-eng)": 89.99,
+ "Tatoeba (zsm-eng)": 94.77,
+ "Tatoeba (kor-eng)": 88.98,
+ "Tatoeba (kat-eng)": 81.09,
+ "Tatoeba (gsw-eng)": 47.07,
+ "Tatoeba (ile-eng)": 76.22,
+ "Tatoeba (rus-eng)": 92.32,
+ "Tatoeba (lvs-eng)": 88.62,
+ "Tatoeba (afr-eng)": 88.23,
+ "Tatoeba (pol-eng)": 96.92,
+ "Tatoeba (kzj-eng)": 6.13,
+ "Tatoeba (hsb-eng)": 53.8,
+ "Tatoeba (bos-eng)": 92.28,
+ "Tatoeba (swe-eng)": 93.88,
+ "Tatoeba (fry-eng)": 57.25,
+ "Tatoeba (fin-eng)": 94.15
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "Cohere-embed-multilingual-v3.0",
- "AmazonReviewsClassification (fr)": 41.89,
- "MTOPDomainClassification (fr)": 86.23,
- "MTOPIntentClassification (fr)": 61.07,
- "MasakhaNEWSClassification (fra)": 83.06,
- "MassiveIntentClassification (fr)": 62.94,
- "MassiveScenarioClassification (fr)": 67.29
+ "AllegroReviews": 47.3,
+ "AmazonCounterfactualClassification (en-ext)": 77.41,
+ "AmazonCounterfactualClassification (en)": 77.67,
+ "AmazonCounterfactualClassification (de)": 68.58,
+ "AmazonCounterfactualClassification (ja)": 77.99,
+ "AmazonReviewsClassification (fr)": 45.61,
+ "AmazonReviewsClassification (en)": 51.97,
+ "AmazonReviewsClassification (de)": 48.05,
+ "AmazonReviewsClassification (es)": 45.77,
+ "AmazonReviewsClassification (ja)": 42.46,
+ "AmazonReviewsClassification (zh)": 40.71,
+ "AngryTweetsClassification": 58.92,
+ "CBD": 71.67,
+ "DanishPoliticalCommentsClassification": 42.69,
+ "GeoreviewClassification": 50.88,
+ "HeadlineClassification": 79.5,
+ "InappropriatenessClassification": 62.46,
+ "KinopoiskClassification": 61.84,
+ "LccSentimentClassification": 60.47,
+ "MTOPDomainClassification (fr)": 90.32,
+ "MTOPDomainClassification (en)": 94.44,
+ "MTOPDomainClassification (de)": 92.86,
+ "MTOPDomainClassification (es)": 92.76,
+ "MTOPDomainClassification (hi)": 90.81,
+ "MTOPDomainClassification (th)": 88.56,
+ "MTOPIntentClassification (fr)": 63.64,
+ "MTOPIntentClassification (en)": 69.95,
+ "MTOPIntentClassification (de)": 69.81,
+ "MTOPIntentClassification (es)": 70.37,
+ "MTOPIntentClassification (hi)": 65.94,
+ "MTOPIntentClassification (th)": 68.74,
+ "MasakhaNEWSClassification (fra)": 79.5,
+ "MasakhaNEWSClassification (amh)": 87.39,
+ "MasakhaNEWSClassification (eng)": 82.01,
+ "MasakhaNEWSClassification (hau)": 81.81,
+ "MasakhaNEWSClassification (ibo)": 74.62,
+ "MasakhaNEWSClassification (lin)": 80.17,
+ "MasakhaNEWSClassification (lug)": 77.58,
+ "MasakhaNEWSClassification (orm)": 81.14,
+ "MasakhaNEWSClassification (pcm)": 93.34,
+ "MasakhaNEWSClassification (run)": 82.86,
+ "MasakhaNEWSClassification (sna)": 90.0,
+ "MasakhaNEWSClassification (som)": 65.99,
+ "MasakhaNEWSClassification (swa)": 76.39,
+ "MasakhaNEWSClassification (tir)": 74.63,
+ "MasakhaNEWSClassification (xho)": 85.12,
+ "MasakhaNEWSClassification (yor)": 82.21,
+ "MassiveIntentClassification (fr)": 67.37,
+ "MassiveIntentClassification (zh-TW)": 63.14,
+ "MassiveIntentClassification (sw)": 55.88,
+ "MassiveIntentClassification (ur)": 61.8,
+ "MassiveIntentClassification (ja)": 69.2,
+ "MassiveIntentClassification (sq)": 63.61,
+ "MassiveIntentClassification (ar)": 57.88,
+ "MassiveIntentClassification (tl)": 62.23,
+ "MassiveIntentClassification (ru)": 69.08,
+ "MassiveIntentClassification (tr)": 67.23,
+ "MassiveIntentClassification (kn)": 59.99,
+ "MassiveIntentClassification (de)": 66.25,
+ "MassiveIntentClassification (it)": 67.58,
+ "MassiveIntentClassification (zh-CN)": 68.14,
+ "MassiveIntentClassification (id)": 67.09,
+ "MassiveIntentClassification (ms)": 64.21,
+ "MassiveIntentClassification (hy)": 59.61,
+ "MassiveIntentClassification (el)": 66.86,
+ "MassiveIntentClassification (sv)": 69.6,
+ "MassiveIntentClassification (fi)": 66.69,
+ "MassiveIntentClassification (ml)": 63.87,
+ "MassiveIntentClassification (da)": 66.49,
+ "MassiveIntentClassification (lv)": 63.81,
+ "MassiveIntentClassification (he)": 64.47,
+ "MassiveIntentClassification (nl)": 68.13,
+ "MassiveIntentClassification (th)": 64.94,
+ "MassiveIntentClassification (ko)": 65.89,
+ "MassiveIntentClassification (jv)": 55.1,
+ "MassiveIntentClassification (mn)": 58.64,
+ "MassiveIntentClassification (pt)": 68.7,
+ "MassiveIntentClassification (te)": 61.75,
+ "MassiveIntentClassification (cy)": 53.01,
+ "MassiveIntentClassification (bn)": 62.12,
+ "MassiveIntentClassification (am)": 52.95,
+ "MassiveIntentClassification (es)": 67.59,
+ "MassiveIntentClassification (ka)": 49.77,
+ "MassiveIntentClassification (km)": 43.05,
+ "MassiveIntentClassification (hi)": 66.02,
+ "MassiveIntentClassification (ro)": 64.5,
+ "MassiveIntentClassification (ta)": 60.78,
+ "MassiveIntentClassification (en)": 72.11,
+ "MassiveIntentClassification (fa)": 68.69,
+ "MassiveIntentClassification (hu)": 66.59,
+ "MassiveIntentClassification (my)": 58.0,
+ "MassiveIntentClassification (az)": 61.82,
+ "MassiveIntentClassification (is)": 60.44,
+ "MassiveIntentClassification (af)": 59.89,
+ "MassiveIntentClassification (pl)": 68.48,
+ "MassiveIntentClassification (vi)": 65.76,
+ "MassiveIntentClassification (nb)": 67.14,
+ "MassiveIntentClassification (sl)": 64.79,
+ "MassiveScenarioClassification (fr)": 73.11,
+ "MassiveScenarioClassification (jv)": 62.3,
+ "MassiveScenarioClassification (sl)": 71.41,
+ "MassiveScenarioClassification (hi)": 71.49,
+ "MassiveScenarioClassification (hy)": 64.16,
+ "MassiveScenarioClassification (da)": 74.32,
+ "MassiveScenarioClassification (he)": 69.79,
+ "MassiveScenarioClassification (fi)": 71.3,
+ "MassiveScenarioClassification (my)": 61.86,
+ "MassiveScenarioClassification (pt)": 71.8,
+ "MassiveScenarioClassification (ta)": 66.38,
+ "MassiveScenarioClassification (lv)": 69.35,
+ "MassiveScenarioClassification (tl)": 67.43,
+ "MassiveScenarioClassification (id)": 73.29,
+ "MassiveScenarioClassification (fa)": 73.25,
+ "MassiveScenarioClassification (it)": 72.5,
+ "MassiveScenarioClassification (el)": 72.69,
+ "MassiveScenarioClassification (zh-TW)": 69.52,
+ "MassiveScenarioClassification (bn)": 68.62,
+ "MassiveScenarioClassification (ja)": 75.32,
+ "MassiveScenarioClassification (de)": 74.29,
+ "MassiveScenarioClassification (mn)": 63.42,
+ "MassiveScenarioClassification (nl)": 74.14,
+ "MassiveScenarioClassification (tr)": 71.79,
+ "MassiveScenarioClassification (is)": 67.99,
+ "MassiveScenarioClassification (sq)": 70.47,
+ "MassiveScenarioClassification (nb)": 73.82,
+ "MassiveScenarioClassification (ru)": 74.26,
+ "MassiveScenarioClassification (cy)": 60.29,
+ "MassiveScenarioClassification (sw)": 63.81,
+ "MassiveScenarioClassification (th)": 71.08,
+ "MassiveScenarioClassification (af)": 67.77,
+ "MassiveScenarioClassification (ms)": 69.28,
+ "MassiveScenarioClassification (ur)": 68.42,
+ "MassiveScenarioClassification (az)": 65.33,
+ "MassiveScenarioClassification (vi)": 72.03,
+ "MassiveScenarioClassification (ro)": 70.12,
+ "MassiveScenarioClassification (es)": 72.32,
+ "MassiveScenarioClassification (hu)": 72.82,
+ "MassiveScenarioClassification (ka)": 57.6,
+ "MassiveScenarioClassification (pl)": 72.81,
+ "MassiveScenarioClassification (sv)": 75.69,
+ "MassiveScenarioClassification (am)": 59.54,
+ "MassiveScenarioClassification (te)": 67.72,
+ "MassiveScenarioClassification (ml)": 69.96,
+ "MassiveScenarioClassification (ar)": 64.82,
+ "MassiveScenarioClassification (zh-CN)": 74.61,
+ "MassiveScenarioClassification (km)": 49.84,
+ "MassiveScenarioClassification (kn)": 66.78,
+ "MassiveScenarioClassification (ko)": 73.42,
+ "MassiveScenarioClassification (en)": 76.37,
+ "NoRecClassification": 60.27,
+ "NordicLangClassification": 78.74,
+ "PAC": 68.29,
+ "PolEmo2.0-IN": 82.31,
+ "PolEmo2.0-OUT": 63.0,
+ "RuReviewsClassification": 66.83,
+ "RuSciBenchGRNTIClassification": 62.17,
+ "RuSciBenchOECDClassification": 48.22
}
]
},
@@ -1401,13 +6246,58 @@
"v_measure": [
{
"Model": "Cohere-embed-multilingual-v3.0",
- "AlloProfClusteringP2P": 63.53,
- "AlloProfClusteringS2S": 36.18,
- "HALClusteringS2S": 19.9,
+ "AlloProfClusteringP2P": 62.87,
+ "AlloProfClusteringS2S": 46.5,
+ "BlurbsClusteringP2P": 42.32,
+ "BlurbsClusteringS2S": 19.26,
+ "GeoreviewClusteringP2P": 64.4,
+ "HALClusteringS2S": 27.61,
"MLSUMClusteringP2P": 45.08,
+ "MLSUMClusteringP2P (de)": 44.04,
+ "MLSUMClusteringP2P (fr)": 45.43,
+ "MLSUMClusteringP2P (ru)": 47.6,
+ "MLSUMClusteringP2P (es)": 48.14,
"MLSUMClusteringS2S": 34.75,
- "MasakhaNEWSClusteringP2P (fra)": 53.18,
- "MasakhaNEWSClusteringS2S (fra)": 32.31
+ "MLSUMClusteringS2S (de)": 43.06,
+ "MLSUMClusteringS2S (fr)": 45.69,
+ "MLSUMClusteringS2S (ru)": 45.76,
+ "MLSUMClusteringS2S (es)": 47.72,
+ "MasakhaNEWSClusteringP2P (fra)": 64.89,
+ "MasakhaNEWSClusteringP2P (amh)": 67.44,
+ "MasakhaNEWSClusteringP2P (eng)": 71.02,
+ "MasakhaNEWSClusteringP2P (hau)": 79.53,
+ "MasakhaNEWSClusteringP2P (ibo)": 68.03,
+ "MasakhaNEWSClusteringP2P (lin)": 60.21,
+ "MasakhaNEWSClusteringP2P (lug)": 67.34,
+ "MasakhaNEWSClusteringP2P (orm)": 63.94,
+ "MasakhaNEWSClusteringP2P (pcm)": 82.02,
+ "MasakhaNEWSClusteringP2P (run)": 65.24,
+ "MasakhaNEWSClusteringP2P (sna)": 61.69,
+ "MasakhaNEWSClusteringP2P (som)": 39.54,
+ "MasakhaNEWSClusteringP2P (swa)": 36.93,
+ "MasakhaNEWSClusteringP2P (tir)": 61.32,
+ "MasakhaNEWSClusteringP2P (xho)": 45.56,
+ "MasakhaNEWSClusteringP2P (yor)": 48.43,
+ "MasakhaNEWSClusteringS2S (fra)": 55.29,
+ "MasakhaNEWSClusteringS2S (amh)": 50.53,
+ "MasakhaNEWSClusteringS2S (eng)": 57.31,
+ "MasakhaNEWSClusteringS2S (hau)": 41.86,
+ "MasakhaNEWSClusteringS2S (ibo)": 34.99,
+ "MasakhaNEWSClusteringS2S (lin)": 43.77,
+ "MasakhaNEWSClusteringS2S (lug)": 43.26,
+ "MasakhaNEWSClusteringS2S (orm)": 29.75,
+ "MasakhaNEWSClusteringS2S (pcm)": 61.13,
+ "MasakhaNEWSClusteringS2S (run)": 53.39,
+ "MasakhaNEWSClusteringS2S (sna)": 69.47,
+ "MasakhaNEWSClusteringS2S (som)": 35.33,
+ "MasakhaNEWSClusteringS2S (swa)": 12.76,
+ "MasakhaNEWSClusteringS2S (tir)": 54.47,
+ "MasakhaNEWSClusteringS2S (xho)": 31.21,
+ "MasakhaNEWSClusteringS2S (yor)": 36.85,
+ "RuSciBenchGRNTIClusteringP2P": 56.95,
+ "RuSciBenchOECDClusteringP2P": 48.45,
+ "TenKGnadClusteringP2P": 48.44,
+ "TenKGnadClusteringS2S": 37.86
}
]
},
@@ -1415,8 +6305,45 @@
"max_ap": [
{
"Model": "Cohere-embed-multilingual-v3.0",
+ "CDSC-E": 73.47,
+ "FalseFriendsGermanEnglish": 54.25,
+ "OpusparcusPC (de)": 97.44,
+ "OpusparcusPC (en)": 98.67,
+ "OpusparcusPC (fi)": 94.57,
+ "OpusparcusPC (fr)": 94.07,
+ "OpusparcusPC (ru)": 90.55,
+ "OpusparcusPC (sv)": 95.4,
+ "PSC": 99.51,
+ "PawsXPairClassification (de)": 59.47,
+ "PawsXPairClassification (en)": 64.94,
+ "PawsXPairClassification (es)": 59.28,
+ "PawsXPairClassification (fr)": 61.24,
+ "PawsXPairClassification (ja)": 51.82,
+ "PawsXPairClassification (ko)": 53.09,
+ "PawsXPairClassification (zh)": 58.59,
+ "SICK-E-PL": 79.27,
+ "TERRa": 58.5
+ },
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "CDSC-E": 73.47,
+ "FalseFriendsGermanEnglish": 54.29,
"OpusparcusPC (fr)": 94.08,
- "PawsXPairClassification (fr)": 61.26
+ "OpusparcusPC (de)": 97.47,
+ "OpusparcusPC (en)": 98.7,
+ "OpusparcusPC (fi)": 94.57,
+ "OpusparcusPC (ru)": 90.55,
+ "OpusparcusPC (sv)": 95.4,
+ "PSC": 99.51,
+ "PawsXPairClassification (fr)": 61.24,
+ "PawsXPairClassification (de)": 59.52,
+ "PawsXPairClassification (en)": 65.06,
+ "PawsXPairClassification (es)": 59.28,
+ "PawsXPairClassification (ja)": 52.08,
+ "PawsXPairClassification (ko)": 53.4,
+ "PawsXPairClassification (zh)": 58.59,
+ "SICK-E-PL": 79.27,
+ "TERRa": 58.61
},
{
"Model": "Cohere-embed-multilingual-v3.0",
@@ -1429,8 +6356,31 @@
"map": [
{
"Model": "Cohere-embed-multilingual-v3.0",
- "AlloprofReranking": 51.01,
- "SyntecReranking": 85.72
+ "AlloprofReranking": 75.41,
+ "RuBQReranking": 75.26,
+ "SyntecReranking": 91.2,
+ "T2Reranking": 67.8
+ },
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "MIRACLReranking (ar)": 77.86,
+ "MIRACLReranking (bn)": 75.74,
+ "MIRACLReranking (de)": 55.61,
+ "MIRACLReranking (en)": 63.53,
+ "MIRACLReranking (es)": 64.33,
+ "MIRACLReranking (fa)": 60.26,
+ "MIRACLReranking (fi)": 79.69,
+ "MIRACLReranking (fr)": 57.2,
+ "MIRACLReranking (hi)": 66.53,
+ "MIRACLReranking (id)": 58.68,
+ "MIRACLReranking (ja)": 66.93,
+ "MIRACLReranking (ko)": 55.89,
+ "MIRACLReranking (ru)": 64.88,
+ "MIRACLReranking (sw)": 66.32,
+ "MIRACLReranking (te)": 80.68,
+ "MIRACLReranking (th)": 77.09,
+ "MIRACLReranking (yo)": 67.58,
+ "MIRACLReranking (zh)": 53.72
}
]
},
@@ -1438,16 +6388,193 @@
"ndcg_at_10": [
{
"Model": "Cohere-embed-multilingual-v3.0",
- "AlloprofRetrieval": 38.36,
- "BSARDRetrieval": 0.14,
- "MintakaRetrieval (fr)": 25.44,
- "SyntecRetrieval": 79.27,
- "XPQARetrieval (fr)": 58.87
+ "AILACasedocs": 28.31,
+ "AILAStatutes": 29.69,
+ "ARCChallenge": 11.6,
+ "AlloprofRetrieval": 51.51,
+ "AlphaNLI": 18.47,
+ "AppsRetrieval": 31.91,
+ "BSARDRetrieval": 22.91,
+ "CmedqaRetrieval": 30.2,
+ "CodeFeedbackMT": 42.9,
+ "CodeFeedbackST": 74.19,
+ "CodeSearchNetCCRetrieval (python)": 66.39,
+ "CodeSearchNetCCRetrieval (javascript)": 60.54,
+ "CodeSearchNetCCRetrieval (go)": 46.68,
+ "CodeSearchNetCCRetrieval (ruby)": 61.72,
+ "CodeSearchNetCCRetrieval (java)": 60.47,
+ "CodeSearchNetCCRetrieval (php)": 49.65,
+ "CodeSearchNetRetrieval (python)": 89.87,
+ "CodeSearchNetRetrieval (javascript)": 76.56,
+ "CodeSearchNetRetrieval (go)": 92.42,
+ "CodeSearchNetRetrieval (ruby)": 81.6,
+ "CodeSearchNetRetrieval (java)": 80.11,
+ "CodeSearchNetRetrieval (php)": 84.47,
+ "CodeTransOceanContest": 70.25,
+ "CodeTransOceanDL": 30.14,
+ "CosQA": 32.58,
+ "CovidRetrieval": 77.12,
+ "GerDaLIR": 8.62,
+ "GerDaLIRSmall": 19.6,
+ "GermanQuAD-Retrieval": 94.31,
+ "HellaSwag": 30.57,
+ "LEMBNarrativeQARetrieval": 21.94,
+ "LEMBQMSumRetrieval": 23.05,
+ "LEMBSummScreenFDRetrieval": 70.03,
+ "LEMBWikimQARetrieval": 56.59,
+ "LeCaRDv2": 58.31,
+ "LegalBenchConsumerContractsQA": 75.9,
+ "LegalBenchCorporateLobbying": 93.76,
+ "LegalQuAD": 46.85,
+ "LegalSummarization": 64.22,
+ "MintakaRetrieval (fr)": 34.56,
+ "MintakaRetrieval (ar)": 25.53,
+ "MintakaRetrieval (de)": 34.09,
+ "MintakaRetrieval (es)": 33.82,
+ "MintakaRetrieval (hi)": 27.88,
+ "MintakaRetrieval (it)": 35.76,
+ "MintakaRetrieval (ja)": 26.37,
+ "MintakaRetrieval (pt)": 36.2,
+ "PIQA": 30.62,
+ "Quail": 5.42,
+ "RARbCode": 60.08,
+ "RARbMath": 71.31,
+ "RiaNewsRetrieval": 82.5,
+ "RuBQRetrieval": 73.04,
+ "SIQA": 5.58,
+ "SciFact-PL": 65.68,
+ "SpartQA": 4.66,
+ "StackOverflowQA": 89.42,
+ "SyntecRetrieval": 88.59,
+ "SyntheticText2SQL": 59.79,
+ "TRECCOVID-PL": 80.26,
+ "TempReasonL1": 0.91,
+ "TempReasonL2Fact": 34.23,
+ "TempReasonL2Pure": 1.92,
+ "TempReasonL3Fact": 29.08,
+ "TempReasonL3Pure": 7.81,
+ "WinoGrande": 58.44,
+ "XMarket (de)": 20.17,
+ "XMarket (en)": 26.07,
+ "XMarket (es)": 19.47,
+ "XPQARetrieval (fr)": 58.87,
+ "XPQARetrieval (ara-ara)": 47.98,
+ "XPQARetrieval (eng-ara)": 33.72,
+ "XPQARetrieval (ara-eng)": 44.1,
+ "XPQARetrieval (deu-deu)": 79.11,
+ "XPQARetrieval (eng-deu)": 51.12,
+ "XPQARetrieval (deu-eng)": 74.31,
+ "XPQARetrieval (spa-spa)": 64.45,
+ "XPQARetrieval (eng-spa)": 42.89,
+ "XPQARetrieval (spa-eng)": 59.59,
+ "XPQARetrieval (fra-fra)": 69.72,
+ "XPQARetrieval (eng-fra)": 46.91,
+ "XPQARetrieval (fra-eng)": 65.47,
+ "XPQARetrieval (hin-hin)": 74.06,
+ "XPQARetrieval (eng-hin)": 37.98,
+ "XPQARetrieval (hin-eng)": 71.01,
+ "XPQARetrieval (ita-ita)": 77.46,
+ "XPQARetrieval (eng-ita)": 44.84,
+ "XPQARetrieval (ita-eng)": 69.11,
+ "XPQARetrieval (jpn-jpn)": 75.36,
+ "XPQARetrieval (eng-jpn)": 43.83,
+ "XPQARetrieval (jpn-eng)": 70.16,
+ "XPQARetrieval (kor-kor)": 38.21,
+ "XPQARetrieval (eng-kor)": 37.74,
+ "XPQARetrieval (kor-eng)": 36.38,
+ "XPQARetrieval (pol-pol)": 51.53,
+ "XPQARetrieval (eng-pol)": 34.7,
+ "XPQARetrieval (pol-eng)": 47.96,
+ "XPQARetrieval (por-por)": 48.46,
+ "XPQARetrieval (eng-por)": 28.12,
+ "XPQARetrieval (por-eng)": 47.01,
+ "XPQARetrieval (tam-tam)": 45.46,
+ "XPQARetrieval (eng-tam)": 22.35,
+ "XPQARetrieval (tam-eng)": 39.33,
+ "XPQARetrieval (cmn-cmn)": 65.78,
+ "XPQARetrieval (eng-cmn)": 34.24,
+ "XPQARetrieval (cmn-eng)": 57.79
}
]
},
"STS": {
"cosine_spearman": [
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "CDSC-R": 90.43,
+ "GermanSTSBenchmark": 81.1,
+ "RUParaPhraserSTS": 71.11,
+ "RuSTSBenchmarkSTS": 81.91,
+ "SICK-R-PL": 76.21,
+ "SICKFr": 79.24,
+ "STS22 (zh-en)": 74.01,
+ "STS22 (ar)": 60.53,
+ "STS22 (es-it)": 76.51,
+ "STS22 (fr-pl)": 50.71,
+ "STS22 (fr)": 82.76,
+ "STS22 (de)": 61.71,
+ "STS22 (es)": 68.78,
+ "STS22 (ru)": 65.39,
+ "STS22 (en)": 69.63,
+ "STS22 (pl-en)": 78.24,
+ "STS22 (zh)": 68.83,
+ "STS22 (de-en)": 61.8,
+ "STS22 (tr)": 66.67,
+ "STS22 (pl)": 41.69,
+ "STS22 (de-fr)": 65.96,
+ "STS22 (it)": 79.78,
+ "STS22 (de-pl)": 55.89,
+ "STS22 (es-en)": 79.94,
+ "STSB": 80.81,
+ "STSBenchmarkMultilingualSTS (en)": 86.7,
+ "STSBenchmarkMultilingualSTS (it)": 80.41,
+ "STSBenchmarkMultilingualSTS (de)": 82.3,
+ "STSBenchmarkMultilingualSTS (nl)": 80.29,
+ "STSBenchmarkMultilingualSTS (pt)": 76.18,
+ "STSBenchmarkMultilingualSTS (zh)": 80.14,
+ "STSBenchmarkMultilingualSTS (es)": 82.82,
+ "STSBenchmarkMultilingualSTS (fr)": 81.86,
+ "STSBenchmarkMultilingualSTS (ru)": 81.61,
+ "STSBenchmarkMultilingualSTS (pl)": 79.44
+ },
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "CDSC-R": 90.43,
+ "GermanSTSBenchmark": 81.1,
+ "RUParaPhraserSTS": 71.11,
+ "RuSTSBenchmarkSTS": 81.91,
+ "SICK-R-PL": 76.21,
+ "SICKFr": 79.24,
+ "STS22 (zh-en)": 74.01,
+ "STS22 (ar)": 60.53,
+ "STS22 (es-it)": 76.51,
+ "STS22 (fr-pl)": 50.71,
+ "STS22 (fr)": 82.76,
+ "STS22 (de)": 61.71,
+ "STS22 (es)": 68.78,
+ "STS22 (ru)": 65.39,
+ "STS22 (en)": 69.63,
+ "STS22 (pl-en)": 78.24,
+ "STS22 (zh)": 68.83,
+ "STS22 (de-en)": 61.8,
+ "STS22 (tr)": 66.67,
+ "STS22 (pl)": 41.69,
+ "STS22 (de-fr)": 65.96,
+ "STS22 (it)": 79.78,
+ "STS22 (de-pl)": 55.89,
+ "STS22 (es-en)": 79.94,
+ "STSB": 80.81,
+ "STSBenchmarkMultilingualSTS (en)": 86.7,
+ "STSBenchmarkMultilingualSTS (it)": 80.41,
+ "STSBenchmarkMultilingualSTS (de)": 82.3,
+ "STSBenchmarkMultilingualSTS (nl)": 80.29,
+ "STSBenchmarkMultilingualSTS (pt)": 76.18,
+ "STSBenchmarkMultilingualSTS (zh)": 80.14,
+ "STSBenchmarkMultilingualSTS (es)": 82.82,
+ "STSBenchmarkMultilingualSTS (fr)": 81.86,
+ "STSBenchmarkMultilingualSTS (ru)": 81.61,
+ "STSBenchmarkMultilingualSTS (pl)": 79.44
+ },
{
"Model": "Cohere-embed-multilingual-v3.0",
"SICKFr": 79.23,
@@ -1461,14 +6588,35 @@
{
"Model": "Cohere-embed-multilingual-v3.0",
"SummEvalFr": 31.26
+ },
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "SummEvalFr": 31.24
+ },
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "SummEvalFr": 31.24
}
]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "CEDRClassification": 45.67,
+ "SensitiveTopicsClassification": 30.83
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "Cohere-embed-multilingual-v3.0",
+ "Core17InstructionRetrieval": 0.29,
+ "News21InstructionRetrieval": 0.83,
+ "Robust04InstructionRetrieval": -6.8
+ }
+ ]
}
},
"DeepPavlov__distilrubert-small-cased-conversational": {
@@ -1476,6 +6624,7 @@
"f1": [
{
"Model": "distilrubert-small-cased-conversational",
+ "BornholmBitextMining": 13.33,
"Tatoeba (rus-eng)": 24.16
}
]
@@ -1484,15 +6633,169 @@
"accuracy": [
{
"Model": "distilrubert-small-cased-conversational",
+ "AllegroReviews": 23.31,
+ "AmazonCounterfactualClassification (en-ext)": 69.65,
+ "AmazonCounterfactualClassification (en)": 72.24,
+ "AmazonCounterfactualClassification (de)": 63.37,
+ "AmazonCounterfactualClassification (ja)": 56.24,
+ "AmazonPolarityClassification": 57.61,
+ "AmazonReviewsClassification (en)": 28.83,
+ "AmazonReviewsClassification (de)": 25.01,
+ "AmazonReviewsClassification (es)": 24.62,
+ "AmazonReviewsClassification (fr)": 25.9,
+ "AmazonReviewsClassification (ja)": 21.21,
+ "AmazonReviewsClassification (zh)": 21.67,
+ "AngryTweetsClassification": 42.73,
+ "Banking77Classification": 64.28,
+ "CBD": 52.53,
+ "DanishPoliticalCommentsClassification": 27.46,
+ "EmotionClassification": 22.08,
"GeoreviewClassification": 38.95,
"HeadlineClassification": 75.59,
+ "ImdbClassification": 54.55,
"InappropriatenessClassification": 60.68,
"KinopoiskClassification": 49.67,
+ "LccSentimentClassification": 42.87,
+ "MTOPDomainClassification (en)": 71.22,
+ "MTOPDomainClassification (de)": 54.5,
+ "MTOPDomainClassification (es)": 62.17,
+ "MTOPDomainClassification (fr)": 54.21,
+ "MTOPDomainClassification (hi)": 23.0,
+ "MTOPDomainClassification (th)": 15.66,
+ "MTOPIntentClassification (en)": 53.1,
+ "MTOPIntentClassification (de)": 46.41,
+ "MTOPIntentClassification (es)": 46.94,
+ "MTOPIntentClassification (fr)": 39.64,
+ "MTOPIntentClassification (hi)": 5.12,
+ "MTOPIntentClassification (th)": 4.9,
+ "MasakhaNEWSClassification (amh)": 27.79,
+ "MasakhaNEWSClassification (eng)": 58.73,
+ "MasakhaNEWSClassification (fra)": 40.14,
+ "MasakhaNEWSClassification (hau)": 46.64,
+ "MasakhaNEWSClassification (ibo)": 34.13,
+ "MasakhaNEWSClassification (lin)": 46.11,
+ "MasakhaNEWSClassification (lug)": 43.72,
+ "MasakhaNEWSClassification (orm)": 42.98,
+ "MasakhaNEWSClassification (pcm)": 72.98,
+ "MasakhaNEWSClassification (run)": 39.6,
+ "MasakhaNEWSClassification (sna)": 56.26,
+ "MasakhaNEWSClassification (som)": 28.37,
+ "MasakhaNEWSClassification (swa)": 34.12,
+ "MasakhaNEWSClassification (tir)": 24.49,
+ "MasakhaNEWSClassification (xho)": 52.05,
+ "MasakhaNEWSClassification (yor)": 40.02,
"MassiveIntentClassification (ru)": 63.12,
+ "MassiveIntentClassification (tr)": 35.68,
+ "MassiveIntentClassification (fi)": 38.38,
+ "MassiveIntentClassification (hi)": 3.66,
+ "MassiveIntentClassification (ar)": 10.51,
+ "MassiveIntentClassification (id)": 42.01,
+ "MassiveIntentClassification (de)": 38.92,
+ "MassiveIntentClassification (sq)": 39.72,
+ "MassiveIntentClassification (tl)": 38.01,
+ "MassiveIntentClassification (pt)": 41.53,
+ "MassiveIntentClassification (te)": 2.92,
+ "MassiveIntentClassification (ro)": 35.01,
+ "MassiveIntentClassification (am)": 3.09,
+ "MassiveIntentClassification (hy)": 3.44,
+ "MassiveIntentClassification (th)": 4.5,
+ "MassiveIntentClassification (hu)": 35.67,
+ "MassiveIntentClassification (sw)": 37.86,
+ "MassiveIntentClassification (nl)": 38.91,
+ "MassiveIntentClassification (nb)": 39.67,
+ "MassiveIntentClassification (kn)": 3.64,
+ "MassiveIntentClassification (fa)": 9.91,
+ "MassiveIntentClassification (mn)": 36.35,
+ "MassiveIntentClassification (sl)": 41.18,
+ "MassiveIntentClassification (ta)": 3.02,
+ "MassiveIntentClassification (ja)": 5.08,
+ "MassiveIntentClassification (sv)": 40.52,
+ "MassiveIntentClassification (bn)": 3.2,
+ "MassiveIntentClassification (jv)": 37.07,
+ "MassiveIntentClassification (km)": 4.63,
+ "MassiveIntentClassification (af)": 39.21,
+ "MassiveIntentClassification (he)": 17.66,
+ "MassiveIntentClassification (my)": 3.94,
+ "MassiveIntentClassification (zh-TW)": 7.38,
+ "MassiveIntentClassification (da)": 41.31,
+ "MassiveIntentClassification (fr)": 38.49,
+ "MassiveIntentClassification (lv)": 37.51,
+ "MassiveIntentClassification (is)": 35.77,
+ "MassiveIntentClassification (es)": 38.33,
+ "MassiveIntentClassification (ur)": 9.69,
+ "MassiveIntentClassification (ml)": 3.03,
+ "MassiveIntentClassification (cy)": 37.7,
+ "MassiveIntentClassification (zh-CN)": 6.54,
+ "MassiveIntentClassification (en)": 51.68,
+ "MassiveIntentClassification (el)": 22.94,
+ "MassiveIntentClassification (vi)": 28.04,
+ "MassiveIntentClassification (ka)": 2.84,
+ "MassiveIntentClassification (ko)": 2.9,
+ "MassiveIntentClassification (it)": 43.34,
+ "MassiveIntentClassification (az)": 35.64,
+ "MassiveIntentClassification (pl)": 38.29,
+ "MassiveIntentClassification (ms)": 40.91,
"MassiveScenarioClassification (ru)": 68.08,
+ "MassiveScenarioClassification (ml)": 7.2,
+ "MassiveScenarioClassification (he)": 23.39,
+ "MassiveScenarioClassification (da)": 39.88,
+ "MassiveScenarioClassification (lv)": 35.41,
+ "MassiveScenarioClassification (my)": 9.7,
+ "MassiveScenarioClassification (ja)": 9.41,
+ "MassiveScenarioClassification (az)": 33.91,
+ "MassiveScenarioClassification (de)": 38.2,
+ "MassiveScenarioClassification (es)": 38.77,
+ "MassiveScenarioClassification (tr)": 32.96,
+ "MassiveScenarioClassification (zh-CN)": 10.71,
+ "MassiveScenarioClassification (sw)": 39.73,
+ "MassiveScenarioClassification (fr)": 38.54,
+ "MassiveScenarioClassification (hy)": 8.77,
+ "MassiveScenarioClassification (is)": 34.28,
+ "MassiveScenarioClassification (pl)": 36.87,
+ "MassiveScenarioClassification (zh-TW)": 12.68,
+ "MassiveScenarioClassification (af)": 39.15,
+ "MassiveScenarioClassification (nl)": 39.05,
+ "MassiveScenarioClassification (am)": 7.69,
+ "MassiveScenarioClassification (id)": 40.03,
+ "MassiveScenarioClassification (sq)": 38.93,
+ "MassiveScenarioClassification (cy)": 39.15,
+ "MassiveScenarioClassification (ro)": 36.54,
+ "MassiveScenarioClassification (km)": 9.91,
+ "MassiveScenarioClassification (bn)": 7.78,
+ "MassiveScenarioClassification (jv)": 37.71,
+ "MassiveScenarioClassification (it)": 41.1,
+ "MassiveScenarioClassification (mn)": 36.03,
+ "MassiveScenarioClassification (ko)": 7.46,
+ "MassiveScenarioClassification (th)": 9.65,
+ "MassiveScenarioClassification (sl)": 41.38,
+ "MassiveScenarioClassification (ur)": 14.69,
+ "MassiveScenarioClassification (sv)": 39.18,
+ "MassiveScenarioClassification (en)": 55.21,
+ "MassiveScenarioClassification (tl)": 35.29,
+ "MassiveScenarioClassification (ms)": 42.59,
+ "MassiveScenarioClassification (fa)": 13.46,
+ "MassiveScenarioClassification (nb)": 38.43,
+ "MassiveScenarioClassification (el)": 24.37,
+ "MassiveScenarioClassification (te)": 7.29,
+ "MassiveScenarioClassification (ka)": 7.29,
+ "MassiveScenarioClassification (kn)": 8.07,
+ "MassiveScenarioClassification (vi)": 30.81,
+ "MassiveScenarioClassification (hu)": 34.26,
+ "MassiveScenarioClassification (pt)": 40.5,
+ "MassiveScenarioClassification (ar)": 15.72,
+ "MassiveScenarioClassification (hi)": 8.73,
+ "MassiveScenarioClassification (ta)": 6.82,
+ "MassiveScenarioClassification (fi)": 37.41,
+ "NoRecClassification": 39.2,
+ "NordicLangClassification": 59.34,
+ "PAC": 57.98,
+ "PolEmo2.0-IN": 40.42,
+ "PolEmo2.0-OUT": 30.89,
"RuReviewsClassification": 54.05,
"RuSciBenchGRNTIClassification": 48.53,
- "RuSciBenchOECDClassification": 37.65
+ "RuSciBenchOECDClassification": 37.65,
+ "ToxicConversationsClassification": 58.89,
+ "TweetSentimentExtractionClassification": 43.44
}
]
},
@@ -1500,11 +6803,67 @@
"v_measure": [
{
"Model": "distilrubert-small-cased-conversational",
+ "AlloProfClusteringP2P": 31.59,
+ "AlloProfClusteringS2S": 21.2,
+ "ArxivClusteringP2P": 18.41,
+ "ArxivClusteringS2S": 12.81,
+ "BiorxivClusteringP2P": 13.07,
+ "BiorxivClusteringS2S": 7.72,
+ "BlurbsClusteringP2P": 8.59,
+ "BlurbsClusteringS2S": 7.36,
"GeoreviewClusteringP2P": 43.26,
+ "HALClusteringS2S": 3.35,
"MLSUMClusteringP2P (ru)": 50.08,
+ "MLSUMClusteringP2P (de)": 7.49,
+ "MLSUMClusteringP2P (fr)": 20.49,
+ "MLSUMClusteringP2P (es)": 27.16,
"MLSUMClusteringS2S (ru)": 51.12,
+ "MLSUMClusteringS2S (de)": 7.63,
+ "MLSUMClusteringS2S (fr)": 20.18,
+ "MLSUMClusteringS2S (es)": 26.82,
+ "MasakhaNEWSClusteringP2P (amh)": 40.28,
+ "MasakhaNEWSClusteringP2P (eng)": 7.58,
+ "MasakhaNEWSClusteringP2P (fra)": 22.87,
+ "MasakhaNEWSClusteringP2P (hau)": 4.18,
+ "MasakhaNEWSClusteringP2P (ibo)": 20.88,
+ "MasakhaNEWSClusteringP2P (lin)": 42.94,
+ "MasakhaNEWSClusteringP2P (lug)": 44.9,
+ "MasakhaNEWSClusteringP2P (orm)": 27.54,
+ "MasakhaNEWSClusteringP2P (pcm)": 22.4,
+ "MasakhaNEWSClusteringP2P (run)": 42.28,
+ "MasakhaNEWSClusteringP2P (sna)": 42.22,
+ "MasakhaNEWSClusteringP2P (som)": 26.63,
+ "MasakhaNEWSClusteringP2P (swa)": 5.87,
+ "MasakhaNEWSClusteringP2P (tir)": 42.99,
+ "MasakhaNEWSClusteringP2P (xho)": 21.02,
+ "MasakhaNEWSClusteringP2P (yor)": 21.54,
+ "MasakhaNEWSClusteringS2S (amh)": 40.06,
+ "MasakhaNEWSClusteringS2S (eng)": 8.71,
+ "MasakhaNEWSClusteringS2S (fra)": 22.43,
+ "MasakhaNEWSClusteringS2S (hau)": 6.42,
+ "MasakhaNEWSClusteringS2S (ibo)": 22.5,
+ "MasakhaNEWSClusteringS2S (lin)": 52.19,
+ "MasakhaNEWSClusteringS2S (lug)": 43.89,
+ "MasakhaNEWSClusteringS2S (orm)": 23.6,
+ "MasakhaNEWSClusteringS2S (pcm)": 42.75,
+ "MasakhaNEWSClusteringS2S (run)": 44.59,
+ "MasakhaNEWSClusteringS2S (sna)": 42.25,
+ "MasakhaNEWSClusteringS2S (som)": 24.67,
+ "MasakhaNEWSClusteringS2S (swa)": 14.31,
+ "MasakhaNEWSClusteringS2S (tir)": 44.02,
+ "MasakhaNEWSClusteringS2S (xho)": 25.97,
+ "MasakhaNEWSClusteringS2S (yor)": 21.79,
+ "MedrxivClusteringP2P": 18.17,
+ "MedrxivClusteringS2S": 15.25,
+ "RedditClustering": 11.95,
+ "RedditClusteringP2P": 22.76,
"RuSciBenchGRNTIClusteringP2P": 37.84,
- "RuSciBenchOECDClusteringP2P": 34.12
+ "RuSciBenchOECDClusteringP2P": 34.12,
+ "StackExchangeClustering": 21.41,
+ "StackExchangeClusteringP2P": 22.98,
+ "TenKGnadClusteringP2P": 7.42,
+ "TenKGnadClusteringS2S": 4.29,
+ "TwentyNewsgroupsClustering": 11.65
}
]
},
@@ -1512,13 +6871,51 @@
"max_ap": [
{
"Model": "distilrubert-small-cased-conversational",
+ "CDSC-E": 46.48,
+ "FalseFriendsGermanEnglish": 48.72,
"OpusparcusPC (ru)": 84.35,
- "TERRa": 52.48
+ "OpusparcusPC (de)": 88.05,
+ "OpusparcusPC (en)": 93.94,
+ "OpusparcusPC (fi)": 82.18,
+ "OpusparcusPC (fr)": 85.23,
+ "OpusparcusPC (sv)": 81.83,
+ "PSC": 62.14,
+ "PawsXPairClassification (de)": 48.55,
+ "PawsXPairClassification (en)": 43.39,
+ "PawsXPairClassification (es)": 47.18,
+ "PawsXPairClassification (fr)": 49.14,
+ "PawsXPairClassification (ja)": 45.87,
+ "PawsXPairClassification (ko)": 46.69,
+ "PawsXPairClassification (zh)": 49.73,
+ "SICK-E-PL": 45.62,
+ "SprintDuplicateQuestions": 33.2,
+ "TERRa": 52.48,
+ "TwitterSemEval2015": 50.27,
+ "TwitterURLCorpus": 63.78
},
{
"Model": "distilrubert-small-cased-conversational",
+ "CDSC-E": 46.84,
+ "FalseFriendsGermanEnglish": 49.57,
"OpusparcusPC (ru)": 84.35,
- "TERRa": 53.02
+ "OpusparcusPC (de)": 88.57,
+ "OpusparcusPC (en)": 93.94,
+ "OpusparcusPC (fi)": 82.18,
+ "OpusparcusPC (fr)": 85.23,
+ "OpusparcusPC (sv)": 81.84,
+ "PSC": 62.23,
+ "PawsXPairClassification (de)": 48.55,
+ "PawsXPairClassification (en)": 47.47,
+ "PawsXPairClassification (es)": 47.18,
+ "PawsXPairClassification (fr)": 49.14,
+ "PawsXPairClassification (ja)": 47.68,
+ "PawsXPairClassification (ko)": 46.69,
+ "PawsXPairClassification (zh)": 50.2,
+ "SICK-E-PL": 45.78,
+ "SprintDuplicateQuestions": 33.2,
+ "TERRa": 53.02,
+ "TwitterSemEval2015": 50.38,
+ "TwitterURLCorpus": 63.78
}
]
},
@@ -1526,11 +6923,35 @@
"map": [
{
"Model": "distilrubert-small-cased-conversational",
- "MIRACLReranking (ru)": 13.09
+ "AlloprofReranking": 32.82,
+ "AskUbuntuDupQuestions": 45.48,
+ "MindSmallReranking": 24.95,
+ "RuBQReranking": 42.58,
+ "SciDocsRR": 47.81,
+ "StackOverflowDupQuestions": 33.41,
+ "SyntecReranking": 36.86,
+ "T2Reranking": 52.07
},
{
"Model": "distilrubert-small-cased-conversational",
- "RuBQReranking": 42.58
+ "MIRACLReranking (ru)": 13.09,
+ "MIRACLReranking (ar)": 3.73,
+ "MIRACLReranking (bn)": 2.86,
+ "MIRACLReranking (de)": 4.48,
+ "MIRACLReranking (en)": 10.73,
+ "MIRACLReranking (es)": 7.05,
+ "MIRACLReranking (fa)": 3.92,
+ "MIRACLReranking (fi)": 12.52,
+ "MIRACLReranking (fr)": 5.53,
+ "MIRACLReranking (hi)": 4.02,
+ "MIRACLReranking (id)": 8.43,
+ "MIRACLReranking (ja)": 2.56,
+ "MIRACLReranking (ko)": 3.66,
+ "MIRACLReranking (sw)": 11.13,
+ "MIRACLReranking (te)": 2.57,
+ "MIRACLReranking (th)": 2.15,
+ "MIRACLReranking (yo)": 9.3,
+ "MIRACLReranking (zh)": 2.83
}
]
},
@@ -1538,9 +6959,143 @@
"ndcg_at_10": [
{
"Model": "distilrubert-small-cased-conversational",
+ "AILACasedocs": 9.74,
+ "AILAStatutes": 14.09,
+ "ARCChallenge": 2.13,
+ "AlloprofRetrieval": 0.35,
+ "AlphaNLI": 2.5,
+ "AppsRetrieval": 0.07,
+ "ArguAna": 15.13,
+ "BSARDRetrieval": 0.0,
+ "ClimateFEVER": 0.3,
+ "CmedqaRetrieval": 0.12,
+ "CodeFeedbackMT": 11.84,
+ "CodeFeedbackST": 7.42,
+ "CodeSearchNetCCRetrieval (python)": 13.81,
+ "CodeSearchNetCCRetrieval (javascript)": 22.77,
+ "CodeSearchNetCCRetrieval (go)": 12.62,
+ "CodeSearchNetCCRetrieval (ruby)": 26.22,
+ "CodeSearchNetCCRetrieval (java)": 16.29,
+ "CodeSearchNetCCRetrieval (php)": 14.17,
+ "CodeSearchNetRetrieval (python)": 19.08,
+ "CodeSearchNetRetrieval (javascript)": 10.38,
+ "CodeSearchNetRetrieval (go)": 7.54,
+ "CodeSearchNetRetrieval (ruby)": 12.38,
+ "CodeSearchNetRetrieval (java)": 11.9,
+ "CodeSearchNetRetrieval (php)": 11.26,
+ "CodeTransOceanContest": 15.26,
+ "CodeTransOceanDL": 31.1,
+ "CosQA": 2.93,
+ "CovidRetrieval": 0.0,
+ "DBPedia": 1.58,
+ "FEVER": 1.69,
+ "FiQA2018": 1.66,
+ "GerDaLIR": 0.22,
+ "GerDaLIRSmall": 0.51,
+ "GermanQuAD-Retrieval": 5.35,
+ "HellaSwag": 5.59,
+ "HotpotQA": 5.82,
+ "LEMBNarrativeQARetrieval": 1.9,
+ "LEMBQMSumRetrieval": 6.27,
+ "LEMBSummScreenFDRetrieval": 5.86,
+ "LEMBWikimQARetrieval": 16.75,
+ "LeCaRDv2": 7.95,
+ "LegalBenchConsumerContractsQA": 7.61,
+ "LegalBenchCorporateLobbying": 40.71,
+ "LegalQuAD": 3.7,
+ "LegalSummarization": 34.95,
"MIRACLRetrieval (ru)": 2.39,
+ "MIRACLRetrieval (ar)": 0.0,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 0.28,
+ "MIRACLRetrieval (en)": 1.23,
+ "MIRACLRetrieval (es)": 0.08,
+ "MIRACLRetrieval (fa)": 0.03,
+ "MIRACLRetrieval (fi)": 1.02,
+ "MIRACLRetrieval (fr)": 0.16,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 0.28,
+ "MIRACLRetrieval (ja)": 0.0,
+ "MIRACLRetrieval (ko)": 0.22,
+ "MIRACLRetrieval (sw)": 1.95,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.0,
+ "MIRACLRetrieval (yo)": 4.42,
+ "MIRACLRetrieval (zh)": 0.0,
+ "MSMARCO": 0.63,
+ "MintakaRetrieval (ar)": 0.51,
+ "MintakaRetrieval (de)": 0.75,
+ "MintakaRetrieval (es)": 0.69,
+ "MintakaRetrieval (fr)": 1.06,
+ "MintakaRetrieval (hi)": 0.42,
+ "MintakaRetrieval (it)": 1.21,
+ "MintakaRetrieval (ja)": 0.21,
+ "MintakaRetrieval (pt)": 0.86,
+ "NFCorpus": 3.51,
+ "NQ": 0.1,
+ "PIQA": 5.05,
+ "Quail": 0.08,
+ "QuoraRetrieval": 65.11,
+ "RARbCode": 0.0,
+ "RARbMath": 6.96,
"RiaNewsRetrieval": 4.14,
- "RuBQRetrieval": 10.6
+ "RuBQRetrieval": 10.6,
+ "SCIDOCS": 0.48,
+ "SIQA": 0.1,
+ "SciFact": 6.63,
+ "SciFact-PL": 0.71,
+ "SpartQA": 5.81,
+ "StackOverflowQA": 13.99,
+ "SyntecRetrieval": 10.17,
+ "SyntheticText2SQL": 3.4,
+ "TRECCOVID": 15.18,
+ "TRECCOVID-PL": 4.67,
+ "TempReasonL1": 0.88,
+ "TempReasonL2Fact": 1.91,
+ "TempReasonL2Pure": 0.09,
+ "TempReasonL3Fact": 2.41,
+ "TempReasonL3Pure": 2.14,
+ "Touche2020": 1.9,
+ "WinoGrande": 0.62,
+ "XMarket (de)": 3.29,
+ "XMarket (en)": 3.24,
+ "XMarket (es)": 2.58,
+ "XPQARetrieval (ara-ara)": 1.83,
+ "XPQARetrieval (eng-ara)": 0.67,
+ "XPQARetrieval (ara-eng)": 1.23,
+ "XPQARetrieval (deu-deu)": 19.74,
+ "XPQARetrieval (eng-deu)": 0.54,
+ "XPQARetrieval (deu-eng)": 1.64,
+ "XPQARetrieval (spa-spa)": 13.1,
+ "XPQARetrieval (eng-spa)": 0.9,
+ "XPQARetrieval (spa-eng)": 1.78,
+ "XPQARetrieval (fra-fra)": 19.06,
+ "XPQARetrieval (eng-fra)": 1.01,
+ "XPQARetrieval (fra-eng)": 2.85,
+ "XPQARetrieval (hin-hin)": 4.81,
+ "XPQARetrieval (eng-hin)": 1.63,
+ "XPQARetrieval (hin-eng)": 0.93,
+ "XPQARetrieval (ita-ita)": 30.64,
+ "XPQARetrieval (eng-ita)": 2.4,
+ "XPQARetrieval (ita-eng)": 4.45,
+ "XPQARetrieval (jpn-jpn)": 1.8,
+ "XPQARetrieval (eng-jpn)": 0.53,
+ "XPQARetrieval (jpn-eng)": 0.82,
+ "XPQARetrieval (kor-kor)": 1.95,
+ "XPQARetrieval (eng-kor)": 1.11,
+ "XPQARetrieval (kor-eng)": 1.03,
+ "XPQARetrieval (pol-pol)": 10.77,
+ "XPQARetrieval (eng-pol)": 0.84,
+ "XPQARetrieval (pol-eng)": 2.65,
+ "XPQARetrieval (por-por)": 11.69,
+ "XPQARetrieval (eng-por)": 1.09,
+ "XPQARetrieval (por-eng)": 1.8,
+ "XPQARetrieval (tam-tam)": 0.81,
+ "XPQARetrieval (eng-tam)": 1.15,
+ "XPQARetrieval (tam-eng)": 0.54,
+ "XPQARetrieval (cmn-cmn)": 4.74,
+ "XPQARetrieval (eng-cmn)": 0.63,
+ "XPQARetrieval (cmn-eng)": 0.75
}
]
},
@@ -1548,15 +7103,129 @@
"cosine_spearman": [
{
"Model": "distilrubert-small-cased-conversational",
+ "BIOSSES": 47.99,
+ "CDSC-R": 66.98,
+ "GermanSTSBenchmark": 40.07,
+ "SICK-R": 56.7,
+ "SICK-R-PL": 43.36,
+ "SICKFr": 54.27,
+ "STS12": 40.83,
+ "STS13": 52.22,
+ "STS14": 46.76,
+ "STS15": 60.64,
+ "STS16": 64.37,
+ "STS17 (ko-ko)": 7.82,
+ "STS17 (fr-en)": 26.82,
+ "STS17 (it-en)": 22.21,
+ "STS17 (ar-ar)": 27.9,
+ "STS17 (es-es)": 47.95,
+ "STS17 (en-tr)": 19.37,
+ "STS17 (en-en)": 60.3,
+ "STS17 (nl-en)": 18.33,
+ "STS17 (es-en)": 16.25,
+ "STS17 (en-de)": 16.62,
+ "STS17 (en-ar)": 11.54,
+ "STS22 (pl-en)": 14.03,
+ "STS22 (fr-pl)": 39.44,
+ "STS22 (ar)": 26.69,
+ "STS22 (fr)": 50.98,
+ "STS22 (en)": 44.27,
+ "STS22 (pl)": 16.55,
+ "STS22 (de)": 23.32,
+ "STS22 (de-en)": 27.38,
+ "STS22 (it)": 38.44,
+ "STS22 (es)": 37.91,
+ "STS22 (zh-en)": 7.34,
+ "STS22 (de-pl)": -29.47,
+ "STS22 (tr)": 10.5,
+ "STS22 (es-en)": 21.73,
+ "STS22 (es-it)": 20.72,
+ "STS22 (de-fr)": 1.78,
+ "STS22 (zh)": 18.15,
+ "STSB": 17.17,
+ "STSBenchmark": 55.97,
+ "STSBenchmarkMultilingualSTS (en)": 55.97,
+ "STSBenchmarkMultilingualSTS (es)": 49.87,
+ "STSBenchmarkMultilingualSTS (nl)": 46.12,
+ "STSBenchmarkMultilingualSTS (pl)": 46.61,
+ "STSBenchmarkMultilingualSTS (zh)": 15.41,
+ "STSBenchmarkMultilingualSTS (fr)": 51.0,
+ "STSBenchmarkMultilingualSTS (it)": 48.59,
+ "STSBenchmarkMultilingualSTS (pt)": 45.29,
+ "STSBenchmarkMultilingualSTS (de)": 42.19
+ },
+ {
+ "Model": "distilrubert-small-cased-conversational",
+ "BIOSSES": 47.99,
+ "CDSC-R": 66.98,
+ "GermanSTSBenchmark": 40.07,
"RUParaPhraserSTS": 55.01,
"RuSTSBenchmarkSTS": 61.72,
+ "SICK-R": 56.7,
+ "SICK-R-PL": 43.36,
+ "SICKFr": 54.27,
+ "STS12": 40.83,
+ "STS13": 52.22,
+ "STS14": 46.76,
+ "STS15": 60.64,
+ "STS16": 64.37,
+ "STS17 (ko-ko)": 7.35,
+ "STS17 (fr-en)": 26.82,
+ "STS17 (it-en)": 22.21,
+ "STS17 (ar-ar)": 27.13,
+ "STS17 (es-es)": 47.95,
+ "STS17 (en-tr)": 19.37,
+ "STS17 (en-en)": 60.3,
+ "STS17 (nl-en)": 18.33,
+ "STS17 (es-en)": 16.25,
+ "STS17 (en-de)": 16.62,
+ "STS17 (en-ar)": 11.54,
"STS22 (ru)": 51.87,
- "STSBenchmarkMultilingualSTS (ru)": 61.6
+ "STS22 (pl-en)": 14.03,
+ "STS22 (fr-pl)": 39.44,
+ "STS22 (ar)": 26.71,
+ "STS22 (fr)": 50.98,
+ "STS22 (en)": 44.27,
+ "STS22 (pl)": 16.75,
+ "STS22 (de)": 23.32,
+ "STS22 (de-en)": 27.38,
+ "STS22 (it)": 38.44,
+ "STS22 (es)": 37.91,
+ "STS22 (zh-en)": 7.34,
+ "STS22 (de-pl)": -29.47,
+ "STS22 (tr)": 10.5,
+ "STS22 (es-en)": 21.73,
+ "STS22 (es-it)": 20.72,
+ "STS22 (de-fr)": 1.78,
+ "STS22 (zh)": 18.15,
+ "STSB": 17.21,
+ "STSBenchmark": 55.97,
+ "STSBenchmarkMultilingualSTS (ru)": 61.6,
+ "STSBenchmarkMultilingualSTS (en)": 55.97,
+ "STSBenchmarkMultilingualSTS (es)": 49.87,
+ "STSBenchmarkMultilingualSTS (nl)": 46.12,
+ "STSBenchmarkMultilingualSTS (pl)": 46.61,
+ "STSBenchmarkMultilingualSTS (zh)": 15.38,
+ "STSBenchmarkMultilingualSTS (fr)": 51.0,
+ "STSBenchmarkMultilingualSTS (it)": 48.59,
+ "STSBenchmarkMultilingualSTS (pt)": 45.29,
+ "STSBenchmarkMultilingualSTS (de)": 42.19
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "distilrubert-small-cased-conversational",
+ "SummEval": 29.45,
+ "SummEvalFr": 28.13
+ },
+ {
+ "Model": "distilrubert-small-cased-conversational",
+ "SummEval": 29.45,
+ "SummEvalFr": 28.13
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
@@ -1568,7 +7237,14 @@
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "distilrubert-small-cased-conversational",
+ "Core17InstructionRetrieval": 0.22,
+ "News21InstructionRetrieval": -1.44,
+ "Robust04InstructionRetrieval": -0.6
+ }
+ ]
}
},
"DeepPavlov__rubert-base-cased": {
@@ -1576,6 +7252,7 @@
"f1": [
{
"Model": "rubert-base-cased",
+ "BornholmBitextMining": 12.61,
"Tatoeba (rus-eng)": 16.76
}
]
@@ -1584,15 +7261,169 @@
"accuracy": [
{
"Model": "rubert-base-cased",
+ "AllegroReviews": 23.05,
+ "AmazonCounterfactualClassification (en-ext)": 64.54,
+ "AmazonCounterfactualClassification (en)": 65.67,
+ "AmazonCounterfactualClassification (de)": 63.45,
+ "AmazonCounterfactualClassification (ja)": 53.38,
+ "AmazonPolarityClassification": 56.54,
+ "AmazonReviewsClassification (en)": 26.34,
+ "AmazonReviewsClassification (de)": 25.25,
+ "AmazonReviewsClassification (es)": 25.49,
+ "AmazonReviewsClassification (fr)": 25.07,
+ "AmazonReviewsClassification (ja)": 22.65,
+ "AmazonReviewsClassification (zh)": 24.87,
+ "AngryTweetsClassification": 43.11,
+ "Banking77Classification": 51.54,
+ "CBD": 52.84,
+ "DanishPoliticalCommentsClassification": 26.26,
+ "EmotionClassification": 20.01,
"GeoreviewClassification": 37.22,
"HeadlineClassification": 75.23,
+ "ImdbClassification": 54.6,
"InappropriatenessClassification": 57.34,
"KinopoiskClassification": 49.91,
+ "LccSentimentClassification": 38.0,
+ "MTOPDomainClassification (en)": 62.95,
+ "MTOPDomainClassification (de)": 53.03,
+ "MTOPDomainClassification (es)": 59.51,
+ "MTOPDomainClassification (fr)": 53.35,
+ "MTOPDomainClassification (hi)": 20.73,
+ "MTOPDomainClassification (th)": 15.17,
+ "MTOPIntentClassification (en)": 45.33,
+ "MTOPIntentClassification (de)": 42.59,
+ "MTOPIntentClassification (es)": 42.4,
+ "MTOPIntentClassification (fr)": 38.32,
+ "MTOPIntentClassification (hi)": 7.91,
+ "MTOPIntentClassification (th)": 4.64,
+ "MasakhaNEWSClassification (amh)": 31.36,
+ "MasakhaNEWSClassification (eng)": 68.88,
+ "MasakhaNEWSClassification (fra)": 57.51,
+ "MasakhaNEWSClassification (hau)": 47.08,
+ "MasakhaNEWSClassification (ibo)": 37.38,
+ "MasakhaNEWSClassification (lin)": 45.66,
+ "MasakhaNEWSClassification (lug)": 41.66,
+ "MasakhaNEWSClassification (orm)": 43.35,
+ "MasakhaNEWSClassification (pcm)": 78.75,
+ "MasakhaNEWSClassification (run)": 43.01,
+ "MasakhaNEWSClassification (sna)": 60.11,
+ "MasakhaNEWSClassification (som)": 35.82,
+ "MasakhaNEWSClassification (swa)": 38.66,
+ "MasakhaNEWSClassification (tir)": 24.45,
+ "MasakhaNEWSClassification (xho)": 54.61,
+ "MasakhaNEWSClassification (yor)": 41.27,
"MassiveIntentClassification (ru)": 53.02,
+ "MassiveIntentClassification (ro)": 32.09,
+ "MassiveIntentClassification (hi)": 5.77,
+ "MassiveIntentClassification (ta)": 3.32,
+ "MassiveIntentClassification (nb)": 33.94,
+ "MassiveIntentClassification (lv)": 37.55,
+ "MassiveIntentClassification (ur)": 21.03,
+ "MassiveIntentClassification (es)": 34.14,
+ "MassiveIntentClassification (tl)": 31.63,
+ "MassiveIntentClassification (km)": 4.03,
+ "MassiveIntentClassification (is)": 32.66,
+ "MassiveIntentClassification (pt)": 36.75,
+ "MassiveIntentClassification (ar)": 21.45,
+ "MassiveIntentClassification (he)": 23.67,
+ "MassiveIntentClassification (kn)": 3.82,
+ "MassiveIntentClassification (af)": 31.89,
+ "MassiveIntentClassification (hu)": 31.87,
+ "MassiveIntentClassification (jv)": 31.14,
+ "MassiveIntentClassification (bn)": 2.93,
+ "MassiveIntentClassification (fa)": 29.0,
+ "MassiveIntentClassification (fi)": 33.55,
+ "MassiveIntentClassification (ko)": 9.3,
+ "MassiveIntentClassification (my)": 3.78,
+ "MassiveIntentClassification (th)": 4.77,
+ "MassiveIntentClassification (ja)": 30.31,
+ "MassiveIntentClassification (pl)": 36.98,
+ "MassiveIntentClassification (sq)": 35.35,
+ "MassiveIntentClassification (da)": 35.55,
+ "MassiveIntentClassification (sl)": 36.68,
+ "MassiveIntentClassification (en)": 40.37,
+ "MassiveIntentClassification (az)": 35.33,
+ "MassiveIntentClassification (hy)": 7.1,
+ "MassiveIntentClassification (sv)": 34.93,
+ "MassiveIntentClassification (te)": 3.63,
+ "MassiveIntentClassification (el)": 30.25,
+ "MassiveIntentClassification (nl)": 33.82,
+ "MassiveIntentClassification (tr)": 33.28,
+ "MassiveIntentClassification (sw)": 32.75,
+ "MassiveIntentClassification (ka)": 8.83,
+ "MassiveIntentClassification (id)": 35.82,
+ "MassiveIntentClassification (de)": 34.34,
+ "MassiveIntentClassification (vi)": 26.95,
+ "MassiveIntentClassification (mn)": 35.6,
+ "MassiveIntentClassification (cy)": 31.99,
+ "MassiveIntentClassification (am)": 2.9,
+ "MassiveIntentClassification (fr)": 36.45,
+ "MassiveIntentClassification (ml)": 2.63,
+ "MassiveIntentClassification (it)": 39.35,
+ "MassiveIntentClassification (zh-CN)": 46.24,
+ "MassiveIntentClassification (zh-TW)": 41.71,
+ "MassiveIntentClassification (ms)": 33.19,
"MassiveScenarioClassification (ru)": 56.79,
+ "MassiveScenarioClassification (fr)": 36.73,
+ "MassiveScenarioClassification (fi)": 32.83,
+ "MassiveScenarioClassification (el)": 31.53,
+ "MassiveScenarioClassification (ml)": 6.86,
+ "MassiveScenarioClassification (ko)": 13.25,
+ "MassiveScenarioClassification (sq)": 33.63,
+ "MassiveScenarioClassification (sw)": 34.01,
+ "MassiveScenarioClassification (hu)": 32.14,
+ "MassiveScenarioClassification (tr)": 32.45,
+ "MassiveScenarioClassification (tl)": 31.62,
+ "MassiveScenarioClassification (ka)": 15.21,
+ "MassiveScenarioClassification (th)": 9.65,
+ "MassiveScenarioClassification (af)": 33.29,
+ "MassiveScenarioClassification (ar)": 22.05,
+ "MassiveScenarioClassification (pl)": 35.54,
+ "MassiveScenarioClassification (sl)": 37.54,
+ "MassiveScenarioClassification (km)": 9.39,
+ "MassiveScenarioClassification (kn)": 7.06,
+ "MassiveScenarioClassification (bn)": 6.99,
+ "MassiveScenarioClassification (ms)": 36.12,
+ "MassiveScenarioClassification (ta)": 7.3,
+ "MassiveScenarioClassification (ur)": 24.5,
+ "MassiveScenarioClassification (hy)": 13.37,
+ "MassiveScenarioClassification (ja)": 31.19,
+ "MassiveScenarioClassification (cy)": 33.38,
+ "MassiveScenarioClassification (fa)": 29.86,
+ "MassiveScenarioClassification (nb)": 31.87,
+ "MassiveScenarioClassification (vi)": 30.1,
+ "MassiveScenarioClassification (es)": 35.76,
+ "MassiveScenarioClassification (is)": 32.45,
+ "MassiveScenarioClassification (zh-CN)": 49.06,
+ "MassiveScenarioClassification (it)": 37.46,
+ "MassiveScenarioClassification (te)": 7.78,
+ "MassiveScenarioClassification (hi)": 9.65,
+ "MassiveScenarioClassification (he)": 24.11,
+ "MassiveScenarioClassification (am)": 8.15,
+ "MassiveScenarioClassification (sv)": 33.72,
+ "MassiveScenarioClassification (jv)": 32.34,
+ "MassiveScenarioClassification (da)": 34.44,
+ "MassiveScenarioClassification (mn)": 35.48,
+ "MassiveScenarioClassification (ro)": 34.4,
+ "MassiveScenarioClassification (id)": 35.64,
+ "MassiveScenarioClassification (en)": 44.48,
+ "MassiveScenarioClassification (pt)": 36.31,
+ "MassiveScenarioClassification (de)": 33.5,
+ "MassiveScenarioClassification (nl)": 34.67,
+ "MassiveScenarioClassification (lv)": 36.42,
+ "MassiveScenarioClassification (az)": 34.58,
+ "MassiveScenarioClassification (my)": 9.47,
+ "MassiveScenarioClassification (zh-TW)": 42.15,
+ "NoRecClassification": 38.16,
+ "NordicLangClassification": 60.75,
+ "PAC": 61.78,
+ "PolEmo2.0-IN": 42.33,
+ "PolEmo2.0-OUT": 34.19,
"RuReviewsClassification": 50.74,
"RuSciBenchGRNTIClassification": 48.03,
- "RuSciBenchOECDClassification": 36.13
+ "RuSciBenchOECDClassification": 36.13,
+ "ToxicConversationsClassification": 55.36,
+ "TweetSentimentExtractionClassification": 40.79
}
]
},
@@ -1600,11 +7431,67 @@
"v_measure": [
{
"Model": "rubert-base-cased",
+ "AlloProfClusteringP2P": 41.54,
+ "AlloProfClusteringS2S": 24.5,
+ "ArxivClusteringP2P": 19.12,
+ "ArxivClusteringS2S": 15.72,
+ "BiorxivClusteringP2P": 15.01,
+ "BiorxivClusteringS2S": 11.27,
+ "BlurbsClusteringP2P": 10.74,
+ "BlurbsClusteringS2S": 8.66,
"GeoreviewClusteringP2P": 28.77,
+ "HALClusteringS2S": 9.48,
"MLSUMClusteringP2P (ru)": 41.42,
+ "MLSUMClusteringP2P (de)": 22.69,
+ "MLSUMClusteringP2P (fr)": 31.79,
+ "MLSUMClusteringP2P (es)": 34.6,
"MLSUMClusteringS2S (ru)": 40.52,
+ "MLSUMClusteringS2S (de)": 21.14,
+ "MLSUMClusteringS2S (fr)": 30.71,
+ "MLSUMClusteringS2S (es)": 33.77,
+ "MasakhaNEWSClusteringP2P (amh)": 40.38,
+ "MasakhaNEWSClusteringP2P (eng)": 7.24,
+ "MasakhaNEWSClusteringP2P (fra)": 24.89,
+ "MasakhaNEWSClusteringP2P (hau)": 4.33,
+ "MasakhaNEWSClusteringP2P (ibo)": 20.68,
+ "MasakhaNEWSClusteringP2P (lin)": 42.23,
+ "MasakhaNEWSClusteringP2P (lug)": 48.28,
+ "MasakhaNEWSClusteringP2P (orm)": 24.84,
+ "MasakhaNEWSClusteringP2P (pcm)": 29.98,
+ "MasakhaNEWSClusteringP2P (run)": 42.83,
+ "MasakhaNEWSClusteringP2P (sna)": 41.71,
+ "MasakhaNEWSClusteringP2P (som)": 26.34,
+ "MasakhaNEWSClusteringP2P (swa)": 3.39,
+ "MasakhaNEWSClusteringP2P (tir)": 42.65,
+ "MasakhaNEWSClusteringP2P (xho)": 22.04,
+ "MasakhaNEWSClusteringP2P (yor)": 21.69,
+ "MasakhaNEWSClusteringS2S (amh)": 40.03,
+ "MasakhaNEWSClusteringS2S (eng)": 18.53,
+ "MasakhaNEWSClusteringS2S (fra)": 23.47,
+ "MasakhaNEWSClusteringS2S (hau)": 9.65,
+ "MasakhaNEWSClusteringS2S (ibo)": 27.01,
+ "MasakhaNEWSClusteringS2S (lin)": 51.17,
+ "MasakhaNEWSClusteringS2S (lug)": 43.62,
+ "MasakhaNEWSClusteringS2S (orm)": 21.9,
+ "MasakhaNEWSClusteringS2S (pcm)": 39.29,
+ "MasakhaNEWSClusteringS2S (run)": 45.33,
+ "MasakhaNEWSClusteringS2S (sna)": 42.96,
+ "MasakhaNEWSClusteringS2S (som)": 25.29,
+ "MasakhaNEWSClusteringS2S (swa)": 10.43,
+ "MasakhaNEWSClusteringS2S (tir)": 42.77,
+ "MasakhaNEWSClusteringS2S (xho)": 25.79,
+ "MasakhaNEWSClusteringS2S (yor)": 24.62,
+ "MedrxivClusteringP2P": 19.13,
+ "MedrxivClusteringS2S": 16.96,
+ "RedditClustering": 14.16,
+ "RedditClusteringP2P": 18.47,
"RuSciBenchGRNTIClusteringP2P": 28.29,
- "RuSciBenchOECDClusteringP2P": 26.67
+ "RuSciBenchOECDClusteringP2P": 26.67,
+ "StackExchangeClustering": 21.35,
+ "StackExchangeClusteringP2P": 21.64,
+ "TenKGnadClusteringP2P": 16.29,
+ "TenKGnadClusteringS2S": 10.93,
+ "TwentyNewsgroupsClustering": 12.36
}
]
},
@@ -1612,13 +7499,51 @@
"max_ap": [
{
"Model": "rubert-base-cased",
+ "CDSC-E": 35.01,
+ "FalseFriendsGermanEnglish": 48.57,
"OpusparcusPC (ru)": 81.65,
- "TERRa": 52.12
+ "OpusparcusPC (de)": 89.13,
+ "OpusparcusPC (en)": 92.78,
+ "OpusparcusPC (fi)": 79.66,
+ "OpusparcusPC (fr)": 84.68,
+ "OpusparcusPC (sv)": 78.49,
+ "PSC": 64.57,
+ "PawsXPairClassification (de)": 49.6,
+ "PawsXPairClassification (en)": 46.62,
+ "PawsXPairClassification (es)": 49.86,
+ "PawsXPairClassification (fr)": 51.09,
+ "PawsXPairClassification (ja)": 46.04,
+ "PawsXPairClassification (ko)": 47.52,
+ "PawsXPairClassification (zh)": 52.77,
+ "SICK-E-PL": 47.26,
+ "SprintDuplicateQuestions": 21.8,
+ "TERRa": 52.12,
+ "TwitterSemEval2015": 42.31,
+ "TwitterURLCorpus": 55.52
},
{
"Model": "rubert-base-cased",
+ "CDSC-E": 35.32,
+ "FalseFriendsGermanEnglish": 49.53,
"OpusparcusPC (ru)": 81.65,
- "TERRa": 53.17
+ "OpusparcusPC (de)": 89.13,
+ "OpusparcusPC (en)": 92.78,
+ "OpusparcusPC (fi)": 79.66,
+ "OpusparcusPC (fr)": 84.68,
+ "OpusparcusPC (sv)": 78.49,
+ "PSC": 66.18,
+ "PawsXPairClassification (de)": 49.96,
+ "PawsXPairClassification (en)": 47.53,
+ "PawsXPairClassification (es)": 49.86,
+ "PawsXPairClassification (fr)": 51.09,
+ "PawsXPairClassification (ja)": 46.42,
+ "PawsXPairClassification (ko)": 47.71,
+ "PawsXPairClassification (zh)": 52.89,
+ "SICK-E-PL": 47.31,
+ "SprintDuplicateQuestions": 21.8,
+ "TERRa": 53.17,
+ "TwitterSemEval2015": 42.31,
+ "TwitterURLCorpus": 55.52
}
]
},
@@ -1626,11 +7551,35 @@
"map": [
{
"Model": "rubert-base-cased",
- "MIRACLReranking (ru)": 13.27
+ "AlloprofReranking": 29.26,
+ "AskUbuntuDupQuestions": 46.46,
+ "MindSmallReranking": 28.06,
+ "RuBQReranking": 41.65,
+ "SciDocsRR": 45.85,
+ "StackOverflowDupQuestions": 29.76,
+ "SyntecReranking": 35.08,
+ "T2Reranking": 53.26
},
{
"Model": "rubert-base-cased",
- "RuBQReranking": 41.65
+ "MIRACLReranking (ru)": 13.27,
+ "MIRACLReranking (ar)": 2.85,
+ "MIRACLReranking (bn)": 2.97,
+ "MIRACLReranking (de)": 6.21,
+ "MIRACLReranking (en)": 7.53,
+ "MIRACLReranking (es)": 5.16,
+ "MIRACLReranking (fa)": 3.53,
+ "MIRACLReranking (fi)": 6.96,
+ "MIRACLReranking (fr)": 4.97,
+ "MIRACLReranking (hi)": 5.1,
+ "MIRACLReranking (id)": 5.28,
+ "MIRACLReranking (ja)": 4.89,
+ "MIRACLReranking (ko)": 5.86,
+ "MIRACLReranking (sw)": 4.75,
+ "MIRACLReranking (te)": 5.36,
+ "MIRACLReranking (th)": 2.72,
+ "MIRACLReranking (yo)": 4.67,
+ "MIRACLReranking (zh)": 3.95
}
]
},
@@ -1638,9 +7587,143 @@
"ndcg_at_10": [
{
"Model": "rubert-base-cased",
+ "AILACasedocs": 10.35,
+ "AILAStatutes": 11.53,
+ "ARCChallenge": 0.56,
+ "AlloprofRetrieval": 0.24,
+ "AlphaNLI": 0.56,
+ "AppsRetrieval": 0.04,
+ "ArguAna": 13.02,
+ "BSARDRetrieval": 0.0,
+ "ClimateFEVER": 0.15,
+ "CmedqaRetrieval": 0.9,
+ "CodeFeedbackMT": 8.43,
+ "CodeFeedbackST": 3.66,
+ "CodeSearchNetCCRetrieval (python)": 3.77,
+ "CodeSearchNetCCRetrieval (javascript)": 8.05,
+ "CodeSearchNetCCRetrieval (go)": 4.52,
+ "CodeSearchNetCCRetrieval (ruby)": 9.34,
+ "CodeSearchNetCCRetrieval (java)": 5.94,
+ "CodeSearchNetCCRetrieval (php)": 4.64,
+ "CodeSearchNetRetrieval (python)": 6.47,
+ "CodeSearchNetRetrieval (javascript)": 2.31,
+ "CodeSearchNetRetrieval (go)": 2.67,
+ "CodeSearchNetRetrieval (ruby)": 5.26,
+ "CodeSearchNetRetrieval (java)": 2.8,
+ "CodeSearchNetRetrieval (php)": 1.9,
+ "CodeTransOceanContest": 5.49,
+ "CodeTransOceanDL": 29.16,
+ "CosQA": 0.14,
+ "CovidRetrieval": 0.07,
+ "DBPedia": 0.83,
+ "FEVER": 0.83,
+ "FiQA2018": 0.16,
+ "GerDaLIR": 0.25,
+ "GerDaLIRSmall": 0.62,
+ "GermanQuAD-Retrieval": 5.16,
+ "HellaSwag": 1.71,
+ "HotpotQA": 0.61,
+ "LEMBNarrativeQARetrieval": 2.92,
+ "LEMBQMSumRetrieval": 5.52,
+ "LEMBSummScreenFDRetrieval": 6.19,
+ "LEMBWikimQARetrieval": 15.54,
+ "LeCaRDv2": 26.33,
+ "LegalBenchConsumerContractsQA": 9.09,
+ "LegalBenchCorporateLobbying": 15.85,
+ "LegalQuAD": 3.65,
+ "LegalSummarization": 16.31,
"MIRACLRetrieval (ru)": 0.88,
+ "MIRACLRetrieval (ar)": 0.0,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 0.0,
+ "MIRACLRetrieval (en)": 0.04,
+ "MIRACLRetrieval (es)": 0.0,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 0.05,
+ "MIRACLRetrieval (fr)": 0.0,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 0.0,
+ "MIRACLRetrieval (ja)": 0.13,
+ "MIRACLRetrieval (ko)": 0.22,
+ "MIRACLRetrieval (sw)": 0.09,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.0,
+ "MIRACLRetrieval (yo)": 0.99,
+ "MIRACLRetrieval (zh)": 0.09,
+ "MSMARCO": 0.12,
+ "MintakaRetrieval (ar)": 0.78,
+ "MintakaRetrieval (de)": 0.94,
+ "MintakaRetrieval (es)": 1.16,
+ "MintakaRetrieval (fr)": 1.13,
+ "MintakaRetrieval (hi)": 0.44,
+ "MintakaRetrieval (it)": 0.91,
+ "MintakaRetrieval (ja)": 0.62,
+ "MintakaRetrieval (pt)": 0.76,
+ "NFCorpus": 2.69,
+ "NQ": 0.09,
+ "PIQA": 1.66,
+ "Quail": 0.03,
+ "QuoraRetrieval": 44.56,
+ "RARbCode": 0.03,
+ "RARbMath": 3.54,
"RiaNewsRetrieval": 5.58,
- "RuBQRetrieval": 9.52
+ "RuBQRetrieval": 9.52,
+ "SCIDOCS": 0.35,
+ "SIQA": 0.0,
+ "SciFact": 1.06,
+ "SciFact-PL": 0.42,
+ "SpartQA": 0.03,
+ "StackOverflowQA": 8.75,
+ "SyntecRetrieval": 6.26,
+ "SyntheticText2SQL": 1.79,
+ "TRECCOVID": 4.05,
+ "TRECCOVID-PL": 2.64,
+ "TempReasonL1": 0.24,
+ "TempReasonL2Fact": 1.06,
+ "TempReasonL2Pure": 0.22,
+ "TempReasonL3Fact": 1.01,
+ "TempReasonL3Pure": 2.61,
+ "Touche2020": 0.0,
+ "WinoGrande": 0.03,
+ "XMarket (de)": 0.96,
+ "XMarket (en)": 0.95,
+ "XMarket (es)": 0.76,
+ "XPQARetrieval (ara-ara)": 1.72,
+ "XPQARetrieval (eng-ara)": 0.9,
+ "XPQARetrieval (ara-eng)": 0.72,
+ "XPQARetrieval (deu-deu)": 8.62,
+ "XPQARetrieval (eng-deu)": 0.65,
+ "XPQARetrieval (deu-eng)": 2.13,
+ "XPQARetrieval (spa-spa)": 5.23,
+ "XPQARetrieval (eng-spa)": 0.57,
+ "XPQARetrieval (spa-eng)": 2.27,
+ "XPQARetrieval (fra-fra)": 7.82,
+ "XPQARetrieval (eng-fra)": 0.87,
+ "XPQARetrieval (fra-eng)": 2.62,
+ "XPQARetrieval (hin-hin)": 2.85,
+ "XPQARetrieval (eng-hin)": 2.81,
+ "XPQARetrieval (hin-eng)": 0.85,
+ "XPQARetrieval (ita-ita)": 15.62,
+ "XPQARetrieval (eng-ita)": 1.4,
+ "XPQARetrieval (ita-eng)": 3.26,
+ "XPQARetrieval (jpn-jpn)": 15.82,
+ "XPQARetrieval (eng-jpn)": 0.83,
+ "XPQARetrieval (jpn-eng)": 0.71,
+ "XPQARetrieval (kor-kor)": 1.9,
+ "XPQARetrieval (eng-kor)": 1.08,
+ "XPQARetrieval (kor-eng)": 0.85,
+ "XPQARetrieval (pol-pol)": 7.68,
+ "XPQARetrieval (eng-pol)": 0.82,
+ "XPQARetrieval (pol-eng)": 1.68,
+ "XPQARetrieval (por-por)": 5.3,
+ "XPQARetrieval (eng-por)": 0.57,
+ "XPQARetrieval (por-eng)": 1.63,
+ "XPQARetrieval (tam-tam)": 1.57,
+ "XPQARetrieval (eng-tam)": 1.17,
+ "XPQARetrieval (tam-eng)": 0.8,
+ "XPQARetrieval (cmn-cmn)": 18.63,
+ "XPQARetrieval (eng-cmn)": 1.06,
+ "XPQARetrieval (cmn-eng)": 1.79
}
]
},
@@ -1648,15 +7731,129 @@
"cosine_spearman": [
{
"Model": "rubert-base-cased",
+ "BIOSSES": 30.22,
+ "CDSC-R": 51.41,
+ "GermanSTSBenchmark": 36.37,
+ "SICK-R": 44.22,
+ "SICK-R-PL": 44.3,
+ "SICKFr": 44.03,
+ "STS12": 27.75,
+ "STS13": 39.83,
+ "STS14": 34.89,
+ "STS15": 43.05,
+ "STS16": 45.66,
+ "STS17 (nl-en)": 18.44,
+ "STS17 (en-ar)": 1.36,
+ "STS17 (ar-ar)": 30.14,
+ "STS17 (en-de)": 10.88,
+ "STS17 (es-en)": 3.39,
+ "STS17 (it-en)": 19.23,
+ "STS17 (en-tr)": -7.83,
+ "STS17 (fr-en)": 15.78,
+ "STS17 (en-en)": 56.29,
+ "STS17 (es-es)": 42.21,
+ "STS17 (ko-ko)": 13.94,
+ "STS22 (it)": 30.56,
+ "STS22 (de)": 24.22,
+ "STS22 (en)": 38.33,
+ "STS22 (es)": 36.12,
+ "STS22 (zh)": 32.97,
+ "STS22 (ar)": 22.72,
+ "STS22 (tr)": 12.35,
+ "STS22 (es-en)": 21.75,
+ "STS22 (zh-en)": 11.04,
+ "STS22 (es-it)": 20.11,
+ "STS22 (fr)": 44.7,
+ "STS22 (de-en)": 22.58,
+ "STS22 (de-fr)": 17.12,
+ "STS22 (pl)": 5.9,
+ "STS22 (pl-en)": 30.9,
+ "STS22 (fr-pl)": 39.44,
+ "STS22 (de-pl)": -25.81,
+ "STSB": 36.87,
+ "STSBenchmark": 39.84,
+ "STSBenchmarkMultilingualSTS (de)": 40.7,
+ "STSBenchmarkMultilingualSTS (nl)": 33.12,
+ "STSBenchmarkMultilingualSTS (en)": 39.84,
+ "STSBenchmarkMultilingualSTS (es)": 38.62,
+ "STSBenchmarkMultilingualSTS (fr)": 44.46,
+ "STSBenchmarkMultilingualSTS (pl)": 37.96,
+ "STSBenchmarkMultilingualSTS (pt)": 36.13,
+ "STSBenchmarkMultilingualSTS (it)": 35.86,
+ "STSBenchmarkMultilingualSTS (zh)": 36.96
+ },
+ {
+ "Model": "rubert-base-cased",
+ "BIOSSES": 30.22,
+ "CDSC-R": 51.41,
+ "GermanSTSBenchmark": 36.37,
"RUParaPhraserSTS": 49.72,
"RuSTSBenchmarkSTS": 53.95,
+ "SICK-R": 44.22,
+ "SICK-R-PL": 44.3,
+ "SICKFr": 44.02,
+ "STS12": 27.75,
+ "STS13": 39.83,
+ "STS14": 34.89,
+ "STS15": 43.05,
+ "STS16": 45.66,
+ "STS17 (nl-en)": 18.44,
+ "STS17 (en-ar)": 1.36,
+ "STS17 (ar-ar)": 30.13,
+ "STS17 (en-de)": 10.88,
+ "STS17 (es-en)": 3.39,
+ "STS17 (it-en)": 19.23,
+ "STS17 (en-tr)": -7.83,
+ "STS17 (fr-en)": 15.78,
+ "STS17 (en-en)": 56.29,
+ "STS17 (es-es)": 42.21,
+ "STS17 (ko-ko)": 14.2,
"STS22 (ru)": 34.98,
- "STSBenchmarkMultilingualSTS (ru)": 53.76
+ "STS22 (it)": 30.56,
+ "STS22 (de)": 24.23,
+ "STS22 (en)": 38.33,
+ "STS22 (es)": 36.12,
+ "STS22 (zh)": 32.97,
+ "STS22 (ar)": 22.74,
+ "STS22 (tr)": 12.35,
+ "STS22 (es-en)": 21.75,
+ "STS22 (zh-en)": 11.04,
+ "STS22 (es-it)": 20.11,
+ "STS22 (fr)": 44.7,
+ "STS22 (de-en)": 22.58,
+ "STS22 (de-fr)": 17.12,
+ "STS22 (pl)": 5.72,
+ "STS22 (pl-en)": 30.9,
+ "STS22 (fr-pl)": 39.44,
+ "STS22 (de-pl)": -25.81,
+ "STSB": 36.87,
+ "STSBenchmark": 39.84,
+ "STSBenchmarkMultilingualSTS (ru)": 53.76,
+ "STSBenchmarkMultilingualSTS (de)": 40.7,
+ "STSBenchmarkMultilingualSTS (nl)": 33.12,
+ "STSBenchmarkMultilingualSTS (en)": 39.84,
+ "STSBenchmarkMultilingualSTS (es)": 38.62,
+ "STSBenchmarkMultilingualSTS (fr)": 44.46,
+ "STSBenchmarkMultilingualSTS (pl)": 37.96,
+ "STSBenchmarkMultilingualSTS (pt)": 36.13,
+ "STSBenchmarkMultilingualSTS (it)": 35.86,
+ "STSBenchmarkMultilingualSTS (zh)": 36.96
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "rubert-base-cased",
+ "SummEval": 29.56,
+ "SummEvalFr": 28.42
+ },
+ {
+ "Model": "rubert-base-cased",
+ "SummEval": 29.56,
+ "SummEvalFr": 28.42
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
@@ -1668,7 +7865,14 @@
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "rubert-base-cased",
+ "Core17InstructionRetrieval": -3.56,
+ "News21InstructionRetrieval": 4.57,
+ "Robust04InstructionRetrieval": 1.4
+ }
+ ]
}
},
"DeepPavlov__rubert-base-cased-sentence": {
@@ -1676,6 +7880,7 @@
"f1": [
{
"Model": "rubert-base-cased-sentence",
+ "BornholmBitextMining": 15.6,
"Tatoeba (rus-eng)": 20.26
}
]
@@ -1684,15 +7889,169 @@
"accuracy": [
{
"Model": "rubert-base-cased-sentence",
+ "AllegroReviews": 23.97,
+ "AmazonCounterfactualClassification (en-ext)": 62.27,
+ "AmazonCounterfactualClassification (en)": 59.58,
+ "AmazonCounterfactualClassification (de)": 55.93,
+ "AmazonCounterfactualClassification (ja)": 53.95,
+ "AmazonPolarityClassification": 60.81,
+ "AmazonReviewsClassification (en)": 28.25,
+ "AmazonReviewsClassification (de)": 27.28,
+ "AmazonReviewsClassification (es)": 28.62,
+ "AmazonReviewsClassification (fr)": 28.85,
+ "AmazonReviewsClassification (ja)": 23.6,
+ "AmazonReviewsClassification (zh)": 26.82,
+ "AngryTweetsClassification": 40.25,
+ "Banking77Classification": 48.98,
+ "CBD": 44.27,
+ "DanishPoliticalCommentsClassification": 27.05,
+ "EmotionClassification": 19.64,
"GeoreviewClassification": 38.05,
"HeadlineClassification": 67.64,
+ "ImdbClassification": 57.73,
"InappropriatenessClassification": 58.27,
"KinopoiskClassification": 45.86,
+ "LccSentimentClassification": 37.0,
+ "MTOPDomainClassification (en)": 59.98,
+ "MTOPDomainClassification (de)": 49.12,
+ "MTOPDomainClassification (es)": 51.0,
+ "MTOPDomainClassification (fr)": 49.23,
+ "MTOPDomainClassification (hi)": 21.92,
+ "MTOPDomainClassification (th)": 16.94,
+ "MTOPIntentClassification (en)": 39.37,
+ "MTOPIntentClassification (de)": 36.57,
+ "MTOPIntentClassification (es)": 34.91,
+ "MTOPIntentClassification (fr)": 32.83,
+ "MTOPIntentClassification (hi)": 7.64,
+ "MTOPIntentClassification (th)": 4.67,
+ "MasakhaNEWSClassification (amh)": 33.46,
+ "MasakhaNEWSClassification (eng)": 65.83,
+ "MasakhaNEWSClassification (fra)": 62.13,
+ "MasakhaNEWSClassification (hau)": 48.6,
+ "MasakhaNEWSClassification (ibo)": 41.97,
+ "MasakhaNEWSClassification (lin)": 57.77,
+ "MasakhaNEWSClassification (lug)": 43.72,
+ "MasakhaNEWSClassification (orm)": 37.51,
+ "MasakhaNEWSClassification (pcm)": 82.13,
+ "MasakhaNEWSClassification (run)": 45.75,
+ "MasakhaNEWSClassification (sna)": 60.89,
+ "MasakhaNEWSClassification (som)": 33.84,
+ "MasakhaNEWSClassification (swa)": 42.46,
+ "MasakhaNEWSClassification (tir)": 26.88,
+ "MasakhaNEWSClassification (xho)": 52.29,
+ "MasakhaNEWSClassification (yor)": 48.93,
"MassiveIntentClassification (ru)": 49.1,
+ "MassiveIntentClassification (af)": 30.31,
+ "MassiveIntentClassification (bn)": 3.39,
+ "MassiveIntentClassification (el)": 31.9,
+ "MassiveIntentClassification (cy)": 30.38,
+ "MassiveIntentClassification (da)": 33.35,
+ "MassiveIntentClassification (ro)": 31.19,
+ "MassiveIntentClassification (mn)": 34.67,
+ "MassiveIntentClassification (hi)": 6.01,
+ "MassiveIntentClassification (hu)": 30.56,
+ "MassiveIntentClassification (id)": 34.3,
+ "MassiveIntentClassification (km)": 4.34,
+ "MassiveIntentClassification (am)": 3.09,
+ "MassiveIntentClassification (sq)": 34.82,
+ "MassiveIntentClassification (fi)": 31.83,
+ "MassiveIntentClassification (th)": 5.14,
+ "MassiveIntentClassification (vi)": 26.94,
+ "MassiveIntentClassification (pl)": 35.34,
+ "MassiveIntentClassification (te)": 3.21,
+ "MassiveIntentClassification (pt)": 34.8,
+ "MassiveIntentClassification (sl)": 33.18,
+ "MassiveIntentClassification (hy)": 7.77,
+ "MassiveIntentClassification (fr)": 33.06,
+ "MassiveIntentClassification (he)": 22.67,
+ "MassiveIntentClassification (ar)": 23.08,
+ "MassiveIntentClassification (ta)": 3.34,
+ "MassiveIntentClassification (zh-CN)": 48.85,
+ "MassiveIntentClassification (ko)": 9.85,
+ "MassiveIntentClassification (is)": 29.98,
+ "MassiveIntentClassification (ms)": 31.07,
+ "MassiveIntentClassification (ml)": 2.89,
+ "MassiveIntentClassification (nb)": 31.28,
+ "MassiveIntentClassification (tl)": 31.0,
+ "MassiveIntentClassification (de)": 32.2,
+ "MassiveIntentClassification (ja)": 32.27,
+ "MassiveIntentClassification (fa)": 28.38,
+ "MassiveIntentClassification (en)": 37.12,
+ "MassiveIntentClassification (zh-TW)": 43.83,
+ "MassiveIntentClassification (nl)": 31.93,
+ "MassiveIntentClassification (tr)": 31.73,
+ "MassiveIntentClassification (es)": 31.66,
+ "MassiveIntentClassification (az)": 35.5,
+ "MassiveIntentClassification (ur)": 21.08,
+ "MassiveIntentClassification (my)": 4.09,
+ "MassiveIntentClassification (lv)": 34.72,
+ "MassiveIntentClassification (ka)": 9.45,
+ "MassiveIntentClassification (it)": 35.87,
+ "MassiveIntentClassification (jv)": 28.62,
+ "MassiveIntentClassification (sw)": 29.21,
+ "MassiveIntentClassification (sv)": 33.01,
+ "MassiveIntentClassification (kn)": 3.79,
"MassiveScenarioClassification (ru)": 51.91,
+ "MassiveScenarioClassification (zh-CN)": 51.12,
+ "MassiveScenarioClassification (id)": 32.58,
+ "MassiveScenarioClassification (de)": 32.16,
+ "MassiveScenarioClassification (tl)": 31.56,
+ "MassiveScenarioClassification (ja)": 33.43,
+ "MassiveScenarioClassification (hy)": 13.76,
+ "MassiveScenarioClassification (th)": 9.85,
+ "MassiveScenarioClassification (my)": 10.63,
+ "MassiveScenarioClassification (fi)": 31.93,
+ "MassiveScenarioClassification (mn)": 33.53,
+ "MassiveScenarioClassification (fa)": 28.15,
+ "MassiveScenarioClassification (nl)": 33.1,
+ "MassiveScenarioClassification (ro)": 34.53,
+ "MassiveScenarioClassification (el)": 33.91,
+ "MassiveScenarioClassification (az)": 34.63,
+ "MassiveScenarioClassification (zh-TW)": 45.25,
+ "MassiveScenarioClassification (ko)": 13.87,
+ "MassiveScenarioClassification (am)": 7.97,
+ "MassiveScenarioClassification (ml)": 7.05,
+ "MassiveScenarioClassification (bn)": 7.62,
+ "MassiveScenarioClassification (it)": 35.72,
+ "MassiveScenarioClassification (hu)": 31.62,
+ "MassiveScenarioClassification (da)": 33.28,
+ "MassiveScenarioClassification (is)": 31.24,
+ "MassiveScenarioClassification (sl)": 34.1,
+ "MassiveScenarioClassification (sv)": 32.09,
+ "MassiveScenarioClassification (ms)": 32.15,
+ "MassiveScenarioClassification (vi)": 30.31,
+ "MassiveScenarioClassification (ar)": 23.6,
+ "MassiveScenarioClassification (fr)": 35.38,
+ "MassiveScenarioClassification (km)": 9.61,
+ "MassiveScenarioClassification (pl)": 35.88,
+ "MassiveScenarioClassification (ur)": 25.08,
+ "MassiveScenarioClassification (sq)": 35.52,
+ "MassiveScenarioClassification (pt)": 35.7,
+ "MassiveScenarioClassification (kn)": 7.82,
+ "MassiveScenarioClassification (lv)": 33.23,
+ "MassiveScenarioClassification (hi)": 9.56,
+ "MassiveScenarioClassification (he)": 23.09,
+ "MassiveScenarioClassification (te)": 7.38,
+ "MassiveScenarioClassification (af)": 30.26,
+ "MassiveScenarioClassification (cy)": 30.19,
+ "MassiveScenarioClassification (ka)": 16.23,
+ "MassiveScenarioClassification (sw)": 31.42,
+ "MassiveScenarioClassification (tr)": 32.19,
+ "MassiveScenarioClassification (es)": 32.66,
+ "MassiveScenarioClassification (nb)": 29.6,
+ "MassiveScenarioClassification (en)": 40.34,
+ "MassiveScenarioClassification (ta)": 6.79,
+ "MassiveScenarioClassification (jv)": 29.67,
+ "NoRecClassification": 39.12,
+ "NordicLangClassification": 51.39,
+ "PAC": 64.36,
+ "PolEmo2.0-IN": 43.75,
+ "PolEmo2.0-OUT": 34.64,
"RuReviewsClassification": 58.34,
"RuSciBenchGRNTIClassification": 52.18,
- "RuSciBenchOECDClassification": 40.11
+ "RuSciBenchOECDClassification": 40.11,
+ "ToxicConversationsClassification": 53.28,
+ "TweetSentimentExtractionClassification": 43.27
}
]
},
@@ -1700,11 +8059,67 @@
"v_measure": [
{
"Model": "rubert-base-cased-sentence",
+ "AlloProfClusteringP2P": 39.0,
+ "AlloProfClusteringS2S": 23.5,
+ "ArxivClusteringP2P": 19.4,
+ "ArxivClusteringS2S": 15.28,
+ "BiorxivClusteringP2P": 14.91,
+ "BiorxivClusteringS2S": 10.62,
+ "BlurbsClusteringP2P": 16.55,
+ "BlurbsClusteringS2S": 8.99,
"GeoreviewClusteringP2P": 41.82,
+ "HALClusteringS2S": 10.36,
"MLSUMClusteringP2P (ru)": 43.71,
+ "MLSUMClusteringP2P (de)": 27.24,
+ "MLSUMClusteringP2P (fr)": 29.91,
+ "MLSUMClusteringP2P (es)": 30.69,
"MLSUMClusteringS2S (ru)": 45.94,
+ "MLSUMClusteringS2S (de)": 27.47,
+ "MLSUMClusteringS2S (fr)": 28.79,
+ "MLSUMClusteringS2S (es)": 30.33,
+ "MasakhaNEWSClusteringP2P (amh)": 40.3,
+ "MasakhaNEWSClusteringP2P (eng)": 37.13,
+ "MasakhaNEWSClusteringP2P (fra)": 42.57,
+ "MasakhaNEWSClusteringP2P (hau)": 13.22,
+ "MasakhaNEWSClusteringP2P (ibo)": 28.87,
+ "MasakhaNEWSClusteringP2P (lin)": 49.89,
+ "MasakhaNEWSClusteringP2P (lug)": 42.71,
+ "MasakhaNEWSClusteringP2P (orm)": 23.69,
+ "MasakhaNEWSClusteringP2P (pcm)": 62.14,
+ "MasakhaNEWSClusteringP2P (run)": 48.63,
+ "MasakhaNEWSClusteringP2P (sna)": 49.34,
+ "MasakhaNEWSClusteringP2P (som)": 25.51,
+ "MasakhaNEWSClusteringP2P (swa)": 16.72,
+ "MasakhaNEWSClusteringP2P (tir)": 43.32,
+ "MasakhaNEWSClusteringP2P (xho)": 29.13,
+ "MasakhaNEWSClusteringP2P (yor)": 26.91,
+ "MasakhaNEWSClusteringS2S (amh)": 43.23,
+ "MasakhaNEWSClusteringS2S (eng)": 17.18,
+ "MasakhaNEWSClusteringS2S (fra)": 21.71,
+ "MasakhaNEWSClusteringS2S (hau)": 8.85,
+ "MasakhaNEWSClusteringS2S (ibo)": 29.68,
+ "MasakhaNEWSClusteringS2S (lin)": 54.18,
+ "MasakhaNEWSClusteringS2S (lug)": 42.77,
+ "MasakhaNEWSClusteringS2S (orm)": 21.24,
+ "MasakhaNEWSClusteringS2S (pcm)": 33.99,
+ "MasakhaNEWSClusteringS2S (run)": 44.17,
+ "MasakhaNEWSClusteringS2S (sna)": 41.15,
+ "MasakhaNEWSClusteringS2S (som)": 24.67,
+ "MasakhaNEWSClusteringS2S (swa)": 6.4,
+ "MasakhaNEWSClusteringS2S (tir)": 43.32,
+ "MasakhaNEWSClusteringS2S (xho)": 29.98,
+ "MasakhaNEWSClusteringS2S (yor)": 26.69,
+ "MedrxivClusteringP2P": 17.86,
+ "MedrxivClusteringS2S": 16.45,
+ "RedditClustering": 14.72,
+ "RedditClusteringP2P": 23.35,
"RuSciBenchGRNTIClusteringP2P": 46.29,
- "RuSciBenchOECDClusteringP2P": 41.28
+ "RuSciBenchOECDClusteringP2P": 41.28,
+ "StackExchangeClustering": 16.48,
+ "StackExchangeClusteringP2P": 26.54,
+ "TenKGnadClusteringP2P": 23.3,
+ "TenKGnadClusteringS2S": 8.08,
+ "TwentyNewsgroupsClustering": 15.85
}
]
},
@@ -1712,13 +8127,51 @@
"max_ap": [
{
"Model": "rubert-base-cased-sentence",
+ "CDSC-E": 61.23,
+ "FalseFriendsGermanEnglish": 48.16,
"OpusparcusPC (ru)": 81.52,
- "TERRa": 59.12
+ "OpusparcusPC (de)": 88.51,
+ "OpusparcusPC (en)": 93.0,
+ "OpusparcusPC (fi)": 81.98,
+ "OpusparcusPC (fr)": 82.89,
+ "OpusparcusPC (sv)": 80.84,
+ "PSC": 59.64,
+ "PawsXPairClassification (de)": 51.99,
+ "PawsXPairClassification (en)": 53.54,
+ "PawsXPairClassification (es)": 51.51,
+ "PawsXPairClassification (fr)": 54.04,
+ "PawsXPairClassification (ja)": 46.79,
+ "PawsXPairClassification (ko)": 47.86,
+ "PawsXPairClassification (zh)": 54.4,
+ "SICK-E-PL": 60.78,
+ "SprintDuplicateQuestions": 47.57,
+ "TERRa": 59.12,
+ "TwitterSemEval2015": 44.14,
+ "TwitterURLCorpus": 70.15
},
{
"Model": "rubert-base-cased-sentence",
+ "CDSC-E": 61.23,
+ "FalseFriendsGermanEnglish": 48.28,
"OpusparcusPC (ru)": 81.76,
- "TERRa": 59.12
+ "OpusparcusPC (de)": 88.78,
+ "OpusparcusPC (en)": 93.0,
+ "OpusparcusPC (fi)": 82.1,
+ "OpusparcusPC (fr)": 83.27,
+ "OpusparcusPC (sv)": 80.84,
+ "PSC": 59.64,
+ "PawsXPairClassification (de)": 52.04,
+ "PawsXPairClassification (en)": 53.68,
+ "PawsXPairClassification (es)": 51.53,
+ "PawsXPairClassification (fr)": 54.1,
+ "PawsXPairClassification (ja)": 46.95,
+ "PawsXPairClassification (ko)": 47.86,
+ "PawsXPairClassification (zh)": 54.44,
+ "SICK-E-PL": 60.78,
+ "SprintDuplicateQuestions": 48.25,
+ "TERRa": 59.12,
+ "TwitterSemEval2015": 44.64,
+ "TwitterURLCorpus": 70.15
}
]
},
@@ -1726,11 +8179,35 @@
"map": [
{
"Model": "rubert-base-cased-sentence",
- "MIRACLReranking (ru)": 13.77
+ "AlloprofReranking": 33.58,
+ "AskUbuntuDupQuestions": 45.69,
+ "MindSmallReranking": 26.15,
+ "RuBQReranking": 39.89,
+ "SciDocsRR": 48.28,
+ "StackOverflowDupQuestions": 29.81,
+ "SyntecReranking": 36.0,
+ "T2Reranking": 53.74
},
{
"Model": "rubert-base-cased-sentence",
- "RuBQReranking": 39.89
+ "MIRACLReranking (ru)": 13.77,
+ "MIRACLReranking (ar)": 3.16,
+ "MIRACLReranking (bn)": 3.09,
+ "MIRACLReranking (de)": 8.84,
+ "MIRACLReranking (en)": 8.53,
+ "MIRACLReranking (es)": 8.02,
+ "MIRACLReranking (fa)": 4.56,
+ "MIRACLReranking (fi)": 10.54,
+ "MIRACLReranking (fr)": 6.26,
+ "MIRACLReranking (hi)": 4.55,
+ "MIRACLReranking (id)": 7.57,
+ "MIRACLReranking (ja)": 4.76,
+ "MIRACLReranking (ko)": 3.8,
+ "MIRACLReranking (sw)": 8.93,
+ "MIRACLReranking (te)": 3.58,
+ "MIRACLReranking (th)": 2.07,
+ "MIRACLReranking (yo)": 12.73,
+ "MIRACLReranking (zh)": 3.48
}
]
},
@@ -1738,9 +8215,143 @@
"ndcg_at_10": [
{
"Model": "rubert-base-cased-sentence",
+ "AILACasedocs": 9.0,
+ "AILAStatutes": 14.13,
+ "ARCChallenge": 0.94,
+ "AlloprofRetrieval": 1.09,
+ "AlphaNLI": 1.74,
+ "AppsRetrieval": 0.29,
+ "ArguAna": 9.39,
+ "BSARDRetrieval": 0.0,
+ "ClimateFEVER": 0.22,
+ "CmedqaRetrieval": 1.51,
+ "CodeFeedbackMT": 5.54,
+ "CodeFeedbackST": 5.55,
+ "CodeSearchNetCCRetrieval (python)": 4.46,
+ "CodeSearchNetCCRetrieval (javascript)": 7.57,
+ "CodeSearchNetCCRetrieval (go)": 3.93,
+ "CodeSearchNetCCRetrieval (ruby)": 10.55,
+ "CodeSearchNetCCRetrieval (java)": 5.2,
+ "CodeSearchNetCCRetrieval (php)": 3.92,
+ "CodeSearchNetRetrieval (python)": 11.06,
+ "CodeSearchNetRetrieval (javascript)": 6.01,
+ "CodeSearchNetRetrieval (go)": 7.78,
+ "CodeSearchNetRetrieval (ruby)": 10.14,
+ "CodeSearchNetRetrieval (java)": 6.09,
+ "CodeSearchNetRetrieval (php)": 4.98,
+ "CodeTransOceanContest": 9.55,
+ "CodeTransOceanDL": 29.35,
+ "CosQA": 0.88,
+ "CovidRetrieval": 0.07,
+ "DBPedia": 1.46,
+ "FEVER": 0.64,
+ "FiQA2018": 0.19,
+ "GerDaLIR": 0.14,
+ "GerDaLIRSmall": 0.6,
+ "GermanQuAD-Retrieval": 16.71,
+ "HellaSwag": 2.08,
+ "HotpotQA": 2.39,
+ "LEMBNarrativeQARetrieval": 2.64,
+ "LEMBQMSumRetrieval": 6.92,
+ "LEMBSummScreenFDRetrieval": 13.38,
+ "LEMBWikimQARetrieval": 11.01,
+ "LeCaRDv2": 22.36,
+ "LegalBenchConsumerContractsQA": 18.02,
+ "LegalBenchCorporateLobbying": 35.35,
+ "LegalQuAD": 2.94,
+ "LegalSummarization": 18.23,
"MIRACLRetrieval (ru)": 1.92,
+ "MIRACLRetrieval (ar)": 0.0,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 0.36,
+ "MIRACLRetrieval (en)": 0.04,
+ "MIRACLRetrieval (es)": 0.15,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 0.73,
+ "MIRACLRetrieval (fr)": 0.0,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 0.27,
+ "MIRACLRetrieval (ja)": 0.06,
+ "MIRACLRetrieval (ko)": 0.52,
+ "MIRACLRetrieval (sw)": 1.01,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.08,
+ "MIRACLRetrieval (yo)": 2.03,
+ "MIRACLRetrieval (zh)": 0.07,
+ "MSMARCO": 0.2,
+ "MintakaRetrieval (ar)": 1.02,
+ "MintakaRetrieval (de)": 4.68,
+ "MintakaRetrieval (es)": 2.87,
+ "MintakaRetrieval (fr)": 4.43,
+ "MintakaRetrieval (hi)": 0.37,
+ "MintakaRetrieval (it)": 3.39,
+ "MintakaRetrieval (ja)": 2.12,
+ "MintakaRetrieval (pt)": 4.86,
+ "NFCorpus": 1.62,
+ "NQ": 0.04,
+ "PIQA": 1.94,
+ "Quail": 0.14,
+ "QuoraRetrieval": 48.23,
+ "RARbCode": 0.36,
+ "RARbMath": 5.57,
"RiaNewsRetrieval": 6.72,
- "RuBQRetrieval": 12.63
+ "RuBQRetrieval": 12.63,
+ "SCIDOCS": 1.18,
+ "SIQA": 0.25,
+ "SciFact": 3.4,
+ "SciFact-PL": 1.67,
+ "SpartQA": 1.73,
+ "StackOverflowQA": 6.97,
+ "SyntecRetrieval": 12.78,
+ "SyntheticText2SQL": 9.32,
+ "TRECCOVID": 7.8,
+ "TRECCOVID-PL": 3.96,
+ "TempReasonL1": 0.16,
+ "TempReasonL2Fact": 1.73,
+ "TempReasonL2Pure": 0.12,
+ "TempReasonL3Fact": 1.79,
+ "TempReasonL3Pure": 3.7,
+ "Touche2020": 0.0,
+ "WinoGrande": 5.27,
+ "XMarket (de)": 1.2,
+ "XMarket (en)": 1.36,
+ "XMarket (es)": 1.05,
+ "XPQARetrieval (ara-ara)": 4.3,
+ "XPQARetrieval (eng-ara)": 0.87,
+ "XPQARetrieval (ara-eng)": 1.71,
+ "XPQARetrieval (deu-deu)": 12.4,
+ "XPQARetrieval (eng-deu)": 3.2,
+ "XPQARetrieval (deu-eng)": 5.85,
+ "XPQARetrieval (spa-spa)": 8.73,
+ "XPQARetrieval (eng-spa)": 2.72,
+ "XPQARetrieval (spa-eng)": 6.49,
+ "XPQARetrieval (fra-fra)": 9.02,
+ "XPQARetrieval (eng-fra)": 2.91,
+ "XPQARetrieval (fra-eng)": 4.29,
+ "XPQARetrieval (hin-hin)": 5.13,
+ "XPQARetrieval (eng-hin)": 2.55,
+ "XPQARetrieval (hin-eng)": 1.72,
+ "XPQARetrieval (ita-ita)": 17.6,
+ "XPQARetrieval (eng-ita)": 3.22,
+ "XPQARetrieval (ita-eng)": 6.54,
+ "XPQARetrieval (jpn-jpn)": 15.14,
+ "XPQARetrieval (eng-jpn)": 1.29,
+ "XPQARetrieval (jpn-eng)": 2.03,
+ "XPQARetrieval (kor-kor)": 1.99,
+ "XPQARetrieval (eng-kor)": 1.22,
+ "XPQARetrieval (kor-eng)": 2.0,
+ "XPQARetrieval (pol-pol)": 6.8,
+ "XPQARetrieval (eng-pol)": 2.06,
+ "XPQARetrieval (pol-eng)": 5.41,
+ "XPQARetrieval (por-por)": 9.19,
+ "XPQARetrieval (eng-por)": 1.86,
+ "XPQARetrieval (por-eng)": 5.75,
+ "XPQARetrieval (tam-tam)": 2.66,
+ "XPQARetrieval (eng-tam)": 2.1,
+ "XPQARetrieval (tam-eng)": 1.33,
+ "XPQARetrieval (cmn-cmn)": 15.91,
+ "XPQARetrieval (eng-cmn)": 1.06,
+ "XPQARetrieval (cmn-eng)": 4.43
}
]
},
@@ -1748,15 +8359,129 @@
"cosine_spearman": [
{
"Model": "rubert-base-cased-sentence",
+ "BIOSSES": 49.0,
+ "CDSC-R": 54.78,
+ "GermanSTSBenchmark": 50.75,
+ "SICK-R": 56.98,
+ "SICK-R-PL": 46.8,
+ "SICKFr": 57.97,
+ "STS12": 59.02,
+ "STS13": 46.99,
+ "STS14": 50.88,
+ "STS15": 64.58,
+ "STS16": 53.98,
+ "STS17 (en-ar)": 0.76,
+ "STS17 (en-de)": 24.32,
+ "STS17 (ko-ko)": 15.41,
+ "STS17 (en-en)": 67.13,
+ "STS17 (fr-en)": 34.08,
+ "STS17 (it-en)": 29.34,
+ "STS17 (nl-en)": 37.16,
+ "STS17 (en-tr)": 10.7,
+ "STS17 (es-en)": 30.01,
+ "STS17 (es-es)": 72.47,
+ "STS17 (ar-ar)": 45.68,
+ "STS22 (en)": 52.22,
+ "STS22 (de-en)": 35.87,
+ "STS22 (ar)": 24.57,
+ "STS22 (tr)": 14.64,
+ "STS22 (fr-pl)": 5.63,
+ "STS22 (es-en)": 44.2,
+ "STS22 (de-fr)": 30.19,
+ "STS22 (de)": 25.32,
+ "STS22 (es)": 39.82,
+ "STS22 (es-it)": 27.88,
+ "STS22 (zh)": 40.52,
+ "STS22 (fr)": 58.5,
+ "STS22 (zh-en)": 30.35,
+ "STS22 (pl-en)": 23.13,
+ "STS22 (pl)": 13.46,
+ "STS22 (de-pl)": 34.06,
+ "STS22 (it)": 46.45,
+ "STSB": 62.38,
+ "STSBenchmark": 55.79,
+ "STSBenchmarkMultilingualSTS (pl)": 47.56,
+ "STSBenchmarkMultilingualSTS (zh)": 61.68,
+ "STSBenchmarkMultilingualSTS (en)": 55.79,
+ "STSBenchmarkMultilingualSTS (it)": 52.36,
+ "STSBenchmarkMultilingualSTS (nl)": 49.51,
+ "STSBenchmarkMultilingualSTS (de)": 52.98,
+ "STSBenchmarkMultilingualSTS (es)": 56.64,
+ "STSBenchmarkMultilingualSTS (fr)": 55.41,
+ "STSBenchmarkMultilingualSTS (pt)": 51.92
+ },
+ {
+ "Model": "rubert-base-cased-sentence",
+ "BIOSSES": 49.0,
+ "CDSC-R": 54.78,
+ "GermanSTSBenchmark": 50.75,
"RUParaPhraserSTS": 66.24,
"RuSTSBenchmarkSTS": 66.03,
+ "SICK-R": 56.98,
+ "SICK-R-PL": 46.8,
+ "SICKFr": 57.97,
+ "STS12": 59.02,
+ "STS13": 46.99,
+ "STS14": 50.88,
+ "STS15": 64.58,
+ "STS16": 53.98,
+ "STS17 (en-ar)": 0.76,
+ "STS17 (en-de)": 24.32,
+ "STS17 (ko-ko)": 15.65,
+ "STS17 (en-en)": 67.13,
+ "STS17 (fr-en)": 34.08,
+ "STS17 (it-en)": 29.34,
+ "STS17 (nl-en)": 37.16,
+ "STS17 (en-tr)": 10.7,
+ "STS17 (es-en)": 30.01,
+ "STS17 (es-es)": 72.47,
+ "STS17 (ar-ar)": 45.67,
"STS22 (ru)": 51.27,
- "STSBenchmarkMultilingualSTS (ru)": 66.71
+ "STS22 (en)": 52.22,
+ "STS22 (de-en)": 35.87,
+ "STS22 (ar)": 24.55,
+ "STS22 (tr)": 14.64,
+ "STS22 (fr-pl)": 5.63,
+ "STS22 (es-en)": 44.2,
+ "STS22 (de-fr)": 30.19,
+ "STS22 (de)": 25.32,
+ "STS22 (es)": 39.82,
+ "STS22 (es-it)": 27.88,
+ "STS22 (zh)": 40.52,
+ "STS22 (fr)": 58.5,
+ "STS22 (zh-en)": 30.35,
+ "STS22 (pl-en)": 23.13,
+ "STS22 (pl)": 13.37,
+ "STS22 (de-pl)": 34.06,
+ "STS22 (it)": 46.45,
+ "STSB": 62.38,
+ "STSBenchmark": 55.79,
+ "STSBenchmarkMultilingualSTS (ru)": 66.71,
+ "STSBenchmarkMultilingualSTS (pl)": 47.56,
+ "STSBenchmarkMultilingualSTS (zh)": 61.68,
+ "STSBenchmarkMultilingualSTS (en)": 55.79,
+ "STSBenchmarkMultilingualSTS (it)": 52.36,
+ "STSBenchmarkMultilingualSTS (nl)": 49.51,
+ "STSBenchmarkMultilingualSTS (de)": 52.98,
+ "STSBenchmarkMultilingualSTS (es)": 56.64,
+ "STSBenchmarkMultilingualSTS (fr)": 55.41,
+ "STSBenchmarkMultilingualSTS (pt)": 51.92
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "rubert-base-cased-sentence",
+ "SummEval": 30.57,
+ "SummEvalFr": 29.2
+ },
+ {
+ "Model": "rubert-base-cased-sentence",
+ "SummEval": 30.57,
+ "SummEvalFr": 29.2
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
@@ -1768,7 +8493,14 @@
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "rubert-base-cased-sentence",
+ "Core17InstructionRetrieval": 1.6,
+ "News21InstructionRetrieval": 3.22,
+ "Robust04InstructionRetrieval": -4.02
+ }
+ ]
}
},
"FacebookAI__xlm-roberta-base": {
@@ -2392,13 +9124,13 @@
"CBD": 77.23,
"DanishPoliticalCommentsClassification": 41.28,
"EmotionClassification": 56.01,
- "GeoreviewClassification": 53.47,
- "HeadlineClassification": 85.66,
+ "GeoreviewClassification": 57.7,
+ "HeadlineClassification": 86.08,
"IFlyTek": 52.7,
"ImdbClassification": 95.07,
- "InappropriatenessClassification": 65.29,
+ "InappropriatenessClassification": 67.85,
"JDReview": 87.65,
- "KinopoiskClassification": 64.25,
+ "KinopoiskClassification": 69.15,
"LccSentimentClassification": 70.13,
"MTOPDomainClassification (en)": 95.37,
"MTOPDomainClassification (de)": 93.51,
@@ -2537,9 +9269,9 @@
"PAC": 67.05,
"PolEmo2.0-IN": 83.5,
"PolEmo2.0-OUT": 62.55,
- "RuReviewsClassification": 68.58,
- "RuSciBenchGRNTIClassification": 64.56,
- "RuSciBenchOECDClassification": 51.2,
+ "RuReviewsClassification": 72.05,
+ "RuSciBenchGRNTIClassification": 67.98,
+ "RuSciBenchOECDClassification": 54.45,
"TNews": 51.99,
"ToxicConversationsClassification": 68.81,
"TweetSentimentExtractionClassification": 66.26,
@@ -2551,7 +9283,7 @@
"v_measure": [
{
"Model": "GritLM-7B",
- "GeoreviewClusteringP2P": 74.06,
+ "GeoreviewClusteringP2P": 77.46,
"MasakhaNEWSClusteringP2P (amh)": 45.1,
"MasakhaNEWSClusteringP2P (eng)": 70.5,
"MasakhaNEWSClusteringP2P (fra)": 73.54,
@@ -2584,8 +9316,8 @@
"MasakhaNEWSClusteringS2S (tir)": 45.32,
"MasakhaNEWSClusteringS2S (xho)": 28.94,
"MasakhaNEWSClusteringS2S (yor)": 63.26,
- "RuSciBenchGRNTIClusteringP2P": 60.01,
- "RuSciBenchOECDClusteringP2P": 51.66
+ "RuSciBenchGRNTIClusteringP2P": 61.57,
+ "RuSciBenchOECDClusteringP2P": 53.88
}
]
},
@@ -2594,6 +9326,7 @@
{
"Model": "GritLM-7B",
"CDSC-E": 75.61,
+ "FalseFriendsGermanEnglish": 54.17,
"OpusparcusPC (de)": 97.43,
"OpusparcusPC (en)": 99.14,
"OpusparcusPC (fi)": 92.05,
@@ -2617,6 +9350,7 @@
{
"Model": "GritLM-7B",
"CDSC-E": 75.61,
+ "FalseFriendsGermanEnglish": 54.17,
"OpusparcusPC (de)": 97.43,
"OpusparcusPC (en)": 99.14,
"OpusparcusPC (fi)": 92.05,
@@ -2647,11 +9381,32 @@
"AskUbuntuDupQuestions": 67.37,
"MMarcoReranking": 21.7,
"MindSmallReranking": 31.81,
- "RuBQReranking": 72.43,
+ "RuBQReranking": 75.75,
"SciDocsRR": 86.82,
"StackOverflowDupQuestions": 55.94,
"SyntecReranking": 92.62,
"T2Reranking": 65.64
+ },
+ {
+ "Model": "GritLM-7B",
+ "MIRACLReranking (ru)": 64.23,
+ "MIRACLReranking (ar)": 74.22,
+ "MIRACLReranking (bn)": 70.07,
+ "MIRACLReranking (de)": 54.02,
+ "MIRACLReranking (en)": 63.11,
+ "MIRACLReranking (es)": 61.47,
+ "MIRACLReranking (fa)": 53.69,
+ "MIRACLReranking (fi)": 77.58,
+ "MIRACLReranking (fr)": 53.51,
+ "MIRACLReranking (hi)": 60.69,
+ "MIRACLReranking (id)": 57.2,
+ "MIRACLReranking (ja)": 66.14,
+ "MIRACLReranking (ko)": 55.58,
+ "MIRACLReranking (sw)": 65.0,
+ "MIRACLReranking (te)": 70.57,
+ "MIRACLReranking (th)": 70.61,
+ "MIRACLReranking (yo)": 66.19,
+ "MIRACLReranking (zh)": 50.02
}
]
},
@@ -2703,7 +9458,10 @@
"EcomRetrieval": 54.33,
"FiQA-PL": 37.98,
"FiQA2018": 59.91,
+ "GerDaLIR": 9.32,
"GerDaLIRSmall": 20.61,
+ "GermanDPR": 83.09,
+ "GermanQuAD-Retrieval": 95.32,
"HellaSwag": 39.45,
"LEMBNarrativeQARetrieval": 41.45,
"LEMBQMSumRetrieval": 30.36,
@@ -2714,6 +9472,7 @@
"LegalBenchCorporateLobbying": 95.0,
"LegalQuAD": 44.18,
"LegalSummarization": 70.64,
+ "MIRACLRetrieval (ru)": 68.3,
"MMarcoRetrieval": 76.54,
"MedicalRetrieval": 55.81,
"MintakaRetrieval (ar)": 25.88,
@@ -2730,7 +9489,8 @@
"Quail": 11.67,
"RARbCode": 84.02,
"RARbMath": 82.35,
- "RuBQRetrieval": 70.94,
+ "RiaNewsRetrieval": 82.76,
+ "RuBQRetrieval": 76.31,
"SCIDOCS": 24.41,
"SCIDOCS-PL": 18.34,
"SIQA": 7.23,
@@ -2751,6 +9511,9 @@
"Touche2020": 27.78,
"VideoRetrieval": 53.85,
"WinoGrande": 53.7,
+ "XMarket (de)": 25.01,
+ "XMarket (en)": 26.66,
+ "XMarket (es)": 26.2,
"XPQARetrieval (ara-ara)": 45.21,
"XPQARetrieval (eng-ara)": 27.34,
"XPQARetrieval (ara-eng)": 39.43,
@@ -2812,6 +9575,7 @@
"BIOSSES": 86.32,
"BQ": 49.18,
"CDSC-R": 93.38,
+ "GermanSTSBenchmark": 81.23,
"LCQMC": 75.52,
"PAWSX": 16.4,
"RUParaPhraserSTS": 74.37,
@@ -2835,6 +9599,8 @@
"STS17 (en-en)": 90.14,
"STS17 (es-en)": 87.47,
"STS17 (es-es)": 87.12,
+ "STS22 (ru)": 68.45,
+ "STS22 (en)": 68.63,
"STSB": 78.12,
"STSBenchmark": 85.64,
"STSBenchmarkMultilingualSTS (en)": 85.65,
@@ -2855,6 +9621,7 @@
"BIOSSES": 86.32,
"BQ": 49.18,
"CDSC-R": 93.38,
+ "GermanSTSBenchmark": 81.23,
"LCQMC": 75.52,
"PAWSX": 16.4,
"RUParaPhraserSTS": 74.37,
@@ -2878,6 +9645,7 @@
"STS17 (en-en)": 90.14,
"STS17 (es-en)": 87.47,
"STS17 (es-es)": 87.12,
+ "STS22 (ru)": 68.45,
"STSB": 78.12,
"STSBenchmark": 85.64,
"STSBenchmarkMultilingualSTS (en)": 85.65,
@@ -2890,6 +9658,10 @@
"STSBenchmarkMultilingualSTS (de)": 82.08,
"STSBenchmarkMultilingualSTS (it)": 81.69,
"STSBenchmarkMultilingualSTS (zh)": 79.73
+ },
+ {
+ "Model": "GritLM-7B",
+ "STS22 (en)": 68.63
}
]
},
@@ -2909,8 +9681,8 @@
"accuracy": [
{
"Model": "GritLM-7B",
- "CEDRClassification": 42.68,
- "SensitiveTopicsClassification": 28.52
+ "CEDRClassification": 50.67,
+ "SensitiveTopicsClassification": 33.24
}
]
},
@@ -4234,24 +11006,435 @@
},
"Salesforce__SFR-Embedding-Mistral": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "BornholmBitextMining": 50.24,
+ "Tatoeba (yid-eng)": 32.58,
+ "Tatoeba (heb-eng)": 82.59,
+ "Tatoeba (cat-eng)": 92.62,
+ "Tatoeba (ita-eng)": 92.07,
+ "Tatoeba (isl-eng)": 87.17,
+ "Tatoeba (awa-eng)": 67.75,
+ "Tatoeba (jav-eng)": 38.43,
+ "Tatoeba (lfn-eng)": 74.85,
+ "Tatoeba (spa-eng)": 98.87,
+ "Tatoeba (ast-eng)": 81.86,
+ "Tatoeba (pam-eng)": 15.44,
+ "Tatoeba (epo-eng)": 87.38,
+ "Tatoeba (mon-eng)": 38.56,
+ "Tatoeba (slv-eng)": 87.19,
+ "Tatoeba (ina-eng)": 96.25,
+ "Tatoeba (por-eng)": 94.44,
+ "Tatoeba (dsb-eng)": 66.31,
+ "Tatoeba (ceb-eng)": 43.93,
+ "Tatoeba (mhr-eng)": 16.21,
+ "Tatoeba (cor-eng)": 9.26,
+ "Tatoeba (hun-eng)": 90.48,
+ "Tatoeba (amh-eng)": 21.64,
+ "Tatoeba (ile-eng)": 84.97,
+ "Tatoeba (orv-eng)": 60.95,
+ "Tatoeba (fin-eng)": 90.56,
+ "Tatoeba (eus-eng)": 40.81,
+ "Tatoeba (hye-eng)": 63.0,
+ "Tatoeba (jpn-eng)": 95.32,
+ "Tatoeba (zsm-eng)": 94.56,
+ "Tatoeba (fry-eng)": 71.85,
+ "Tatoeba (hin-eng)": 95.25,
+ "Tatoeba (tat-eng)": 37.32,
+ "Tatoeba (ido-eng)": 77.09,
+ "Tatoeba (csb-eng)": 64.89,
+ "Tatoeba (ara-eng)": 89.16,
+ "Tatoeba (ces-eng)": 95.12,
+ "Tatoeba (urd-eng)": 83.88,
+ "Tatoeba (vie-eng)": 94.85,
+ "Tatoeba (ron-eng)": 93.29,
+ "Tatoeba (kzj-eng)": 12.17,
+ "Tatoeba (pol-eng)": 96.87,
+ "Tatoeba (deu-eng)": 99.47,
+ "Tatoeba (gla-eng)": 63.95,
+ "Tatoeba (uzb-eng)": 53.17,
+ "Tatoeba (cha-eng)": 47.21,
+ "Tatoeba (est-eng)": 64.2,
+ "Tatoeba (tuk-eng)": 42.48,
+ "Tatoeba (max-eng)": 65.53,
+ "Tatoeba (gle-eng)": 74.19,
+ "Tatoeba (mar-eng)": 69.32,
+ "Tatoeba (rus-eng)": 93.6,
+ "Tatoeba (tam-eng)": 72.45,
+ "Tatoeba (kur-eng)": 31.05,
+ "Tatoeba (kab-eng)": 3.37,
+ "Tatoeba (fao-eng)": 71.95,
+ "Tatoeba (cym-eng)": 72.14,
+ "Tatoeba (nno-eng)": 87.26,
+ "Tatoeba (lvs-eng)": 65.97,
+ "Tatoeba (arq-eng)": 50.3,
+ "Tatoeba (nov-eng)": 74.65,
+ "Tatoeba (uig-eng)": 39.11,
+ "Tatoeba (swe-eng)": 92.91,
+ "Tatoeba (wuu-eng)": 90.01,
+ "Tatoeba (nob-eng)": 96.83,
+ "Tatoeba (ukr-eng)": 94.22,
+ "Tatoeba (tha-eng)": 93.89,
+ "Tatoeba (ind-eng)": 93.7,
+ "Tatoeba (yue-eng)": 89.87,
+ "Tatoeba (glg-eng)": 91.35,
+ "Tatoeba (bel-eng)": 87.44,
+ "Tatoeba (xho-eng)": 36.47,
+ "Tatoeba (kor-eng)": 92.25,
+ "Tatoeba (dtp-eng)": 11.84,
+ "Tatoeba (lit-eng)": 69.19,
+ "Tatoeba (swh-eng)": 62.9,
+ "Tatoeba (bos-eng)": 91.92,
+ "Tatoeba (swg-eng)": 68.76,
+ "Tatoeba (mkd-eng)": 86.9,
+ "Tatoeba (lat-eng)": 88.01,
+ "Tatoeba (tel-eng)": 43.17,
+ "Tatoeba (bre-eng)": 14.57,
+ "Tatoeba (mal-eng)": 55.6,
+ "Tatoeba (tur-eng)": 94.33,
+ "Tatoeba (ben-eng)": 81.15,
+ "Tatoeba (pms-eng)": 63.17,
+ "Tatoeba (tzl-eng)": 51.59,
+ "Tatoeba (tgl-eng)": 93.37,
+ "Tatoeba (fra-eng)": 95.61,
+ "Tatoeba (sqi-eng)": 66.66,
+ "Tatoeba (gsw-eng)": 52.27,
+ "Tatoeba (arz-eng)": 70.4,
+ "Tatoeba (hsb-eng)": 75.51,
+ "Tatoeba (oci-eng)": 62.22,
+ "Tatoeba (ber-eng)": 7.85,
+ "Tatoeba (hrv-eng)": 94.38,
+ "Tatoeba (cbk-eng)": 81.96,
+ "Tatoeba (nld-eng)": 96.07,
+ "Tatoeba (dan-eng)": 93.81,
+ "Tatoeba (pes-eng)": 89.78,
+ "Tatoeba (aze-eng)": 76.2,
+ "Tatoeba (srp-eng)": 93.23,
+ "Tatoeba (ang-eng)": 83.46,
+ "Tatoeba (ell-eng)": 91.21,
+ "Tatoeba (khm-eng)": 36.97,
+ "Tatoeba (bul-eng)": 93.77,
+ "Tatoeba (kaz-eng)": 46.98,
+ "Tatoeba (kat-eng)": 60.22,
+ "Tatoeba (war-eng)": 43.02,
+ "Tatoeba (afr-eng)": 87.66,
+ "Tatoeba (nds-eng)": 78.34,
+ "Tatoeba (slk-eng)": 89.48,
+ "Tatoeba (cmn-eng)": 96.22
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "AllegroReviews": 57.4,
+ "AmazonCounterfactualClassification (en-ext)": 74.03,
+ "AmazonCounterfactualClassification (en)": 74.51,
+ "AmazonCounterfactualClassification (de)": 71.56,
+ "AmazonCounterfactualClassification (ja)": 74.03,
+ "AmazonReviewsClassification (en)": 55.43,
+ "AmazonReviewsClassification (de)": 54.46,
+ "AmazonReviewsClassification (es)": 51.63,
+ "AmazonReviewsClassification (fr)": 51.0,
+ "AmazonReviewsClassification (ja)": 50.44,
+ "AmazonReviewsClassification (zh)": 46.61,
+ "AngryTweetsClassification": 63.94,
+ "CBD": 71.93,
+ "DanishPoliticalCommentsClassification": 35.9,
+ "GeoreviewClassification": 57.36,
+ "HeadlineClassification": 87.11,
+ "InappropriatenessClassification": 70.64,
+ "KinopoiskClassification": 68.9,
+ "LccSentimentClassification": 63.87,
+ "MTOPDomainClassification (en)": 95.88,
+ "MTOPDomainClassification (de)": 91.7,
+ "MTOPDomainClassification (es)": 91.75,
+ "MTOPDomainClassification (fr)": 89.97,
+ "MTOPDomainClassification (hi)": 87.66,
+ "MTOPDomainClassification (th)": 84.42,
+ "MTOPIntentClassification (en)": 78.86,
+ "MTOPIntentClassification (de)": 69.41,
+ "MTOPIntentClassification (es)": 73.93,
+ "MTOPIntentClassification (fr)": 70.47,
+ "MTOPIntentClassification (hi)": 60.12,
+ "MTOPIntentClassification (th)": 62.46,
+ "MasakhaNEWSClassification (amh)": 53.54,
+ "MasakhaNEWSClassification (eng)": 87.18,
+ "MasakhaNEWSClassification (fra)": 83.39,
+ "MasakhaNEWSClassification (hau)": 77.22,
+ "MasakhaNEWSClassification (ibo)": 77.1,
+ "MasakhaNEWSClassification (lin)": 83.66,
+ "MasakhaNEWSClassification (lug)": 73.32,
+ "MasakhaNEWSClassification (orm)": 77.63,
+ "MasakhaNEWSClassification (pcm)": 96.46,
+ "MasakhaNEWSClassification (run)": 85.75,
+ "MasakhaNEWSClassification (sna)": 90.24,
+ "MasakhaNEWSClassification (som)": 65.85,
+ "MasakhaNEWSClassification (swa)": 79.89,
+ "MasakhaNEWSClassification (tir)": 33.71,
+ "MasakhaNEWSClassification (xho)": 88.32,
+ "MasakhaNEWSClassification (yor)": 85.47,
+ "MassiveIntentClassification (it)": 71.45,
+ "MassiveIntentClassification (es)": 70.96,
+ "MassiveIntentClassification (sv)": 70.53,
+ "MassiveIntentClassification (zh-CN)": 72.23,
+ "MassiveIntentClassification (nb)": 65.92,
+ "MassiveIntentClassification (nl)": 71.39,
+ "MassiveIntentClassification (da)": 68.55,
+ "MassiveIntentClassification (ur)": 57.45,
+ "MassiveIntentClassification (tl)": 62.7,
+ "MassiveIntentClassification (bn)": 58.97,
+ "MassiveIntentClassification (sq)": 49.97,
+ "MassiveIntentClassification (sl)": 63.79,
+ "MassiveIntentClassification (ru)": 73.86,
+ "MassiveIntentClassification (ms)": 66.75,
+ "MassiveIntentClassification (jv)": 51.45,
+ "MassiveIntentClassification (lv)": 51.32,
+ "MassiveIntentClassification (id)": 68.38,
+ "MassiveIntentClassification (hy)": 47.97,
+ "MassiveIntentClassification (sw)": 52.32,
+ "MassiveIntentClassification (cy)": 48.02,
+ "MassiveIntentClassification (hi)": 64.37,
+ "MassiveIntentClassification (af)": 62.53,
+ "MassiveIntentClassification (el)": 64.8,
+ "MassiveIntentClassification (is)": 53.62,
+ "MassiveIntentClassification (ko)": 69.6,
+ "MassiveIntentClassification (ka)": 46.01,
+ "MassiveIntentClassification (mn)": 41.87,
+ "MassiveIntentClassification (th)": 61.74,
+ "MassiveIntentClassification (ar)": 58.41,
+ "MassiveIntentClassification (hu)": 65.21,
+ "MassiveIntentClassification (ro)": 64.58,
+ "MassiveIntentClassification (fa)": 69.15,
+ "MassiveIntentClassification (vi)": 67.01,
+ "MassiveIntentClassification (zh-TW)": 65.92,
+ "MassiveIntentClassification (ml)": 42.18,
+ "MassiveIntentClassification (pl)": 71.17,
+ "MassiveIntentClassification (he)": 62.79,
+ "MassiveIntentClassification (km)": 40.3,
+ "MassiveIntentClassification (tr)": 66.87,
+ "MassiveIntentClassification (az)": 58.73,
+ "MassiveIntentClassification (ja)": 71.81,
+ "MassiveIntentClassification (my)": 39.62,
+ "MassiveIntentClassification (fi)": 64.47,
+ "MassiveIntentClassification (pt)": 71.78,
+ "MassiveIntentClassification (ta)": 45.59,
+ "MassiveIntentClassification (te)": 46.17,
+ "MassiveIntentClassification (am)": 35.64,
+ "MassiveIntentClassification (fr)": 71.71,
+ "MassiveIntentClassification (de)": 70.56,
+ "MassiveIntentClassification (en)": 77.05,
+ "MassiveIntentClassification (kn)": 46.58,
+ "MassiveScenarioClassification (my)": 46.43,
+ "MassiveScenarioClassification (mn)": 48.85,
+ "MassiveScenarioClassification (fa)": 74.87,
+ "MassiveScenarioClassification (ro)": 69.59,
+ "MassiveScenarioClassification (sl)": 71.38,
+ "MassiveScenarioClassification (lv)": 59.14,
+ "MassiveScenarioClassification (ms)": 73.14,
+ "MassiveScenarioClassification (zh-CN)": 76.77,
+ "MassiveScenarioClassification (ka)": 54.83,
+ "MassiveScenarioClassification (vi)": 72.77,
+ "MassiveScenarioClassification (nl)": 76.71,
+ "MassiveScenarioClassification (hu)": 71.21,
+ "MassiveScenarioClassification (de)": 77.63,
+ "MassiveScenarioClassification (te)": 54.0,
+ "MassiveScenarioClassification (en)": 79.73,
+ "MassiveScenarioClassification (ta)": 53.35,
+ "MassiveScenarioClassification (tr)": 70.87,
+ "MassiveScenarioClassification (nb)": 73.02,
+ "MassiveScenarioClassification (zh-TW)": 72.02,
+ "MassiveScenarioClassification (az)": 64.38,
+ "MassiveScenarioClassification (es)": 75.4,
+ "MassiveScenarioClassification (kn)": 56.38,
+ "MassiveScenarioClassification (pt)": 74.69,
+ "MassiveScenarioClassification (af)": 71.47,
+ "MassiveScenarioClassification (bn)": 64.78,
+ "MassiveScenarioClassification (hi)": 70.04,
+ "MassiveScenarioClassification (tl)": 71.28,
+ "MassiveScenarioClassification (el)": 70.39,
+ "MassiveScenarioClassification (id)": 74.24,
+ "MassiveScenarioClassification (th)": 69.72,
+ "MassiveScenarioClassification (pl)": 75.43,
+ "MassiveScenarioClassification (sq)": 59.53,
+ "MassiveScenarioClassification (hy)": 54.93,
+ "MassiveScenarioClassification (ur)": 64.5,
+ "MassiveScenarioClassification (ml)": 49.94,
+ "MassiveScenarioClassification (sv)": 77.09,
+ "MassiveScenarioClassification (da)": 74.31,
+ "MassiveScenarioClassification (am)": 42.84,
+ "MassiveScenarioClassification (jv)": 61.77,
+ "MassiveScenarioClassification (ja)": 76.23,
+ "MassiveScenarioClassification (km)": 48.96,
+ "MassiveScenarioClassification (cy)": 59.47,
+ "MassiveScenarioClassification (fi)": 68.89,
+ "MassiveScenarioClassification (ko)": 74.8,
+ "MassiveScenarioClassification (ar)": 67.42,
+ "MassiveScenarioClassification (is)": 64.42,
+ "MassiveScenarioClassification (sw)": 62.53,
+ "MassiveScenarioClassification (ru)": 77.65,
+ "MassiveScenarioClassification (it)": 75.4,
+ "MassiveScenarioClassification (he)": 66.27,
+ "MassiveScenarioClassification (fr)": 76.04,
+ "NoRecClassification": 55.04,
+ "NordicLangClassification": 68.8,
+ "PAC": 62.48,
+ "PolEmo2.0-IN": 84.46,
+ "PolEmo2.0-OUT": 54.86,
+ "RuReviewsClassification": 71.01,
+ "RuSciBenchGRNTIClassification": 68.08,
+ "RuSciBenchOECDClassification": 54.1
+ }
+ ]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "AlloProfClusteringP2P": 70.89,
+ "AlloProfClusteringS2S": 61.38,
+ "BlurbsClusteringP2P": 47.73,
+ "BlurbsClusteringS2S": 21.8,
+ "GeoreviewClusteringP2P": 77.21,
+ "HALClusteringS2S": 27.76,
+ "MLSUMClusteringP2P (de)": 53.27,
+ "MLSUMClusteringP2P (fr)": 48.16,
+ "MLSUMClusteringP2P (ru)": 56.1,
+ "MLSUMClusteringP2P (es)": 50.1,
+ "MLSUMClusteringS2S (de)": 52.44,
+ "MLSUMClusteringS2S (fr)": 47.44,
+ "MLSUMClusteringS2S (ru)": 54.12,
+ "MLSUMClusteringS2S (es)": 49.02,
+ "MasakhaNEWSClusteringP2P (amh)": 48.0,
+ "MasakhaNEWSClusteringP2P (eng)": 72.93,
+ "MasakhaNEWSClusteringP2P (fra)": 72.3,
+ "MasakhaNEWSClusteringP2P (hau)": 68.64,
+ "MasakhaNEWSClusteringP2P (ibo)": 67.08,
+ "MasakhaNEWSClusteringP2P (lin)": 79.52,
+ "MasakhaNEWSClusteringP2P (lug)": 53.77,
+ "MasakhaNEWSClusteringP2P (orm)": 55.13,
+ "MasakhaNEWSClusteringP2P (pcm)": 92.6,
+ "MasakhaNEWSClusteringP2P (run)": 61.05,
+ "MasakhaNEWSClusteringP2P (sna)": 79.0,
+ "MasakhaNEWSClusteringP2P (som)": 42.12,
+ "MasakhaNEWSClusteringP2P (swa)": 42.16,
+ "MasakhaNEWSClusteringP2P (tir)": 47.59,
+ "MasakhaNEWSClusteringP2P (xho)": 58.32,
+ "MasakhaNEWSClusteringP2P (yor)": 77.24,
+ "MasakhaNEWSClusteringS2S (amh)": 47.22,
+ "MasakhaNEWSClusteringS2S (eng)": 66.21,
+ "MasakhaNEWSClusteringS2S (fra)": 66.88,
+ "MasakhaNEWSClusteringS2S (hau)": 39.76,
+ "MasakhaNEWSClusteringS2S (ibo)": 62.6,
+ "MasakhaNEWSClusteringS2S (lin)": 80.5,
+ "MasakhaNEWSClusteringS2S (lug)": 51.36,
+ "MasakhaNEWSClusteringS2S (orm)": 31.55,
+ "MasakhaNEWSClusteringS2S (pcm)": 83.95,
+ "MasakhaNEWSClusteringS2S (run)": 59.32,
+ "MasakhaNEWSClusteringS2S (sna)": 53.11,
+ "MasakhaNEWSClusteringS2S (som)": 36.74,
+ "MasakhaNEWSClusteringS2S (swa)": 35.93,
+ "MasakhaNEWSClusteringS2S (tir)": 44.42,
+ "MasakhaNEWSClusteringS2S (xho)": 33.32,
+ "MasakhaNEWSClusteringS2S (yor)": 61.95,
+ "RuSciBenchGRNTIClusteringP2P": 63.05,
+ "RuSciBenchOECDClusteringP2P": 54.49,
+ "TenKGnadClusteringP2P": 55.31,
+ "TenKGnadClusteringS2S": 39.78
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "CDSC-E": 75.1,
+ "FalseFriendsGermanEnglish": 53.11,
+ "OpusparcusPC (de)": 97.46,
+ "OpusparcusPC (en)": 99.09,
+ "OpusparcusPC (fi)": 92.3,
+ "OpusparcusPC (fr)": 94.94,
+ "OpusparcusPC (ru)": 90.82,
+ "OpusparcusPC (sv)": 95.01,
+ "PSC": 99.63,
+ "PawsXPairClassification (de)": 59.59,
+ "PawsXPairClassification (en)": 66.91,
+ "PawsXPairClassification (es)": 60.97,
+ "PawsXPairClassification (fr)": 62.57,
+ "PawsXPairClassification (ja)": 52.68,
+ "PawsXPairClassification (ko)": 53.28,
+ "PawsXPairClassification (zh)": 59.15,
+ "SICK-E-PL": 77.4,
+ "TERRa": 60.65
+ },
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "CDSC-E": 75.1,
+ "FalseFriendsGermanEnglish": 53.11,
+ "OpusparcusPC (de)": 97.46,
+ "OpusparcusPC (en)": 99.09,
+ "OpusparcusPC (fi)": 92.3,
+ "OpusparcusPC (fr)": 94.94,
+ "OpusparcusPC (ru)": 90.82,
+ "OpusparcusPC (sv)": 95.01,
+ "PSC": 99.67,
+ "PawsXPairClassification (de)": 60.1,
+ "PawsXPairClassification (en)": 67.08,
+ "PawsXPairClassification (es)": 61.15,
+ "PawsXPairClassification (fr)": 62.69,
+ "PawsXPairClassification (ja)": 52.81,
+ "PawsXPairClassification (ko)": 53.32,
+ "PawsXPairClassification (zh)": 59.63,
+ "SICK-E-PL": 77.4,
+ "TERRa": 60.65
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "AlloprofReranking": 79.32,
+ "RuBQReranking": 77.24,
+ "SyntecReranking": 90.75,
+ "T2Reranking": 66.97
+ },
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "MIRACLReranking (ar)": 75.78,
+ "MIRACLReranking (bn)": 69.72,
+ "MIRACLReranking (de)": 54.39,
+ "MIRACLReranking (en)": 62.66,
+ "MIRACLReranking (es)": 61.76,
+ "MIRACLReranking (fa)": 55.27,
+ "MIRACLReranking (fi)": 78.11,
+ "MIRACLReranking (fr)": 55.93,
+ "MIRACLReranking (hi)": 58.09,
+ "MIRACLReranking (id)": 58.12,
+ "MIRACLReranking (ja)": 63.56,
+ "MIRACLReranking (ko)": 55.38,
+ "MIRACLReranking (ru)": 63.67,
+ "MIRACLReranking (sw)": 61.87,
+ "MIRACLReranking (te)": 74.0,
+ "MIRACLReranking (th)": 72.49,
+ "MIRACLReranking (yo)": 66.0,
+ "MIRACLReranking (zh)": 49.75
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
{
"Model": "SFR-Embedding-Mistral",
+ "AILACasedocs": 36.67,
+ "AILAStatutes": 37.47,
+ "ARCChallenge": 20.39,
+ "AlloprofRetrieval": 56.74,
+ "AlphaNLI": 29.36,
+ "AppsRetrieval": 26.11,
+ "BSARDRetrieval": 26.28,
"BrightRetrieval (sustainable_living)": 19.79,
"BrightRetrieval (economics)": 17.84,
"BrightRetrieval (theoremqa_theorems)": 24.32,
@@ -4263,7 +11446,105 @@
"BrightRetrieval (leetcode)": 27.35,
"BrightRetrieval (biology)": 19.49,
"BrightRetrieval (earth_science)": 26.63,
- "BrightRetrieval (robotics)": 16.7
+ "BrightRetrieval (robotics)": 16.7,
+ "CmedqaRetrieval": 35.12,
+ "CodeFeedbackMT": 40.33,
+ "CodeFeedbackST": 78.07,
+ "CodeSearchNetCCRetrieval (python)": 70.38,
+ "CodeSearchNetCCRetrieval (javascript)": 69.09,
+ "CodeSearchNetCCRetrieval (go)": 62.02,
+ "CodeSearchNetCCRetrieval (ruby)": 65.21,
+ "CodeSearchNetCCRetrieval (java)": 67.75,
+ "CodeSearchNetCCRetrieval (php)": 52.44,
+ "CodeSearchNetRetrieval (python)": 92.26,
+ "CodeSearchNetRetrieval (javascript)": 81.47,
+ "CodeSearchNetRetrieval (go)": 93.28,
+ "CodeSearchNetRetrieval (ruby)": 85.94,
+ "CodeSearchNetRetrieval (java)": 84.03,
+ "CodeSearchNetRetrieval (php)": 83.39,
+ "CodeTransOceanContest": 83.52,
+ "CodeTransOceanDL": 32.93,
+ "CosQA": 34.31,
+ "CovidRetrieval": 73.07,
+ "GerDaLIR": 8.22,
+ "GerDaLIRSmall": 18.39,
+ "GermanQuAD-Retrieval": 95.22,
+ "HellaSwag": 37.68,
+ "LEMBNarrativeQARetrieval": 39.2,
+ "LEMBQMSumRetrieval": 29.43,
+ "LEMBSummScreenFDRetrieval": 77.73,
+ "LEMBWikimQARetrieval": 59.28,
+ "LeCaRDv2": 59.58,
+ "LegalBenchConsumerContractsQA": 79.15,
+ "LegalBenchCorporateLobbying": 94.83,
+ "LegalQuAD": 45.08,
+ "LegalSummarization": 71.05,
+ "MintakaRetrieval (ar)": 24.63,
+ "MintakaRetrieval (de)": 51.48,
+ "MintakaRetrieval (es)": 48.5,
+ "MintakaRetrieval (fr)": 49.62,
+ "MintakaRetrieval (hi)": 25.72,
+ "MintakaRetrieval (it)": 47.58,
+ "MintakaRetrieval (ja)": 33.34,
+ "MintakaRetrieval (pt)": 51.01,
+ "PIQA": 42.46,
+ "Quail": 8.19,
+ "RARbCode": 77.97,
+ "RARbMath": 79.36,
+ "RiaNewsRetrieval": 81.14,
+ "RuBQRetrieval": 77.16,
+ "SIQA": 5.96,
+ "SciFact-PL": 69.64,
+ "SpartQA": 8.81,
+ "StackOverflowQA": 92.28,
+ "SyntecRetrieval": 91.49,
+ "SyntheticText2SQL": 59.29,
+ "TRECCOVID-PL": 75.27,
+ "TempReasonL1": 3.59,
+ "TempReasonL2Fact": 40.15,
+ "TempReasonL2Pure": 9.64,
+ "TempReasonL3Fact": 31.24,
+ "TempReasonL3Pure": 14.14,
+ "WinoGrande": 45.65,
+ "XMarket (de)": 27.3,
+ "XMarket (en)": 33.95,
+ "XMarket (es)": 28.26,
+ "XPQARetrieval (ara-ara)": 47.29,
+ "XPQARetrieval (eng-ara)": 31.26,
+ "XPQARetrieval (ara-eng)": 42.44,
+ "XPQARetrieval (deu-deu)": 79.35,
+ "XPQARetrieval (eng-deu)": 49.52,
+ "XPQARetrieval (deu-eng)": 74.65,
+ "XPQARetrieval (spa-spa)": 63.0,
+ "XPQARetrieval (eng-spa)": 36.73,
+ "XPQARetrieval (spa-eng)": 60.38,
+ "XPQARetrieval (fra-fra)": 71.58,
+ "XPQARetrieval (eng-fra)": 44.35,
+ "XPQARetrieval (fra-eng)": 67.19,
+ "XPQARetrieval (hin-hin)": 74.64,
+ "XPQARetrieval (eng-hin)": 22.02,
+ "XPQARetrieval (hin-eng)": 66.84,
+ "XPQARetrieval (ita-ita)": 75.67,
+ "XPQARetrieval (eng-ita)": 33.01,
+ "XPQARetrieval (ita-eng)": 69.6,
+ "XPQARetrieval (jpn-jpn)": 73.75,
+ "XPQARetrieval (eng-jpn)": 43.54,
+ "XPQARetrieval (jpn-eng)": 70.8,
+ "XPQARetrieval (kor-kor)": 40.03,
+ "XPQARetrieval (eng-kor)": 33.38,
+ "XPQARetrieval (kor-eng)": 35.08,
+ "XPQARetrieval (pol-pol)": 48.25,
+ "XPQARetrieval (eng-pol)": 35.62,
+ "XPQARetrieval (pol-eng)": 44.53,
+ "XPQARetrieval (por-por)": 50.61,
+ "XPQARetrieval (eng-por)": 28.2,
+ "XPQARetrieval (por-eng)": 49.27,
+ "XPQARetrieval (tam-tam)": 43.19,
+ "XPQARetrieval (eng-tam)": 4.27,
+ "XPQARetrieval (tam-eng)": 22.14,
+ "XPQARetrieval (cmn-cmn)": 64.0,
+ "XPQARetrieval (eng-cmn)": 35.37,
+ "XPQARetrieval (cmn-eng)": 57.31
}
],
"recall_at_1": [
@@ -4281,16 +11562,115 @@
]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "CDSC-R": 92.59,
+ "GermanSTSBenchmark": 85.74,
+ "RUParaPhraserSTS": 75.99,
+ "RuSTSBenchmarkSTS": 84.57,
+ "SICK-R-PL": 76.59,
+ "SICKFr": 81.27,
+ "STS22 (en)": 68.63,
+ "STS22 (es-it)": 75.55,
+ "STS22 (it)": 76.79,
+ "STS22 (ar)": 55.04,
+ "STS22 (pl-en)": 74.65,
+ "STS22 (fr)": 81.03,
+ "STS22 (de-en)": 61.61,
+ "STS22 (pl)": 40.47,
+ "STS22 (zh-en)": 72.83,
+ "STS22 (es)": 67.38,
+ "STS22 (zh)": 64.65,
+ "STS22 (ru)": 62.38,
+ "STS22 (es-en)": 77.51,
+ "STS22 (de-fr)": 62.9,
+ "STS22 (de)": 52.72,
+ "STS22 (tr)": 68.81,
+ "STS22 (de-pl)": 58.39,
+ "STS22 (fr-pl)": 84.52,
+ "STSB": 81.97,
+ "STSBenchmarkMultilingualSTS (pl)": 84.24,
+ "STSBenchmarkMultilingualSTS (ru)": 84.68,
+ "STSBenchmarkMultilingualSTS (nl)": 84.23,
+ "STSBenchmarkMultilingualSTS (en)": 89.0,
+ "STSBenchmarkMultilingualSTS (it)": 85.04,
+ "STSBenchmarkMultilingualSTS (zh)": 82.95,
+ "STSBenchmarkMultilingualSTS (es)": 86.69,
+ "STSBenchmarkMultilingualSTS (de)": 86.02,
+ "STSBenchmarkMultilingualSTS (pt)": 85.24,
+ "STSBenchmarkMultilingualSTS (fr)": 85.87
+ },
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "CDSC-R": 92.59,
+ "GermanSTSBenchmark": 85.74,
+ "RUParaPhraserSTS": 75.99,
+ "RuSTSBenchmarkSTS": 84.57,
+ "SICK-R-PL": 76.59,
+ "SICKFr": 81.27,
+ "STS22 (en)": 68.63,
+ "STS22 (es-it)": 75.55,
+ "STS22 (it)": 76.79,
+ "STS22 (ar)": 55.04,
+ "STS22 (pl-en)": 74.65,
+ "STS22 (fr)": 81.03,
+ "STS22 (de-en)": 61.61,
+ "STS22 (pl)": 40.47,
+ "STS22 (zh-en)": 72.83,
+ "STS22 (es)": 67.38,
+ "STS22 (zh)": 64.65,
+ "STS22 (ru)": 62.38,
+ "STS22 (es-en)": 77.51,
+ "STS22 (de-fr)": 62.9,
+ "STS22 (de)": 52.72,
+ "STS22 (tr)": 68.81,
+ "STS22 (de-pl)": 58.39,
+ "STS22 (fr-pl)": 84.52,
+ "STSB": 81.97,
+ "STSBenchmarkMultilingualSTS (pl)": 84.24,
+ "STSBenchmarkMultilingualSTS (ru)": 84.68,
+ "STSBenchmarkMultilingualSTS (nl)": 84.23,
+ "STSBenchmarkMultilingualSTS (en)": 89.0,
+ "STSBenchmarkMultilingualSTS (it)": 85.04,
+ "STSBenchmarkMultilingualSTS (zh)": 82.95,
+ "STSBenchmarkMultilingualSTS (es)": 86.69,
+ "STSBenchmarkMultilingualSTS (de)": 86.02,
+ "STSBenchmarkMultilingualSTS (pt)": 85.24,
+ "STSBenchmarkMultilingualSTS (fr)": 85.87
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "SummEvalFr": 30.44
+ },
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "SummEvalFr": 30.44
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "CEDRClassification": 51.74,
+ "SensitiveTopicsClassification": 34.15
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "SFR-Embedding-Mistral",
+ "Core17InstructionRetrieval": 4.55,
+ "News21InstructionRetrieval": 1.38,
+ "Robust04InstructionRetrieval": -5.45
+ }
+ ]
}
},
"T-Systems-onsite__cross-en-de-roberta-sentence-transformer": {
@@ -4426,21 +11806,292 @@
},
"ai-forever__sbert_large_mt_nlu_ru": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "sbert_large_mt_nlu_ru",
+ "BornholmBitextMining": 17.0,
+ "Tatoeba (aze-eng)": 0.49,
+ "Tatoeba (pes-eng)": 0.14,
+ "Tatoeba (kaz-eng)": 1.97,
+ "Tatoeba (bel-eng)": 11.51,
+ "Tatoeba (hye-eng)": 0.23,
+ "Tatoeba (cmn-eng)": 0.43,
+ "Tatoeba (tat-eng)": 1.93,
+ "Tatoeba (max-eng)": 1.41,
+ "Tatoeba (oci-eng)": 0.88,
+ "Tatoeba (lat-eng)": 0.49,
+ "Tatoeba (gla-eng)": 0.62,
+ "Tatoeba (awa-eng)": 0.17,
+ "Tatoeba (ron-eng)": 0.66,
+ "Tatoeba (ast-eng)": 6.21,
+ "Tatoeba (vie-eng)": 0.63,
+ "Tatoeba (ang-eng)": 5.98,
+ "Tatoeba (nno-eng)": 1.5,
+ "Tatoeba (mal-eng)": 0.15,
+ "Tatoeba (arz-eng)": 0.47,
+ "Tatoeba (kur-eng)": 0.14,
+ "Tatoeba (kzj-eng)": 0.27,
+ "Tatoeba (glg-eng)": 1.21,
+ "Tatoeba (ben-eng)": 0.0,
+ "Tatoeba (dan-eng)": 1.64,
+ "Tatoeba (jpn-eng)": 0.0,
+ "Tatoeba (ces-eng)": 0.14,
+ "Tatoeba (ind-eng)": 0.61,
+ "Tatoeba (hrv-eng)": 0.53,
+ "Tatoeba (orv-eng)": 4.82,
+ "Tatoeba (fra-eng)": 1.81,
+ "Tatoeba (arq-eng)": 0.18,
+ "Tatoeba (nld-eng)": 2.56,
+ "Tatoeba (rus-eng)": 28.16,
+ "Tatoeba (hin-eng)": 0.02,
+ "Tatoeba (bos-eng)": 1.9,
+ "Tatoeba (cym-eng)": 1.16,
+ "Tatoeba (cor-eng)": 0.4,
+ "Tatoeba (ido-eng)": 0.94,
+ "Tatoeba (cha-eng)": 4.68,
+ "Tatoeba (gsw-eng)": 8.64,
+ "Tatoeba (epo-eng)": 0.4,
+ "Tatoeba (nob-eng)": 1.01,
+ "Tatoeba (ceb-eng)": 0.27,
+ "Tatoeba (uzb-eng)": 1.71,
+ "Tatoeba (pms-eng)": 1.88,
+ "Tatoeba (cat-eng)": 2.71,
+ "Tatoeba (kab-eng)": 0.28,
+ "Tatoeba (heb-eng)": 0.37,
+ "Tatoeba (uig-eng)": 0.11,
+ "Tatoeba (ara-eng)": 0.07,
+ "Tatoeba (gle-eng)": 0.73,
+ "Tatoeba (zsm-eng)": 0.5,
+ "Tatoeba (kat-eng)": 0.38,
+ "Tatoeba (srp-eng)": 4.58,
+ "Tatoeba (dsb-eng)": 0.11,
+ "Tatoeba (cbk-eng)": 0.41,
+ "Tatoeba (nds-eng)": 3.5,
+ "Tatoeba (ita-eng)": 1.04,
+ "Tatoeba (ber-eng)": 0.68,
+ "Tatoeba (ile-eng)": 4.41,
+ "Tatoeba (tel-eng)": 0.54,
+ "Tatoeba (kor-eng)": 0.15,
+ "Tatoeba (fin-eng)": 0.2,
+ "Tatoeba (yue-eng)": 0.2,
+ "Tatoeba (fao-eng)": 1.03,
+ "Tatoeba (jav-eng)": 2.43,
+ "Tatoeba (ukr-eng)": 25.12,
+ "Tatoeba (tuk-eng)": 1.26,
+ "Tatoeba (wuu-eng)": 0.28,
+ "Tatoeba (swg-eng)": 0.04,
+ "Tatoeba (pam-eng)": 0.51,
+ "Tatoeba (tur-eng)": 0.21,
+ "Tatoeba (bre-eng)": 0.63,
+ "Tatoeba (hun-eng)": 0.71,
+ "Tatoeba (isl-eng)": 0.51,
+ "Tatoeba (tzl-eng)": 0.59,
+ "Tatoeba (mhr-eng)": 1.64,
+ "Tatoeba (nov-eng)": 4.41,
+ "Tatoeba (dtp-eng)": 0.6,
+ "Tatoeba (xho-eng)": 1.22,
+ "Tatoeba (mar-eng)": 0.05,
+ "Tatoeba (khm-eng)": 0.01,
+ "Tatoeba (slv-eng)": 0.85,
+ "Tatoeba (hsb-eng)": 0.45,
+ "Tatoeba (csb-eng)": 1.21,
+ "Tatoeba (mon-eng)": 1.64,
+ "Tatoeba (war-eng)": 0.43,
+ "Tatoeba (tam-eng)": 0.0,
+ "Tatoeba (lfn-eng)": 1.8,
+ "Tatoeba (spa-eng)": 1.0,
+ "Tatoeba (fry-eng)": 5.52,
+ "Tatoeba (deu-eng)": 2.42,
+ "Tatoeba (amh-eng)": 1.2,
+ "Tatoeba (ell-eng)": 0.13,
+ "Tatoeba (swe-eng)": 0.86,
+ "Tatoeba (lit-eng)": 0.21,
+ "Tatoeba (yid-eng)": 0.15,
+ "Tatoeba (eus-eng)": 0.99,
+ "Tatoeba (est-eng)": 0.37,
+ "Tatoeba (tgl-eng)": 0.27,
+ "Tatoeba (pol-eng)": 0.73,
+ "Tatoeba (mkd-eng)": 5.36,
+ "Tatoeba (ina-eng)": 2.96,
+ "Tatoeba (sqi-eng)": 0.78,
+ "Tatoeba (swh-eng)": 0.58,
+ "Tatoeba (urd-eng)": 0.03,
+ "Tatoeba (por-eng)": 0.73,
+ "Tatoeba (bul-eng)": 7.69,
+ "Tatoeba (lvs-eng)": 0.38,
+ "Tatoeba (tha-eng)": 0.18,
+ "Tatoeba (afr-eng)": 2.42,
+ "Tatoeba (slk-eng)": 0.39
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "sbert_large_mt_nlu_ru",
+ "AllegroReviews": 21.21,
+ "AmazonCounterfactualClassification (en-ext)": 59.12,
+ "AmazonCounterfactualClassification (en)": 61.57,
+ "AmazonCounterfactualClassification (de)": 60.52,
+ "AmazonCounterfactualClassification (ja)": 50.22,
+ "AmazonPolarityClassification": 60.24,
+ "AmazonReviewsClassification (en)": 28.26,
+ "AmazonReviewsClassification (de)": 24.32,
+ "AmazonReviewsClassification (es)": 23.78,
+ "AmazonReviewsClassification (fr)": 24.55,
+ "AmazonReviewsClassification (ja)": 20.16,
+ "AmazonReviewsClassification (zh)": 20.7,
+ "AngryTweetsClassification": 44.35,
+ "Banking77Classification": 39.12,
+ "CBD": 49.87,
+ "DanishPoliticalCommentsClassification": 27.26,
+ "EmotionClassification": 19.28,
"GeoreviewClassification": 39.67,
"HeadlineClassification": 77.19,
+ "ImdbClassification": 56.13,
"InappropriatenessClassification": 64.64,
"KinopoiskClassification": 50.33,
+ "LccSentimentClassification": 42.0,
+ "MTOPDomainClassification (en)": 72.42,
+ "MTOPDomainClassification (de)": 42.69,
+ "MTOPDomainClassification (es)": 42.65,
+ "MTOPDomainClassification (fr)": 44.15,
+ "MTOPDomainClassification (hi)": 22.55,
+ "MTOPDomainClassification (th)": 15.97,
+ "MTOPIntentClassification (en)": 40.1,
+ "MTOPIntentClassification (de)": 23.36,
+ "MTOPIntentClassification (es)": 19.5,
+ "MTOPIntentClassification (fr)": 23.37,
+ "MTOPIntentClassification (hi)": 4.45,
+ "MTOPIntentClassification (th)": 5.56,
+ "MasakhaNEWSClassification (amh)": 30.24,
+ "MasakhaNEWSClassification (eng)": 60.19,
+ "MasakhaNEWSClassification (fra)": 45.24,
+ "MasakhaNEWSClassification (hau)": 32.75,
+ "MasakhaNEWSClassification (ibo)": 30.21,
+ "MasakhaNEWSClassification (lin)": 40.11,
+ "MasakhaNEWSClassification (lug)": 34.3,
+ "MasakhaNEWSClassification (orm)": 32.06,
+ "MasakhaNEWSClassification (pcm)": 79.21,
+ "MasakhaNEWSClassification (run)": 38.51,
+ "MasakhaNEWSClassification (sna)": 50.0,
+ "MasakhaNEWSClassification (som)": 30.85,
+ "MasakhaNEWSClassification (swa)": 30.17,
+ "MasakhaNEWSClassification (tir)": 25.4,
+ "MasakhaNEWSClassification (xho)": 42.83,
+ "MasakhaNEWSClassification (yor)": 36.13,
"MassiveIntentClassification (ru)": 61.42,
+ "MassiveIntentClassification (ur)": 2.66,
+ "MassiveIntentClassification (nl)": 19.71,
+ "MassiveIntentClassification (ar)": 4.44,
+ "MassiveIntentClassification (pt)": 21.61,
+ "MassiveIntentClassification (tr)": 18.12,
+ "MassiveIntentClassification (bn)": 3.43,
+ "MassiveIntentClassification (ro)": 18.92,
+ "MassiveIntentClassification (jv)": 18.97,
+ "MassiveIntentClassification (af)": 16.74,
+ "MassiveIntentClassification (hu)": 18.03,
+ "MassiveIntentClassification (ja)": 4.32,
+ "MassiveIntentClassification (pl)": 17.96,
+ "MassiveIntentClassification (es)": 18.79,
+ "MassiveIntentClassification (fa)": 3.44,
+ "MassiveIntentClassification (de)": 22.32,
+ "MassiveIntentClassification (en)": 38.41,
+ "MassiveIntentClassification (lv)": 16.7,
+ "MassiveIntentClassification (zh-CN)": 2.12,
+ "MassiveIntentClassification (vi)": 19.68,
+ "MassiveIntentClassification (hy)": 3.28,
+ "MassiveIntentClassification (nb)": 18.97,
+ "MassiveIntentClassification (tl)": 18.93,
+ "MassiveIntentClassification (my)": 4.16,
+ "MassiveIntentClassification (zh-TW)": 3.87,
+ "MassiveIntentClassification (he)": 2.54,
+ "MassiveIntentClassification (id)": 22.3,
+ "MassiveIntentClassification (is)": 14.84,
+ "MassiveIntentClassification (hi)": 2.66,
+ "MassiveIntentClassification (el)": 13.64,
+ "MassiveIntentClassification (am)": 2.68,
+ "MassiveIntentClassification (sl)": 18.98,
+ "MassiveIntentClassification (th)": 4.06,
+ "MassiveIntentClassification (kn)": 3.08,
+ "MassiveIntentClassification (km)": 4.83,
+ "MassiveIntentClassification (it)": 21.19,
+ "MassiveIntentClassification (sv)": 18.94,
+ "MassiveIntentClassification (az)": 16.05,
+ "MassiveIntentClassification (fi)": 17.73,
+ "MassiveIntentClassification (ka)": 2.79,
+ "MassiveIntentClassification (ml)": 3.05,
+ "MassiveIntentClassification (ms)": 16.25,
+ "MassiveIntentClassification (te)": 2.37,
+ "MassiveIntentClassification (sw)": 18.46,
+ "MassiveIntentClassification (cy)": 17.52,
+ "MassiveIntentClassification (mn)": 22.36,
+ "MassiveIntentClassification (sq)": 20.08,
+ "MassiveIntentClassification (ta)": 2.37,
+ "MassiveIntentClassification (ko)": 2.4,
+ "MassiveIntentClassification (da)": 21.51,
+ "MassiveIntentClassification (fr)": 20.94,
"MassiveScenarioClassification (ru)": 68.13,
+ "MassiveScenarioClassification (sl)": 23.28,
+ "MassiveScenarioClassification (sv)": 23.79,
+ "MassiveScenarioClassification (bn)": 8.97,
+ "MassiveScenarioClassification (ar)": 11.91,
+ "MassiveScenarioClassification (hu)": 24.64,
+ "MassiveScenarioClassification (ko)": 6.91,
+ "MassiveScenarioClassification (kn)": 7.58,
+ "MassiveScenarioClassification (am)": 7.24,
+ "MassiveScenarioClassification (ka)": 7.51,
+ "MassiveScenarioClassification (it)": 26.18,
+ "MassiveScenarioClassification (my)": 10.3,
+ "MassiveScenarioClassification (jv)": 27.39,
+ "MassiveScenarioClassification (te)": 7.8,
+ "MassiveScenarioClassification (fi)": 23.82,
+ "MassiveScenarioClassification (ja)": 9.13,
+ "MassiveScenarioClassification (af)": 24.34,
+ "MassiveScenarioClassification (pl)": 27.01,
+ "MassiveScenarioClassification (da)": 29.86,
+ "MassiveScenarioClassification (th)": 8.94,
+ "MassiveScenarioClassification (hy)": 9.14,
+ "MassiveScenarioClassification (id)": 27.32,
+ "MassiveScenarioClassification (nl)": 26.8,
+ "MassiveScenarioClassification (fa)": 6.97,
+ "MassiveScenarioClassification (ro)": 24.28,
+ "MassiveScenarioClassification (ur)": 9.68,
+ "MassiveScenarioClassification (tl)": 25.1,
+ "MassiveScenarioClassification (az)": 20.32,
+ "MassiveScenarioClassification (ta)": 6.95,
+ "MassiveScenarioClassification (el)": 20.43,
+ "MassiveScenarioClassification (km)": 8.92,
+ "MassiveScenarioClassification (nb)": 24.55,
+ "MassiveScenarioClassification (es)": 23.82,
+ "MassiveScenarioClassification (he)": 7.29,
+ "MassiveScenarioClassification (ms)": 23.08,
+ "MassiveScenarioClassification (de)": 31.89,
+ "MassiveScenarioClassification (sw)": 25.07,
+ "MassiveScenarioClassification (is)": 22.25,
+ "MassiveScenarioClassification (pt)": 26.39,
+ "MassiveScenarioClassification (zh-TW)": 9.31,
+ "MassiveScenarioClassification (hi)": 7.99,
+ "MassiveScenarioClassification (sq)": 27.3,
+ "MassiveScenarioClassification (vi)": 26.56,
+ "MassiveScenarioClassification (ml)": 7.28,
+ "MassiveScenarioClassification (fr)": 28.63,
+ "MassiveScenarioClassification (lv)": 19.48,
+ "MassiveScenarioClassification (mn)": 24.93,
+ "MassiveScenarioClassification (cy)": 22.42,
+ "MassiveScenarioClassification (tr)": 21.86,
+ "MassiveScenarioClassification (zh-CN)": 9.71,
+ "MassiveScenarioClassification (en)": 49.23,
+ "NoRecClassification": 38.42,
+ "NordicLangClassification": 38.2,
+ "PAC": 63.64,
+ "PolEmo2.0-IN": 41.48,
+ "PolEmo2.0-OUT": 33.79,
"RuReviewsClassification": 58.29,
"RuSciBenchGRNTIClassification": 54.19,
- "RuSciBenchOECDClassification": 43.8
+ "RuSciBenchOECDClassification": 43.8,
+ "ToxicConversationsClassification": 56.2,
+ "TweetSentimentExtractionClassification": 47.37
}
]
},
@@ -4448,9 +12099,67 @@
"v_measure": [
{
"Model": "sbert_large_mt_nlu_ru",
+ "AlloProfClusteringP2P": 35.78,
+ "AlloProfClusteringS2S": 23.21,
+ "ArxivClusteringP2P": 18.86,
+ "ArxivClusteringS2S": 13.17,
+ "BiorxivClusteringP2P": 11.85,
+ "BiorxivClusteringS2S": 7.62,
+ "BlurbsClusteringP2P": 10.62,
+ "BlurbsClusteringS2S": 8.8,
"GeoreviewClusteringP2P": 57.07,
+ "HALClusteringS2S": 6.28,
+ "MLSUMClusteringP2P (de)": 14.6,
+ "MLSUMClusteringP2P (fr)": 25.26,
+ "MLSUMClusteringP2P (ru)": 50.88,
+ "MLSUMClusteringP2P (es)": 28.3,
+ "MLSUMClusteringS2S (de)": 15.56,
+ "MLSUMClusteringS2S (fr)": 24.85,
+ "MLSUMClusteringS2S (ru)": 48.17,
+ "MLSUMClusteringS2S (es)": 28.07,
+ "MasakhaNEWSClusteringP2P (amh)": 40.5,
+ "MasakhaNEWSClusteringP2P (eng)": 28.6,
+ "MasakhaNEWSClusteringP2P (fra)": 24.46,
+ "MasakhaNEWSClusteringP2P (hau)": 4.7,
+ "MasakhaNEWSClusteringP2P (ibo)": 21.98,
+ "MasakhaNEWSClusteringP2P (lin)": 47.61,
+ "MasakhaNEWSClusteringP2P (lug)": 44.57,
+ "MasakhaNEWSClusteringP2P (orm)": 21.68,
+ "MasakhaNEWSClusteringP2P (pcm)": 36.56,
+ "MasakhaNEWSClusteringP2P (run)": 44.46,
+ "MasakhaNEWSClusteringP2P (sna)": 43.14,
+ "MasakhaNEWSClusteringP2P (som)": 24.25,
+ "MasakhaNEWSClusteringP2P (swa)": 17.51,
+ "MasakhaNEWSClusteringP2P (tir)": 42.96,
+ "MasakhaNEWSClusteringP2P (xho)": 23.45,
+ "MasakhaNEWSClusteringP2P (yor)": 20.87,
+ "MasakhaNEWSClusteringS2S (amh)": 43.32,
+ "MasakhaNEWSClusteringS2S (eng)": 11.71,
+ "MasakhaNEWSClusteringS2S (fra)": 27.34,
+ "MasakhaNEWSClusteringS2S (hau)": 4.58,
+ "MasakhaNEWSClusteringS2S (ibo)": 25.82,
+ "MasakhaNEWSClusteringS2S (lin)": 49.46,
+ "MasakhaNEWSClusteringS2S (lug)": 48.05,
+ "MasakhaNEWSClusteringS2S (orm)": 22.57,
+ "MasakhaNEWSClusteringS2S (pcm)": 34.83,
+ "MasakhaNEWSClusteringS2S (run)": 44.33,
+ "MasakhaNEWSClusteringS2S (sna)": 40.15,
+ "MasakhaNEWSClusteringS2S (som)": 25.33,
+ "MasakhaNEWSClusteringS2S (swa)": 12.2,
+ "MasakhaNEWSClusteringS2S (tir)": 43.12,
+ "MasakhaNEWSClusteringS2S (xho)": 25.53,
+ "MasakhaNEWSClusteringS2S (yor)": 23.17,
+ "MedrxivClusteringP2P": 18.77,
+ "MedrxivClusteringS2S": 16.42,
+ "RedditClustering": 16.05,
+ "RedditClusteringP2P": 30.9,
"RuSciBenchGRNTIClusteringP2P": 51.44,
- "RuSciBenchOECDClusteringP2P": 45.36
+ "RuSciBenchOECDClusteringP2P": 45.36,
+ "StackExchangeClustering": 23.38,
+ "StackExchangeClusteringP2P": 24.29,
+ "TenKGnadClusteringP2P": 13.99,
+ "TenKGnadClusteringS2S": 9.3,
+ "TwentyNewsgroupsClustering": 13.4
}
]
},
@@ -4458,11 +12167,51 @@
"max_ap": [
{
"Model": "sbert_large_mt_nlu_ru",
- "TERRa": 51.97
+ "CDSC-E": 37.2,
+ "FalseFriendsGermanEnglish": 47.48,
+ "OpusparcusPC (de)": 88.85,
+ "OpusparcusPC (en)": 96.31,
+ "OpusparcusPC (fi)": 83.7,
+ "OpusparcusPC (fr)": 83.31,
+ "OpusparcusPC (ru)": 90.3,
+ "OpusparcusPC (sv)": 81.9,
+ "PSC": 54.62,
+ "PawsXPairClassification (de)": 50.91,
+ "PawsXPairClassification (en)": 51.82,
+ "PawsXPairClassification (es)": 51.22,
+ "PawsXPairClassification (fr)": 53.01,
+ "PawsXPairClassification (ja)": 46.23,
+ "PawsXPairClassification (ko)": 47.32,
+ "PawsXPairClassification (zh)": 49.44,
+ "SICK-E-PL": 44.93,
+ "SprintDuplicateQuestions": 32.48,
+ "TERRa": 51.97,
+ "TwitterSemEval2015": 53.21,
+ "TwitterURLCorpus": 74.79
},
{
"Model": "sbert_large_mt_nlu_ru",
- "TERRa": 51.97
+ "CDSC-E": 37.27,
+ "FalseFriendsGermanEnglish": 47.51,
+ "OpusparcusPC (de)": 88.85,
+ "OpusparcusPC (en)": 96.38,
+ "OpusparcusPC (fi)": 83.79,
+ "OpusparcusPC (fr)": 83.34,
+ "OpusparcusPC (ru)": 90.37,
+ "OpusparcusPC (sv)": 81.9,
+ "PSC": 54.94,
+ "PawsXPairClassification (de)": 51.15,
+ "PawsXPairClassification (en)": 51.91,
+ "PawsXPairClassification (es)": 51.28,
+ "PawsXPairClassification (fr)": 53.06,
+ "PawsXPairClassification (ja)": 46.7,
+ "PawsXPairClassification (ko)": 47.38,
+ "PawsXPairClassification (zh)": 49.7,
+ "SICK-E-PL": 44.97,
+ "SprintDuplicateQuestions": 32.92,
+ "TERRa": 51.97,
+ "TwitterSemEval2015": 53.21,
+ "TwitterURLCorpus": 74.79
}
]
},
@@ -4470,11 +12219,35 @@
"map": [
{
"Model": "sbert_large_mt_nlu_ru",
- "MIRACLReranking (ru)": 24.99
+ "AlloprofReranking": 31.62,
+ "AskUbuntuDupQuestions": 46.55,
+ "MindSmallReranking": 26.72,
+ "RuBQReranking": 56.14,
+ "SciDocsRR": 48.16,
+ "StackOverflowDupQuestions": 32.85,
+ "SyntecReranking": 35.97,
+ "T2Reranking": 50.61
},
{
"Model": "sbert_large_mt_nlu_ru",
- "RuBQReranking": 56.14
+ "MIRACLReranking (ru)": 24.99,
+ "MIRACLReranking (ar)": 2.12,
+ "MIRACLReranking (bn)": 1.32,
+ "MIRACLReranking (de)": 5.55,
+ "MIRACLReranking (en)": 9.94,
+ "MIRACLReranking (es)": 6.11,
+ "MIRACLReranking (fa)": 3.1,
+ "MIRACLReranking (fi)": 8.8,
+ "MIRACLReranking (fr)": 4.94,
+ "MIRACLReranking (hi)": 4.01,
+ "MIRACLReranking (id)": 4.71,
+ "MIRACLReranking (ja)": 1.92,
+ "MIRACLReranking (ko)": 6.18,
+ "MIRACLReranking (sw)": 6.32,
+ "MIRACLReranking (te)": 1.8,
+ "MIRACLReranking (th)": 2.54,
+ "MIRACLReranking (yo)": 6.59,
+ "MIRACLReranking (zh)": 2.42
}
]
},
@@ -4482,9 +12255,125 @@
"ndcg_at_10": [
{
"Model": "sbert_large_mt_nlu_ru",
+ "AILACasedocs": 6.54,
+ "AILAStatutes": 11.76,
+ "ARCChallenge": 1.9,
+ "AlloprofRetrieval": 0.26,
+ "AlphaNLI": 0.71,
+ "AppsRetrieval": 0.23,
+ "ArguAna": 15.62,
+ "BSARDRetrieval": 0.13,
+ "ClimateFEVER": 0.16,
+ "CmedqaRetrieval": 0.45,
+ "CodeFeedbackMT": 5.6,
+ "CodeFeedbackST": 5.33,
+ "CodeSearchNetCCRetrieval (python)": 5.89,
+ "CodeSearchNetCCRetrieval (javascript)": 8.44,
+ "CodeSearchNetCCRetrieval (go)": 3.82,
+ "CodeSearchNetCCRetrieval (ruby)": 12.6,
+ "CodeSearchNetCCRetrieval (java)": 5.22,
+ "CodeSearchNetCCRetrieval (php)": 3.76,
+ "CodeSearchNetRetrieval (python)": 15.0,
+ "CodeSearchNetRetrieval (javascript)": 9.82,
+ "CodeSearchNetRetrieval (go)": 10.42,
+ "CodeSearchNetRetrieval (ruby)": 11.82,
+ "CodeSearchNetRetrieval (java)": 4.77,
+ "CodeSearchNetRetrieval (php)": 9.82,
+ "CodeTransOceanContest": 11.01,
+ "CodeTransOceanDL": 28.95,
+ "CosQA": 0.56,
+ "CovidRetrieval": 0.0,
+ "DBPedia": 0.27,
+ "FEVER": 0.22,
+ "FiQA2018": 0.52,
+ "GerDaLIR": 0.14,
+ "GerDaLIRSmall": 0.38,
+ "GermanQuAD-Retrieval": 4.86,
+ "HellaSwag": 2.87,
+ "HotpotQA": 1.63,
+ "LEMBNarrativeQARetrieval": 2.67,
+ "LEMBQMSumRetrieval": 6.84,
+ "LEMBSummScreenFDRetrieval": 6.53,
+ "LEMBWikimQARetrieval": 9.2,
+ "LeCaRDv2": 6.4,
+ "LegalBenchConsumerContractsQA": 15.84,
+ "LegalBenchCorporateLobbying": 21.07,
+ "LegalQuAD": 2.6,
+ "LegalSummarization": 20.57,
"MIRACLRetrieval (ru)": 6.2,
+ "MintakaRetrieval (ar)": 0.25,
+ "MintakaRetrieval (de)": 1.1,
+ "MintakaRetrieval (es)": 0.56,
+ "MintakaRetrieval (fr)": 1.07,
+ "MintakaRetrieval (hi)": 0.51,
+ "MintakaRetrieval (it)": 0.71,
+ "MintakaRetrieval (ja)": 0.34,
+ "MintakaRetrieval (pt)": 0.94,
+ "NFCorpus": 2.35,
+ "NQ": 0.39,
+ "PIQA": 1.81,
+ "Quail": 0.13,
+ "QuoraRetrieval": 57.97,
+ "RARbCode": 0.09,
+ "RARbMath": 6.37,
"RiaNewsRetrieval": 21.4,
- "RuBQRetrieval": 29.8
+ "RuBQRetrieval": 29.8,
+ "SCIDOCS": 0.37,
+ "SIQA": 0.35,
+ "SciFact": 1.27,
+ "SciFact-PL": 0.12,
+ "SpartQA": 0.0,
+ "StackOverflowQA": 8.95,
+ "SyntecRetrieval": 9.18,
+ "SyntheticText2SQL": 8.82,
+ "TRECCOVID": 7.16,
+ "TRECCOVID-PL": 1.17,
+ "TempReasonL1": 0.17,
+ "TempReasonL2Fact": 0.4,
+ "TempReasonL2Pure": 0.11,
+ "TempReasonL3Fact": 0.5,
+ "TempReasonL3Pure": 0.6,
+ "Touche2020": 1.33,
+ "WinoGrande": 0.71,
+ "XMarket (de)": 0.6,
+ "XMarket (en)": 0.81,
+ "XMarket (es)": 0.52,
+ "XPQARetrieval (ara-ara)": 2.35,
+ "XPQARetrieval (eng-ara)": 0.52,
+ "XPQARetrieval (ara-eng)": 2.27,
+ "XPQARetrieval (deu-deu)": 12.57,
+ "XPQARetrieval (eng-deu)": 0.8,
+ "XPQARetrieval (deu-eng)": 4.53,
+ "XPQARetrieval (spa-spa)": 5.96,
+ "XPQARetrieval (eng-spa)": 0.61,
+ "XPQARetrieval (spa-eng)": 2.01,
+ "XPQARetrieval (fra-fra)": 9.68,
+ "XPQARetrieval (eng-fra)": 1.03,
+ "XPQARetrieval (fra-eng)": 5.62,
+ "XPQARetrieval (hin-hin)": 5.47,
+ "XPQARetrieval (eng-hin)": 2.81,
+ "XPQARetrieval (hin-eng)": 1.68,
+ "XPQARetrieval (ita-ita)": 14.3,
+ "XPQARetrieval (eng-ita)": 1.23,
+ "XPQARetrieval (ita-eng)": 4.44,
+ "XPQARetrieval (jpn-jpn)": 4.2,
+ "XPQARetrieval (eng-jpn)": 1.04,
+ "XPQARetrieval (jpn-eng)": 1.47,
+ "XPQARetrieval (kor-kor)": 1.86,
+ "XPQARetrieval (eng-kor)": 0.83,
+ "XPQARetrieval (kor-eng)": 0.85,
+ "XPQARetrieval (pol-pol)": 7.34,
+ "XPQARetrieval (eng-pol)": 1.5,
+ "XPQARetrieval (pol-eng)": 2.4,
+ "XPQARetrieval (por-por)": 6.56,
+ "XPQARetrieval (eng-por)": 0.99,
+ "XPQARetrieval (por-eng)": 2.48,
+ "XPQARetrieval (tam-tam)": 1.09,
+ "XPQARetrieval (eng-tam)": 1.49,
+ "XPQARetrieval (tam-eng)": 1.06,
+ "XPQARetrieval (cmn-cmn)": 6.79,
+ "XPQARetrieval (eng-cmn)": 1.38,
+ "XPQARetrieval (cmn-eng)": 1.86
}
]
},
@@ -4492,14 +12381,130 @@
"cosine_spearman": [
{
"Model": "sbert_large_mt_nlu_ru",
+ "BIOSSES": 44.98,
+ "CDSC-R": 59.74,
+ "GermanSTSBenchmark": 36.44,
+ "SICK-R": 65.91,
+ "SICK-R-PL": 42.58,
+ "SICKFr": 53.29,
+ "STS12": 48.13,
+ "STS13": 48.04,
+ "STS14": 46.6,
+ "STS15": 65.34,
+ "STS16": 61.94,
+ "STS17 (ar-ar)": 12.08,
+ "STS17 (it-en)": 11.01,
+ "STS17 (es-es)": 39.85,
+ "STS17 (en-ar)": 4.47,
+ "STS17 (en-tr)": -6.65,
+ "STS17 (es-en)": 25.72,
+ "STS17 (en-de)": 14.55,
+ "STS17 (fr-en)": 17.21,
+ "STS17 (nl-en)": 19.39,
+ "STS17 (en-en)": 68.58,
+ "STS17 (ko-ko)": 8.05,
+ "STS22 (de)": 17.08,
+ "STS22 (de-pl)": -22.02,
+ "STS22 (zh-en)": 12.37,
+ "STS22 (pl-en)": 28.42,
+ "STS22 (tr)": 25.85,
+ "STS22 (fr)": 36.5,
+ "STS22 (es)": 37.89,
+ "STS22 (de-en)": 23.56,
+ "STS22 (es-en)": 23.75,
+ "STS22 (pl)": 5.41,
+ "STS22 (fr-pl)": -28.17,
+ "STS22 (it)": 36.74,
+ "STS22 (es-it)": 28.02,
+ "STS22 (de-fr)": 16.37,
+ "STS22 (zh)": 14.23,
+ "STS22 (ar)": 23.71,
+ "STS22 (en)": 51.46,
+ "STSB": 4.92,
+ "STSBenchmark": 58.81,
+ "STSBenchmarkMultilingualSTS (es)": 37.36,
+ "STSBenchmarkMultilingualSTS (zh)": 5.23,
+ "STSBenchmarkMultilingualSTS (it)": 38.85,
+ "STSBenchmarkMultilingualSTS (nl)": 37.38,
+ "STSBenchmarkMultilingualSTS (en)": 58.81,
+ "STSBenchmarkMultilingualSTS (fr)": 44.41,
+ "STSBenchmarkMultilingualSTS (pl)": 41.04,
+ "STSBenchmarkMultilingualSTS (ru)": 70.91,
+ "STSBenchmarkMultilingualSTS (pt)": 35.18,
+ "STSBenchmarkMultilingualSTS (de)": 39.48
+ },
+ {
+ "Model": "sbert_large_mt_nlu_ru",
+ "BIOSSES": 44.98,
+ "CDSC-R": 59.74,
+ "GermanSTSBenchmark": 36.44,
"RUParaPhraserSTS": 65.17,
"RuSTSBenchmarkSTS": 71.22,
- "STS22 (ru)": 56.82
+ "SICK-R": 65.91,
+ "SICK-R-PL": 42.58,
+ "SICKFr": 53.3,
+ "STS12": 48.13,
+ "STS13": 48.04,
+ "STS14": 46.6,
+ "STS15": 65.34,
+ "STS16": 61.94,
+ "STS17 (ar-ar)": 12.1,
+ "STS17 (it-en)": 11.01,
+ "STS17 (es-es)": 39.86,
+ "STS17 (en-ar)": 4.47,
+ "STS17 (en-tr)": -6.65,
+ "STS17 (es-en)": 25.72,
+ "STS17 (en-de)": 14.55,
+ "STS17 (fr-en)": 17.21,
+ "STS17 (nl-en)": 19.39,
+ "STS17 (en-en)": 68.58,
+ "STS17 (ko-ko)": 8.11,
+ "STS22 (ru)": 56.82,
+ "STS22 (de)": 17.08,
+ "STS22 (de-pl)": -22.02,
+ "STS22 (zh-en)": 12.37,
+ "STS22 (pl-en)": 28.42,
+ "STS22 (tr)": 25.85,
+ "STS22 (fr)": 36.5,
+ "STS22 (es)": 37.89,
+ "STS22 (de-en)": 23.56,
+ "STS22 (es-en)": 23.75,
+ "STS22 (pl)": 5.51,
+ "STS22 (fr-pl)": -28.17,
+ "STS22 (it)": 36.74,
+ "STS22 (es-it)": 28.02,
+ "STS22 (de-fr)": 16.37,
+ "STS22 (zh)": 14.22,
+ "STS22 (ar)": 23.73,
+ "STS22 (en)": 51.46,
+ "STSB": 4.9,
+ "STSBenchmark": 58.81,
+ "STSBenchmarkMultilingualSTS (es)": 37.36,
+ "STSBenchmarkMultilingualSTS (zh)": 5.26,
+ "STSBenchmarkMultilingualSTS (it)": 38.85,
+ "STSBenchmarkMultilingualSTS (nl)": 37.38,
+ "STSBenchmarkMultilingualSTS (en)": 58.81,
+ "STSBenchmarkMultilingualSTS (fr)": 44.41,
+ "STSBenchmarkMultilingualSTS (pl)": 41.04,
+ "STSBenchmarkMultilingualSTS (ru)": 70.91,
+ "STSBenchmarkMultilingualSTS (pt)": 35.18,
+ "STSBenchmarkMultilingualSTS (de)": 39.48
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "sbert_large_mt_nlu_ru",
+ "SummEval": 27.17,
+ "SummEvalFr": 30.39
+ },
+ {
+ "Model": "sbert_large_mt_nlu_ru",
+ "SummEval": 27.17,
+ "SummEvalFr": 30.39
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
@@ -4511,26 +12516,304 @@
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "sbert_large_mt_nlu_ru",
+ "Core17InstructionRetrieval": 0.08,
+ "News21InstructionRetrieval": -0.36,
+ "Robust04InstructionRetrieval": -1.16
+ }
+ ]
}
},
"ai-forever__sbert_large_nlu_ru": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "sbert_large_nlu_ru",
+ "BornholmBitextMining": 12.6,
+ "Tatoeba (urd-eng)": 0.0,
+ "Tatoeba (arq-eng)": 0.29,
+ "Tatoeba (cat-eng)": 0.58,
+ "Tatoeba (gle-eng)": 0.24,
+ "Tatoeba (epo-eng)": 0.5,
+ "Tatoeba (mon-eng)": 1.96,
+ "Tatoeba (ita-eng)": 0.86,
+ "Tatoeba (nds-eng)": 1.72,
+ "Tatoeba (tzl-eng)": 0.03,
+ "Tatoeba (hin-eng)": 0.0,
+ "Tatoeba (xho-eng)": 1.04,
+ "Tatoeba (nno-eng)": 0.65,
+ "Tatoeba (hye-eng)": 0.13,
+ "Tatoeba (awa-eng)": 0.25,
+ "Tatoeba (ber-eng)": 0.35,
+ "Tatoeba (amh-eng)": 0.6,
+ "Tatoeba (swg-eng)": 1.05,
+ "Tatoeba (uzb-eng)": 1.16,
+ "Tatoeba (mhr-eng)": 2.08,
+ "Tatoeba (lit-eng)": 0.04,
+ "Tatoeba (jav-eng)": 1.17,
+ "Tatoeba (eus-eng)": 0.33,
+ "Tatoeba (lfn-eng)": 1.34,
+ "Tatoeba (tat-eng)": 1.43,
+ "Tatoeba (slv-eng)": 0.47,
+ "Tatoeba (tha-eng)": 0.12,
+ "Tatoeba (ceb-eng)": 0.56,
+ "Tatoeba (ile-eng)": 1.75,
+ "Tatoeba (vie-eng)": 0.34,
+ "Tatoeba (ces-eng)": 0.2,
+ "Tatoeba (tgl-eng)": 0.1,
+ "Tatoeba (kzj-eng)": 0.1,
+ "Tatoeba (ell-eng)": 0.17,
+ "Tatoeba (orv-eng)": 6.42,
+ "Tatoeba (swe-eng)": 0.55,
+ "Tatoeba (bel-eng)": 17.74,
+ "Tatoeba (nov-eng)": 2.34,
+ "Tatoeba (max-eng)": 0.79,
+ "Tatoeba (ast-eng)": 0.56,
+ "Tatoeba (ara-eng)": 0.2,
+ "Tatoeba (dan-eng)": 0.79,
+ "Tatoeba (pms-eng)": 0.92,
+ "Tatoeba (kab-eng)": 0.1,
+ "Tatoeba (tur-eng)": 0.01,
+ "Tatoeba (nld-eng)": 1.35,
+ "Tatoeba (dtp-eng)": 0.21,
+ "Tatoeba (est-eng)": 0.21,
+ "Tatoeba (ind-eng)": 0.32,
+ "Tatoeba (pes-eng)": 0.07,
+ "Tatoeba (ina-eng)": 1.37,
+ "Tatoeba (gla-eng)": 0.13,
+ "Tatoeba (tel-eng)": 0.76,
+ "Tatoeba (pol-eng)": 0.19,
+ "Tatoeba (yue-eng)": 0.01,
+ "Tatoeba (fin-eng)": 0.07,
+ "Tatoeba (war-eng)": 0.22,
+ "Tatoeba (sqi-eng)": 0.2,
+ "Tatoeba (yid-eng)": 0.1,
+ "Tatoeba (khm-eng)": 0.44,
+ "Tatoeba (cmn-eng)": 0.33,
+ "Tatoeba (fry-eng)": 3.06,
+ "Tatoeba (kat-eng)": 0.61,
+ "Tatoeba (ben-eng)": 0.0,
+ "Tatoeba (zsm-eng)": 0.24,
+ "Tatoeba (cym-eng)": 0.32,
+ "Tatoeba (kaz-eng)": 2.55,
+ "Tatoeba (lvs-eng)": 0.4,
+ "Tatoeba (spa-eng)": 0.34,
+ "Tatoeba (hsb-eng)": 0.22,
+ "Tatoeba (tam-eng)": 0.22,
+ "Tatoeba (arz-eng)": 0.01,
+ "Tatoeba (kur-eng)": 0.02,
+ "Tatoeba (hrv-eng)": 0.28,
+ "Tatoeba (slk-eng)": 0.12,
+ "Tatoeba (kor-eng)": 0.29,
+ "Tatoeba (wuu-eng)": 0.35,
+ "Tatoeba (jpn-eng)": 0.0,
+ "Tatoeba (aze-eng)": 0.13,
+ "Tatoeba (cor-eng)": 0.19,
+ "Tatoeba (isl-eng)": 0.11,
+ "Tatoeba (bul-eng)": 11.98,
+ "Tatoeba (ido-eng)": 0.74,
+ "Tatoeba (nob-eng)": 0.73,
+ "Tatoeba (fra-eng)": 0.45,
+ "Tatoeba (bos-eng)": 0.04,
+ "Tatoeba (deu-eng)": 1.61,
+ "Tatoeba (ron-eng)": 0.37,
+ "Tatoeba (rus-eng)": 38.98,
+ "Tatoeba (ang-eng)": 5.77,
+ "Tatoeba (pam-eng)": 0.34,
+ "Tatoeba (fao-eng)": 0.63,
+ "Tatoeba (mal-eng)": 0.0,
+ "Tatoeba (dsb-eng)": 0.16,
+ "Tatoeba (oci-eng)": 0.63,
+ "Tatoeba (srp-eng)": 5.54,
+ "Tatoeba (lat-eng)": 0.21,
+ "Tatoeba (afr-eng)": 1.24,
+ "Tatoeba (cha-eng)": 1.05,
+ "Tatoeba (bre-eng)": 0.31,
+ "Tatoeba (hun-eng)": 0.37,
+ "Tatoeba (swh-eng)": 0.0,
+ "Tatoeba (mar-eng)": 0.03,
+ "Tatoeba (csb-eng)": 0.16,
+ "Tatoeba (tuk-eng)": 0.51,
+ "Tatoeba (uig-eng)": 0.08,
+ "Tatoeba (glg-eng)": 0.42,
+ "Tatoeba (heb-eng)": 0.3,
+ "Tatoeba (por-eng)": 0.21,
+ "Tatoeba (cbk-eng)": 0.0,
+ "Tatoeba (ukr-eng)": 35.48,
+ "Tatoeba (mkd-eng)": 6.83,
+ "Tatoeba (gsw-eng)": 3.43
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "sbert_large_nlu_ru",
+ "AllegroReviews": 21.01,
+ "AmazonCounterfactualClassification (en-ext)": 62.03,
+ "AmazonCounterfactualClassification (en)": 62.37,
+ "AmazonCounterfactualClassification (de)": 53.43,
+ "AmazonCounterfactualClassification (ja)": 52.0,
+ "AmazonPolarityClassification": 59.33,
+ "AmazonReviewsClassification (en)": 27.26,
+ "AmazonReviewsClassification (de)": 23.78,
+ "AmazonReviewsClassification (es)": 23.42,
+ "AmazonReviewsClassification (fr)": 23.95,
+ "AmazonReviewsClassification (ja)": 20.12,
+ "AmazonReviewsClassification (zh)": 20.91,
+ "AngryTweetsClassification": 44.11,
+ "Banking77Classification": 34.79,
+ "CBD": 51.0,
+ "DanishPoliticalCommentsClassification": 26.55,
+ "EmotionClassification": 18.68,
"GeoreviewClassification": 39.97,
"HeadlineClassification": 79.26,
+ "ImdbClassification": 55.97,
"InappropriatenessClassification": 62.52,
"KinopoiskClassification": 49.51,
+ "LccSentimentClassification": 38.87,
+ "MTOPDomainClassification (en)": 69.22,
+ "MTOPDomainClassification (de)": 38.11,
+ "MTOPDomainClassification (es)": 39.46,
+ "MTOPDomainClassification (fr)": 38.27,
+ "MTOPDomainClassification (hi)": 22.61,
+ "MTOPDomainClassification (th)": 15.48,
+ "MTOPIntentClassification (en)": 37.01,
+ "MTOPIntentClassification (de)": 17.73,
+ "MTOPIntentClassification (es)": 15.75,
+ "MTOPIntentClassification (fr)": 16.34,
+ "MTOPIntentClassification (hi)": 4.66,
+ "MTOPIntentClassification (th)": 5.57,
+ "MasakhaNEWSClassification (amh)": 31.7,
+ "MasakhaNEWSClassification (eng)": 65.42,
+ "MasakhaNEWSClassification (fra)": 56.61,
+ "MasakhaNEWSClassification (hau)": 34.62,
+ "MasakhaNEWSClassification (ibo)": 30.54,
+ "MasakhaNEWSClassification (lin)": 50.4,
+ "MasakhaNEWSClassification (lug)": 30.31,
+ "MasakhaNEWSClassification (orm)": 32.55,
+ "MasakhaNEWSClassification (pcm)": 84.23,
+ "MasakhaNEWSClassification (run)": 32.17,
+ "MasakhaNEWSClassification (sna)": 47.67,
+ "MasakhaNEWSClassification (som)": 22.55,
+ "MasakhaNEWSClassification (swa)": 29.43,
+ "MasakhaNEWSClassification (tir)": 29.96,
+ "MasakhaNEWSClassification (xho)": 44.04,
+ "MasakhaNEWSClassification (yor)": 35.47,
"MassiveIntentClassification (ru)": 61.09,
+ "MassiveIntentClassification (fr)": 18.21,
+ "MassiveIntentClassification (az)": 11.82,
+ "MassiveIntentClassification (bn)": 3.05,
+ "MassiveIntentClassification (hu)": 14.69,
+ "MassiveIntentClassification (cy)": 15.27,
+ "MassiveIntentClassification (ar)": 3.9,
+ "MassiveIntentClassification (sl)": 15.13,
+ "MassiveIntentClassification (ms)": 14.04,
+ "MassiveIntentClassification (en)": 36.64,
+ "MassiveIntentClassification (af)": 14.72,
+ "MassiveIntentClassification (ta)": 2.34,
+ "MassiveIntentClassification (nl)": 17.77,
+ "MassiveIntentClassification (my)": 4.45,
+ "MassiveIntentClassification (nb)": 14.4,
+ "MassiveIntentClassification (sw)": 13.76,
+ "MassiveIntentClassification (es)": 14.85,
+ "MassiveIntentClassification (ml)": 2.85,
+ "MassiveIntentClassification (is)": 11.27,
+ "MassiveIntentClassification (km)": 4.86,
+ "MassiveIntentClassification (pl)": 18.13,
+ "MassiveIntentClassification (hy)": 3.19,
+ "MassiveIntentClassification (hi)": 3.06,
+ "MassiveIntentClassification (tr)": 13.43,
+ "MassiveIntentClassification (zh-CN)": 2.52,
+ "MassiveIntentClassification (zh-TW)": 3.33,
+ "MassiveIntentClassification (ja)": 4.14,
+ "MassiveIntentClassification (de)": 21.1,
+ "MassiveIntentClassification (kn)": 3.07,
+ "MassiveIntentClassification (el)": 12.06,
+ "MassiveIntentClassification (th)": 3.96,
+ "MassiveIntentClassification (sq)": 17.11,
+ "MassiveIntentClassification (ko)": 2.62,
+ "MassiveIntentClassification (mn)": 16.16,
+ "MassiveIntentClassification (am)": 2.74,
+ "MassiveIntentClassification (id)": 17.75,
+ "MassiveIntentClassification (fa)": 3.42,
+ "MassiveIntentClassification (da)": 18.77,
+ "MassiveIntentClassification (it)": 17.9,
+ "MassiveIntentClassification (vi)": 16.58,
+ "MassiveIntentClassification (tl)": 16.85,
+ "MassiveIntentClassification (ur)": 2.59,
+ "MassiveIntentClassification (ro)": 15.75,
+ "MassiveIntentClassification (sv)": 14.53,
+ "MassiveIntentClassification (he)": 2.51,
+ "MassiveIntentClassification (lv)": 15.26,
+ "MassiveIntentClassification (jv)": 14.86,
+ "MassiveIntentClassification (ka)": 2.44,
+ "MassiveIntentClassification (pt)": 17.47,
+ "MassiveIntentClassification (fi)": 12.62,
+ "MassiveIntentClassification (te)": 2.18,
"MassiveScenarioClassification (ru)": 67.6,
+ "MassiveScenarioClassification (ml)": 7.72,
+ "MassiveScenarioClassification (hi)": 7.63,
+ "MassiveScenarioClassification (fi)": 17.24,
+ "MassiveScenarioClassification (pl)": 26.4,
+ "MassiveScenarioClassification (ur)": 9.33,
+ "MassiveScenarioClassification (sl)": 19.84,
+ "MassiveScenarioClassification (nl)": 25.23,
+ "MassiveScenarioClassification (fa)": 6.75,
+ "MassiveScenarioClassification (id)": 23.56,
+ "MassiveScenarioClassification (de)": 29.53,
+ "MassiveScenarioClassification (ka)": 7.74,
+ "MassiveScenarioClassification (km)": 9.32,
+ "MassiveScenarioClassification (tr)": 18.96,
+ "MassiveScenarioClassification (ms)": 19.85,
+ "MassiveScenarioClassification (en)": 49.26,
+ "MassiveScenarioClassification (lv)": 18.1,
+ "MassiveScenarioClassification (th)": 8.9,
+ "MassiveScenarioClassification (am)": 7.3,
+ "MassiveScenarioClassification (el)": 19.95,
+ "MassiveScenarioClassification (is)": 17.48,
+ "MassiveScenarioClassification (cy)": 21.95,
+ "MassiveScenarioClassification (fr)": 24.94,
+ "MassiveScenarioClassification (az)": 18.88,
+ "MassiveScenarioClassification (pt)": 25.3,
+ "MassiveScenarioClassification (ro)": 21.17,
+ "MassiveScenarioClassification (ja)": 9.7,
+ "MassiveScenarioClassification (bn)": 8.49,
+ "MassiveScenarioClassification (mn)": 19.35,
+ "MassiveScenarioClassification (da)": 27.23,
+ "MassiveScenarioClassification (es)": 21.27,
+ "MassiveScenarioClassification (hy)": 8.86,
+ "MassiveScenarioClassification (vi)": 22.54,
+ "MassiveScenarioClassification (tl)": 24.39,
+ "MassiveScenarioClassification (nb)": 20.82,
+ "MassiveScenarioClassification (ko)": 6.71,
+ "MassiveScenarioClassification (kn)": 7.7,
+ "MassiveScenarioClassification (ta)": 7.14,
+ "MassiveScenarioClassification (ar)": 11.92,
+ "MassiveScenarioClassification (zh-TW)": 9.25,
+ "MassiveScenarioClassification (sq)": 22.42,
+ "MassiveScenarioClassification (zh-CN)": 9.3,
+ "MassiveScenarioClassification (he)": 8.12,
+ "MassiveScenarioClassification (jv)": 22.46,
+ "MassiveScenarioClassification (sw)": 22.07,
+ "MassiveScenarioClassification (my)": 10.27,
+ "MassiveScenarioClassification (hu)": 20.7,
+ "MassiveScenarioClassification (it)": 26.82,
+ "MassiveScenarioClassification (sv)": 19.74,
+ "MassiveScenarioClassification (af)": 21.73,
+ "MassiveScenarioClassification (te)": 7.88,
+ "NoRecClassification": 39.06,
+ "NordicLangClassification": 39.41,
+ "PAC": 68.93,
+ "PolEmo2.0-IN": 40.28,
+ "PolEmo2.0-OUT": 30.67,
"RuReviewsClassification": 58.27,
"RuSciBenchGRNTIClassification": 53.9,
- "RuSciBenchOECDClassification": 43.04
+ "RuSciBenchOECDClassification": 43.04,
+ "ToxicConversationsClassification": 57.76,
+ "TweetSentimentExtractionClassification": 47.21
}
]
},
@@ -4538,9 +12821,67 @@
"v_measure": [
{
"Model": "sbert_large_nlu_ru",
+ "AlloProfClusteringP2P": 39.96,
+ "AlloProfClusteringS2S": 23.7,
+ "ArxivClusteringP2P": 18.57,
+ "ArxivClusteringS2S": 11.83,
+ "BiorxivClusteringP2P": 12.51,
+ "BiorxivClusteringS2S": 6.79,
+ "BlurbsClusteringP2P": 11.42,
+ "BlurbsClusteringS2S": 8.6,
"GeoreviewClusteringP2P": 57.12,
+ "HALClusteringS2S": 6.03,
+ "MLSUMClusteringP2P (de)": 15.09,
+ "MLSUMClusteringP2P (fr)": 27.16,
+ "MLSUMClusteringP2P (ru)": 49.13,
+ "MLSUMClusteringP2P (es)": 29.37,
+ "MLSUMClusteringS2S (de)": 15.85,
+ "MLSUMClusteringS2S (fr)": 27.09,
+ "MLSUMClusteringS2S (ru)": 49.58,
+ "MLSUMClusteringS2S (es)": 28.74,
+ "MasakhaNEWSClusteringP2P (amh)": 43.17,
+ "MasakhaNEWSClusteringP2P (eng)": 41.53,
+ "MasakhaNEWSClusteringP2P (fra)": 38.45,
+ "MasakhaNEWSClusteringP2P (hau)": 8.06,
+ "MasakhaNEWSClusteringP2P (ibo)": 21.37,
+ "MasakhaNEWSClusteringP2P (lin)": 50.33,
+ "MasakhaNEWSClusteringP2P (lug)": 43.55,
+ "MasakhaNEWSClusteringP2P (orm)": 20.41,
+ "MasakhaNEWSClusteringP2P (pcm)": 71.26,
+ "MasakhaNEWSClusteringP2P (run)": 41.88,
+ "MasakhaNEWSClusteringP2P (sna)": 46.77,
+ "MasakhaNEWSClusteringP2P (som)": 24.45,
+ "MasakhaNEWSClusteringP2P (swa)": 12.32,
+ "MasakhaNEWSClusteringP2P (tir)": 43.45,
+ "MasakhaNEWSClusteringP2P (xho)": 22.84,
+ "MasakhaNEWSClusteringP2P (yor)": 21.23,
+ "MasakhaNEWSClusteringS2S (amh)": 43.26,
+ "MasakhaNEWSClusteringS2S (eng)": 9.15,
+ "MasakhaNEWSClusteringS2S (fra)": 24.96,
+ "MasakhaNEWSClusteringS2S (hau)": 4.21,
+ "MasakhaNEWSClusteringS2S (ibo)": 26.62,
+ "MasakhaNEWSClusteringS2S (lin)": 51.88,
+ "MasakhaNEWSClusteringS2S (lug)": 45.69,
+ "MasakhaNEWSClusteringS2S (orm)": 21.66,
+ "MasakhaNEWSClusteringS2S (pcm)": 31.8,
+ "MasakhaNEWSClusteringS2S (run)": 44.07,
+ "MasakhaNEWSClusteringS2S (sna)": 52.21,
+ "MasakhaNEWSClusteringS2S (som)": 23.6,
+ "MasakhaNEWSClusteringS2S (swa)": 14.33,
+ "MasakhaNEWSClusteringS2S (tir)": 43.31,
+ "MasakhaNEWSClusteringS2S (xho)": 24.5,
+ "MasakhaNEWSClusteringS2S (yor)": 23.26,
+ "MedrxivClusteringP2P": 19.63,
+ "MedrxivClusteringS2S": 14.79,
+ "RedditClustering": 15.97,
+ "RedditClusteringP2P": 33.1,
"RuSciBenchGRNTIClusteringP2P": 49.7,
- "RuSciBenchOECDClusteringP2P": 44.48
+ "RuSciBenchOECDClusteringP2P": 44.48,
+ "StackExchangeClustering": 21.14,
+ "StackExchangeClusteringP2P": 23.58,
+ "TenKGnadClusteringP2P": 18.55,
+ "TenKGnadClusteringS2S": 10.24,
+ "TwentyNewsgroupsClustering": 13.55
}
]
},
@@ -4548,11 +12889,51 @@
"max_ap": [
{
"Model": "sbert_large_nlu_ru",
- "TERRa": 50.17
+ "CDSC-E": 35.64,
+ "FalseFriendsGermanEnglish": 48.33,
+ "OpusparcusPC (de)": 88.54,
+ "OpusparcusPC (en)": 96.42,
+ "OpusparcusPC (fi)": 83.14,
+ "OpusparcusPC (fr)": 82.75,
+ "OpusparcusPC (ru)": 89.72,
+ "OpusparcusPC (sv)": 80.13,
+ "PSC": 57.84,
+ "PawsXPairClassification (de)": 50.88,
+ "PawsXPairClassification (en)": 50.62,
+ "PawsXPairClassification (es)": 51.74,
+ "PawsXPairClassification (fr)": 53.8,
+ "PawsXPairClassification (ja)": 46.11,
+ "PawsXPairClassification (ko)": 47.25,
+ "PawsXPairClassification (zh)": 48.87,
+ "SICK-E-PL": 44.12,
+ "SprintDuplicateQuestions": 15.22,
+ "TERRa": 50.17,
+ "TwitterSemEval2015": 51.4,
+ "TwitterURLCorpus": 73.98
},
{
"Model": "sbert_large_nlu_ru",
- "TERRa": 50.17
+ "CDSC-E": 35.69,
+ "FalseFriendsGermanEnglish": 48.34,
+ "OpusparcusPC (de)": 88.55,
+ "OpusparcusPC (en)": 96.45,
+ "OpusparcusPC (fi)": 83.15,
+ "OpusparcusPC (fr)": 82.75,
+ "OpusparcusPC (ru)": 89.72,
+ "OpusparcusPC (sv)": 80.13,
+ "PSC": 57.84,
+ "PawsXPairClassification (de)": 51.01,
+ "PawsXPairClassification (en)": 50.64,
+ "PawsXPairClassification (es)": 51.74,
+ "PawsXPairClassification (fr)": 53.84,
+ "PawsXPairClassification (ja)": 46.43,
+ "PawsXPairClassification (ko)": 47.67,
+ "PawsXPairClassification (zh)": 48.93,
+ "SICK-E-PL": 44.15,
+ "SprintDuplicateQuestions": 15.34,
+ "TERRa": 50.17,
+ "TwitterSemEval2015": 51.4,
+ "TwitterURLCorpus": 73.98
}
]
},
@@ -4560,11 +12941,35 @@
"map": [
{
"Model": "sbert_large_nlu_ru",
- "MIRACLReranking (ru)": 18.8
+ "AlloprofReranking": 31.06,
+ "AskUbuntuDupQuestions": 43.65,
+ "MindSmallReranking": 26.86,
+ "RuBQReranking": 46.81,
+ "SciDocsRR": 44.72,
+ "StackOverflowDupQuestions": 29.72,
+ "SyntecReranking": 35.78,
+ "T2Reranking": 50.7
},
{
"Model": "sbert_large_nlu_ru",
- "RuBQReranking": 46.81
+ "MIRACLReranking (ru)": 18.8,
+ "MIRACLReranking (ar)": 2.13,
+ "MIRACLReranking (bn)": 1.59,
+ "MIRACLReranking (de)": 3.86,
+ "MIRACLReranking (en)": 8.82,
+ "MIRACLReranking (es)": 4.97,
+ "MIRACLReranking (fa)": 3.24,
+ "MIRACLReranking (fi)": 4.84,
+ "MIRACLReranking (fr)": 3.15,
+ "MIRACLReranking (hi)": 4.13,
+ "MIRACLReranking (id)": 3.65,
+ "MIRACLReranking (ja)": 1.94,
+ "MIRACLReranking (ko)": 4.9,
+ "MIRACLReranking (sw)": 3.1,
+ "MIRACLReranking (te)": 2.57,
+ "MIRACLReranking (th)": 2.5,
+ "MIRACLReranking (yo)": 4.17,
+ "MIRACLReranking (zh)": 2.58
}
]
},
@@ -4572,9 +12977,142 @@
"ndcg_at_10": [
{
"Model": "sbert_large_nlu_ru",
+ "AILACasedocs": 12.96,
+ "AILAStatutes": 19.38,
+ "ARCChallenge": 1.8,
+ "AlloprofRetrieval": 0.34,
+ "AlphaNLI": 1.1,
+ "AppsRetrieval": 0.19,
+ "ArguAna": 17.29,
+ "BSARDRetrieval": 0.0,
+ "ClimateFEVER": 0.03,
+ "CmedqaRetrieval": 0.28,
+ "CodeFeedbackMT": 5.2,
+ "CodeFeedbackST": 3.78,
+ "CodeSearchNetCCRetrieval (python)": 4.27,
+ "CodeSearchNetCCRetrieval (javascript)": 4.77,
+ "CodeSearchNetCCRetrieval (go)": 3.2,
+ "CodeSearchNetCCRetrieval (ruby)": 11.04,
+ "CodeSearchNetCCRetrieval (java)": 4.12,
+ "CodeSearchNetCCRetrieval (php)": 2.43,
+ "CodeSearchNetRetrieval (python)": 10.29,
+ "CodeSearchNetRetrieval (javascript)": 6.59,
+ "CodeSearchNetRetrieval (go)": 7.18,
+ "CodeSearchNetRetrieval (ruby)": 7.29,
+ "CodeSearchNetRetrieval (java)": 5.06,
+ "CodeSearchNetRetrieval (php)": 6.07,
+ "CodeTransOceanContest": 9.79,
+ "CodeTransOceanDL": 31.48,
+ "CosQA": 0.35,
+ "CovidRetrieval": 0.0,
+ "DBPedia": 0.23,
+ "FEVER": 0.2,
+ "FiQA2018": 0.33,
+ "GerDaLIR": 0.25,
+ "GerDaLIRSmall": 0.72,
+ "GermanQuAD-Retrieval": 5.61,
+ "HellaSwag": 2.48,
+ "HotpotQA": 0.93,
+ "LEMBNarrativeQARetrieval": 2.65,
+ "LEMBQMSumRetrieval": 7.24,
+ "LEMBSummScreenFDRetrieval": 7.8,
+ "LEMBWikimQARetrieval": 10.12,
+ "LeCaRDv2": 9.14,
+ "LegalBenchConsumerContractsQA": 12.36,
+ "LegalBenchCorporateLobbying": 22.42,
+ "LegalQuAD": 3.1,
+ "LegalSummarization": 15.01,
"MIRACLRetrieval (ru)": 1.98,
+ "MIRACLRetrieval (ar)": 0.0,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 0.0,
+ "MIRACLRetrieval (en)": 0.02,
+ "MIRACLRetrieval (es)": 0.0,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 0.13,
+ "MIRACLRetrieval (fr)": 0.0,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 0.05,
+ "MIRACLRetrieval (ja)": 0.0,
+ "MIRACLRetrieval (ko)": 0.31,
+ "MIRACLRetrieval (sw)": 0.18,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.0,
+ "MIRACLRetrieval (yo)": 0.49,
+ "MIRACLRetrieval (zh)": 0.0,
+ "MintakaRetrieval (ar)": 0.26,
+ "MintakaRetrieval (de)": 0.85,
+ "MintakaRetrieval (es)": 0.9,
+ "MintakaRetrieval (fr)": 0.85,
+ "MintakaRetrieval (hi)": 0.6,
+ "MintakaRetrieval (it)": 0.68,
+ "MintakaRetrieval (ja)": 0.26,
+ "MintakaRetrieval (pt)": 0.99,
+ "NFCorpus": 1.99,
+ "NQ": 0.2,
+ "PIQA": 1.65,
+ "Quail": 0.19,
+ "QuoraRetrieval": 44.36,
+ "RARbCode": 0.21,
+ "RARbMath": 4.27,
"RiaNewsRetrieval": 11.11,
- "RuBQRetrieval": 12.45
+ "RuBQRetrieval": 12.45,
+ "SCIDOCS": 0.23,
+ "SIQA": 0.13,
+ "SciFact": 1.73,
+ "SciFact-PL": 0.66,
+ "SpartQA": 0.02,
+ "StackOverflowQA": 6.87,
+ "SyntecRetrieval": 11.87,
+ "SyntheticText2SQL": 4.57,
+ "TRECCOVID": 3.91,
+ "TRECCOVID-PL": 1.16,
+ "TempReasonL1": 0.19,
+ "TempReasonL2Fact": 0.35,
+ "TempReasonL2Pure": 0.09,
+ "TempReasonL3Fact": 0.45,
+ "TempReasonL3Pure": 0.3,
+ "Touche2020": 0.29,
+ "WinoGrande": 0.38,
+ "XMarket (de)": 0.49,
+ "XMarket (en)": 0.7,
+ "XMarket (es)": 0.39,
+ "XPQARetrieval (ara-ara)": 2.35,
+ "XPQARetrieval (eng-ara)": 0.76,
+ "XPQARetrieval (ara-eng)": 2.03,
+ "XPQARetrieval (deu-deu)": 10.72,
+ "XPQARetrieval (eng-deu)": 0.63,
+ "XPQARetrieval (deu-eng)": 4.67,
+ "XPQARetrieval (spa-spa)": 4.19,
+ "XPQARetrieval (eng-spa)": 0.7,
+ "XPQARetrieval (spa-eng)": 2.37,
+ "XPQARetrieval (fra-fra)": 9.13,
+ "XPQARetrieval (eng-fra)": 0.71,
+ "XPQARetrieval (fra-eng)": 3.77,
+ "XPQARetrieval (hin-hin)": 4.85,
+ "XPQARetrieval (eng-hin)": 2.3,
+ "XPQARetrieval (hin-eng)": 0.92,
+ "XPQARetrieval (ita-ita)": 8.48,
+ "XPQARetrieval (eng-ita)": 1.0,
+ "XPQARetrieval (ita-eng)": 2.85,
+ "XPQARetrieval (jpn-jpn)": 3.62,
+ "XPQARetrieval (eng-jpn)": 1.13,
+ "XPQARetrieval (jpn-eng)": 1.32,
+ "XPQARetrieval (kor-kor)": 1.82,
+ "XPQARetrieval (eng-kor)": 0.54,
+ "XPQARetrieval (kor-eng)": 0.91,
+ "XPQARetrieval (pol-pol)": 6.15,
+ "XPQARetrieval (eng-pol)": 1.0,
+ "XPQARetrieval (pol-eng)": 1.89,
+ "XPQARetrieval (por-por)": 5.66,
+ "XPQARetrieval (eng-por)": 0.75,
+ "XPQARetrieval (por-eng)": 1.76,
+ "XPQARetrieval (tam-tam)": 1.18,
+ "XPQARetrieval (eng-tam)": 1.73,
+ "XPQARetrieval (tam-eng)": 0.76,
+ "XPQARetrieval (cmn-cmn)": 6.07,
+ "XPQARetrieval (eng-cmn)": 1.94,
+ "XPQARetrieval (cmn-eng)": 1.18
}
]
},
@@ -4582,14 +13120,130 @@
"cosine_spearman": [
{
"Model": "sbert_large_nlu_ru",
+ "BIOSSES": 39.69,
+ "CDSC-R": 58.92,
+ "GermanSTSBenchmark": 25.4,
+ "SICK-R": 58.86,
+ "SICK-R-PL": 39.82,
+ "SICKFr": 48.31,
+ "STS12": 31.48,
+ "STS13": 37.11,
+ "STS14": 36.65,
+ "STS15": 58.25,
+ "STS16": 58.5,
+ "STS17 (en-tr)": 1.31,
+ "STS17 (nl-en)": 13.17,
+ "STS17 (it-en)": 11.05,
+ "STS17 (fr-en)": 20.49,
+ "STS17 (es-es)": 44.07,
+ "STS17 (ko-ko)": 7.57,
+ "STS17 (en-ar)": 21.74,
+ "STS17 (en-en)": 62.5,
+ "STS17 (es-en)": 10.26,
+ "STS17 (ar-ar)": 11.58,
+ "STS17 (en-de)": 9.52,
+ "STS22 (es-it)": 29.06,
+ "STS22 (de)": 14.13,
+ "STS22 (es-en)": 13.97,
+ "STS22 (de-en)": 18.76,
+ "STS22 (pl)": 10.48,
+ "STS22 (ar)": 24.15,
+ "STS22 (fr-pl)": 50.71,
+ "STS22 (es)": 38.98,
+ "STS22 (tr)": 29.01,
+ "STS22 (it)": 39.38,
+ "STS22 (zh)": 20.71,
+ "STS22 (en)": 57.74,
+ "STS22 (pl-en)": 6.58,
+ "STS22 (fr)": 55.23,
+ "STS22 (zh-en)": 7.36,
+ "STS22 (de-fr)": 29.79,
+ "STS22 (de-pl)": -20.42,
+ "STSB": 4.05,
+ "STSBenchmark": 46.8,
+ "STSBenchmarkMultilingualSTS (de)": 29.29,
+ "STSBenchmarkMultilingualSTS (zh)": 5.4,
+ "STSBenchmarkMultilingualSTS (pl)": 39.01,
+ "STSBenchmarkMultilingualSTS (es)": 30.71,
+ "STSBenchmarkMultilingualSTS (pt)": 33.45,
+ "STSBenchmarkMultilingualSTS (en)": 46.8,
+ "STSBenchmarkMultilingualSTS (ru)": 58.45,
+ "STSBenchmarkMultilingualSTS (nl)": 33.46,
+ "STSBenchmarkMultilingualSTS (fr)": 37.91,
+ "STSBenchmarkMultilingualSTS (it)": 33.1
+ },
+ {
+ "Model": "sbert_large_nlu_ru",
+ "BIOSSES": 39.69,
+ "CDSC-R": 58.92,
+ "GermanSTSBenchmark": 25.4,
"RUParaPhraserSTS": 62.06,
"RuSTSBenchmarkSTS": 58.82,
- "STS22 (ru)": 50.75
+ "SICK-R": 58.86,
+ "SICK-R-PL": 39.82,
+ "SICKFr": 48.31,
+ "STS12": 31.48,
+ "STS13": 37.11,
+ "STS14": 36.65,
+ "STS15": 58.25,
+ "STS16": 58.5,
+ "STS17 (en-tr)": 1.31,
+ "STS17 (nl-en)": 13.17,
+ "STS17 (it-en)": 11.05,
+ "STS17 (fr-en)": 20.49,
+ "STS17 (es-es)": 44.07,
+ "STS17 (ko-ko)": 7.62,
+ "STS17 (en-ar)": 21.74,
+ "STS17 (en-en)": 62.5,
+ "STS17 (es-en)": 10.26,
+ "STS17 (ar-ar)": 12.45,
+ "STS17 (en-de)": 9.52,
+ "STS22 (ru)": 50.75,
+ "STS22 (es-it)": 29.06,
+ "STS22 (de)": 14.12,
+ "STS22 (es-en)": 13.97,
+ "STS22 (de-en)": 18.76,
+ "STS22 (pl)": 10.29,
+ "STS22 (ar)": 24.13,
+ "STS22 (fr-pl)": 50.71,
+ "STS22 (es)": 38.98,
+ "STS22 (tr)": 29.01,
+ "STS22 (it)": 39.38,
+ "STS22 (zh)": 20.71,
+ "STS22 (en)": 57.74,
+ "STS22 (pl-en)": 6.58,
+ "STS22 (fr)": 55.23,
+ "STS22 (zh-en)": 7.36,
+ "STS22 (de-fr)": 29.79,
+ "STS22 (de-pl)": -20.42,
+ "STSB": 4.09,
+ "STSBenchmark": 46.8,
+ "STSBenchmarkMultilingualSTS (de)": 29.29,
+ "STSBenchmarkMultilingualSTS (zh)": 5.41,
+ "STSBenchmarkMultilingualSTS (pl)": 39.01,
+ "STSBenchmarkMultilingualSTS (es)": 30.71,
+ "STSBenchmarkMultilingualSTS (pt)": 33.45,
+ "STSBenchmarkMultilingualSTS (en)": 46.8,
+ "STSBenchmarkMultilingualSTS (ru)": 58.46,
+ "STSBenchmarkMultilingualSTS (nl)": 33.46,
+ "STSBenchmarkMultilingualSTS (fr)": 37.91,
+ "STSBenchmarkMultilingualSTS (it)": 33.1
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "sbert_large_nlu_ru",
+ "SummEval": 28.2,
+ "SummEvalFr": 28.06
+ },
+ {
+ "Model": "sbert_large_nlu_ru",
+ "SummEval": 28.2,
+ "SummEvalFr": 28.06
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
@@ -4601,7 +13255,14 @@
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "sbert_large_nlu_ru",
+ "Core17InstructionRetrieval": -2.17,
+ "News21InstructionRetrieval": 3.38,
+ "Robust04InstructionRetrieval": 0.46
+ }
+ ]
}
},
"aliyun__OpenSearch-text-hybrid": {
@@ -4953,6 +13614,7 @@
"ArguAna": 49.28,
"CQADupstackRetrieval": 31.86,
"ClimateFEVER": 13.62,
+ "CmedqaRetrieval": 1.34,
"DBPedia": 29.91,
"FEVER": 48.09,
"FiQA2018": 25.14,
@@ -4962,8 +13624,10 @@
"NQ": 28.5,
"QuoraRetrieval": 80.42,
"SCIDOCS": 15.78,
- "SciFact": 68.7,
+ "SciFact": 68.74,
+ "SciFact-PL": 56.36,
"TRECCOVID": 62.31,
+ "TRECCOVID-PL": 44.6,
"Touche2020": 33.05
}
]
@@ -5159,6 +13823,7 @@
"f1": [
{
"Model": "LaBSE-en-ru",
+ "BornholmBitextMining": 37.36,
"Tatoeba (rus-eng)": 93.62
}
]
@@ -5167,10 +13832,57 @@
"accuracy": [
{
"Model": "LaBSE-en-ru",
+ "AllegroReviews": 28.22,
+ "AmazonCounterfactualClassification (en-ext)": 76.12,
+ "AmazonCounterfactualClassification (en)": 76.06,
+ "AmazonCounterfactualClassification (de)": 52.69,
+ "AmazonCounterfactualClassification (ja)": 56.98,
+ "AmazonPolarityClassification": 68.35,
+ "AmazonReviewsClassification (en)": 35.53,
+ "AmazonReviewsClassification (de)": 29.83,
+ "AmazonReviewsClassification (es)": 33.68,
+ "AmazonReviewsClassification (fr)": 32.75,
+ "AmazonReviewsClassification (ja)": 20.65,
+ "AmazonReviewsClassification (zh)": 20.77,
+ "AngryTweetsClassification": 46.23,
+ "Banking77Classification": 69.6,
+ "CBD": 52.86,
+ "DanishPoliticalCommentsClassification": 30.4,
+ "EmotionClassification": 36.15,
"GeoreviewClassification": 40.89,
"HeadlineClassification": 68.75,
+ "ImdbClassification": 62.87,
"InappropriatenessClassification": 58.48,
"KinopoiskClassification": 49.85,
+ "LccSentimentClassification": 44.53,
+ "MTOPDomainClassification (en)": 85.6,
+ "MTOPDomainClassification (de)": 59.47,
+ "MTOPDomainClassification (es)": 61.23,
+ "MTOPDomainClassification (fr)": 64.84,
+ "MTOPDomainClassification (hi)": 20.35,
+ "MTOPDomainClassification (th)": 14.86,
+ "MTOPIntentClassification (en)": 62.39,
+ "MTOPIntentClassification (de)": 35.1,
+ "MTOPIntentClassification (es)": 42.27,
+ "MTOPIntentClassification (fr)": 41.03,
+ "MTOPIntentClassification (hi)": 4.17,
+ "MTOPIntentClassification (th)": 4.44,
+ "MasakhaNEWSClassification (amh)": 34.89,
+ "MasakhaNEWSClassification (eng)": 78.43,
+ "MasakhaNEWSClassification (fra)": 73.44,
+ "MasakhaNEWSClassification (hau)": 59.15,
+ "MasakhaNEWSClassification (ibo)": 50.46,
+ "MasakhaNEWSClassification (lin)": 67.09,
+ "MasakhaNEWSClassification (lug)": 52.91,
+ "MasakhaNEWSClassification (orm)": 43.51,
+ "MasakhaNEWSClassification (pcm)": 90.23,
+ "MasakhaNEWSClassification (run)": 57.14,
+ "MasakhaNEWSClassification (sna)": 74.61,
+ "MasakhaNEWSClassification (som)": 44.18,
+ "MasakhaNEWSClassification (swa)": 54.39,
+ "MasakhaNEWSClassification (tir)": 24.82,
+ "MasakhaNEWSClassification (xho)": 60.51,
+ "MasakhaNEWSClassification (yor)": 63.77,
"MassiveIntentClassification (sw)": 19.98,
"MassiveIntentClassification (az)": 19.52,
"MassiveIntentClassification (tr)": 24.12,
@@ -5273,9 +13985,16 @@
"MassiveScenarioClassification (hu)": 33.92,
"MassiveScenarioClassification (ko)": 7.37,
"MassiveScenarioClassification (ar)": 12.43,
+ "NoRecClassification": 40.21,
+ "NordicLangClassification": 38.84,
+ "PAC": 68.61,
+ "PolEmo2.0-IN": 55.44,
+ "PolEmo2.0-OUT": 33.64,
"RuReviewsClassification": 58.01,
"RuSciBenchGRNTIClassification": 52.8,
- "RuSciBenchOECDClassification": 40.36
+ "RuSciBenchOECDClassification": 40.36,
+ "ToxicConversationsClassification": 60.26,
+ "TweetSentimentExtractionClassification": 57.75
}
]
},
@@ -5283,11 +14002,67 @@
"v_measure": [
{
"Model": "LaBSE-en-ru",
+ "AlloProfClusteringP2P": 55.09,
+ "AlloProfClusteringS2S": 22.69,
+ "ArxivClusteringP2P": 31.41,
+ "ArxivClusteringS2S": 20.66,
+ "BiorxivClusteringP2P": 28.94,
+ "BiorxivClusteringS2S": 18.04,
+ "BlurbsClusteringP2P": 30.05,
+ "BlurbsClusteringS2S": 11.13,
"GeoreviewClusteringP2P": 51.89,
+ "HALClusteringS2S": 13.46,
"MLSUMClusteringP2P (ru)": 37.87,
+ "MLSUMClusteringP2P (de)": 34.6,
+ "MLSUMClusteringP2P (fr)": 41.44,
+ "MLSUMClusteringP2P (es)": 41.36,
"MLSUMClusteringS2S (ru)": 41.24,
+ "MLSUMClusteringS2S (de)": 37.56,
+ "MLSUMClusteringS2S (fr)": 40.85,
+ "MLSUMClusteringS2S (es)": 41.08,
+ "MasakhaNEWSClusteringP2P (amh)": 40.17,
+ "MasakhaNEWSClusteringP2P (eng)": 51.39,
+ "MasakhaNEWSClusteringP2P (fra)": 58.64,
+ "MasakhaNEWSClusteringP2P (hau)": 37.52,
+ "MasakhaNEWSClusteringP2P (ibo)": 33.42,
+ "MasakhaNEWSClusteringP2P (lin)": 54.8,
+ "MasakhaNEWSClusteringP2P (lug)": 44.12,
+ "MasakhaNEWSClusteringP2P (orm)": 24.77,
+ "MasakhaNEWSClusteringP2P (pcm)": 67.72,
+ "MasakhaNEWSClusteringP2P (run)": 50.52,
+ "MasakhaNEWSClusteringP2P (sna)": 57.22,
+ "MasakhaNEWSClusteringP2P (som)": 27.57,
+ "MasakhaNEWSClusteringP2P (swa)": 20.27,
+ "MasakhaNEWSClusteringP2P (tir)": 48.36,
+ "MasakhaNEWSClusteringP2P (xho)": 39.56,
+ "MasakhaNEWSClusteringP2P (yor)": 29.12,
+ "MasakhaNEWSClusteringS2S (amh)": 44.07,
+ "MasakhaNEWSClusteringS2S (eng)": 21.34,
+ "MasakhaNEWSClusteringS2S (fra)": 26.33,
+ "MasakhaNEWSClusteringS2S (hau)": 12.88,
+ "MasakhaNEWSClusteringS2S (ibo)": 29.53,
+ "MasakhaNEWSClusteringS2S (lin)": 51.47,
+ "MasakhaNEWSClusteringS2S (lug)": 44.0,
+ "MasakhaNEWSClusteringS2S (orm)": 21.96,
+ "MasakhaNEWSClusteringS2S (pcm)": 69.63,
+ "MasakhaNEWSClusteringS2S (run)": 46.67,
+ "MasakhaNEWSClusteringS2S (sna)": 44.44,
+ "MasakhaNEWSClusteringS2S (som)": 24.45,
+ "MasakhaNEWSClusteringS2S (swa)": 13.6,
+ "MasakhaNEWSClusteringS2S (tir)": 45.41,
+ "MasakhaNEWSClusteringS2S (xho)": 27.49,
+ "MasakhaNEWSClusteringS2S (yor)": 27.71,
+ "MedrxivClusteringP2P": 28.51,
+ "MedrxivClusteringS2S": 23.45,
+ "RedditClustering": 25.45,
+ "RedditClusteringP2P": 47.49,
"RuSciBenchGRNTIClusteringP2P": 47.48,
- "RuSciBenchOECDClusteringP2P": 41.16
+ "RuSciBenchOECDClusteringP2P": 41.16,
+ "StackExchangeClustering": 32.87,
+ "StackExchangeClusteringP2P": 27.69,
+ "TenKGnadClusteringP2P": 40.15,
+ "TenKGnadClusteringS2S": 13.25,
+ "TwentyNewsgroupsClustering": 21.92
}
]
},
@@ -5295,13 +14070,51 @@
"max_ap": [
{
"Model": "LaBSE-en-ru",
+ "CDSC-E": 46.83,
+ "FalseFriendsGermanEnglish": 45.84,
"OpusparcusPC (ru)": 87.18,
- "TERRa": 55.61
+ "OpusparcusPC (de)": 92.16,
+ "OpusparcusPC (en)": 98.01,
+ "OpusparcusPC (fi)": 85.3,
+ "OpusparcusPC (fr)": 88.76,
+ "OpusparcusPC (sv)": 86.89,
+ "PSC": 88.51,
+ "PawsXPairClassification (de)": 49.25,
+ "PawsXPairClassification (en)": 54.46,
+ "PawsXPairClassification (es)": 49.41,
+ "PawsXPairClassification (fr)": 51.62,
+ "PawsXPairClassification (ja)": 47.43,
+ "PawsXPairClassification (ko)": 46.91,
+ "PawsXPairClassification (zh)": 50.78,
+ "SICK-E-PL": 46.03,
+ "SprintDuplicateQuestions": 88.53,
+ "TERRa": 55.61,
+ "TwitterSemEval2015": 60.61,
+ "TwitterURLCorpus": 84.04
},
{
"Model": "LaBSE-en-ru",
+ "CDSC-E": 46.83,
+ "FalseFriendsGermanEnglish": 45.86,
"OpusparcusPC (ru)": 87.18,
- "TERRa": 55.61
+ "OpusparcusPC (de)": 92.16,
+ "OpusparcusPC (en)": 98.01,
+ "OpusparcusPC (fi)": 85.3,
+ "OpusparcusPC (fr)": 88.76,
+ "OpusparcusPC (sv)": 86.89,
+ "PSC": 88.51,
+ "PawsXPairClassification (de)": 49.72,
+ "PawsXPairClassification (en)": 54.46,
+ "PawsXPairClassification (es)": 49.41,
+ "PawsXPairClassification (fr)": 51.69,
+ "PawsXPairClassification (ja)": 47.43,
+ "PawsXPairClassification (ko)": 47.05,
+ "PawsXPairClassification (zh)": 51.37,
+ "SICK-E-PL": 46.12,
+ "SprintDuplicateQuestions": 88.53,
+ "TERRa": 55.61,
+ "TwitterSemEval2015": 60.61,
+ "TwitterURLCorpus": 84.04
}
]
},
@@ -5309,11 +14122,35 @@
"map": [
{
"Model": "LaBSE-en-ru",
- "MIRACLReranking (ru)": 28.86
+ "AlloprofReranking": 42.29,
+ "AskUbuntuDupQuestions": 53.02,
+ "MindSmallReranking": 29.22,
+ "RuBQReranking": 54.83,
+ "SciDocsRR": 67.31,
+ "StackOverflowDupQuestions": 42.36,
+ "SyntecReranking": 53.7,
+ "T2Reranking": 57.1
},
{
"Model": "LaBSE-en-ru",
- "RuBQReranking": 54.83
+ "MIRACLReranking (ru)": 28.86,
+ "MIRACLReranking (ar)": 3.36,
+ "MIRACLReranking (bn)": 2.99,
+ "MIRACLReranking (de)": 18.12,
+ "MIRACLReranking (en)": 23.94,
+ "MIRACLReranking (es)": 18.08,
+ "MIRACLReranking (fa)": 3.51,
+ "MIRACLReranking (fi)": 30.5,
+ "MIRACLReranking (fr)": 18.36,
+ "MIRACLReranking (hi)": 2.65,
+ "MIRACLReranking (id)": 15.56,
+ "MIRACLReranking (ja)": 2.73,
+ "MIRACLReranking (ko)": 4.15,
+ "MIRACLReranking (sw)": 18.93,
+ "MIRACLReranking (te)": 3.04,
+ "MIRACLReranking (th)": 2.32,
+ "MIRACLReranking (yo)": 40.52,
+ "MIRACLReranking (zh)": 4.15
}
]
},
@@ -5321,9 +14158,143 @@
"ndcg_at_10": [
{
"Model": "LaBSE-en-ru",
+ "AILACasedocs": 22.41,
+ "AILAStatutes": 15.94,
+ "ARCChallenge": 3.46,
+ "AlloprofRetrieval": 10.73,
+ "AlphaNLI": 12.55,
+ "AppsRetrieval": 2.26,
+ "ArguAna": 31.91,
+ "BSARDRetrieval": 1.61,
+ "ClimateFEVER": 3.16,
+ "CmedqaRetrieval": 1.55,
+ "CodeFeedbackMT": 24.65,
+ "CodeFeedbackST": 39.86,
+ "CodeSearchNetCCRetrieval (python)": 29.56,
+ "CodeSearchNetCCRetrieval (javascript)": 39.74,
+ "CodeSearchNetCCRetrieval (go)": 27.72,
+ "CodeSearchNetCCRetrieval (ruby)": 36.38,
+ "CodeSearchNetCCRetrieval (java)": 33.75,
+ "CodeSearchNetCCRetrieval (php)": 24.25,
+ "CodeSearchNetRetrieval (python)": 59.24,
+ "CodeSearchNetRetrieval (javascript)": 45.1,
+ "CodeSearchNetRetrieval (go)": 49.06,
+ "CodeSearchNetRetrieval (ruby)": 49.67,
+ "CodeSearchNetRetrieval (java)": 33.58,
+ "CodeSearchNetRetrieval (php)": 44.36,
+ "CodeTransOceanContest": 32.93,
+ "CodeTransOceanDL": 32.37,
+ "CosQA": 8.76,
+ "CovidRetrieval": 0.35,
+ "DBPedia": 13.51,
+ "FEVER": 8.6,
+ "FiQA2018": 6.8,
+ "GerDaLIR": 0.92,
+ "GerDaLIRSmall": 2.24,
+ "GermanQuAD-Retrieval": 64.84,
+ "HellaSwag": 5.57,
+ "HotpotQA": 17.02,
+ "LEMBNarrativeQARetrieval": 11.1,
+ "LEMBQMSumRetrieval": 18.32,
+ "LEMBSummScreenFDRetrieval": 45.33,
+ "LEMBWikimQARetrieval": 25.1,
+ "LeCaRDv2": 12.13,
+ "LegalBenchConsumerContractsQA": 56.69,
+ "LegalBenchCorporateLobbying": 66.24,
+ "LegalQuAD": 12.78,
+ "LegalSummarization": 52.49,
"MIRACLRetrieval (ru)": 10.58,
+ "MIRACLRetrieval (ar)": 0.04,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 4.18,
+ "MIRACLRetrieval (en)": 4.0,
+ "MIRACLRetrieval (es)": 2.34,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 11.13,
+ "MIRACLRetrieval (fr)": 4.4,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 2.75,
+ "MIRACLRetrieval (ja)": 0.04,
+ "MIRACLRetrieval (ko)": 2.06,
+ "MIRACLRetrieval (sw)": 7.82,
+ "MIRACLRetrieval (te)": 0.04,
+ "MIRACLRetrieval (th)": 0.0,
+ "MIRACLRetrieval (yo)": 16.31,
+ "MIRACLRetrieval (zh)": 0.0,
+ "MSMARCO": 6.78,
+ "MintakaRetrieval (ar)": 0.48,
+ "MintakaRetrieval (de)": 15.01,
+ "MintakaRetrieval (es)": 14.71,
+ "MintakaRetrieval (fr)": 14.4,
+ "MintakaRetrieval (hi)": 0.88,
+ "MintakaRetrieval (it)": 15.28,
+ "MintakaRetrieval (ja)": 0.97,
+ "MintakaRetrieval (pt)": 14.97,
+ "NFCorpus": 12.59,
+ "NQ": 7.01,
+ "PIQA": 6.47,
+ "Quail": 1.55,
+ "QuoraRetrieval": 76.76,
+ "RARbCode": 2.11,
+ "RARbMath": 27.24,
"RiaNewsRetrieval": 34.73,
- "RuBQRetrieval": 29.03
+ "RuBQRetrieval": 29.03,
+ "SCIDOCS": 4.96,
+ "SIQA": 1.05,
+ "SciFact": 33.78,
+ "SciFact-PL": 22.71,
+ "SpartQA": 1.52,
+ "StackOverflowQA": 38.16,
+ "SyntecRetrieval": 32.97,
+ "SyntheticText2SQL": 39.52,
+ "TRECCOVID": 14.9,
+ "TRECCOVID-PL": 8.43,
+ "TempReasonL1": 1.65,
+ "TempReasonL2Fact": 7.07,
+ "TempReasonL2Pure": 0.1,
+ "TempReasonL3Fact": 8.34,
+ "TempReasonL3Pure": 4.69,
+ "Touche2020": 4.16,
+ "WinoGrande": 53.29,
+ "XMarket (de)": 1.71,
+ "XMarket (en)": 2.38,
+ "XMarket (es)": 1.98,
+ "XPQARetrieval (ara-ara)": 4.65,
+ "XPQARetrieval (eng-ara)": 4.38,
+ "XPQARetrieval (ara-eng)": 6.93,
+ "XPQARetrieval (deu-deu)": 32.76,
+ "XPQARetrieval (eng-deu)": 15.1,
+ "XPQARetrieval (deu-eng)": 29.14,
+ "XPQARetrieval (spa-spa)": 26.94,
+ "XPQARetrieval (eng-spa)": 15.15,
+ "XPQARetrieval (spa-eng)": 24.44,
+ "XPQARetrieval (fra-fra)": 34.15,
+ "XPQARetrieval (eng-fra)": 13.19,
+ "XPQARetrieval (fra-eng)": 31.59,
+ "XPQARetrieval (hin-hin)": 7.03,
+ "XPQARetrieval (eng-hin)": 6.64,
+ "XPQARetrieval (hin-eng)": 7.09,
+ "XPQARetrieval (ita-ita)": 38.17,
+ "XPQARetrieval (eng-ita)": 15.56,
+ "XPQARetrieval (ita-eng)": 30.28,
+ "XPQARetrieval (jpn-jpn)": 6.77,
+ "XPQARetrieval (eng-jpn)": 3.35,
+ "XPQARetrieval (jpn-eng)": 8.17,
+ "XPQARetrieval (kor-kor)": 2.68,
+ "XPQARetrieval (eng-kor)": 4.24,
+ "XPQARetrieval (kor-eng)": 2.89,
+ "XPQARetrieval (pol-pol)": 17.92,
+ "XPQARetrieval (eng-pol)": 9.24,
+ "XPQARetrieval (pol-eng)": 15.74,
+ "XPQARetrieval (por-por)": 20.3,
+ "XPQARetrieval (eng-por)": 10.69,
+ "XPQARetrieval (por-eng)": 19.59,
+ "XPQARetrieval (tam-tam)": 3.01,
+ "XPQARetrieval (eng-tam)": 5.01,
+ "XPQARetrieval (tam-eng)": 2.93,
+ "XPQARetrieval (cmn-cmn)": 14.24,
+ "XPQARetrieval (eng-cmn)": 7.1,
+ "XPQARetrieval (cmn-eng)": 12.57
}
]
},
@@ -5331,8 +14302,66 @@
"cosine_spearman": [
{
"Model": "LaBSE-en-ru",
+ "BIOSSES": 78.82,
+ "CDSC-R": 70.29,
+ "GermanSTSBenchmark": 51.6,
+ "SICK-R": 69.13,
+ "SICK-R-PL": 47.05,
+ "SICKFr": 60.31,
+ "STS12": 65.16,
+ "STS13": 67.66,
+ "STS14": 63.32,
+ "STS15": 76.21,
+ "STS16": 73.27,
+ "STS17 (en-en)": 79.48,
+ "STS17 (en-ar)": 1.85,
+ "STS17 (en-tr)": 30.01,
+ "STS17 (fr-en)": 49.54,
+ "STS17 (ar-ar)": 15.84,
+ "STS17 (es-es)": 68.88,
+ "STS17 (es-en)": 46.6,
+ "STS17 (it-en)": 48.58,
+ "STS17 (en-de)": 45.81,
+ "STS17 (nl-en)": 42.33,
+ "STS17 (ko-ko)": 10.13,
+ "STSB": 7.69,
+ "STSBenchmark": 71.91,
+ "STSBenchmarkMultilingualSTS (de)": 54.55,
+ "STSBenchmarkMultilingualSTS (it)": 54.52,
+ "STSBenchmarkMultilingualSTS (en)": 71.91,
+ "STSBenchmarkMultilingualSTS (pl)": 51.02,
+ "STSBenchmarkMultilingualSTS (fr)": 58.4,
+ "STSBenchmarkMultilingualSTS (es)": 53.05,
+ "STSBenchmarkMultilingualSTS (pt)": 51.64,
+ "STSBenchmarkMultilingualSTS (nl)": 52.8,
+ "STSBenchmarkMultilingualSTS (zh)": 11.39
+ },
+ {
+ "Model": "LaBSE-en-ru",
+ "BIOSSES": 78.82,
+ "CDSC-R": 70.29,
+ "GermanSTSBenchmark": 51.6,
"RUParaPhraserSTS": 65.87,
"RuSTSBenchmarkSTS": 73.32,
+ "SICK-R": 69.13,
+ "SICK-R-PL": 47.05,
+ "SICKFr": 60.32,
+ "STS12": 65.16,
+ "STS13": 67.66,
+ "STS14": 63.32,
+ "STS15": 76.21,
+ "STS16": 73.27,
+ "STS17 (en-en)": 79.48,
+ "STS17 (en-ar)": 1.85,
+ "STS17 (en-tr)": 30.01,
+ "STS17 (fr-en)": 49.54,
+ "STS17 (ar-ar)": 15.51,
+ "STS17 (es-es)": 68.87,
+ "STS17 (es-en)": 46.6,
+ "STS17 (it-en)": 48.58,
+ "STS17 (en-de)": 45.81,
+ "STS17 (nl-en)": 42.33,
+ "STS17 (ko-ko)": 9.85,
"STS22 (de)": 38.9,
"STS22 (en)": 59.47,
"STS22 (pl-en)": 58.73,
@@ -5351,12 +14380,34 @@
"STS22 (zh-en)": 24.98,
"STS22 (ar)": 31.85,
"STS22 (zh)": 35.1,
- "STSBenchmarkMultilingualSTS (ru)": 73.02
+ "STSB": 7.53,
+ "STSBenchmark": 71.91,
+ "STSBenchmarkMultilingualSTS (ru)": 73.02,
+ "STSBenchmarkMultilingualSTS (de)": 54.55,
+ "STSBenchmarkMultilingualSTS (it)": 54.52,
+ "STSBenchmarkMultilingualSTS (en)": 71.91,
+ "STSBenchmarkMultilingualSTS (pl)": 51.02,
+ "STSBenchmarkMultilingualSTS (fr)": 58.4,
+ "STSBenchmarkMultilingualSTS (es)": 53.05,
+ "STSBenchmarkMultilingualSTS (pt)": 51.64,
+ "STSBenchmarkMultilingualSTS (nl)": 52.8,
+ "STSBenchmarkMultilingualSTS (zh)": 11.31
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "LaBSE-en-ru",
+ "SummEval": 30.58,
+ "SummEvalFr": 31.63
+ },
+ {
+ "Model": "LaBSE-en-ru",
+ "SummEval": 30.58,
+ "SummEvalFr": 31.63
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
@@ -5368,26 +14419,304 @@
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "LaBSE-en-ru",
+ "Core17InstructionRetrieval": -1.6,
+ "News21InstructionRetrieval": -0.39,
+ "Robust04InstructionRetrieval": -9.07
+ }
+ ]
}
},
"cointegrated__rubert-tiny": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "rubert-tiny",
+ "BornholmBitextMining": 22.12,
+ "Tatoeba (aze-eng)": 0.54,
+ "Tatoeba (mar-eng)": 0.01,
+ "Tatoeba (kab-eng)": 0.14,
+ "Tatoeba (ile-eng)": 10.55,
+ "Tatoeba (ell-eng)": 0.2,
+ "Tatoeba (slv-eng)": 1.62,
+ "Tatoeba (dan-eng)": 3.21,
+ "Tatoeba (slk-eng)": 0.76,
+ "Tatoeba (ceb-eng)": 1.07,
+ "Tatoeba (swg-eng)": 2.2,
+ "Tatoeba (war-eng)": 0.97,
+ "Tatoeba (eus-eng)": 0.86,
+ "Tatoeba (ido-eng)": 2.59,
+ "Tatoeba (arz-eng)": 0.07,
+ "Tatoeba (kur-eng)": 1.73,
+ "Tatoeba (nds-eng)": 3.17,
+ "Tatoeba (awa-eng)": 0.11,
+ "Tatoeba (heb-eng)": 0.43,
+ "Tatoeba (fao-eng)": 2.21,
+ "Tatoeba (cmn-eng)": 0.6,
+ "Tatoeba (cym-eng)": 0.76,
+ "Tatoeba (hun-eng)": 0.36,
+ "Tatoeba (tel-eng)": 0.06,
+ "Tatoeba (amh-eng)": 0.61,
+ "Tatoeba (ita-eng)": 3.99,
+ "Tatoeba (nno-eng)": 1.65,
+ "Tatoeba (uzb-eng)": 2.94,
+ "Tatoeba (sqi-eng)": 1.38,
+ "Tatoeba (pms-eng)": 1.81,
+ "Tatoeba (ben-eng)": 0.15,
+ "Tatoeba (nov-eng)": 18.1,
+ "Tatoeba (ast-eng)": 5.05,
+ "Tatoeba (cha-eng)": 2.63,
+ "Tatoeba (wuu-eng)": 0.17,
+ "Tatoeba (dsb-eng)": 1.4,
+ "Tatoeba (kat-eng)": 0.2,
+ "Tatoeba (tam-eng)": 0.36,
+ "Tatoeba (khm-eng)": 0.03,
+ "Tatoeba (afr-eng)": 3.2,
+ "Tatoeba (epo-eng)": 1.58,
+ "Tatoeba (fra-eng)": 5.04,
+ "Tatoeba (tha-eng)": 0.19,
+ "Tatoeba (swe-eng)": 2.77,
+ "Tatoeba (tzl-eng)": 1.78,
+ "Tatoeba (isl-eng)": 0.83,
+ "Tatoeba (bel-eng)": 9.88,
+ "Tatoeba (max-eng)": 3.1,
+ "Tatoeba (rus-eng)": 78.56,
+ "Tatoeba (ang-eng)": 6.76,
+ "Tatoeba (kaz-eng)": 3.22,
+ "Tatoeba (ces-eng)": 0.8,
+ "Tatoeba (cbk-eng)": 2.38,
+ "Tatoeba (gle-eng)": 0.38,
+ "Tatoeba (kor-eng)": 0.25,
+ "Tatoeba (cor-eng)": 0.58,
+ "Tatoeba (hsb-eng)": 1.71,
+ "Tatoeba (hin-eng)": 0.03,
+ "Tatoeba (xho-eng)": 1.25,
+ "Tatoeba (orv-eng)": 5.51,
+ "Tatoeba (oci-eng)": 1.91,
+ "Tatoeba (fin-eng)": 0.4,
+ "Tatoeba (ukr-eng)": 20.11,
+ "Tatoeba (nld-eng)": 4.39,
+ "Tatoeba (mhr-eng)": 1.99,
+ "Tatoeba (ind-eng)": 1.02,
+ "Tatoeba (tuk-eng)": 1.84,
+ "Tatoeba (fry-eng)": 6.18,
+ "Tatoeba (tur-eng)": 0.44,
+ "Tatoeba (mon-eng)": 1.34,
+ "Tatoeba (vie-eng)": 0.89,
+ "Tatoeba (ara-eng)": 0.0,
+ "Tatoeba (lat-eng)": 3.23,
+ "Tatoeba (ber-eng)": 0.66,
+ "Tatoeba (glg-eng)": 2.41,
+ "Tatoeba (est-eng)": 0.86,
+ "Tatoeba (arq-eng)": 0.17,
+ "Tatoeba (bos-eng)": 2.31,
+ "Tatoeba (jav-eng)": 1.71,
+ "Tatoeba (tat-eng)": 1.82,
+ "Tatoeba (swh-eng)": 1.04,
+ "Tatoeba (mkd-eng)": 10.82,
+ "Tatoeba (ron-eng)": 2.69,
+ "Tatoeba (uig-eng)": 0.2,
+ "Tatoeba (dtp-eng)": 0.46,
+ "Tatoeba (pam-eng)": 0.4,
+ "Tatoeba (srp-eng)": 4.8,
+ "Tatoeba (hye-eng)": 0.0,
+ "Tatoeba (yid-eng)": 0.15,
+ "Tatoeba (cat-eng)": 3.31,
+ "Tatoeba (bul-eng)": 19.67,
+ "Tatoeba (zsm-eng)": 1.1,
+ "Tatoeba (lvs-eng)": 0.65,
+ "Tatoeba (gsw-eng)": 4.01,
+ "Tatoeba (lfn-eng)": 4.36,
+ "Tatoeba (por-eng)": 2.54,
+ "Tatoeba (nob-eng)": 2.32,
+ "Tatoeba (mal-eng)": 0.01,
+ "Tatoeba (hrv-eng)": 1.97,
+ "Tatoeba (pol-eng)": 0.92,
+ "Tatoeba (yue-eng)": 0.0,
+ "Tatoeba (kzj-eng)": 0.45,
+ "Tatoeba (urd-eng)": 0.0,
+ "Tatoeba (gla-eng)": 0.79,
+ "Tatoeba (deu-eng)": 2.04,
+ "Tatoeba (spa-eng)": 3.18,
+ "Tatoeba (lit-eng)": 0.71,
+ "Tatoeba (tgl-eng)": 0.79,
+ "Tatoeba (pes-eng)": 0.0,
+ "Tatoeba (csb-eng)": 0.86,
+ "Tatoeba (ina-eng)": 8.61,
+ "Tatoeba (bre-eng)": 0.56,
+ "Tatoeba (jpn-eng)": 0.0
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "rubert-tiny",
+ "AllegroReviews": 23.22,
+ "AmazonCounterfactualClassification (en-ext)": 67.74,
+ "AmazonCounterfactualClassification (en)": 69.09,
+ "AmazonCounterfactualClassification (de)": 66.01,
+ "AmazonCounterfactualClassification (ja)": 50.63,
+ "AmazonPolarityClassification": 62.69,
+ "AmazonReviewsClassification (en)": 30.32,
+ "AmazonReviewsClassification (de)": 24.9,
+ "AmazonReviewsClassification (es)": 26.09,
+ "AmazonReviewsClassification (fr)": 26.03,
+ "AmazonReviewsClassification (ja)": 21.16,
+ "AmazonReviewsClassification (zh)": 21.09,
+ "AngryTweetsClassification": 41.09,
+ "Banking77Classification": 61.03,
+ "CBD": 50.55,
+ "DanishPoliticalCommentsClassification": 27.63,
+ "EmotionClassification": 27.94,
"GeoreviewClassification": 33.45,
"HeadlineClassification": 57.65,
+ "ImdbClassification": 60.52,
"InappropriatenessClassification": 54.5,
"KinopoiskClassification": 41.36,
+ "LccSentimentClassification": 36.2,
+ "MTOPDomainClassification (en)": 81.03,
+ "MTOPDomainClassification (de)": 59.95,
+ "MTOPDomainClassification (es)": 62.76,
+ "MTOPDomainClassification (fr)": 58.29,
+ "MTOPDomainClassification (hi)": 23.3,
+ "MTOPDomainClassification (th)": 16.71,
+ "MTOPIntentClassification (en)": 61.7,
+ "MTOPIntentClassification (de)": 45.91,
+ "MTOPIntentClassification (es)": 47.12,
+ "MTOPIntentClassification (fr)": 42.63,
+ "MTOPIntentClassification (hi)": 4.95,
+ "MTOPIntentClassification (th)": 4.81,
+ "MasakhaNEWSClassification (amh)": 31.52,
+ "MasakhaNEWSClassification (eng)": 69.47,
+ "MasakhaNEWSClassification (fra)": 55.52,
+ "MasakhaNEWSClassification (hau)": 50.35,
+ "MasakhaNEWSClassification (ibo)": 39.18,
+ "MasakhaNEWSClassification (lin)": 50.91,
+ "MasakhaNEWSClassification (lug)": 44.89,
+ "MasakhaNEWSClassification (orm)": 44.55,
+ "MasakhaNEWSClassification (pcm)": 82.43,
+ "MasakhaNEWSClassification (run)": 42.58,
+ "MasakhaNEWSClassification (sna)": 63.06,
+ "MasakhaNEWSClassification (som)": 30.78,
+ "MasakhaNEWSClassification (swa)": 37.92,
+ "MasakhaNEWSClassification (tir)": 26.18,
+ "MasakhaNEWSClassification (xho)": 55.25,
+ "MasakhaNEWSClassification (yor)": 44.04,
"MassiveIntentClassification (ru)": 50.1,
+ "MassiveIntentClassification (el)": 11.82,
+ "MassiveIntentClassification (it)": 43.42,
+ "MassiveIntentClassification (zh-CN)": 7.35,
+ "MassiveIntentClassification (az)": 35.1,
+ "MassiveIntentClassification (nb)": 37.0,
+ "MassiveIntentClassification (te)": 2.86,
+ "MassiveIntentClassification (pt)": 39.64,
+ "MassiveIntentClassification (sw)": 37.25,
+ "MassiveIntentClassification (sl)": 39.27,
+ "MassiveIntentClassification (ja)": 7.35,
+ "MassiveIntentClassification (ka)": 2.94,
+ "MassiveIntentClassification (bn)": 3.54,
+ "MassiveIntentClassification (ta)": 3.05,
+ "MassiveIntentClassification (tr)": 38.14,
+ "MassiveIntentClassification (ko)": 2.66,
+ "MassiveIntentClassification (fr)": 38.64,
+ "MassiveIntentClassification (kn)": 3.81,
+ "MassiveIntentClassification (ms)": 38.99,
+ "MassiveIntentClassification (tl)": 37.39,
+ "MassiveIntentClassification (vi)": 30.18,
+ "MassiveIntentClassification (lv)": 28.27,
+ "MassiveIntentClassification (ro)": 37.64,
+ "MassiveIntentClassification (my)": 4.21,
+ "MassiveIntentClassification (sq)": 38.01,
+ "MassiveIntentClassification (es)": 38.18,
+ "MassiveIntentClassification (af)": 37.64,
+ "MassiveIntentClassification (th)": 4.54,
+ "MassiveIntentClassification (zh-TW)": 9.27,
+ "MassiveIntentClassification (id)": 39.87,
+ "MassiveIntentClassification (he)": 2.73,
+ "MassiveIntentClassification (mn)": 37.02,
+ "MassiveIntentClassification (hi)": 3.71,
+ "MassiveIntentClassification (hy)": 3.49,
+ "MassiveIntentClassification (hu)": 35.29,
+ "MassiveIntentClassification (cy)": 35.89,
+ "MassiveIntentClassification (km)": 4.66,
+ "MassiveIntentClassification (sv)": 38.81,
+ "MassiveIntentClassification (da)": 40.76,
+ "MassiveIntentClassification (en)": 53.87,
+ "MassiveIntentClassification (is)": 32.71,
+ "MassiveIntentClassification (nl)": 38.91,
+ "MassiveIntentClassification (ur)": 3.45,
+ "MassiveIntentClassification (pl)": 33.66,
+ "MassiveIntentClassification (ar)": 5.6,
+ "MassiveIntentClassification (am)": 2.96,
+ "MassiveIntentClassification (fa)": 3.31,
+ "MassiveIntentClassification (ml)": 2.49,
+ "MassiveIntentClassification (de)": 38.61,
+ "MassiveIntentClassification (jv)": 36.44,
+ "MassiveIntentClassification (fi)": 39.21,
"MassiveScenarioClassification (ru)": 52.15,
+ "MassiveScenarioClassification (ko)": 7.26,
+ "MassiveScenarioClassification (lv)": 29.87,
+ "MassiveScenarioClassification (hu)": 36.51,
+ "MassiveScenarioClassification (es)": 39.99,
+ "MassiveScenarioClassification (az)": 35.95,
+ "MassiveScenarioClassification (te)": 7.69,
+ "MassiveScenarioClassification (tl)": 38.89,
+ "MassiveScenarioClassification (el)": 19.12,
+ "MassiveScenarioClassification (zh-CN)": 12.03,
+ "MassiveScenarioClassification (pt)": 39.54,
+ "MassiveScenarioClassification (ml)": 7.32,
+ "MassiveScenarioClassification (fi)": 38.52,
+ "MassiveScenarioClassification (fr)": 40.47,
+ "MassiveScenarioClassification (de)": 41.26,
+ "MassiveScenarioClassification (km)": 9.8,
+ "MassiveScenarioClassification (ms)": 41.21,
+ "MassiveScenarioClassification (sl)": 39.95,
+ "MassiveScenarioClassification (mn)": 33.93,
+ "MassiveScenarioClassification (sw)": 39.17,
+ "MassiveScenarioClassification (id)": 40.86,
+ "MassiveScenarioClassification (th)": 9.37,
+ "MassiveScenarioClassification (vi)": 33.37,
+ "MassiveScenarioClassification (ro)": 40.6,
+ "MassiveScenarioClassification (af)": 38.1,
+ "MassiveScenarioClassification (my)": 9.76,
+ "MassiveScenarioClassification (kn)": 7.92,
+ "MassiveScenarioClassification (ka)": 6.98,
+ "MassiveScenarioClassification (sv)": 37.58,
+ "MassiveScenarioClassification (he)": 7.87,
+ "MassiveScenarioClassification (ar)": 11.34,
+ "MassiveScenarioClassification (fa)": 7.05,
+ "MassiveScenarioClassification (tr)": 38.96,
+ "MassiveScenarioClassification (bn)": 7.71,
+ "MassiveScenarioClassification (am)": 8.08,
+ "MassiveScenarioClassification (en)": 58.9,
+ "MassiveScenarioClassification (nl)": 39.06,
+ "MassiveScenarioClassification (is)": 34.57,
+ "MassiveScenarioClassification (hy)": 9.29,
+ "MassiveScenarioClassification (da)": 41.12,
+ "MassiveScenarioClassification (sq)": 39.18,
+ "MassiveScenarioClassification (it)": 44.54,
+ "MassiveScenarioClassification (jv)": 38.75,
+ "MassiveScenarioClassification (zh-TW)": 14.03,
+ "MassiveScenarioClassification (ur)": 8.54,
+ "MassiveScenarioClassification (pl)": 34.41,
+ "MassiveScenarioClassification (nb)": 37.43,
+ "MassiveScenarioClassification (hi)": 8.51,
+ "MassiveScenarioClassification (ja)": 11.31,
+ "MassiveScenarioClassification (ta)": 6.91,
+ "MassiveScenarioClassification (cy)": 38.09,
+ "NoRecClassification": 39.16,
+ "NordicLangClassification": 54.8,
+ "PAC": 61.59,
+ "PolEmo2.0-IN": 41.09,
+ "PolEmo2.0-OUT": 33.81,
"RuReviewsClassification": 49.56,
"RuSciBenchGRNTIClassification": 35.71,
- "RuSciBenchOECDClassification": 26.51
+ "RuSciBenchOECDClassification": 26.51,
+ "ToxicConversationsClassification": 59.95,
+ "TweetSentimentExtractionClassification": 50.65
}
]
},
@@ -5395,9 +14724,67 @@
"v_measure": [
{
"Model": "rubert-tiny",
+ "AlloProfClusteringP2P": 42.77,
+ "AlloProfClusteringS2S": 24.76,
+ "ArxivClusteringP2P": 22.47,
+ "ArxivClusteringS2S": 15.5,
+ "BiorxivClusteringP2P": 19.69,
+ "BiorxivClusteringS2S": 11.72,
+ "BlurbsClusteringP2P": 9.69,
+ "BlurbsClusteringS2S": 8.78,
"GeoreviewClusteringP2P": 34.4,
+ "HALClusteringS2S": 7.39,
+ "MLSUMClusteringP2P (de)": 10.31,
+ "MLSUMClusteringP2P (fr)": 27.42,
+ "MLSUMClusteringP2P (ru)": 43.77,
+ "MLSUMClusteringP2P (es)": 30.69,
+ "MLSUMClusteringS2S (de)": 9.7,
+ "MLSUMClusteringS2S (fr)": 26.52,
+ "MLSUMClusteringS2S (ru)": 43.04,
+ "MLSUMClusteringS2S (es)": 30.54,
+ "MasakhaNEWSClusteringP2P (amh)": 40.23,
+ "MasakhaNEWSClusteringP2P (eng)": 44.77,
+ "MasakhaNEWSClusteringP2P (fra)": 34.35,
+ "MasakhaNEWSClusteringP2P (hau)": 9.03,
+ "MasakhaNEWSClusteringP2P (ibo)": 21.9,
+ "MasakhaNEWSClusteringP2P (lin)": 47.93,
+ "MasakhaNEWSClusteringP2P (lug)": 47.18,
+ "MasakhaNEWSClusteringP2P (orm)": 26.34,
+ "MasakhaNEWSClusteringP2P (pcm)": 51.64,
+ "MasakhaNEWSClusteringP2P (run)": 44.15,
+ "MasakhaNEWSClusteringP2P (sna)": 43.87,
+ "MasakhaNEWSClusteringP2P (som)": 23.83,
+ "MasakhaNEWSClusteringP2P (swa)": 6.81,
+ "MasakhaNEWSClusteringP2P (tir)": 44.47,
+ "MasakhaNEWSClusteringP2P (xho)": 33.89,
+ "MasakhaNEWSClusteringP2P (yor)": 23.12,
+ "MasakhaNEWSClusteringS2S (amh)": 40.03,
+ "MasakhaNEWSClusteringS2S (eng)": 17.24,
+ "MasakhaNEWSClusteringS2S (fra)": 20.82,
+ "MasakhaNEWSClusteringS2S (hau)": 9.8,
+ "MasakhaNEWSClusteringS2S (ibo)": 25.65,
+ "MasakhaNEWSClusteringS2S (lin)": 50.3,
+ "MasakhaNEWSClusteringS2S (lug)": 42.74,
+ "MasakhaNEWSClusteringS2S (orm)": 21.84,
+ "MasakhaNEWSClusteringS2S (pcm)": 38.52,
+ "MasakhaNEWSClusteringS2S (run)": 44.78,
+ "MasakhaNEWSClusteringS2S (sna)": 44.25,
+ "MasakhaNEWSClusteringS2S (som)": 26.25,
+ "MasakhaNEWSClusteringS2S (swa)": 13.15,
+ "MasakhaNEWSClusteringS2S (tir)": 42.57,
+ "MasakhaNEWSClusteringS2S (xho)": 23.88,
+ "MasakhaNEWSClusteringS2S (yor)": 21.71,
+ "MedrxivClusteringP2P": 21.25,
+ "MedrxivClusteringS2S": 17.78,
+ "RedditClustering": 16.23,
+ "RedditClusteringP2P": 31.54,
"RuSciBenchGRNTIClusteringP2P": 29.89,
- "RuSciBenchOECDClusteringP2P": 27.98
+ "RuSciBenchOECDClusteringP2P": 27.98,
+ "StackExchangeClustering": 31.47,
+ "StackExchangeClusteringP2P": 27.14,
+ "TenKGnadClusteringP2P": 15.1,
+ "TenKGnadClusteringS2S": 8.62,
+ "TwentyNewsgroupsClustering": 15.43
}
]
},
@@ -5405,11 +14792,54 @@
"max_ap": [
{
"Model": "rubert-tiny",
- "TERRa": 51.06
+ "CDSC-E": 43.2,
+ "FalseFriendsGermanEnglish": 48.27,
+ "OpusparcusPC (de)": 90.13,
+ "OpusparcusPC (en)": 96.89,
+ "OpusparcusPC (fi)": 84.26,
+ "OpusparcusPC (fr)": 86.19,
+ "OpusparcusPC (ru)": 82.95,
+ "OpusparcusPC (sv)": 81.52,
+ "PSC": 57.47,
+ "PawsXPairClassification (de)": 48.32,
+ "PawsXPairClassification (en)": 44.09,
+ "PawsXPairClassification (es)": 47.03,
+ "PawsXPairClassification (fr)": 49.15,
+ "PawsXPairClassification (ja)": 48.29,
+ "PawsXPairClassification (ko)": 46.45,
+ "PawsXPairClassification (zh)": 50.89,
+ "SICK-E-PL": 45.51,
+ "SprintDuplicateQuestions": 53.27,
+ "TwitterSemEval2015": 54.51,
+ "TwitterURLCorpus": 71.97
+ },
+ {
+ "Model": "rubert-tiny",
+ "CDSC-E": 44.08,
+ "FalseFriendsGermanEnglish": 48.41,
+ "OpusparcusPC (de)": 90.6,
+ "OpusparcusPC (en)": 96.89,
+ "OpusparcusPC (fi)": 84.97,
+ "OpusparcusPC (fr)": 86.61,
+ "OpusparcusPC (ru)": 83.43,
+ "OpusparcusPC (sv)": 82.2,
+ "PSC": 59.31,
+ "PawsXPairClassification (de)": 48.32,
+ "PawsXPairClassification (en)": 44.24,
+ "PawsXPairClassification (es)": 47.16,
+ "PawsXPairClassification (fr)": 49.4,
+ "PawsXPairClassification (ja)": 48.29,
+ "PawsXPairClassification (ko)": 46.68,
+ "PawsXPairClassification (zh)": 50.97,
+ "SICK-E-PL": 45.58,
+ "SprintDuplicateQuestions": 53.27,
+ "TERRa": 52.85,
+ "TwitterSemEval2015": 54.51,
+ "TwitterURLCorpus": 71.97
},
{
"Model": "rubert-tiny",
- "TERRa": 52.85
+ "TERRa": 51.06
}
]
},
@@ -5417,7 +14847,35 @@
"map": [
{
"Model": "rubert-tiny",
- "RuBQReranking": 35.44
+ "AlloprofReranking": 31.42,
+ "AskUbuntuDupQuestions": 47.47,
+ "MindSmallReranking": 25.98,
+ "RuBQReranking": 35.44,
+ "SciDocsRR": 51.43,
+ "StackOverflowDupQuestions": 35.42,
+ "SyntecReranking": 42.77,
+ "T2Reranking": 55.86
+ },
+ {
+ "Model": "rubert-tiny",
+ "MIRACLReranking (ar)": 1.96,
+ "MIRACLReranking (bn)": 1.68,
+ "MIRACLReranking (de)": 6.36,
+ "MIRACLReranking (en)": 9.34,
+ "MIRACLReranking (es)": 6.58,
+ "MIRACLReranking (fa)": 3.45,
+ "MIRACLReranking (fi)": 8.24,
+ "MIRACLReranking (fr)": 4.62,
+ "MIRACLReranking (hi)": 4.05,
+ "MIRACLReranking (id)": 5.85,
+ "MIRACLReranking (ja)": 2.12,
+ "MIRACLReranking (ko)": 4.88,
+ "MIRACLReranking (ru)": 7.08,
+ "MIRACLReranking (sw)": 6.62,
+ "MIRACLReranking (te)": 2.5,
+ "MIRACLReranking (th)": 2.02,
+ "MIRACLReranking (yo)": 8.39,
+ "MIRACLReranking (zh)": 2.27
}
]
},
@@ -5425,13 +14883,256 @@
"ndcg_at_10": [
{
"Model": "rubert-tiny",
+ "AILACasedocs": 11.96,
+ "AILAStatutes": 12.64,
+ "ARCChallenge": 2.13,
+ "AlloprofRetrieval": 1.39,
+ "AlphaNLI": 1.48,
+ "AppsRetrieval": 0.27,
+ "ArguAna": 24.16,
+ "BSARDRetrieval": 0.0,
+ "ClimateFEVER": 1.5,
+ "CmedqaRetrieval": 0.25,
+ "CodeFeedbackMT": 13.47,
+ "CodeFeedbackST": 8.09,
+ "CodeSearchNetCCRetrieval (python)": 11.11,
+ "CodeSearchNetCCRetrieval (javascript)": 20.43,
+ "CodeSearchNetCCRetrieval (go)": 10.31,
+ "CodeSearchNetCCRetrieval (ruby)": 21.66,
+ "CodeSearchNetCCRetrieval (java)": 15.11,
+ "CodeSearchNetCCRetrieval (php)": 11.37,
+ "CodeSearchNetRetrieval (python)": 20.82,
+ "CodeSearchNetRetrieval (javascript)": 9.78,
+ "CodeSearchNetRetrieval (go)": 13.54,
+ "CodeSearchNetRetrieval (ruby)": 17.34,
+ "CodeSearchNetRetrieval (java)": 10.23,
+ "CodeSearchNetRetrieval (php)": 9.12,
+ "CodeTransOceanContest": 13.36,
+ "CodeTransOceanDL": 33.44,
+ "CosQA": 3.73,
+ "CovidRetrieval": 0.0,
+ "DBPedia": 1.56,
+ "FEVER": 0.7,
+ "FiQA2018": 1.92,
+ "GerDaLIR": 0.23,
+ "GerDaLIRSmall": 0.62,
+ "GermanQuAD-Retrieval": 19.33,
+ "HellaSwag": 3.74,
+ "HotpotQA": 2.19,
+ "LEMBNarrativeQARetrieval": 2.46,
+ "LEMBQMSumRetrieval": 8.72,
+ "LEMBSummScreenFDRetrieval": 5.39,
+ "LEMBWikimQARetrieval": 12.42,
+ "LeCaRDv2": 11.58,
+ "LegalBenchConsumerContractsQA": 26.36,
+ "LegalBenchCorporateLobbying": 53.56,
+ "LegalQuAD": 5.11,
+ "LegalSummarization": 38.02,
+ "MIRACLRetrieval (ar)": 0.0,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 0.03,
+ "MIRACLRetrieval (en)": 0.06,
+ "MIRACLRetrieval (es)": 0.15,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 0.77,
+ "MIRACLRetrieval (fr)": 0.0,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 0.19,
+ "MIRACLRetrieval (ja)": 0.0,
+ "MIRACLRetrieval (ko)": 0.5,
+ "MIRACLRetrieval (ru)": 0.58,
+ "MIRACLRetrieval (sw)": 0.83,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.02,
+ "MIRACLRetrieval (yo)": 0.25,
+ "MIRACLRetrieval (zh)": 0.0,
+ "MSMARCO": 1.27,
+ "MintakaRetrieval (ar)": 0.3,
+ "MintakaRetrieval (de)": 1.07,
+ "MintakaRetrieval (es)": 1.17,
+ "MintakaRetrieval (fr)": 1.4,
+ "MintakaRetrieval (hi)": 0.62,
+ "MintakaRetrieval (it)": 1.68,
+ "MintakaRetrieval (ja)": 0.44,
+ "MintakaRetrieval (pt)": 1.61,
+ "NFCorpus": 2.82,
+ "NQ": 0.75,
+ "PIQA": 3.51,
+ "Quail": 0.41,
+ "QuoraRetrieval": 58.8,
+ "RARbCode": 0.63,
+ "RARbMath": 4.68,
"RiaNewsRetrieval": 0.79,
- "RuBQRetrieval": 3.24
+ "RuBQRetrieval": 3.24,
+ "SCIDOCS": 0.86,
+ "SIQA": 0.23,
+ "SciFact": 12.55,
+ "SciFact-PL": 3.63,
+ "SpartQA": 3.34,
+ "StackOverflowQA": 16.66,
+ "SyntecRetrieval": 17.8,
+ "SyntheticText2SQL": 4.72,
+ "TRECCOVID": 9.47,
+ "TRECCOVID-PL": 5.75,
+ "TempReasonL1": 0.16,
+ "TempReasonL2Fact": 0.77,
+ "TempReasonL2Pure": 0.05,
+ "TempReasonL3Fact": 1.66,
+ "TempReasonL3Pure": 2.69,
+ "Touche2020": 2.36,
+ "WinoGrande": 2.34,
+ "XMarket (de)": 1.35,
+ "XMarket (en)": 1.77,
+ "XMarket (es)": 1.45,
+ "XPQARetrieval (ara-ara)": 2.46,
+ "XPQARetrieval (eng-ara)": 0.35,
+ "XPQARetrieval (ara-eng)": 1.44,
+ "XPQARetrieval (deu-deu)": 15.49,
+ "XPQARetrieval (eng-deu)": 1.11,
+ "XPQARetrieval (deu-eng)": 5.18,
+ "XPQARetrieval (spa-spa)": 13.85,
+ "XPQARetrieval (eng-spa)": 0.99,
+ "XPQARetrieval (spa-eng)": 5.73,
+ "XPQARetrieval (fra-fra)": 17.15,
+ "XPQARetrieval (eng-fra)": 1.39,
+ "XPQARetrieval (fra-eng)": 7.88,
+ "XPQARetrieval (hin-hin)": 4.78,
+ "XPQARetrieval (eng-hin)": 1.99,
+ "XPQARetrieval (hin-eng)": 1.22,
+ "XPQARetrieval (ita-ita)": 25.22,
+ "XPQARetrieval (eng-ita)": 2.02,
+ "XPQARetrieval (ita-eng)": 8.16,
+ "XPQARetrieval (jpn-jpn)": 3.57,
+ "XPQARetrieval (eng-jpn)": 0.42,
+ "XPQARetrieval (jpn-eng)": 1.29,
+ "XPQARetrieval (kor-kor)": 1.75,
+ "XPQARetrieval (eng-kor)": 1.26,
+ "XPQARetrieval (kor-eng)": 1.48,
+ "XPQARetrieval (pol-pol)": 8.06,
+ "XPQARetrieval (eng-pol)": 0.75,
+ "XPQARetrieval (pol-eng)": 4.61,
+ "XPQARetrieval (por-por)": 11.81,
+ "XPQARetrieval (eng-por)": 1.03,
+ "XPQARetrieval (por-eng)": 3.32,
+ "XPQARetrieval (tam-tam)": 1.94,
+ "XPQARetrieval (eng-tam)": 1.11,
+ "XPQARetrieval (tam-eng)": 0.93,
+ "XPQARetrieval (cmn-cmn)": 5.96,
+ "XPQARetrieval (eng-cmn)": 0.81,
+ "XPQARetrieval (cmn-eng)": 1.56
}
]
},
"STS": {
"cosine_spearman": [
+ {
+ "Model": "rubert-tiny",
+ "BIOSSES": 58.0,
+ "CDSC-R": 64.0,
+ "GermanSTSBenchmark": 48.0,
+ "SICK-R": 60.39,
+ "SICK-R-PL": 43.68,
+ "SICKFr": 54.08,
+ "STS12": 49.66,
+ "STS13": 63.4,
+ "STS14": 54.88,
+ "STS15": 67.32,
+ "STS16": 66.87,
+ "STS17 (nl-en)": 10.21,
+ "STS17 (ar-ar)": 13.35,
+ "STS17 (es-en)": 14.43,
+ "STS17 (en-tr)": -0.72,
+ "STS17 (fr-en)": 11.82,
+ "STS17 (en-de)": 2.33,
+ "STS17 (it-en)": 7.59,
+ "STS17 (en-en)": 67.66,
+ "STS17 (en-ar)": 20.94,
+ "STS17 (ko-ko)": 10.74,
+ "STS17 (es-es)": 62.45,
+ "STS22 (tr)": 20.65,
+ "STS22 (es-it)": 32.31,
+ "STS22 (en)": 52.63,
+ "STS22 (ar)": 18.64,
+ "STS22 (pl)": 8.46,
+ "STS22 (it)": 46.79,
+ "STS22 (fr-pl)": -5.63,
+ "STS22 (es)": 47.32,
+ "STS22 (de-fr)": 10.26,
+ "STS22 (fr)": 45.86,
+ "STS22 (pl-en)": 16.24,
+ "STS22 (de-en)": 22.47,
+ "STS22 (zh-en)": 1.05,
+ "STS22 (de)": 18.79,
+ "STS22 (zh)": 26.99,
+ "STS22 (de-pl)": -5.36,
+ "STS22 (es-en)": 27.71,
+ "STSB": 21.79,
+ "STSBenchmark": 55.86,
+ "STSBenchmarkMultilingualSTS (pl)": 49.04,
+ "STSBenchmarkMultilingualSTS (nl)": 46.82,
+ "STSBenchmarkMultilingualSTS (de)": 49.2,
+ "STSBenchmarkMultilingualSTS (fr)": 52.85,
+ "STSBenchmarkMultilingualSTS (en)": 55.86,
+ "STSBenchmarkMultilingualSTS (pt)": 44.54,
+ "STSBenchmarkMultilingualSTS (es)": 49.77,
+ "STSBenchmarkMultilingualSTS (zh)": 19.6,
+ "STSBenchmarkMultilingualSTS (it)": 49.68,
+ "STSBenchmarkMultilingualSTS (ru)": 58.23
+ },
+ {
+ "Model": "rubert-tiny",
+ "BIOSSES": 58.0,
+ "CDSC-R": 64.0,
+ "GermanSTSBenchmark": 48.0,
+ "SICK-R": 60.39,
+ "SICK-R-PL": 43.68,
+ "SICKFr": 54.08,
+ "STS12": 49.66,
+ "STS13": 63.4,
+ "STS14": 54.88,
+ "STS15": 67.32,
+ "STS16": 66.87,
+ "STS17 (nl-en)": 10.21,
+ "STS17 (ar-ar)": 13.18,
+ "STS17 (es-en)": 14.43,
+ "STS17 (en-tr)": -0.72,
+ "STS17 (fr-en)": 11.82,
+ "STS17 (en-de)": 2.33,
+ "STS17 (it-en)": 7.59,
+ "STS17 (en-en)": 67.66,
+ "STS17 (en-ar)": 20.94,
+ "STS17 (ko-ko)": 10.98,
+ "STS17 (es-es)": 62.45,
+ "STS22 (tr)": 20.65,
+ "STS22 (es-it)": 32.31,
+ "STS22 (en)": 52.63,
+ "STS22 (ar)": 18.64,
+ "STS22 (pl)": 8.57,
+ "STS22 (it)": 46.79,
+ "STS22 (fr-pl)": -5.63,
+ "STS22 (es)": 47.32,
+ "STS22 (de-fr)": 10.26,
+ "STS22 (fr)": 45.86,
+ "STS22 (pl-en)": 16.24,
+ "STS22 (de-en)": 22.47,
+ "STS22 (zh-en)": 1.05,
+ "STS22 (de)": 18.79,
+ "STS22 (zh)": 26.99,
+ "STS22 (de-pl)": -5.36,
+ "STS22 (es-en)": 27.71,
+ "STSB": 21.79,
+ "STSBenchmark": 55.86,
+ "STSBenchmarkMultilingualSTS (pl)": 49.04,
+ "STSBenchmarkMultilingualSTS (nl)": 46.82,
+ "STSBenchmarkMultilingualSTS (de)": 49.2,
+ "STSBenchmarkMultilingualSTS (fr)": 52.85,
+ "STSBenchmarkMultilingualSTS (en)": 55.86,
+ "STSBenchmarkMultilingualSTS (pt)": 44.54,
+ "STSBenchmarkMultilingualSTS (es)": 49.77,
+ "STSBenchmarkMultilingualSTS (zh)": 19.62,
+ "STSBenchmarkMultilingualSTS (it)": 49.68,
+ "STSBenchmarkMultilingualSTS (ru)": 58.23
+ },
{
"Model": "rubert-tiny",
"RUParaPhraserSTS": 53.41,
@@ -5441,7 +15142,18 @@
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "rubert-tiny",
+ "SummEval": 29.54,
+ "SummEvalFr": 28.57
+ },
+ {
+ "Model": "rubert-tiny",
+ "SummEval": 29.54,
+ "SummEvalFr": 28.57
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
@@ -5453,26 +15165,304 @@
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "rubert-tiny",
+ "Core17InstructionRetrieval": 0.41,
+ "News21InstructionRetrieval": 1.04,
+ "Robust04InstructionRetrieval": -4.12
+ }
+ ]
}
},
"cointegrated__rubert-tiny2": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "rubert-tiny2",
+ "BornholmBitextMining": 37.66,
+ "Tatoeba (sqi-eng)": 3.46,
+ "Tatoeba (orv-eng)": 9.61,
+ "Tatoeba (swe-eng)": 4.08,
+ "Tatoeba (deu-eng)": 3.94,
+ "Tatoeba (nov-eng)": 23.41,
+ "Tatoeba (isl-eng)": 3.1,
+ "Tatoeba (ina-eng)": 12.44,
+ "Tatoeba (yid-eng)": 0.02,
+ "Tatoeba (pes-eng)": 0.7,
+ "Tatoeba (fra-eng)": 6.21,
+ "Tatoeba (ben-eng)": 0.01,
+ "Tatoeba (pam-eng)": 2.4,
+ "Tatoeba (epo-eng)": 2.98,
+ "Tatoeba (kat-eng)": 1.0,
+ "Tatoeba (nno-eng)": 4.35,
+ "Tatoeba (gsw-eng)": 9.92,
+ "Tatoeba (glg-eng)": 4.34,
+ "Tatoeba (mkd-eng)": 13.4,
+ "Tatoeba (bre-eng)": 2.27,
+ "Tatoeba (max-eng)": 7.3,
+ "Tatoeba (fry-eng)": 10.42,
+ "Tatoeba (jav-eng)": 2.29,
+ "Tatoeba (ron-eng)": 4.44,
+ "Tatoeba (ara-eng)": 0.28,
+ "Tatoeba (ast-eng)": 12.51,
+ "Tatoeba (hye-eng)": 0.54,
+ "Tatoeba (kab-eng)": 0.64,
+ "Tatoeba (gle-eng)": 2.76,
+ "Tatoeba (dsb-eng)": 2.13,
+ "Tatoeba (hun-eng)": 2.03,
+ "Tatoeba (yue-eng)": 0.36,
+ "Tatoeba (mon-eng)": 7.39,
+ "Tatoeba (dtp-eng)": 1.49,
+ "Tatoeba (por-eng)": 4.5,
+ "Tatoeba (kor-eng)": 0.35,
+ "Tatoeba (amh-eng)": 0.65,
+ "Tatoeba (mhr-eng)": 6.0,
+ "Tatoeba (fin-eng)": 1.28,
+ "Tatoeba (hin-eng)": 0.03,
+ "Tatoeba (kur-eng)": 3.96,
+ "Tatoeba (gla-eng)": 2.06,
+ "Tatoeba (bel-eng)": 16.75,
+ "Tatoeba (heb-eng)": 0.58,
+ "Tatoeba (jpn-eng)": 0.26,
+ "Tatoeba (vie-eng)": 3.96,
+ "Tatoeba (dan-eng)": 6.31,
+ "Tatoeba (ita-eng)": 7.38,
+ "Tatoeba (ang-eng)": 16.64,
+ "Tatoeba (wuu-eng)": 0.14,
+ "Tatoeba (tgl-eng)": 2.5,
+ "Tatoeba (nld-eng)": 9.18,
+ "Tatoeba (tam-eng)": 0.02,
+ "Tatoeba (ceb-eng)": 2.55,
+ "Tatoeba (zsm-eng)": 2.78,
+ "Tatoeba (hrv-eng)": 5.06,
+ "Tatoeba (lvs-eng)": 1.93,
+ "Tatoeba (mal-eng)": 0.15,
+ "Tatoeba (lit-eng)": 1.35,
+ "Tatoeba (spa-eng)": 5.9,
+ "Tatoeba (pol-eng)": 2.75,
+ "Tatoeba (arz-eng)": 0.03,
+ "Tatoeba (urd-eng)": 0.0,
+ "Tatoeba (ido-eng)": 5.98,
+ "Tatoeba (hsb-eng)": 2.48,
+ "Tatoeba (ces-eng)": 1.68,
+ "Tatoeba (ukr-eng)": 25.99,
+ "Tatoeba (tzl-eng)": 16.56,
+ "Tatoeba (war-eng)": 2.07,
+ "Tatoeba (cha-eng)": 8.89,
+ "Tatoeba (slv-eng)": 3.44,
+ "Tatoeba (tha-eng)": 0.3,
+ "Tatoeba (ber-eng)": 3.6,
+ "Tatoeba (cbk-eng)": 3.55,
+ "Tatoeba (tat-eng)": 6.52,
+ "Tatoeba (nob-eng)": 4.68,
+ "Tatoeba (cmn-eng)": 0.71,
+ "Tatoeba (uig-eng)": 0.42,
+ "Tatoeba (bul-eng)": 30.57,
+ "Tatoeba (ile-eng)": 13.8,
+ "Tatoeba (lat-eng)": 6.13,
+ "Tatoeba (cym-eng)": 3.09,
+ "Tatoeba (cat-eng)": 7.52,
+ "Tatoeba (kaz-eng)": 8.08,
+ "Tatoeba (awa-eng)": 0.45,
+ "Tatoeba (pms-eng)": 6.26,
+ "Tatoeba (swg-eng)": 10.32,
+ "Tatoeba (lfn-eng)": 7.11,
+ "Tatoeba (uzb-eng)": 4.59,
+ "Tatoeba (cor-eng)": 3.04,
+ "Tatoeba (slk-eng)": 2.62,
+ "Tatoeba (nds-eng)": 6.88,
+ "Tatoeba (csb-eng)": 4.88,
+ "Tatoeba (tur-eng)": 1.67,
+ "Tatoeba (bos-eng)": 6.19,
+ "Tatoeba (eus-eng)": 3.9,
+ "Tatoeba (kzj-eng)": 1.39,
+ "Tatoeba (afr-eng)": 5.01,
+ "Tatoeba (swh-eng)": 3.76,
+ "Tatoeba (ell-eng)": 0.44,
+ "Tatoeba (khm-eng)": 0.0,
+ "Tatoeba (est-eng)": 2.19,
+ "Tatoeba (ind-eng)": 2.26,
+ "Tatoeba (mar-eng)": 0.02,
+ "Tatoeba (tel-eng)": 0.4,
+ "Tatoeba (aze-eng)": 2.91,
+ "Tatoeba (srp-eng)": 8.91,
+ "Tatoeba (oci-eng)": 4.32,
+ "Tatoeba (fao-eng)": 4.85,
+ "Tatoeba (arq-eng)": 0.3,
+ "Tatoeba (rus-eng)": 82.92,
+ "Tatoeba (tuk-eng)": 3.56,
+ "Tatoeba (xho-eng)": 5.52
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "rubert-tiny2",
+ "AllegroReviews": 23.03,
+ "AmazonCounterfactualClassification (en-ext)": 66.64,
+ "AmazonCounterfactualClassification (en)": 67.42,
+ "AmazonCounterfactualClassification (de)": 55.34,
+ "AmazonCounterfactualClassification (ja)": 56.48,
+ "AmazonPolarityClassification": 63.8,
+ "AmazonReviewsClassification (en)": 33.02,
+ "AmazonReviewsClassification (de)": 25.63,
+ "AmazonReviewsClassification (es)": 27.9,
+ "AmazonReviewsClassification (fr)": 26.79,
+ "AmazonReviewsClassification (ja)": 21.02,
+ "AmazonReviewsClassification (zh)": 20.64,
+ "AngryTweetsClassification": 43.84,
+ "Banking77Classification": 50.97,
+ "CBD": 52.68,
+ "DanishPoliticalCommentsClassification": 27.9,
+ "EmotionClassification": 24.77,
"GeoreviewClassification": 39.64,
"HeadlineClassification": 74.19,
+ "ImdbClassification": 60.71,
"InappropriatenessClassification": 58.57,
"KinopoiskClassification": 49.06,
+ "LccSentimentClassification": 38.73,
+ "MTOPDomainClassification (en)": 76.85,
+ "MTOPDomainClassification (de)": 54.64,
+ "MTOPDomainClassification (es)": 55.03,
+ "MTOPDomainClassification (fr)": 55.17,
+ "MTOPDomainClassification (hi)": 23.34,
+ "MTOPDomainClassification (th)": 15.88,
+ "MTOPIntentClassification (en)": 41.31,
+ "MTOPIntentClassification (de)": 27.44,
+ "MTOPIntentClassification (es)": 29.09,
+ "MTOPIntentClassification (fr)": 28.69,
+ "MTOPIntentClassification (hi)": 3.77,
+ "MTOPIntentClassification (th)": 4.54,
+ "MasakhaNEWSClassification (amh)": 31.12,
+ "MasakhaNEWSClassification (eng)": 68.91,
+ "MasakhaNEWSClassification (fra)": 39.67,
+ "MasakhaNEWSClassification (hau)": 37.58,
+ "MasakhaNEWSClassification (ibo)": 30.44,
+ "MasakhaNEWSClassification (lin)": 46.8,
+ "MasakhaNEWSClassification (lug)": 41.61,
+ "MasakhaNEWSClassification (orm)": 42.58,
+ "MasakhaNEWSClassification (pcm)": 83.44,
+ "MasakhaNEWSClassification (run)": 38.76,
+ "MasakhaNEWSClassification (sna)": 55.72,
+ "MasakhaNEWSClassification (som)": 32.69,
+ "MasakhaNEWSClassification (swa)": 29.05,
+ "MasakhaNEWSClassification (tir)": 24.26,
+ "MasakhaNEWSClassification (xho)": 41.72,
+ "MasakhaNEWSClassification (yor)": 35.84,
"MassiveIntentClassification (ru)": 50.83,
+ "MassiveIntentClassification (cy)": 25.27,
+ "MassiveIntentClassification (hi)": 2.78,
+ "MassiveIntentClassification (te)": 1.79,
+ "MassiveIntentClassification (fi)": 26.26,
+ "MassiveIntentClassification (hy)": 2.42,
+ "MassiveIntentClassification (es)": 27.46,
+ "MassiveIntentClassification (en)": 44.53,
+ "MassiveIntentClassification (it)": 31.83,
+ "MassiveIntentClassification (bn)": 2.99,
+ "MassiveIntentClassification (fr)": 27.01,
+ "MassiveIntentClassification (nl)": 28.88,
+ "MassiveIntentClassification (pl)": 23.3,
+ "MassiveIntentClassification (jv)": 25.99,
+ "MassiveIntentClassification (ka)": 2.71,
+ "MassiveIntentClassification (id)": 29.35,
+ "MassiveIntentClassification (nb)": 25.68,
+ "MassiveIntentClassification (mn)": 22.97,
+ "MassiveIntentClassification (ko)": 2.32,
+ "MassiveIntentClassification (da)": 28.28,
+ "MassiveIntentClassification (tl)": 27.22,
+ "MassiveIntentClassification (km)": 4.96,
+ "MassiveIntentClassification (hu)": 23.69,
+ "MassiveIntentClassification (ar)": 3.9,
+ "MassiveIntentClassification (de)": 27.96,
+ "MassiveIntentClassification (af)": 23.46,
+ "MassiveIntentClassification (kn)": 2.44,
+ "MassiveIntentClassification (pt)": 28.77,
+ "MassiveIntentClassification (sw)": 23.95,
+ "MassiveIntentClassification (he)": 2.01,
+ "MassiveIntentClassification (lv)": 18.07,
+ "MassiveIntentClassification (ml)": 2.51,
+ "MassiveIntentClassification (el)": 9.85,
+ "MassiveIntentClassification (sq)": 28.1,
+ "MassiveIntentClassification (vi)": 19.48,
+ "MassiveIntentClassification (am)": 2.54,
+ "MassiveIntentClassification (my)": 3.54,
+ "MassiveIntentClassification (ms)": 25.64,
+ "MassiveIntentClassification (zh-TW)": 6.41,
+ "MassiveIntentClassification (is)": 20.61,
+ "MassiveIntentClassification (sl)": 25.4,
+ "MassiveIntentClassification (tr)": 23.04,
+ "MassiveIntentClassification (th)": 3.79,
+ "MassiveIntentClassification (ur)": 2.54,
+ "MassiveIntentClassification (sv)": 27.48,
+ "MassiveIntentClassification (ja)": 4.77,
+ "MassiveIntentClassification (ro)": 27.57,
+ "MassiveIntentClassification (fa)": 3.22,
+ "MassiveIntentClassification (az)": 20.28,
+ "MassiveIntentClassification (zh-CN)": 5.02,
+ "MassiveIntentClassification (ta)": 1.58,
"MassiveScenarioClassification (ru)": 59.15,
+ "MassiveScenarioClassification (zh-CN)": 10.73,
+ "MassiveScenarioClassification (de)": 35.28,
+ "MassiveScenarioClassification (cy)": 33.91,
+ "MassiveScenarioClassification (te)": 6.52,
+ "MassiveScenarioClassification (tl)": 33.49,
+ "MassiveScenarioClassification (sw)": 32.49,
+ "MassiveScenarioClassification (my)": 10.86,
+ "MassiveScenarioClassification (ms)": 33.05,
+ "MassiveScenarioClassification (nb)": 31.09,
+ "MassiveScenarioClassification (zh-TW)": 11.59,
+ "MassiveScenarioClassification (fr)": 34.68,
+ "MassiveScenarioClassification (da)": 35.77,
+ "MassiveScenarioClassification (is)": 26.28,
+ "MassiveScenarioClassification (fa)": 6.71,
+ "MassiveScenarioClassification (ro)": 34.52,
+ "MassiveScenarioClassification (pt)": 33.75,
+ "MassiveScenarioClassification (sv)": 30.99,
+ "MassiveScenarioClassification (it)": 37.19,
+ "MassiveScenarioClassification (es)": 33.12,
+ "MassiveScenarioClassification (lv)": 25.09,
+ "MassiveScenarioClassification (ko)": 6.43,
+ "MassiveScenarioClassification (ml)": 6.61,
+ "MassiveScenarioClassification (km)": 9.07,
+ "MassiveScenarioClassification (bn)": 8.09,
+ "MassiveScenarioClassification (el)": 17.02,
+ "MassiveScenarioClassification (kn)": 7.28,
+ "MassiveScenarioClassification (ka)": 6.64,
+ "MassiveScenarioClassification (fi)": 31.31,
+ "MassiveScenarioClassification (vi)": 25.7,
+ "MassiveScenarioClassification (sq)": 35.62,
+ "MassiveScenarioClassification (ar)": 11.78,
+ "MassiveScenarioClassification (hy)": 8.09,
+ "MassiveScenarioClassification (sl)": 30.45,
+ "MassiveScenarioClassification (th)": 7.79,
+ "MassiveScenarioClassification (az)": 27.56,
+ "MassiveScenarioClassification (pl)": 27.65,
+ "MassiveScenarioClassification (ta)": 6.88,
+ "MassiveScenarioClassification (jv)": 32.17,
+ "MassiveScenarioClassification (nl)": 31.82,
+ "MassiveScenarioClassification (id)": 33.72,
+ "MassiveScenarioClassification (ur)": 8.49,
+ "MassiveScenarioClassification (af)": 29.85,
+ "MassiveScenarioClassification (en)": 55.36,
+ "MassiveScenarioClassification (hu)": 31.34,
+ "MassiveScenarioClassification (ja)": 9.53,
+ "MassiveScenarioClassification (am)": 7.66,
+ "MassiveScenarioClassification (mn)": 24.63,
+ "MassiveScenarioClassification (hi)": 7.29,
+ "MassiveScenarioClassification (he)": 7.83,
+ "MassiveScenarioClassification (tr)": 30.63,
+ "NoRecClassification": 40.82,
+ "NordicLangClassification": 46.52,
+ "PAC": 62.19,
+ "PolEmo2.0-IN": 44.18,
+ "PolEmo2.0-OUT": 31.56,
"RuReviewsClassification": 56.99,
"RuSciBenchGRNTIClassification": 45.63,
- "RuSciBenchOECDClassification": 35.48
+ "RuSciBenchOECDClassification": 35.48,
+ "ToxicConversationsClassification": 57.44,
+ "TweetSentimentExtractionClassification": 50.37
}
]
},
@@ -5480,9 +15470,67 @@
"v_measure": [
{
"Model": "rubert-tiny2",
+ "AlloProfClusteringP2P": 36.88,
+ "AlloProfClusteringS2S": 21.97,
+ "ArxivClusteringP2P": 23.06,
+ "ArxivClusteringS2S": 15.44,
+ "BiorxivClusteringP2P": 21.46,
+ "BiorxivClusteringS2S": 12.43,
+ "BlurbsClusteringP2P": 12.5,
+ "BlurbsClusteringS2S": 9.41,
"GeoreviewClusteringP2P": 41.58,
+ "HALClusteringS2S": 5.81,
+ "MLSUMClusteringP2P (de)": 17.92,
+ "MLSUMClusteringP2P (fr)": 29.01,
+ "MLSUMClusteringP2P (ru)": 51.63,
+ "MLSUMClusteringP2P (es)": 33.53,
+ "MLSUMClusteringS2S (de)": 18.67,
+ "MLSUMClusteringS2S (fr)": 28.05,
+ "MLSUMClusteringS2S (ru)": 47.72,
+ "MLSUMClusteringS2S (es)": 32.34,
+ "MasakhaNEWSClusteringP2P (amh)": 40.65,
+ "MasakhaNEWSClusteringP2P (eng)": 23.62,
+ "MasakhaNEWSClusteringP2P (fra)": 26.3,
+ "MasakhaNEWSClusteringP2P (hau)": 16.93,
+ "MasakhaNEWSClusteringP2P (ibo)": 22.81,
+ "MasakhaNEWSClusteringP2P (lin)": 45.22,
+ "MasakhaNEWSClusteringP2P (lug)": 44.31,
+ "MasakhaNEWSClusteringP2P (orm)": 24.05,
+ "MasakhaNEWSClusteringP2P (pcm)": 50.73,
+ "MasakhaNEWSClusteringP2P (run)": 46.44,
+ "MasakhaNEWSClusteringP2P (sna)": 40.81,
+ "MasakhaNEWSClusteringP2P (som)": 23.35,
+ "MasakhaNEWSClusteringP2P (swa)": 8.7,
+ "MasakhaNEWSClusteringP2P (tir)": 43.15,
+ "MasakhaNEWSClusteringP2P (xho)": 21.63,
+ "MasakhaNEWSClusteringP2P (yor)": 23.88,
+ "MasakhaNEWSClusteringS2S (amh)": 45.44,
+ "MasakhaNEWSClusteringS2S (eng)": 25.98,
+ "MasakhaNEWSClusteringS2S (fra)": 30.07,
+ "MasakhaNEWSClusteringS2S (hau)": 10.62,
+ "MasakhaNEWSClusteringS2S (ibo)": 36.2,
+ "MasakhaNEWSClusteringS2S (lin)": 50.06,
+ "MasakhaNEWSClusteringS2S (lug)": 42.19,
+ "MasakhaNEWSClusteringS2S (orm)": 26.05,
+ "MasakhaNEWSClusteringS2S (pcm)": 46.52,
+ "MasakhaNEWSClusteringS2S (run)": 44.3,
+ "MasakhaNEWSClusteringS2S (sna)": 48.29,
+ "MasakhaNEWSClusteringS2S (som)": 24.56,
+ "MasakhaNEWSClusteringS2S (swa)": 15.48,
+ "MasakhaNEWSClusteringS2S (tir)": 46.56,
+ "MasakhaNEWSClusteringS2S (xho)": 26.57,
+ "MasakhaNEWSClusteringS2S (yor)": 26.75,
+ "MedrxivClusteringP2P": 22.56,
+ "MedrxivClusteringS2S": 18.65,
+ "RedditClustering": 17.72,
+ "RedditClusteringP2P": 32.99,
"RuSciBenchGRNTIClusteringP2P": 39.78,
- "RuSciBenchOECDClusteringP2P": 35.98
+ "RuSciBenchOECDClusteringP2P": 35.98,
+ "StackExchangeClustering": 29.01,
+ "StackExchangeClusteringP2P": 25.71,
+ "TenKGnadClusteringP2P": 11.05,
+ "TenKGnadClusteringS2S": 8.74,
+ "TwentyNewsgroupsClustering": 15.63
}
]
},
@@ -5490,11 +15538,51 @@
"max_ap": [
{
"Model": "rubert-tiny2",
- "TERRa": 51.87
+ "CDSC-E": 45.08,
+ "FalseFriendsGermanEnglish": 46.98,
+ "OpusparcusPC (de)": 91.7,
+ "OpusparcusPC (en)": 97.3,
+ "OpusparcusPC (fi)": 86.73,
+ "OpusparcusPC (fr)": 86.53,
+ "OpusparcusPC (ru)": 85.11,
+ "OpusparcusPC (sv)": 85.1,
+ "PSC": 70.84,
+ "PawsXPairClassification (de)": 48.65,
+ "PawsXPairClassification (en)": 45.47,
+ "PawsXPairClassification (es)": 47.56,
+ "PawsXPairClassification (fr)": 49.3,
+ "PawsXPairClassification (ja)": 48.24,
+ "PawsXPairClassification (ko)": 47.0,
+ "PawsXPairClassification (zh)": 51.8,
+ "SICK-E-PL": 47.84,
+ "SprintDuplicateQuestions": 70.8,
+ "TERRa": 51.87,
+ "TwitterSemEval2015": 55.49,
+ "TwitterURLCorpus": 77.98
},
{
"Model": "rubert-tiny2",
- "TERRa": 51.87
+ "CDSC-E": 45.22,
+ "FalseFriendsGermanEnglish": 46.98,
+ "OpusparcusPC (de)": 91.7,
+ "OpusparcusPC (en)": 97.34,
+ "OpusparcusPC (fi)": 86.73,
+ "OpusparcusPC (fr)": 86.53,
+ "OpusparcusPC (ru)": 85.24,
+ "OpusparcusPC (sv)": 85.1,
+ "PSC": 71.22,
+ "PawsXPairClassification (de)": 48.67,
+ "PawsXPairClassification (en)": 45.47,
+ "PawsXPairClassification (es)": 47.61,
+ "PawsXPairClassification (fr)": 49.3,
+ "PawsXPairClassification (ja)": 48.41,
+ "PawsXPairClassification (ko)": 47.48,
+ "PawsXPairClassification (zh)": 51.83,
+ "SICK-E-PL": 47.87,
+ "SprintDuplicateQuestions": 71.16,
+ "TERRa": 51.87,
+ "TwitterSemEval2015": 55.49,
+ "TwitterURLCorpus": 77.98
}
]
},
@@ -5502,11 +15590,35 @@
"map": [
{
"Model": "rubert-tiny2",
- "MIRACLReranking (ru)": 15.81
+ "AlloprofReranking": 27.94,
+ "AskUbuntuDupQuestions": 48.11,
+ "MindSmallReranking": 26.74,
+ "RuBQReranking": 46.09,
+ "SciDocsRR": 54.67,
+ "StackOverflowDupQuestions": 36.0,
+ "SyntecReranking": 38.15,
+ "T2Reranking": 53.19
},
{
"Model": "rubert-tiny2",
- "RuBQReranking": 46.09
+ "MIRACLReranking (ru)": 15.81,
+ "MIRACLReranking (ar)": 1.94,
+ "MIRACLReranking (bn)": 1.56,
+ "MIRACLReranking (de)": 7.23,
+ "MIRACLReranking (en)": 14.0,
+ "MIRACLReranking (es)": 7.49,
+ "MIRACLReranking (fa)": 3.65,
+ "MIRACLReranking (fi)": 14.18,
+ "MIRACLReranking (fr)": 5.5,
+ "MIRACLReranking (hi)": 3.61,
+ "MIRACLReranking (id)": 8.22,
+ "MIRACLReranking (ja)": 1.99,
+ "MIRACLReranking (ko)": 5.1,
+ "MIRACLReranking (sw)": 9.07,
+ "MIRACLReranking (te)": 4.07,
+ "MIRACLReranking (th)": 1.93,
+ "MIRACLReranking (yo)": 12.97,
+ "MIRACLReranking (zh)": 2.64
}
]
},
@@ -5514,9 +15626,143 @@
"ndcg_at_10": [
{
"Model": "rubert-tiny2",
+ "AILACasedocs": 16.05,
+ "AILAStatutes": 13.81,
+ "ARCChallenge": 2.93,
+ "AlloprofRetrieval": 0.55,
+ "AlphaNLI": 6.83,
+ "AppsRetrieval": 0.44,
+ "ArguAna": 27.66,
+ "BSARDRetrieval": 0.15,
+ "ClimateFEVER": 2.08,
+ "CmedqaRetrieval": 1.09,
+ "CodeFeedbackMT": 26.04,
+ "CodeFeedbackST": 20.27,
+ "CodeSearchNetCCRetrieval (python)": 19.13,
+ "CodeSearchNetCCRetrieval (javascript)": 25.81,
+ "CodeSearchNetCCRetrieval (go)": 13.02,
+ "CodeSearchNetCCRetrieval (ruby)": 28.35,
+ "CodeSearchNetCCRetrieval (java)": 15.62,
+ "CodeSearchNetCCRetrieval (php)": 16.02,
+ "CodeSearchNetRetrieval (python)": 34.54,
+ "CodeSearchNetRetrieval (javascript)": 16.11,
+ "CodeSearchNetRetrieval (go)": 22.91,
+ "CodeSearchNetRetrieval (ruby)": 26.85,
+ "CodeSearchNetRetrieval (java)": 16.8,
+ "CodeSearchNetRetrieval (php)": 18.93,
+ "CodeTransOceanContest": 19.41,
+ "CodeTransOceanDL": 34.02,
+ "CosQA": 4.48,
+ "CovidRetrieval": 0.1,
+ "DBPedia": 2.66,
+ "FEVER": 3.01,
+ "FiQA2018": 2.42,
+ "GerDaLIR": 0.13,
+ "GerDaLIRSmall": 0.41,
+ "GermanQuAD-Retrieval": 16.79,
+ "HellaSwag": 5.48,
+ "HotpotQA": 8.85,
+ "LEMBNarrativeQARetrieval": 4.78,
+ "LEMBQMSumRetrieval": 16.95,
+ "LEMBSummScreenFDRetrieval": 19.52,
+ "LEMBWikimQARetrieval": 14.17,
+ "LeCaRDv2": 11.28,
+ "LegalBenchConsumerContractsQA": 39.51,
+ "LegalBenchCorporateLobbying": 60.19,
+ "LegalQuAD": 4.11,
+ "LegalSummarization": 34.19,
"MIRACLRetrieval (ru)": 1.89,
+ "MIRACLRetrieval (ar)": 0.0,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 0.19,
+ "MIRACLRetrieval (en)": 0.68,
+ "MIRACLRetrieval (es)": 0.0,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 0.62,
+ "MIRACLRetrieval (fr)": 0.0,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 0.12,
+ "MIRACLRetrieval (ja)": 0.0,
+ "MIRACLRetrieval (ko)": 0.97,
+ "MIRACLRetrieval (sw)": 0.76,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.0,
+ "MIRACLRetrieval (yo)": 1.94,
+ "MIRACLRetrieval (zh)": 0.0,
+ "MSMARCO": 1.13,
+ "MintakaRetrieval (ar)": 0.45,
+ "MintakaRetrieval (de)": 3.56,
+ "MintakaRetrieval (es)": 2.95,
+ "MintakaRetrieval (fr)": 3.07,
+ "MintakaRetrieval (hi)": 1.03,
+ "MintakaRetrieval (it)": 4.3,
+ "MintakaRetrieval (ja)": 0.93,
+ "MintakaRetrieval (pt)": 4.42,
+ "NFCorpus": 5.03,
+ "NQ": 0.62,
+ "PIQA": 3.01,
+ "Quail": 0.41,
+ "QuoraRetrieval": 66.99,
+ "RARbCode": 1.44,
+ "RARbMath": 10.72,
"RiaNewsRetrieval": 13.92,
- "RuBQRetrieval": 10.87
+ "RuBQRetrieval": 10.87,
+ "SCIDOCS": 1.18,
+ "SIQA": 0.65,
+ "SciFact": 20.19,
+ "SciFact-PL": 7.73,
+ "SpartQA": 6.28,
+ "StackOverflowQA": 19.97,
+ "SyntecRetrieval": 14.22,
+ "SyntheticText2SQL": 19.8,
+ "TRECCOVID": 8.67,
+ "TRECCOVID-PL": 4.61,
+ "TempReasonL1": 0.9,
+ "TempReasonL2Fact": 3.59,
+ "TempReasonL2Pure": 0.03,
+ "TempReasonL3Fact": 4.76,
+ "TempReasonL3Pure": 4.66,
+ "Touche2020": 0.91,
+ "WinoGrande": 1.42,
+ "XMarket (de)": 0.98,
+ "XMarket (en)": 1.17,
+ "XMarket (es)": 0.93,
+ "XPQARetrieval (ara-ara)": 3.67,
+ "XPQARetrieval (eng-ara)": 1.24,
+ "XPQARetrieval (ara-eng)": 4.28,
+ "XPQARetrieval (deu-deu)": 20.93,
+ "XPQARetrieval (eng-deu)": 2.09,
+ "XPQARetrieval (deu-eng)": 8.32,
+ "XPQARetrieval (spa-spa)": 17.06,
+ "XPQARetrieval (eng-spa)": 2.7,
+ "XPQARetrieval (spa-eng)": 10.16,
+ "XPQARetrieval (fra-fra)": 21.91,
+ "XPQARetrieval (eng-fra)": 2.93,
+ "XPQARetrieval (fra-eng)": 10.07,
+ "XPQARetrieval (hin-hin)": 5.61,
+ "XPQARetrieval (eng-hin)": 3.34,
+ "XPQARetrieval (hin-eng)": 3.96,
+ "XPQARetrieval (ita-ita)": 29.66,
+ "XPQARetrieval (eng-ita)": 3.76,
+ "XPQARetrieval (ita-eng)": 12.15,
+ "XPQARetrieval (jpn-jpn)": 4.65,
+ "XPQARetrieval (eng-jpn)": 1.38,
+ "XPQARetrieval (jpn-eng)": 4.31,
+ "XPQARetrieval (kor-kor)": 2.23,
+ "XPQARetrieval (eng-kor)": 2.19,
+ "XPQARetrieval (kor-eng)": 2.06,
+ "XPQARetrieval (pol-pol)": 12.18,
+ "XPQARetrieval (eng-pol)": 2.14,
+ "XPQARetrieval (pol-eng)": 7.82,
+ "XPQARetrieval (por-por)": 11.97,
+ "XPQARetrieval (eng-por)": 2.25,
+ "XPQARetrieval (por-eng)": 6.15,
+ "XPQARetrieval (tam-tam)": 2.57,
+ "XPQARetrieval (eng-tam)": 2.2,
+ "XPQARetrieval (tam-eng)": 2.44,
+ "XPQARetrieval (cmn-cmn)": 12.1,
+ "XPQARetrieval (eng-cmn)": 3.64,
+ "XPQARetrieval (cmn-eng)": 5.93
}
]
},
@@ -5524,14 +15770,131 @@
"cosine_spearman": [
{
"Model": "rubert-tiny2",
+ "BIOSSES": 71.48,
+ "CDSC-R": 67.95,
+ "GermanSTSBenchmark": 54.76,
+ "SICK-R": 61.07,
+ "SICK-R-PL": 48.45,
+ "SICKFr": 55.65,
+ "STS12": 65.12,
+ "STS13": 59.97,
+ "STS14": 60.28,
+ "STS15": 74.38,
+ "STS16": 69.19,
+ "STS17 (ar-ar)": 17.31,
+ "STS17 (es-en)": 17.23,
+ "STS17 (en-ar)": 13.41,
+ "STS17 (en-tr)": 14.88,
+ "STS17 (en-de)": 27.75,
+ "STS17 (es-es)": 66.85,
+ "STS17 (nl-en)": 30.93,
+ "STS17 (ko-ko)": 10.32,
+ "STS17 (en-en)": 73.66,
+ "STS17 (it-en)": 28.0,
+ "STS17 (fr-en)": 26.87,
+ "STS22 (ru)": 54.38,
+ "STS22 (pl-en)": 26.05,
+ "STS22 (es-it)": 27.11,
+ "STS22 (de)": 14.59,
+ "STS22 (de-pl)": 1.2,
+ "STS22 (es)": 44.49,
+ "STS22 (pl)": 9.38,
+ "STS22 (fr)": 42.82,
+ "STS22 (ar)": 21.36,
+ "STS22 (de-en)": 16.12,
+ "STS22 (fr-pl)": 28.17,
+ "STS22 (en)": 49.25,
+ "STS22 (es-en)": 20.13,
+ "STS22 (de-fr)": 13.24,
+ "STS22 (zh-en)": 27.07,
+ "STS22 (zh)": 35.12,
+ "STS22 (tr)": 20.52,
+ "STS22 (it)": 43.84,
+ "STSB": 16.68,
+ "STSBenchmark": 63.02,
+ "STSBenchmarkMultilingualSTS (pt)": 49.26,
+ "STSBenchmarkMultilingualSTS (fr)": 56.61,
+ "STSBenchmarkMultilingualSTS (nl)": 51.52,
+ "STSBenchmarkMultilingualSTS (zh)": 16.6,
+ "STSBenchmarkMultilingualSTS (ru)": 69.29,
+ "STSBenchmarkMultilingualSTS (it)": 53.15,
+ "STSBenchmarkMultilingualSTS (es)": 53.78,
+ "STSBenchmarkMultilingualSTS (pl)": 52.1,
+ "STSBenchmarkMultilingualSTS (de)": 56.97,
+ "STSBenchmarkMultilingualSTS (en)": 63.02
+ },
+ {
+ "Model": "rubert-tiny2",
+ "BIOSSES": 71.48,
+ "CDSC-R": 67.95,
+ "GermanSTSBenchmark": 54.77,
"RUParaPhraserSTS": 65.14,
"RuSTSBenchmarkSTS": 69.43,
- "STS22 (ru)": 50.23
+ "SICK-R": 61.07,
+ "SICK-R-PL": 48.45,
+ "SICKFr": 55.65,
+ "STS12": 65.12,
+ "STS13": 59.97,
+ "STS14": 60.28,
+ "STS15": 74.38,
+ "STS16": 69.19,
+ "STS17 (ar-ar)": 16.79,
+ "STS17 (es-en)": 17.23,
+ "STS17 (en-ar)": 13.41,
+ "STS17 (en-tr)": 14.88,
+ "STS17 (en-de)": 27.75,
+ "STS17 (es-es)": 66.86,
+ "STS17 (nl-en)": 30.93,
+ "STS17 (ko-ko)": 10.4,
+ "STS17 (en-en)": 73.66,
+ "STS17 (it-en)": 28.0,
+ "STS17 (fr-en)": 26.87,
+ "STS22 (ru)": 54.38,
+ "STS22 (pl-en)": 26.05,
+ "STS22 (es-it)": 27.11,
+ "STS22 (de)": 14.6,
+ "STS22 (de-pl)": 1.2,
+ "STS22 (es)": 44.49,
+ "STS22 (pl)": 9.27,
+ "STS22 (fr)": 42.82,
+ "STS22 (ar)": 21.38,
+ "STS22 (de-en)": 16.12,
+ "STS22 (fr-pl)": 28.17,
+ "STS22 (en)": 49.25,
+ "STS22 (es-en)": 20.13,
+ "STS22 (de-fr)": 13.24,
+ "STS22 (zh-en)": 27.07,
+ "STS22 (zh)": 35.12,
+ "STS22 (tr)": 20.52,
+ "STS22 (it)": 43.84,
+ "STSB": 16.6,
+ "STSBenchmark": 63.02,
+ "STSBenchmarkMultilingualSTS (pt)": 49.26,
+ "STSBenchmarkMultilingualSTS (fr)": 56.61,
+ "STSBenchmarkMultilingualSTS (nl)": 51.52,
+ "STSBenchmarkMultilingualSTS (zh)": 16.54,
+ "STSBenchmarkMultilingualSTS (ru)": 69.29,
+ "STSBenchmarkMultilingualSTS (it)": 53.15,
+ "STSBenchmarkMultilingualSTS (es)": 53.78,
+ "STSBenchmarkMultilingualSTS (pl)": 52.1,
+ "STSBenchmarkMultilingualSTS (de)": 56.97,
+ "STSBenchmarkMultilingualSTS (en)": 63.02
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "rubert-tiny2",
+ "SummEval": 28.46,
+ "SummEvalFr": 32.52
+ },
+ {
+ "Model": "rubert-tiny2",
+ "SummEval": 28.46,
+ "SummEvalFr": 32.52
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
@@ -5543,7 +15906,14 @@
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "rubert-tiny2",
+ "Core17InstructionRetrieval": 0.88,
+ "News21InstructionRetrieval": 1.39,
+ "Robust04InstructionRetrieval": -2.54
+ }
+ ]
}
},
"dangvantuan__sentence-camembert-base": {
@@ -5893,6 +16263,7 @@
"f1": [
{
"Model": "USER-base",
+ "BornholmBitextMining": 34.8,
"Tatoeba (rus-eng)": 90.2
}
]
@@ -5901,15 +16272,169 @@
"accuracy": [
{
"Model": "USER-base",
- "GeoreviewClassification": 47.23,
- "HeadlineClassification": 74.88,
- "InappropriatenessClassification": 61.94,
- "KinopoiskClassification": 55.69,
- "MassiveIntentClassification (ru)": 65.57,
- "MassiveScenarioClassification (ru)": 68.33,
- "RuReviewsClassification": 66.44,
+ "AllegroReviews": 25.6,
+ "AmazonCounterfactualClassification (en-ext)": 67.92,
+ "AmazonCounterfactualClassification (en)": 68.37,
+ "AmazonCounterfactualClassification (de)": 55.33,
+ "AmazonCounterfactualClassification (ja)": 49.24,
+ "AmazonPolarityClassification": 66.69,
+ "AmazonReviewsClassification (en)": 35.19,
+ "AmazonReviewsClassification (de)": 26.36,
+ "AmazonReviewsClassification (es)": 27.0,
+ "AmazonReviewsClassification (fr)": 27.62,
+ "AmazonReviewsClassification (ja)": 20.55,
+ "AmazonReviewsClassification (zh)": 22.46,
+ "AngryTweetsClassification": 46.11,
+ "Banking77Classification": 65.5,
+ "CBD": 51.2,
+ "DanishPoliticalCommentsClassification": 28.0,
+ "EmotionClassification": 27.16,
+ "GeoreviewClassification": 46.88,
+ "HeadlineClassification": 75.0,
+ "ImdbClassification": 58.56,
+ "InappropriatenessClassification": 61.83,
+ "KinopoiskClassification": 56.03,
+ "LccSentimentClassification": 37.6,
+ "MTOPDomainClassification (en)": 81.46,
+ "MTOPDomainClassification (de)": 61.34,
+ "MTOPDomainClassification (es)": 61.05,
+ "MTOPDomainClassification (fr)": 59.72,
+ "MTOPDomainClassification (hi)": 25.63,
+ "MTOPDomainClassification (th)": 25.03,
+ "MTOPIntentClassification (en)": 50.21,
+ "MTOPIntentClassification (de)": 36.4,
+ "MTOPIntentClassification (es)": 34.85,
+ "MTOPIntentClassification (fr)": 35.07,
+ "MTOPIntentClassification (hi)": 4.62,
+ "MTOPIntentClassification (th)": 8.12,
+ "MasakhaNEWSClassification (amh)": 29.65,
+ "MasakhaNEWSClassification (eng)": 72.13,
+ "MasakhaNEWSClassification (fra)": 67.44,
+ "MasakhaNEWSClassification (hau)": 54.95,
+ "MasakhaNEWSClassification (ibo)": 47.59,
+ "MasakhaNEWSClassification (lin)": 61.09,
+ "MasakhaNEWSClassification (lug)": 45.61,
+ "MasakhaNEWSClassification (orm)": 47.05,
+ "MasakhaNEWSClassification (pcm)": 91.74,
+ "MasakhaNEWSClassification (run)": 53.07,
+ "MasakhaNEWSClassification (sna)": 69.43,
+ "MasakhaNEWSClassification (som)": 40.95,
+ "MasakhaNEWSClassification (swa)": 44.94,
+ "MasakhaNEWSClassification (tir)": 24.49,
+ "MasakhaNEWSClassification (xho)": 58.69,
+ "MasakhaNEWSClassification (yor)": 57.91,
+ "MassiveIntentClassification (ru)": 65.92,
+ "MassiveIntentClassification (hu)": 30.6,
+ "MassiveIntentClassification (sv)": 35.62,
+ "MassiveIntentClassification (hy)": 7.88,
+ "MassiveIntentClassification (te)": 5.31,
+ "MassiveIntentClassification (af)": 30.74,
+ "MassiveIntentClassification (ms)": 32.88,
+ "MassiveIntentClassification (en)": 50.95,
+ "MassiveIntentClassification (az)": 34.76,
+ "MassiveIntentClassification (km)": 7.48,
+ "MassiveIntentClassification (it)": 39.85,
+ "MassiveIntentClassification (id)": 35.62,
+ "MassiveIntentClassification (fa)": 12.65,
+ "MassiveIntentClassification (th)": 6.54,
+ "MassiveIntentClassification (sl)": 36.21,
+ "MassiveIntentClassification (ur)": 8.46,
+ "MassiveIntentClassification (fi)": 32.53,
+ "MassiveIntentClassification (he)": 4.71,
+ "MassiveIntentClassification (el)": 19.77,
+ "MassiveIntentClassification (ka)": 5.46,
+ "MassiveIntentClassification (am)": 5.65,
+ "MassiveIntentClassification (is)": 32.58,
+ "MassiveIntentClassification (zh-CN)": 14.69,
+ "MassiveIntentClassification (ar)": 14.28,
+ "MassiveIntentClassification (ml)": 6.04,
+ "MassiveIntentClassification (lv)": 34.91,
+ "MassiveIntentClassification (zh-TW)": 12.74,
+ "MassiveIntentClassification (nb)": 33.1,
+ "MassiveIntentClassification (ta)": 5.97,
+ "MassiveIntentClassification (ja)": 12.32,
+ "MassiveIntentClassification (ko)": 11.02,
+ "MassiveIntentClassification (tl)": 33.3,
+ "MassiveIntentClassification (pl)": 36.95,
+ "MassiveIntentClassification (de)": 33.92,
+ "MassiveIntentClassification (my)": 7.19,
+ "MassiveIntentClassification (jv)": 31.16,
+ "MassiveIntentClassification (kn)": 5.36,
+ "MassiveIntentClassification (vi)": 23.92,
+ "MassiveIntentClassification (hi)": 4.28,
+ "MassiveIntentClassification (da)": 35.68,
+ "MassiveIntentClassification (mn)": 30.97,
+ "MassiveIntentClassification (nl)": 33.41,
+ "MassiveIntentClassification (es)": 34.06,
+ "MassiveIntentClassification (sq)": 38.32,
+ "MassiveIntentClassification (pt)": 35.15,
+ "MassiveIntentClassification (fr)": 35.14,
+ "MassiveIntentClassification (cy)": 34.48,
+ "MassiveIntentClassification (bn)": 6.14,
+ "MassiveIntentClassification (ro)": 34.63,
+ "MassiveIntentClassification (tr)": 32.85,
+ "MassiveIntentClassification (sw)": 34.69,
+ "MassiveScenarioClassification (ru)": 69.06,
+ "MassiveScenarioClassification (fr)": 42.95,
+ "MassiveScenarioClassification (az)": 39.16,
+ "MassiveScenarioClassification (zh-TW)": 19.39,
+ "MassiveScenarioClassification (sw)": 40.11,
+ "MassiveScenarioClassification (ar)": 20.44,
+ "MassiveScenarioClassification (tl)": 38.91,
+ "MassiveScenarioClassification (zh-CN)": 20.84,
+ "MassiveScenarioClassification (sv)": 40.71,
+ "MassiveScenarioClassification (bn)": 11.01,
+ "MassiveScenarioClassification (nl)": 39.77,
+ "MassiveScenarioClassification (km)": 13.46,
+ "MassiveScenarioClassification (te)": 9.99,
+ "MassiveScenarioClassification (da)": 42.54,
+ "MassiveScenarioClassification (el)": 27.39,
+ "MassiveScenarioClassification (my)": 12.72,
+ "MassiveScenarioClassification (mn)": 35.46,
+ "MassiveScenarioClassification (es)": 40.44,
+ "MassiveScenarioClassification (hu)": 38.84,
+ "MassiveScenarioClassification (cy)": 37.02,
+ "MassiveScenarioClassification (kn)": 11.12,
+ "MassiveScenarioClassification (hy)": 13.67,
+ "MassiveScenarioClassification (is)": 38.64,
+ "MassiveScenarioClassification (pl)": 43.88,
+ "MassiveScenarioClassification (nb)": 37.15,
+ "MassiveScenarioClassification (lv)": 39.17,
+ "MassiveScenarioClassification (vi)": 31.5,
+ "MassiveScenarioClassification (en)": 59.93,
+ "MassiveScenarioClassification (id)": 41.48,
+ "MassiveScenarioClassification (ro)": 41.77,
+ "MassiveScenarioClassification (fi)": 37.31,
+ "MassiveScenarioClassification (jv)": 37.93,
+ "MassiveScenarioClassification (ko)": 15.33,
+ "MassiveScenarioClassification (sq)": 43.78,
+ "MassiveScenarioClassification (ta)": 11.1,
+ "MassiveScenarioClassification (ms)": 39.48,
+ "MassiveScenarioClassification (tr)": 37.28,
+ "MassiveScenarioClassification (af)": 37.03,
+ "MassiveScenarioClassification (fa)": 17.22,
+ "MassiveScenarioClassification (ka)": 10.49,
+ "MassiveScenarioClassification (sl)": 39.28,
+ "MassiveScenarioClassification (it)": 45.96,
+ "MassiveScenarioClassification (he)": 10.44,
+ "MassiveScenarioClassification (hi)": 8.93,
+ "MassiveScenarioClassification (ja)": 17.52,
+ "MassiveScenarioClassification (pt)": 40.92,
+ "MassiveScenarioClassification (th)": 11.93,
+ "MassiveScenarioClassification (ml)": 10.56,
+ "MassiveScenarioClassification (ur)": 14.4,
+ "MassiveScenarioClassification (de)": 42.37,
+ "MassiveScenarioClassification (am)": 11.75,
+ "NoRecClassification": 40.97,
+ "NordicLangClassification": 45.21,
+ "PAC": 70.41,
+ "PolEmo2.0-IN": 43.77,
+ "PolEmo2.0-OUT": 23.18,
+ "RuReviewsClassification": 65.48,
"RuSciBenchGRNTIClassification": 55.55,
- "RuSciBenchOECDClassification": 43.28
+ "RuSciBenchOECDClassification": 43.28,
+ "ToxicConversationsClassification": 62.4,
+ "TweetSentimentExtractionClassification": 55.35
}
]
},
@@ -5917,11 +16442,67 @@
"v_measure": [
{
"Model": "USER-base",
- "GeoreviewClusteringP2P": 64.16,
+ "AlloProfClusteringP2P": 44.92,
+ "AlloProfClusteringS2S": 23.19,
+ "ArxivClusteringP2P": 24.06,
+ "ArxivClusteringS2S": 14.62,
+ "BiorxivClusteringP2P": 19.57,
+ "BiorxivClusteringS2S": 10.96,
+ "BlurbsClusteringP2P": 15.08,
+ "BlurbsClusteringS2S": 9.12,
+ "GeoreviewClusteringP2P": 63.41,
+ "HALClusteringS2S": 8.99,
"MLSUMClusteringP2P (ru)": 48.09,
+ "MLSUMClusteringP2P (de)": 22.41,
+ "MLSUMClusteringP2P (fr)": 33.8,
+ "MLSUMClusteringP2P (es)": 34.12,
"MLSUMClusteringS2S (ru)": 45.73,
- "RuSciBenchGRNTIClusteringP2P": 51.38,
- "RuSciBenchOECDClusteringP2P": 44.73
+ "MLSUMClusteringS2S (de)": 23.53,
+ "MLSUMClusteringS2S (fr)": 32.41,
+ "MLSUMClusteringS2S (es)": 33.42,
+ "MasakhaNEWSClusteringP2P (amh)": 41.86,
+ "MasakhaNEWSClusteringP2P (eng)": 51.77,
+ "MasakhaNEWSClusteringP2P (fra)": 46.27,
+ "MasakhaNEWSClusteringP2P (hau)": 29.94,
+ "MasakhaNEWSClusteringP2P (ibo)": 32.63,
+ "MasakhaNEWSClusteringP2P (lin)": 53.65,
+ "MasakhaNEWSClusteringP2P (lug)": 52.56,
+ "MasakhaNEWSClusteringP2P (orm)": 27.51,
+ "MasakhaNEWSClusteringP2P (pcm)": 85.52,
+ "MasakhaNEWSClusteringP2P (run)": 52.92,
+ "MasakhaNEWSClusteringP2P (sna)": 52.22,
+ "MasakhaNEWSClusteringP2P (som)": 30.37,
+ "MasakhaNEWSClusteringP2P (swa)": 22.23,
+ "MasakhaNEWSClusteringP2P (tir)": 43.83,
+ "MasakhaNEWSClusteringP2P (xho)": 27.21,
+ "MasakhaNEWSClusteringP2P (yor)": 26.66,
+ "MasakhaNEWSClusteringS2S (amh)": 40.86,
+ "MasakhaNEWSClusteringS2S (eng)": 20.37,
+ "MasakhaNEWSClusteringS2S (fra)": 25.39,
+ "MasakhaNEWSClusteringS2S (hau)": 15.37,
+ "MasakhaNEWSClusteringS2S (ibo)": 34.49,
+ "MasakhaNEWSClusteringS2S (lin)": 43.66,
+ "MasakhaNEWSClusteringS2S (lug)": 41.45,
+ "MasakhaNEWSClusteringS2S (orm)": 25.63,
+ "MasakhaNEWSClusteringS2S (pcm)": 63.41,
+ "MasakhaNEWSClusteringS2S (run)": 49.77,
+ "MasakhaNEWSClusteringS2S (sna)": 43.07,
+ "MasakhaNEWSClusteringS2S (som)": 26.34,
+ "MasakhaNEWSClusteringS2S (swa)": 15.66,
+ "MasakhaNEWSClusteringS2S (tir)": 42.96,
+ "MasakhaNEWSClusteringS2S (xho)": 27.54,
+ "MasakhaNEWSClusteringS2S (yor)": 29.03,
+ "MedrxivClusteringP2P": 23.8,
+ "MedrxivClusteringS2S": 19.43,
+ "RedditClustering": 26.95,
+ "RedditClusteringP2P": 44.11,
+ "RuSciBenchGRNTIClusteringP2P": 51.5,
+ "RuSciBenchOECDClusteringP2P": 44.87,
+ "StackExchangeClustering": 34.21,
+ "StackExchangeClusteringP2P": 26.54,
+ "TenKGnadClusteringP2P": 29.7,
+ "TenKGnadClusteringS2S": 10.51,
+ "TwentyNewsgroupsClustering": 21.41
}
]
},
@@ -5929,13 +16510,51 @@
"max_ap": [
{
"Model": "USER-base",
+ "CDSC-E": 51.61,
+ "FalseFriendsGermanEnglish": 46.84,
"OpusparcusPC (ru)": 91.65,
- "TERRa": 60.02
+ "OpusparcusPC (de)": 90.82,
+ "OpusparcusPC (en)": 97.49,
+ "OpusparcusPC (fi)": 84.61,
+ "OpusparcusPC (fr)": 88.32,
+ "OpusparcusPC (sv)": 84.08,
+ "PSC": 88.54,
+ "PawsXPairClassification (de)": 52.51,
+ "PawsXPairClassification (en)": 61.15,
+ "PawsXPairClassification (es)": 54.72,
+ "PawsXPairClassification (fr)": 56.35,
+ "PawsXPairClassification (ja)": 49.39,
+ "PawsXPairClassification (ko)": 51.82,
+ "PawsXPairClassification (zh)": 54.65,
+ "SICK-E-PL": 54.43,
+ "SprintDuplicateQuestions": 83.25,
+ "TERRa": 59.76,
+ "TwitterSemEval2015": 61.72,
+ "TwitterURLCorpus": 81.61
},
{
"Model": "USER-base",
+ "CDSC-E": 51.93,
+ "FalseFriendsGermanEnglish": 46.84,
"OpusparcusPC (ru)": 91.65,
- "TERRa": 60.11
+ "OpusparcusPC (de)": 90.82,
+ "OpusparcusPC (en)": 97.49,
+ "OpusparcusPC (fi)": 84.61,
+ "OpusparcusPC (fr)": 88.32,
+ "OpusparcusPC (sv)": 84.11,
+ "PSC": 88.71,
+ "PawsXPairClassification (de)": 52.74,
+ "PawsXPairClassification (en)": 61.15,
+ "PawsXPairClassification (es)": 54.75,
+ "PawsXPairClassification (fr)": 56.39,
+ "PawsXPairClassification (ja)": 49.39,
+ "PawsXPairClassification (ko)": 51.82,
+ "PawsXPairClassification (zh)": 54.88,
+ "SICK-E-PL": 54.43,
+ "SprintDuplicateQuestions": 83.25,
+ "TERRa": 59.76,
+ "TwitterSemEval2015": 61.72,
+ "TwitterURLCorpus": 81.61
}
]
},
@@ -5943,11 +16562,35 @@
"map": [
{
"Model": "USER-base",
- "MIRACLReranking (ru)": 46.75
+ "AlloprofReranking": 38.67,
+ "AskUbuntuDupQuestions": 50.31,
+ "MindSmallReranking": 28.0,
+ "RuBQReranking": 64.42,
+ "SciDocsRR": 59.74,
+ "StackOverflowDupQuestions": 38.95,
+ "SyntecReranking": 52.11,
+ "T2Reranking": 54.18
},
{
"Model": "USER-base",
- "RuBQReranking": 64.42
+ "MIRACLReranking (ru)": 46.75,
+ "MIRACLReranking (ar)": 4.51,
+ "MIRACLReranking (bn)": 4.54,
+ "MIRACLReranking (de)": 14.58,
+ "MIRACLReranking (en)": 35.41,
+ "MIRACLReranking (es)": 20.5,
+ "MIRACLReranking (fa)": 5.79,
+ "MIRACLReranking (fi)": 29.37,
+ "MIRACLReranking (fr)": 15.83,
+ "MIRACLReranking (hi)": 5.04,
+ "MIRACLReranking (id)": 18.1,
+ "MIRACLReranking (ja)": 3.31,
+ "MIRACLReranking (ko)": 5.26,
+ "MIRACLReranking (sw)": 22.13,
+ "MIRACLReranking (te)": 4.89,
+ "MIRACLReranking (th)": 4.26,
+ "MIRACLReranking (yo)": 34.77,
+ "MIRACLReranking (zh)": 2.96
}
]
},
@@ -5955,9 +16598,143 @@
"ndcg_at_10": [
{
"Model": "USER-base",
- "MIRACLRetrieval (ru)": 35.22,
- "RiaNewsRetrieval": 77.83,
- "RuBQRetrieval": 56.86
+ "AILACasedocs": 17.26,
+ "AILAStatutes": 10.27,
+ "ARCChallenge": 4.32,
+ "AlloprofRetrieval": 4.53,
+ "AlphaNLI": 17.69,
+ "AppsRetrieval": 1.06,
+ "ArguAna": 39.93,
+ "BSARDRetrieval": 0.5,
+ "ClimateFEVER": 8.04,
+ "CmedqaRetrieval": 0.43,
+ "CodeFeedbackMT": 11.83,
+ "CodeFeedbackST": 27.39,
+ "CodeSearchNetCCRetrieval (python)": 15.45,
+ "CodeSearchNetCCRetrieval (javascript)": 22.88,
+ "CodeSearchNetCCRetrieval (go)": 13.48,
+ "CodeSearchNetCCRetrieval (ruby)": 25.8,
+ "CodeSearchNetCCRetrieval (java)": 16.81,
+ "CodeSearchNetCCRetrieval (php)": 9.08,
+ "CodeSearchNetRetrieval (python)": 44.22,
+ "CodeSearchNetRetrieval (javascript)": 30.73,
+ "CodeSearchNetRetrieval (go)": 45.46,
+ "CodeSearchNetRetrieval (ruby)": 37.01,
+ "CodeSearchNetRetrieval (java)": 17.15,
+ "CodeSearchNetRetrieval (php)": 23.3,
+ "CodeTransOceanContest": 18.25,
+ "CodeTransOceanDL": 26.66,
+ "CosQA": 2.89,
+ "CovidRetrieval": 0.0,
+ "DBPedia": 16.39,
+ "FEVER": 35.62,
+ "FiQA2018": 8.47,
+ "GerDaLIR": 0.41,
+ "GerDaLIRSmall": 1.14,
+ "GermanQuAD-Retrieval": 54.92,
+ "HellaSwag": 9.64,
+ "HotpotQA": 24.5,
+ "LEMBNarrativeQARetrieval": 11.11,
+ "LEMBQMSumRetrieval": 13.38,
+ "LEMBSummScreenFDRetrieval": 36.32,
+ "LEMBWikimQARetrieval": 32.33,
+ "LeCaRDv2": 7.32,
+ "LegalBenchConsumerContractsQA": 43.78,
+ "LegalBenchCorporateLobbying": 79.29,
+ "LegalQuAD": 3.57,
+ "LegalSummarization": 47.35,
+ "MIRACLRetrieval (ru)": 33.84,
+ "MIRACLRetrieval (ar)": 0.06,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 1.53,
+ "MIRACLRetrieval (en)": 13.77,
+ "MIRACLRetrieval (es)": 1.22,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 6.42,
+ "MIRACLRetrieval (fr)": 2.55,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 2.15,
+ "MIRACLRetrieval (ja)": 0.17,
+ "MIRACLRetrieval (ko)": 0.94,
+ "MIRACLRetrieval (sw)": 7.38,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.16,
+ "MIRACLRetrieval (yo)": 14.33,
+ "MIRACLRetrieval (zh)": 0.0,
+ "MSMARCO": 12.58,
+ "MintakaRetrieval (ar)": 0.54,
+ "MintakaRetrieval (de)": 7.82,
+ "MintakaRetrieval (es)": 10.59,
+ "MintakaRetrieval (fr)": 9.65,
+ "MintakaRetrieval (hi)": 0.88,
+ "MintakaRetrieval (it)": 10.93,
+ "MintakaRetrieval (ja)": 0.95,
+ "MintakaRetrieval (pt)": 11.38,
+ "NFCorpus": 16.6,
+ "NQ": 15.21,
+ "PIQA": 10.26,
+ "Quail": 2.06,
+ "QuoraRetrieval": 76.33,
+ "RARbCode": 0.55,
+ "RARbMath": 22.48,
+ "RiaNewsRetrieval": 77.72,
+ "RuBQRetrieval": 56.86,
+ "SCIDOCS": 6.36,
+ "SIQA": 0.83,
+ "SciFact": 35.61,
+ "SciFact-PL": 23.27,
+ "SpartQA": 2.99,
+ "StackOverflowQA": 25.25,
+ "SyntecRetrieval": 36.67,
+ "SyntheticText2SQL": 29.15,
+ "TRECCOVID": 20.17,
+ "TRECCOVID-PL": 4.99,
+ "TempReasonL1": 1.21,
+ "TempReasonL2Fact": 9.85,
+ "TempReasonL2Pure": 0.18,
+ "TempReasonL3Fact": 11.14,
+ "TempReasonL3Pure": 6.49,
+ "Touche2020": 12.97,
+ "WinoGrande": 44.54,
+ "XMarket (de)": 4.03,
+ "XMarket (en)": 5.25,
+ "XMarket (es)": 5.15,
+ "XPQARetrieval (ara-ara)": 4.77,
+ "XPQARetrieval (eng-ara)": 2.29,
+ "XPQARetrieval (ara-eng)": 6.71,
+ "XPQARetrieval (deu-deu)": 32.64,
+ "XPQARetrieval (eng-deu)": 5.29,
+ "XPQARetrieval (deu-eng)": 17.68,
+ "XPQARetrieval (spa-spa)": 26.81,
+ "XPQARetrieval (eng-spa)": 6.1,
+ "XPQARetrieval (spa-eng)": 16.67,
+ "XPQARetrieval (fra-fra)": 33.11,
+ "XPQARetrieval (eng-fra)": 6.89,
+ "XPQARetrieval (fra-eng)": 23.99,
+ "XPQARetrieval (hin-hin)": 6.78,
+ "XPQARetrieval (eng-hin)": 3.72,
+ "XPQARetrieval (hin-eng)": 5.56,
+ "XPQARetrieval (ita-ita)": 39.67,
+ "XPQARetrieval (eng-ita)": 6.36,
+ "XPQARetrieval (ita-eng)": 18.65,
+ "XPQARetrieval (jpn-jpn)": 12.33,
+ "XPQARetrieval (eng-jpn)": 3.85,
+ "XPQARetrieval (jpn-eng)": 10.44,
+ "XPQARetrieval (kor-kor)": 5.99,
+ "XPQARetrieval (eng-kor)": 5.45,
+ "XPQARetrieval (kor-eng)": 4.52,
+ "XPQARetrieval (pol-pol)": 16.48,
+ "XPQARetrieval (eng-pol)": 5.39,
+ "XPQARetrieval (pol-eng)": 12.87,
+ "XPQARetrieval (por-por)": 20.89,
+ "XPQARetrieval (eng-por)": 4.53,
+ "XPQARetrieval (por-eng)": 15.26,
+ "XPQARetrieval (tam-tam)": 3.3,
+ "XPQARetrieval (eng-tam)": 2.59,
+ "XPQARetrieval (tam-eng)": 3.13,
+ "XPQARetrieval (cmn-cmn)": 7.42,
+ "XPQARetrieval (eng-cmn)": 1.75,
+ "XPQARetrieval (cmn-eng)": 6.27
}
]
},
@@ -5965,34 +16742,152 @@
"cosine_spearman": [
{
"Model": "USER-base",
- "RUParaPhraserSTS": 73.56,
- "RuSTSBenchmarkSTS": 82.26,
- "STS22 (ru)": 63.39,
- "STSBenchmarkMultilingualSTS (ru)": 81.81
+ "BIOSSES": 77.26,
+ "CDSC-R": 74.23,
+ "GermanSTSBenchmark": 60.5,
+ "RUParaPhraserSTS": 73.07,
+ "RuSTSBenchmarkSTS": 81.91,
+ "SICK-R": 68.34,
+ "SICK-R-PL": 54.01,
+ "SICKFr": 60.8,
+ "STS12": 66.31,
+ "STS13": 68.77,
+ "STS14": 67.14,
+ "STS15": 79.88,
+ "STS16": 73.39,
+ "STS17 (fr-en)": 32.17,
+ "STS17 (nl-en)": 39.12,
+ "STS17 (it-en)": 24.74,
+ "STS17 (en-ar)": 5.67,
+ "STS17 (ko-ko)": 21.1,
+ "STS17 (en-tr)": 14.19,
+ "STS17 (en-en)": 81.88,
+ "STS17 (en-de)": 25.18,
+ "STS17 (es-en)": 29.2,
+ "STS17 (es-es)": 68.94,
+ "STS17 (ar-ar)": 32.72,
+ "STS22 (ru)": 68.06,
+ "STS22 (pl)": 24.85,
+ "STS22 (fr)": 64.37,
+ "STS22 (fr-pl)": 39.44,
+ "STS22 (pl-en)": 66.72,
+ "STS22 (tr)": 45.0,
+ "STS22 (de-pl)": 4.65,
+ "STS22 (en)": 61.64,
+ "STS22 (de-en)": 46.23,
+ "STS22 (de-fr)": 44.72,
+ "STS22 (es-en)": 60.25,
+ "STS22 (ar)": 15.45,
+ "STS22 (de)": 18.13,
+ "STS22 (it)": 50.8,
+ "STS22 (zh)": 15.1,
+ "STS22 (es-it)": 53.02,
+ "STS22 (es)": 56.85,
+ "STS22 (zh-en)": 26.88,
+ "STSB": 21.81,
+ "STSBenchmark": 76.03,
+ "STSBenchmarkMultilingualSTS (ru)": 81.81,
+ "STSBenchmarkMultilingualSTS (es)": 59.76,
+ "STSBenchmarkMultilingualSTS (en)": 76.03,
+ "STSBenchmarkMultilingualSTS (de)": 62.5,
+ "STSBenchmarkMultilingualSTS (nl)": 56.71,
+ "STSBenchmarkMultilingualSTS (zh)": 21.78,
+ "STSBenchmarkMultilingualSTS (pl)": 57.09,
+ "STSBenchmarkMultilingualSTS (pt)": 55.18,
+ "STSBenchmarkMultilingualSTS (it)": 57.98,
+ "STSBenchmarkMultilingualSTS (fr)": 61.91
},
{
"Model": "USER-base",
- "RUParaPhraserSTS": 73.56,
- "RuSTSBenchmarkSTS": 82.26,
- "STS22 (ru)": 63.39,
- "STSBenchmarkMultilingualSTS (ru)": 81.81
+ "BIOSSES": 77.26,
+ "CDSC-R": 74.23,
+ "GermanSTSBenchmark": 60.5,
+ "RUParaPhraserSTS": 73.07,
+ "RuSTSBenchmarkSTS": 81.91,
+ "SICK-R": 68.34,
+ "SICK-R-PL": 54.01,
+ "SICKFr": 60.8,
+ "STS12": 66.31,
+ "STS13": 68.77,
+ "STS14": 67.14,
+ "STS15": 79.88,
+ "STS16": 73.39,
+ "STS17 (fr-en)": 32.17,
+ "STS17 (nl-en)": 39.12,
+ "STS17 (it-en)": 24.74,
+ "STS17 (en-ar)": 5.67,
+ "STS17 (ko-ko)": 21.1,
+ "STS17 (en-tr)": 14.19,
+ "STS17 (en-en)": 81.88,
+ "STS17 (en-de)": 25.18,
+ "STS17 (es-en)": 29.2,
+ "STS17 (es-es)": 68.94,
+ "STS17 (ar-ar)": 32.74,
+ "STS22 (ru)": 68.06,
+ "STS22 (pl)": 24.74,
+ "STS22 (fr)": 64.37,
+ "STS22 (fr-pl)": 39.44,
+ "STS22 (pl-en)": 66.72,
+ "STS22 (tr)": 45.0,
+ "STS22 (de-pl)": 4.65,
+ "STS22 (en)": 61.64,
+ "STS22 (de-en)": 46.23,
+ "STS22 (de-fr)": 44.72,
+ "STS22 (es-en)": 60.25,
+ "STS22 (ar)": 15.45,
+ "STS22 (de)": 18.13,
+ "STS22 (it)": 50.79,
+ "STS22 (zh)": 15.1,
+ "STS22 (es-it)": 53.02,
+ "STS22 (es)": 56.85,
+ "STS22 (zh-en)": 26.88,
+ "STSB": 21.81,
+ "STSBenchmark": 76.03,
+ "STSBenchmarkMultilingualSTS (ru)": 81.81,
+ "STSBenchmarkMultilingualSTS (es)": 59.75,
+ "STSBenchmarkMultilingualSTS (en)": 76.03,
+ "STSBenchmarkMultilingualSTS (de)": 62.5,
+ "STSBenchmarkMultilingualSTS (nl)": 56.71,
+ "STSBenchmarkMultilingualSTS (zh)": 21.78,
+ "STSBenchmarkMultilingualSTS (pl)": 57.09,
+ "STSBenchmarkMultilingualSTS (pt)": 55.18,
+ "STSBenchmarkMultilingualSTS (it)": 57.98,
+ "STSBenchmarkMultilingualSTS (fr)": 61.91
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "USER-base",
+ "SummEval": 32.46,
+ "SummEvalFr": 31.02
+ },
+ {
+ "Model": "USER-base",
+ "SummEval": 32.46,
+ "SummEvalFr": 31.02
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
{
"Model": "USER-base",
- "CEDRClassification": 46.47,
- "SensitiveTopicsClassification": 27.5
+ "CEDRClassification": 46.78,
+ "SensitiveTopicsClassification": 28.65
}
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "USER-base",
+ "Core17InstructionRetrieval": -1.42,
+ "News21InstructionRetrieval": 1.51,
+ "Robust04InstructionRetrieval": -5.96
+ }
+ ]
}
},
"deepvk__USER-bge-m3": {
@@ -6000,6 +16895,7 @@
"f1": [
{
"Model": "USER-bge-m3",
+ "BornholmBitextMining": 42.44,
"Tatoeba (rus-eng)": 93.52
}
]
@@ -6008,15 +16904,169 @@
"accuracy": [
{
"Model": "USER-bge-m3",
+ "AllegroReviews": 48.2,
+ "AmazonCounterfactualClassification (en-ext)": 73.79,
+ "AmazonCounterfactualClassification (en)": 74.21,
+ "AmazonCounterfactualClassification (de)": 67.93,
+ "AmazonCounterfactualClassification (ja)": 52.86,
+ "AmazonPolarityClassification": 93.62,
+ "AmazonReviewsClassification (en)": 48.69,
+ "AmazonReviewsClassification (de)": 45.56,
+ "AmazonReviewsClassification (es)": 45.35,
+ "AmazonReviewsClassification (fr)": 45.16,
+ "AmazonReviewsClassification (ja)": 22.27,
+ "AmazonReviewsClassification (zh)": 21.58,
+ "AngryTweetsClassification": 56.91,
+ "Banking77Classification": 81.15,
+ "CBD": 64.42,
+ "DanishPoliticalCommentsClassification": 37.99,
+ "EmotionClassification": 50.25,
"GeoreviewClassification": 50.98,
"HeadlineClassification": 70.09,
+ "ImdbClassification": 90.88,
"InappropriatenessClassification": 60.76,
"KinopoiskClassification": 63.33,
+ "LccSentimentClassification": 63.0,
+ "MTOPDomainClassification (en)": 92.44,
+ "MTOPDomainClassification (de)": 86.55,
+ "MTOPDomainClassification (es)": 89.57,
+ "MTOPDomainClassification (fr)": 85.44,
+ "MTOPDomainClassification (hi)": 21.23,
+ "MTOPDomainClassification (th)": 16.21,
+ "MTOPIntentClassification (en)": 65.61,
+ "MTOPIntentClassification (de)": 60.79,
+ "MTOPIntentClassification (es)": 65.95,
+ "MTOPIntentClassification (fr)": 58.59,
+ "MTOPIntentClassification (hi)": 4.41,
+ "MTOPIntentClassification (th)": 5.32,
+ "MasakhaNEWSClassification (amh)": 39.23,
+ "MasakhaNEWSClassification (eng)": 79.93,
+ "MasakhaNEWSClassification (fra)": 77.54,
+ "MasakhaNEWSClassification (hau)": 76.7,
+ "MasakhaNEWSClassification (ibo)": 61.15,
+ "MasakhaNEWSClassification (lin)": 72.11,
+ "MasakhaNEWSClassification (lug)": 65.52,
+ "MasakhaNEWSClassification (orm)": 75.63,
+ "MasakhaNEWSClassification (pcm)": 91.51,
+ "MasakhaNEWSClassification (run)": 76.06,
+ "MasakhaNEWSClassification (sna)": 84.77,
+ "MasakhaNEWSClassification (som)": 62.55,
+ "MasakhaNEWSClassification (swa)": 71.85,
+ "MasakhaNEWSClassification (tir)": 29.45,
+ "MasakhaNEWSClassification (xho)": 77.74,
+ "MasakhaNEWSClassification (yor)": 79.29,
"MassiveIntentClassification (ru)": 68.85,
+ "MassiveIntentClassification (zh-CN)": 3.03,
+ "MassiveIntentClassification (vi)": 34.1,
+ "MassiveIntentClassification (pl)": 58.31,
+ "MassiveIntentClassification (pt)": 62.67,
+ "MassiveIntentClassification (mn)": 40.26,
+ "MassiveIntentClassification (ms)": 54.77,
+ "MassiveIntentClassification (sq)": 51.26,
+ "MassiveIntentClassification (ro)": 58.32,
+ "MassiveIntentClassification (ml)": 2.74,
+ "MassiveIntentClassification (tr)": 53.8,
+ "MassiveIntentClassification (nb)": 61.11,
+ "MassiveIntentClassification (zh-TW)": 4.62,
+ "MassiveIntentClassification (is)": 45.99,
+ "MassiveIntentClassification (fr)": 64.25,
+ "MassiveIntentClassification (ka)": 7.12,
+ "MassiveIntentClassification (ko)": 2.91,
+ "MassiveIntentClassification (ar)": 5.53,
+ "MassiveIntentClassification (cy)": 38.19,
+ "MassiveIntentClassification (am)": 2.37,
+ "MassiveIntentClassification (hu)": 40.52,
+ "MassiveIntentClassification (hy)": 3.18,
+ "MassiveIntentClassification (de)": 60.56,
+ "MassiveIntentClassification (ta)": 2.97,
+ "MassiveIntentClassification (km)": 5.39,
+ "MassiveIntentClassification (my)": 4.28,
+ "MassiveIntentClassification (es)": 63.18,
+ "MassiveIntentClassification (ur)": 3.34,
+ "MassiveIntentClassification (sv)": 61.69,
+ "MassiveIntentClassification (en)": 70.67,
+ "MassiveIntentClassification (te)": 2.17,
+ "MassiveIntentClassification (af)": 53.29,
+ "MassiveIntentClassification (az)": 44.21,
+ "MassiveIntentClassification (el)": 27.53,
+ "MassiveIntentClassification (jv)": 42.42,
+ "MassiveIntentClassification (lv)": 45.12,
+ "MassiveIntentClassification (sw)": 44.4,
+ "MassiveIntentClassification (fa)": 3.65,
+ "MassiveIntentClassification (it)": 63.58,
+ "MassiveIntentClassification (fi)": 56.34,
+ "MassiveIntentClassification (da)": 62.24,
+ "MassiveIntentClassification (he)": 3.06,
+ "MassiveIntentClassification (id)": 58.18,
+ "MassiveIntentClassification (kn)": 3.71,
+ "MassiveIntentClassification (th)": 4.07,
+ "MassiveIntentClassification (tl)": 47.04,
+ "MassiveIntentClassification (hi)": 3.78,
+ "MassiveIntentClassification (ja)": 2.44,
+ "MassiveIntentClassification (sl)": 56.11,
+ "MassiveIntentClassification (bn)": 3.05,
+ "MassiveIntentClassification (nl)": 59.3,
"MassiveScenarioClassification (ru)": 72.9,
+ "MassiveScenarioClassification (de)": 67.22,
+ "MassiveScenarioClassification (fr)": 69.41,
+ "MassiveScenarioClassification (id)": 63.48,
+ "MassiveScenarioClassification (nl)": 65.78,
+ "MassiveScenarioClassification (cy)": 43.85,
+ "MassiveScenarioClassification (my)": 9.36,
+ "MassiveScenarioClassification (ms)": 60.81,
+ "MassiveScenarioClassification (lv)": 50.58,
+ "MassiveScenarioClassification (sq)": 59.86,
+ "MassiveScenarioClassification (ro)": 65.69,
+ "MassiveScenarioClassification (sv)": 68.53,
+ "MassiveScenarioClassification (tl)": 56.47,
+ "MassiveScenarioClassification (ur)": 9.41,
+ "MassiveScenarioClassification (zh-CN)": 7.85,
+ "MassiveScenarioClassification (jv)": 50.85,
+ "MassiveScenarioClassification (hi)": 7.6,
+ "MassiveScenarioClassification (pl)": 61.86,
+ "MassiveScenarioClassification (ar)": 12.98,
+ "MassiveScenarioClassification (vi)": 38.48,
+ "MassiveScenarioClassification (is)": 54.05,
+ "MassiveScenarioClassification (hy)": 9.5,
+ "MassiveScenarioClassification (th)": 9.23,
+ "MassiveScenarioClassification (te)": 7.39,
+ "MassiveScenarioClassification (ta)": 7.62,
+ "MassiveScenarioClassification (fi)": 59.7,
+ "MassiveScenarioClassification (am)": 6.71,
+ "MassiveScenarioClassification (fa)": 8.4,
+ "MassiveScenarioClassification (az)": 50.34,
+ "MassiveScenarioClassification (el)": 34.63,
+ "MassiveScenarioClassification (ja)": 7.51,
+ "MassiveScenarioClassification (es)": 67.88,
+ "MassiveScenarioClassification (he)": 7.7,
+ "MassiveScenarioClassification (kn)": 8.32,
+ "MassiveScenarioClassification (af)": 60.7,
+ "MassiveScenarioClassification (pt)": 65.38,
+ "MassiveScenarioClassification (sl)": 61.13,
+ "MassiveScenarioClassification (da)": 69.64,
+ "MassiveScenarioClassification (nb)": 67.6,
+ "MassiveScenarioClassification (tr)": 60.02,
+ "MassiveScenarioClassification (km)": 9.61,
+ "MassiveScenarioClassification (it)": 67.92,
+ "MassiveScenarioClassification (ko)": 6.81,
+ "MassiveScenarioClassification (en)": 75.85,
+ "MassiveScenarioClassification (mn)": 45.93,
+ "MassiveScenarioClassification (sw)": 52.36,
+ "MassiveScenarioClassification (ml)": 7.55,
+ "MassiveScenarioClassification (zh-TW)": 8.71,
+ "MassiveScenarioClassification (hu)": 47.43,
+ "MassiveScenarioClassification (ka)": 13.81,
+ "MassiveScenarioClassification (bn)": 8.65,
+ "NoRecClassification": 54.06,
+ "NordicLangClassification": 40.53,
+ "PAC": 67.58,
+ "PolEmo2.0-IN": 75.48,
+ "PolEmo2.0-OUT": 50.0,
"RuReviewsClassification": 68.52,
"RuSciBenchGRNTIClassification": 57.67,
- "RuSciBenchOECDClassification": 44.2
+ "RuSciBenchOECDClassification": 44.2,
+ "ToxicConversationsClassification": 69.19,
+ "TweetSentimentExtractionClassification": 63.88
}
]
},
@@ -6024,9 +17074,67 @@
"v_measure": [
{
"Model": "USER-bge-m3",
- "GeoreviewClusteringP2P": 62.79,
+ "AlloProfClusteringP2P": 58.28,
+ "AlloProfClusteringS2S": 37.82,
+ "ArxivClusteringP2P": 39.68,
+ "ArxivClusteringS2S": 26.76,
+ "BiorxivClusteringP2P": 33.43,
+ "BiorxivClusteringS2S": 25.01,
+ "BlurbsClusteringP2P": 38.7,
+ "BlurbsClusteringS2S": 15.58,
+ "GeoreviewClusteringP2P": 62.94,
+ "HALClusteringS2S": 23.72,
+ "MLSUMClusteringP2P (de)": 42.31,
+ "MLSUMClusteringP2P (fr)": 43.76,
+ "MLSUMClusteringP2P (ru)": 45.48,
+ "MLSUMClusteringP2P (es)": 46.2,
+ "MLSUMClusteringS2S (de)": 40.27,
+ "MLSUMClusteringS2S (fr)": 43.92,
+ "MLSUMClusteringS2S (ru)": 45.53,
+ "MLSUMClusteringS2S (es)": 45.94,
+ "MasakhaNEWSClusteringP2P (amh)": 42.38,
+ "MasakhaNEWSClusteringP2P (eng)": 55.13,
+ "MasakhaNEWSClusteringP2P (fra)": 48.26,
+ "MasakhaNEWSClusteringP2P (hau)": 61.96,
+ "MasakhaNEWSClusteringP2P (ibo)": 49.7,
+ "MasakhaNEWSClusteringP2P (lin)": 60.62,
+ "MasakhaNEWSClusteringP2P (lug)": 53.66,
+ "MasakhaNEWSClusteringP2P (orm)": 30.62,
+ "MasakhaNEWSClusteringP2P (pcm)": 73.49,
+ "MasakhaNEWSClusteringP2P (run)": 51.27,
+ "MasakhaNEWSClusteringP2P (sna)": 58.8,
+ "MasakhaNEWSClusteringP2P (som)": 38.46,
+ "MasakhaNEWSClusteringP2P (swa)": 27.52,
+ "MasakhaNEWSClusteringP2P (tir)": 43.84,
+ "MasakhaNEWSClusteringP2P (xho)": 40.83,
+ "MasakhaNEWSClusteringP2P (yor)": 31.35,
+ "MasakhaNEWSClusteringS2S (amh)": 45.04,
+ "MasakhaNEWSClusteringS2S (eng)": 21.37,
+ "MasakhaNEWSClusteringS2S (fra)": 37.36,
+ "MasakhaNEWSClusteringS2S (hau)": 31.24,
+ "MasakhaNEWSClusteringS2S (ibo)": 35.78,
+ "MasakhaNEWSClusteringS2S (lin)": 47.57,
+ "MasakhaNEWSClusteringS2S (lug)": 43.93,
+ "MasakhaNEWSClusteringS2S (orm)": 24.82,
+ "MasakhaNEWSClusteringS2S (pcm)": 74.85,
+ "MasakhaNEWSClusteringS2S (run)": 47.56,
+ "MasakhaNEWSClusteringS2S (sna)": 46.45,
+ "MasakhaNEWSClusteringS2S (som)": 30.07,
+ "MasakhaNEWSClusteringS2S (swa)": 27.98,
+ "MasakhaNEWSClusteringS2S (tir)": 43.77,
+ "MasakhaNEWSClusteringS2S (xho)": 33.9,
+ "MasakhaNEWSClusteringS2S (yor)": 27.7,
+ "MedrxivClusteringP2P": 30.53,
+ "MedrxivClusteringS2S": 27.04,
+ "RedditClustering": 46.3,
+ "RedditClusteringP2P": 56.92,
"RuSciBenchGRNTIClusteringP2P": 53.11,
- "RuSciBenchOECDClusteringP2P": 44.93
+ "RuSciBenchOECDClusteringP2P": 44.93,
+ "StackExchangeClustering": 55.22,
+ "StackExchangeClusteringP2P": 32.11,
+ "TenKGnadClusteringP2P": 42.3,
+ "TenKGnadClusteringS2S": 22.82,
+ "TwentyNewsgroupsClustering": 40.51
}
]
},
@@ -6034,13 +17142,51 @@
"max_ap": [
{
"Model": "USER-bge-m3",
+ "CDSC-E": 73.41,
+ "FalseFriendsGermanEnglish": 50.01,
"OpusparcusPC (ru)": 90.73,
- "TERRa": 64.99
+ "OpusparcusPC (de)": 94.76,
+ "OpusparcusPC (en)": 98.87,
+ "OpusparcusPC (fi)": 89.88,
+ "OpusparcusPC (fr)": 91.78,
+ "OpusparcusPC (sv)": 92.17,
+ "PSC": 99.13,
+ "PawsXPairClassification (de)": 57.78,
+ "PawsXPairClassification (en)": 66.16,
+ "PawsXPairClassification (es)": 58.62,
+ "PawsXPairClassification (fr)": 60.62,
+ "PawsXPairClassification (ja)": 48.05,
+ "PawsXPairClassification (ko)": 48.83,
+ "PawsXPairClassification (zh)": 48.72,
+ "SICK-E-PL": 70.91,
+ "SprintDuplicateQuestions": 97.24,
+ "TERRa": 64.99,
+ "TwitterSemEval2015": 71.7,
+ "TwitterURLCorpus": 85.71
},
{
"Model": "USER-bge-m3",
+ "CDSC-E": 73.41,
+ "FalseFriendsGermanEnglish": 50.01,
"OpusparcusPC (ru)": 90.73,
- "TERRa": 65.07
+ "OpusparcusPC (de)": 94.76,
+ "OpusparcusPC (en)": 98.88,
+ "OpusparcusPC (fi)": 89.9,
+ "OpusparcusPC (fr)": 91.78,
+ "OpusparcusPC (sv)": 92.17,
+ "PSC": 99.13,
+ "PawsXPairClassification (de)": 58.27,
+ "PawsXPairClassification (en)": 66.22,
+ "PawsXPairClassification (es)": 58.68,
+ "PawsXPairClassification (fr)": 60.71,
+ "PawsXPairClassification (ja)": 48.4,
+ "PawsXPairClassification (ko)": 48.83,
+ "PawsXPairClassification (zh)": 49.02,
+ "SICK-E-PL": 70.91,
+ "SprintDuplicateQuestions": 97.24,
+ "TERRa": 65.07,
+ "TwitterSemEval2015": 71.7,
+ "TwitterURLCorpus": 85.71
}
]
},
@@ -6048,11 +17194,18 @@
"map": [
{
"Model": "USER-bge-m3",
- "MIRACLReranking (ru)": 64.35
+ "AlloprofReranking": 73.25,
+ "AskUbuntuDupQuestions": 61.89,
+ "MindSmallReranking": 31.09,
+ "RuBQReranking": 73.08,
+ "SciDocsRR": 78.23,
+ "StackOverflowDupQuestions": 51.19,
+ "SyntecReranking": 85.83,
+ "T2Reranking": 56.97
},
{
"Model": "USER-bge-m3",
- "RuBQReranking": 73.08
+ "MIRACLReranking (ru)": 64.35
}
]
},
@@ -6060,9 +17213,113 @@
"ndcg_at_10": [
{
"Model": "USER-bge-m3",
+ "AILACasedocs": 34.69,
+ "AILAStatutes": 26.69,
+ "AlloprofRetrieval": 46.17,
+ "AppsRetrieval": 14.17,
+ "ArguAna": 49.95,
+ "BSARDRetrieval": 16.57,
+ "ClimateFEVER": 25.9,
+ "CmedqaRetrieval": 1.26,
+ "CodeFeedbackMT": 46.59,
+ "CodeFeedbackST": 69.42,
+ "CodeSearchNetCCRetrieval (python)": 57.2,
+ "CodeSearchNetCCRetrieval (javascript)": 56.58,
+ "CodeSearchNetCCRetrieval (go)": 45.59,
+ "CodeSearchNetCCRetrieval (ruby)": 56.2,
+ "CodeSearchNetCCRetrieval (java)": 51.14,
+ "CodeSearchNetCCRetrieval (php)": 40.48,
+ "CodeSearchNetRetrieval (python)": 83.58,
+ "CodeSearchNetRetrieval (javascript)": 70.26,
+ "CodeSearchNetRetrieval (go)": 85.71,
+ "CodeSearchNetRetrieval (ruby)": 72.3,
+ "CodeSearchNetRetrieval (java)": 63.08,
+ "CodeSearchNetRetrieval (php)": 76.48,
+ "CodeTransOceanContest": 65.55,
+ "CodeTransOceanDL": 29.16,
+ "CosQA": 28.36,
+ "CovidRetrieval": 0.12,
+ "DBPedia": 38.22,
+ "FEVER": 73.08,
+ "FiQA2018": 40.8,
+ "GerDaLIR": 11.12,
+ "GerDaLIRSmall": 25.79,
+ "GermanQuAD-Retrieval": 94.78,
+ "HotpotQA": 64.32,
+ "LEMBNarrativeQARetrieval": 45.78,
+ "LEMBQMSumRetrieval": 37.22,
+ "LEMBSummScreenFDRetrieval": 93.67,
+ "LEMBWikimQARetrieval": 78.06,
+ "LeCaRDv2": 14.3,
+ "LegalBenchConsumerContractsQA": 78.5,
+ "LegalBenchCorporateLobbying": 90.59,
+ "LegalQuAD": 45.86,
+ "LegalSummarization": 62.78,
"MIRACLRetrieval (ru)": 67.33,
+ "MintakaRetrieval (ar)": 0.51,
+ "MintakaRetrieval (de)": 24.22,
+ "MintakaRetrieval (es)": 23.68,
+ "MintakaRetrieval (fr)": 24.43,
+ "MintakaRetrieval (hi)": 0.87,
+ "MintakaRetrieval (it)": 24.29,
+ "MintakaRetrieval (ja)": 1.52,
+ "MintakaRetrieval (pt)": 24.31,
+ "NFCorpus": 31.29,
+ "NQ": 58.9,
+ "QuoraRetrieval": 87.95,
"RiaNewsRetrieval": 83.53,
- "RuBQRetrieval": 70.03
+ "RuBQRetrieval": 70.03,
+ "SCIDOCS": 16.15,
+ "SciFact": 62.43,
+ "SciFact-PL": 52.67,
+ "SpartQA": 7.58,
+ "StackOverflowQA": 79.29,
+ "SyntecRetrieval": 83.99,
+ "SyntheticText2SQL": 45.44,
+ "TRECCOVID": 51.02,
+ "TRECCOVID-PL": 38.22,
+ "TempReasonL1": 0.9,
+ "Touche2020": 18.96,
+ "WinoGrande": 32.79,
+ "XMarket (de)": 11.45,
+ "XMarket (en)": 12.49,
+ "XMarket (es)": 14.29,
+ "XPQARetrieval (ara-ara)": 6.08,
+ "XPQARetrieval (eng-ara)": 4.76,
+ "XPQARetrieval (ara-eng)": 10.47,
+ "XPQARetrieval (deu-deu)": 69.1,
+ "XPQARetrieval (eng-deu)": 42.06,
+ "XPQARetrieval (deu-eng)": 60.33,
+ "XPQARetrieval (spa-spa)": 57.54,
+ "XPQARetrieval (eng-spa)": 36.34,
+ "XPQARetrieval (spa-eng)": 49.39,
+ "XPQARetrieval (fra-fra)": 65.52,
+ "XPQARetrieval (eng-fra)": 37.81,
+ "XPQARetrieval (fra-eng)": 56.03,
+ "XPQARetrieval (hin-hin)": 8.22,
+ "XPQARetrieval (eng-hin)": 6.53,
+ "XPQARetrieval (hin-eng)": 8.65,
+ "XPQARetrieval (ita-ita)": 71.13,
+ "XPQARetrieval (eng-ita)": 37.85,
+ "XPQARetrieval (ita-eng)": 57.46,
+ "XPQARetrieval (jpn-jpn)": 16.58,
+ "XPQARetrieval (eng-jpn)": 8.46,
+ "XPQARetrieval (jpn-eng)": 16.92,
+ "XPQARetrieval (kor-kor)": 6.5,
+ "XPQARetrieval (eng-kor)": 10.44,
+ "XPQARetrieval (kor-eng)": 7.53,
+ "XPQARetrieval (pol-pol)": 41.18,
+ "XPQARetrieval (eng-pol)": 25.85,
+ "XPQARetrieval (pol-eng)": 35.92,
+ "XPQARetrieval (por-por)": 43.92,
+ "XPQARetrieval (eng-por)": 26.94,
+ "XPQARetrieval (por-eng)": 39.11,
+ "XPQARetrieval (tam-tam)": 3.6,
+ "XPQARetrieval (eng-tam)": 5.88,
+ "XPQARetrieval (tam-eng)": 5.11,
+ "XPQARetrieval (cmn-cmn)": 13.98,
+ "XPQARetrieval (eng-cmn)": 6.93,
+ "XPQARetrieval (cmn-eng)": 14.13
}
]
},
@@ -6070,27 +17327,149 @@
"cosine_spearman": [
{
"Model": "USER-bge-m3",
+ "BIOSSES": 82.26,
+ "CDSC-R": 87.11,
+ "GermanSTSBenchmark": 79.99,
+ "SICK-R": 81.0,
+ "SICK-R-PL": 68.1,
+ "SICKFr": 77.52,
+ "STS12": 81.09,
+ "STS13": 83.85,
+ "STS14": 82.11,
+ "STS15": 89.0,
+ "STS16": 85.8,
+ "STS17 (en-de)": 79.44,
+ "STS17 (es-en)": 73.1,
+ "STS17 (en-tr)": 64.37,
+ "STS17 (nl-en)": 73.16,
+ "STS17 (it-en)": 75.26,
+ "STS17 (en-en)": 87.18,
+ "STS17 (fr-en)": 75.33,
+ "STS17 (en-ar)": 1.36,
+ "STS17 (es-es)": 85.32,
+ "STS17 (ar-ar)": 19.32,
+ "STS17 (ko-ko)": 9.41,
+ "STS22 (ru)": 70.55,
+ "STS22 (pl-en)": 74.88,
+ "STS22 (tr)": 66.46,
+ "STS22 (zh-en)": 17.82,
+ "STS22 (es-en)": 79.25,
+ "STS22 (fr)": 82.53,
+ "STS22 (es)": 70.65,
+ "STS22 (zh)": 37.88,
+ "STS22 (de-pl)": 58.48,
+ "STS22 (de)": 61.46,
+ "STS22 (de-en)": 58.35,
+ "STS22 (it)": 81.45,
+ "STS22 (ar)": 33.34,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (es-it)": 74.77,
+ "STS22 (pl)": 40.59,
+ "STS22 (en)": 68.08,
+ "STS22 (de-fr)": 61.09,
+ "STSB": 0.87,
+ "STSBenchmark": 86.85,
+ "STSBenchmarkMultilingualSTS (it)": 81.62,
+ "STSBenchmarkMultilingualSTS (de)": 80.64,
+ "STSBenchmarkMultilingualSTS (es)": 81.15,
+ "STSBenchmarkMultilingualSTS (nl)": 78.03,
+ "STSBenchmarkMultilingualSTS (pl)": 73.68,
+ "STSBenchmarkMultilingualSTS (pt)": 79.64,
+ "STSBenchmarkMultilingualSTS (fr)": 81.74,
+ "STSBenchmarkMultilingualSTS (en)": 86.85,
+ "STSBenchmarkMultilingualSTS (zh)": 1.42
+ },
+ {
+ "Model": "USER-bge-m3",
+ "BIOSSES": 82.26,
+ "CDSC-R": 87.11,
+ "GermanSTSBenchmark": 79.99,
"RUParaPhraserSTS": 76.36,
"RuSTSBenchmarkSTS": 83.35,
- "STS22 (ru)": 66.42,
- "STSBenchmarkMultilingualSTS (ru)": 82.96
+ "SICK-R": 81.0,
+ "SICK-R-PL": 68.1,
+ "SICKFr": 77.53,
+ "STS12": 81.1,
+ "STS13": 83.85,
+ "STS14": 82.11,
+ "STS15": 89.0,
+ "STS16": 85.8,
+ "STS17 (en-de)": 79.44,
+ "STS17 (es-en)": 73.1,
+ "STS17 (en-tr)": 64.37,
+ "STS17 (nl-en)": 73.16,
+ "STS17 (it-en)": 75.26,
+ "STS17 (en-en)": 87.18,
+ "STS17 (fr-en)": 75.33,
+ "STS17 (en-ar)": 1.36,
+ "STS17 (es-es)": 85.32,
+ "STS17 (ar-ar)": 19.24,
+ "STS17 (ko-ko)": 9.22,
+ "STS22 (ru)": 70.55,
+ "STS22 (pl-en)": 74.88,
+ "STS22 (tr)": 66.46,
+ "STS22 (zh-en)": 17.82,
+ "STS22 (es-en)": 79.25,
+ "STS22 (fr)": 82.53,
+ "STS22 (es)": 70.65,
+ "STS22 (zh)": 37.88,
+ "STS22 (de-pl)": 58.48,
+ "STS22 (de)": 61.47,
+ "STS22 (de-en)": 58.35,
+ "STS22 (it)": 81.45,
+ "STS22 (ar)": 33.36,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (es-it)": 74.77,
+ "STS22 (pl)": 40.49,
+ "STS22 (en)": 68.08,
+ "STS22 (de-fr)": 61.09,
+ "STSB": 4.61,
+ "STSBenchmark": 86.85,
+ "STSBenchmarkMultilingualSTS (ru)": 82.96,
+ "STSBenchmarkMultilingualSTS (it)": 81.62,
+ "STSBenchmarkMultilingualSTS (de)": 80.64,
+ "STSBenchmarkMultilingualSTS (es)": 81.15,
+ "STSBenchmarkMultilingualSTS (nl)": 78.03,
+ "STSBenchmarkMultilingualSTS (pl)": 73.68,
+ "STSBenchmarkMultilingualSTS (pt)": 79.65,
+ "STSBenchmarkMultilingualSTS (fr)": 81.74,
+ "STSBenchmarkMultilingualSTS (en)": 86.85,
+ "STSBenchmarkMultilingualSTS (zh)": 5.27
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "USER-bge-m3",
+ "SummEval": 31.82,
+ "SummEvalFr": 29.94
+ },
+ {
+ "Model": "USER-bge-m3",
+ "SummEval": 31.82,
+ "SummEvalFr": 29.94
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
{
"Model": "USER-bge-m3",
"CEDRClassification": 45.48,
- "SensitiveTopicsClassification": 26.29
+ "SensitiveTopicsClassification": 26.88
}
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "USER-bge-m3",
+ "Core17InstructionRetrieval": 0.04,
+ "News21InstructionRetrieval": -0.93,
+ "Robust04InstructionRetrieval": -7.07
+ }
+ ]
}
},
"deepvk__deberta-v1-base": {
@@ -6098,6 +17477,7 @@
"f1": [
{
"Model": "deberta-v1-base",
+ "BornholmBitextMining": 14.11,
"Tatoeba (rus-eng)": 13.21
}
]
@@ -6106,15 +17486,169 @@
"accuracy": [
{
"Model": "deberta-v1-base",
+ "AllegroReviews": 23.34,
+ "AmazonCounterfactualClassification (en-ext)": 65.06,
+ "AmazonCounterfactualClassification (en)": 64.33,
+ "AmazonCounterfactualClassification (de)": 58.92,
+ "AmazonCounterfactualClassification (ja)": 56.77,
+ "AmazonPolarityClassification": 57.15,
+ "AmazonReviewsClassification (en)": 26.81,
+ "AmazonReviewsClassification (de)": 24.26,
+ "AmazonReviewsClassification (es)": 24.58,
+ "AmazonReviewsClassification (fr)": 25.23,
+ "AmazonReviewsClassification (ja)": 21.08,
+ "AmazonReviewsClassification (zh)": 23.14,
+ "AngryTweetsClassification": 42.98,
+ "Banking77Classification": 54.06,
+ "CBD": 50.33,
+ "DanishPoliticalCommentsClassification": 28.36,
+ "EmotionClassification": 20.53,
"GeoreviewClassification": 40.19,
"HeadlineClassification": 78.75,
+ "ImdbClassification": 52.62,
"InappropriatenessClassification": 61.33,
"KinopoiskClassification": 48.78,
+ "LccSentimentClassification": 40.87,
+ "MTOPDomainClassification (en)": 66.39,
+ "MTOPDomainClassification (de)": 49.69,
+ "MTOPDomainClassification (es)": 56.75,
+ "MTOPDomainClassification (fr)": 49.63,
+ "MTOPDomainClassification (hi)": 32.99,
+ "MTOPDomainClassification (th)": 33.1,
+ "MTOPIntentClassification (en)": 52.32,
+ "MTOPIntentClassification (de)": 39.95,
+ "MTOPIntentClassification (es)": 42.92,
+ "MTOPIntentClassification (fr)": 36.76,
+ "MTOPIntentClassification (hi)": 15.57,
+ "MTOPIntentClassification (th)": 19.52,
+ "MasakhaNEWSClassification (amh)": 32.34,
+ "MasakhaNEWSClassification (eng)": 58.89,
+ "MasakhaNEWSClassification (fra)": 48.89,
+ "MasakhaNEWSClassification (hau)": 48.87,
+ "MasakhaNEWSClassification (ibo)": 41.56,
+ "MasakhaNEWSClassification (lin)": 48.97,
+ "MasakhaNEWSClassification (lug)": 45.87,
+ "MasakhaNEWSClassification (orm)": 46.12,
+ "MasakhaNEWSClassification (pcm)": 74.79,
+ "MasakhaNEWSClassification (run)": 39.16,
+ "MasakhaNEWSClassification (sna)": 63.93,
+ "MasakhaNEWSClassification (som)": 29.29,
+ "MasakhaNEWSClassification (swa)": 37.96,
+ "MasakhaNEWSClassification (tir)": 22.76,
+ "MasakhaNEWSClassification (xho)": 59.43,
+ "MasakhaNEWSClassification (yor)": 45.74,
"MassiveIntentClassification (ru)": 61.32,
+ "MassiveIntentClassification (nb)": 31.01,
+ "MassiveIntentClassification (sw)": 33.17,
+ "MassiveIntentClassification (zh-CN)": 26.9,
+ "MassiveIntentClassification (ko)": 23.74,
+ "MassiveIntentClassification (ar)": 22.52,
+ "MassiveIntentClassification (te)": 12.72,
+ "MassiveIntentClassification (mn)": 29.22,
+ "MassiveIntentClassification (id)": 35.91,
+ "MassiveIntentClassification (is)": 29.9,
+ "MassiveIntentClassification (ml)": 17.03,
+ "MassiveIntentClassification (ka)": 15.38,
+ "MassiveIntentClassification (af)": 31.58,
+ "MassiveIntentClassification (tl)": 32.28,
+ "MassiveIntentClassification (nl)": 33.04,
+ "MassiveIntentClassification (zh-TW)": 24.87,
+ "MassiveIntentClassification (hy)": 14.31,
+ "MassiveIntentClassification (it)": 35.84,
+ "MassiveIntentClassification (es)": 30.09,
+ "MassiveIntentClassification (he)": 15.96,
+ "MassiveIntentClassification (lv)": 31.19,
+ "MassiveIntentClassification (sq)": 34.43,
+ "MassiveIntentClassification (ur)": 15.68,
+ "MassiveIntentClassification (bn)": 13.67,
+ "MassiveIntentClassification (my)": 11.9,
+ "MassiveIntentClassification (sl)": 33.48,
+ "MassiveIntentClassification (en)": 44.81,
+ "MassiveIntentClassification (fr)": 31.26,
+ "MassiveIntentClassification (hi)": 13.06,
+ "MassiveIntentClassification (am)": 15.42,
+ "MassiveIntentClassification (jv)": 32.3,
+ "MassiveIntentClassification (cy)": 34.92,
+ "MassiveIntentClassification (fa)": 22.87,
+ "MassiveIntentClassification (ta)": 12.99,
+ "MassiveIntentClassification (hu)": 29.21,
+ "MassiveIntentClassification (vi)": 30.6,
+ "MassiveIntentClassification (pt)": 32.59,
+ "MassiveIntentClassification (da)": 33.38,
+ "MassiveIntentClassification (tr)": 28.04,
+ "MassiveIntentClassification (fi)": 30.73,
+ "MassiveIntentClassification (th)": 16.83,
+ "MassiveIntentClassification (ja)": 20.97,
+ "MassiveIntentClassification (sv)": 31.97,
+ "MassiveIntentClassification (ro)": 28.66,
+ "MassiveIntentClassification (az)": 31.04,
+ "MassiveIntentClassification (de)": 31.68,
+ "MassiveIntentClassification (km)": 15.76,
+ "MassiveIntentClassification (ms)": 34.09,
+ "MassiveIntentClassification (kn)": 12.66,
+ "MassiveIntentClassification (pl)": 33.5,
+ "MassiveIntentClassification (el)": 23.2,
"MassiveScenarioClassification (ru)": 64.71,
+ "MassiveScenarioClassification (ko)": 25.21,
+ "MassiveScenarioClassification (vi)": 30.39,
+ "MassiveScenarioClassification (da)": 32.6,
+ "MassiveScenarioClassification (te)": 15.72,
+ "MassiveScenarioClassification (lv)": 29.84,
+ "MassiveScenarioClassification (ja)": 21.65,
+ "MassiveScenarioClassification (nb)": 28.7,
+ "MassiveScenarioClassification (sl)": 34.81,
+ "MassiveScenarioClassification (bn)": 16.64,
+ "MassiveScenarioClassification (sq)": 32.41,
+ "MassiveScenarioClassification (ka)": 16.96,
+ "MassiveScenarioClassification (pt)": 29.57,
+ "MassiveScenarioClassification (he)": 17.7,
+ "MassiveScenarioClassification (jv)": 33.16,
+ "MassiveScenarioClassification (tr)": 28.1,
+ "MassiveScenarioClassification (es)": 29.32,
+ "MassiveScenarioClassification (fa)": 24.25,
+ "MassiveScenarioClassification (mn)": 30.46,
+ "MassiveScenarioClassification (ar)": 24.92,
+ "MassiveScenarioClassification (sv)": 31.37,
+ "MassiveScenarioClassification (hy)": 17.57,
+ "MassiveScenarioClassification (zh-CN)": 29.22,
+ "MassiveScenarioClassification (az)": 31.36,
+ "MassiveScenarioClassification (de)": 32.3,
+ "MassiveScenarioClassification (it)": 35.66,
+ "MassiveScenarioClassification (km)": 20.19,
+ "MassiveScenarioClassification (en)": 45.39,
+ "MassiveScenarioClassification (pl)": 32.46,
+ "MassiveScenarioClassification (th)": 20.26,
+ "MassiveScenarioClassification (nl)": 32.45,
+ "MassiveScenarioClassification (is)": 28.84,
+ "MassiveScenarioClassification (am)": 17.93,
+ "MassiveScenarioClassification (my)": 15.23,
+ "MassiveScenarioClassification (ur)": 19.66,
+ "MassiveScenarioClassification (ms)": 33.73,
+ "MassiveScenarioClassification (kn)": 16.33,
+ "MassiveScenarioClassification (hi)": 16.47,
+ "MassiveScenarioClassification (ml)": 18.71,
+ "MassiveScenarioClassification (sw)": 35.19,
+ "MassiveScenarioClassification (hu)": 29.7,
+ "MassiveScenarioClassification (af)": 31.98,
+ "MassiveScenarioClassification (cy)": 35.27,
+ "MassiveScenarioClassification (fi)": 29.71,
+ "MassiveScenarioClassification (tl)": 31.7,
+ "MassiveScenarioClassification (fr)": 29.73,
+ "MassiveScenarioClassification (id)": 35.47,
+ "MassiveScenarioClassification (zh-TW)": 26.79,
+ "MassiveScenarioClassification (ro)": 28.91,
+ "MassiveScenarioClassification (ta)": 16.85,
+ "MassiveScenarioClassification (el)": 24.88,
+ "NoRecClassification": 38.22,
+ "NordicLangClassification": 57.91,
+ "PAC": 62.79,
+ "PolEmo2.0-IN": 40.42,
+ "PolEmo2.0-OUT": 30.22,
"RuReviewsClassification": 55.66,
"RuSciBenchGRNTIClassification": 53.53,
- "RuSciBenchOECDClassification": 41.34
+ "RuSciBenchOECDClassification": 41.34,
+ "ToxicConversationsClassification": 56.61,
+ "TweetSentimentExtractionClassification": 44.62
}
]
},
@@ -6122,11 +17656,67 @@
"v_measure": [
{
"Model": "deberta-v1-base",
+ "AlloProfClusteringP2P": 32.16,
+ "AlloProfClusteringS2S": 22.88,
+ "ArxivClusteringP2P": 16.05,
+ "ArxivClusteringS2S": 10.67,
+ "BiorxivClusteringP2P": 11.67,
+ "BiorxivClusteringS2S": 5.98,
+ "BlurbsClusteringP2P": 8.62,
+ "BlurbsClusteringS2S": 7.93,
"GeoreviewClusteringP2P": 58.79,
+ "HALClusteringS2S": 3.26,
"MLSUMClusteringP2P (ru)": 47.33,
+ "MLSUMClusteringP2P (de)": 6.45,
+ "MLSUMClusteringP2P (fr)": 24.07,
+ "MLSUMClusteringP2P (es)": 27.72,
"MLSUMClusteringS2S (ru)": 44.6,
+ "MLSUMClusteringS2S (de)": 6.0,
+ "MLSUMClusteringS2S (fr)": 22.71,
+ "MLSUMClusteringS2S (es)": 27.16,
+ "MasakhaNEWSClusteringP2P (amh)": 40.75,
+ "MasakhaNEWSClusteringP2P (eng)": 17.6,
+ "MasakhaNEWSClusteringP2P (fra)": 25.68,
+ "MasakhaNEWSClusteringP2P (hau)": 9.55,
+ "MasakhaNEWSClusteringP2P (ibo)": 25.62,
+ "MasakhaNEWSClusteringP2P (lin)": 47.81,
+ "MasakhaNEWSClusteringP2P (lug)": 49.21,
+ "MasakhaNEWSClusteringP2P (orm)": 25.11,
+ "MasakhaNEWSClusteringP2P (pcm)": 61.04,
+ "MasakhaNEWSClusteringP2P (run)": 41.38,
+ "MasakhaNEWSClusteringP2P (sna)": 44.57,
+ "MasakhaNEWSClusteringP2P (som)": 24.65,
+ "MasakhaNEWSClusteringP2P (swa)": 10.56,
+ "MasakhaNEWSClusteringP2P (tir)": 41.9,
+ "MasakhaNEWSClusteringP2P (xho)": 29.12,
+ "MasakhaNEWSClusteringP2P (yor)": 21.53,
+ "MasakhaNEWSClusteringS2S (amh)": 41.49,
+ "MasakhaNEWSClusteringS2S (eng)": 7.12,
+ "MasakhaNEWSClusteringS2S (fra)": 30.34,
+ "MasakhaNEWSClusteringS2S (hau)": 9.86,
+ "MasakhaNEWSClusteringS2S (ibo)": 23.11,
+ "MasakhaNEWSClusteringS2S (lin)": 47.24,
+ "MasakhaNEWSClusteringS2S (lug)": 43.53,
+ "MasakhaNEWSClusteringS2S (orm)": 21.42,
+ "MasakhaNEWSClusteringS2S (pcm)": 32.22,
+ "MasakhaNEWSClusteringS2S (run)": 44.58,
+ "MasakhaNEWSClusteringS2S (sna)": 41.15,
+ "MasakhaNEWSClusteringS2S (som)": 27.5,
+ "MasakhaNEWSClusteringS2S (swa)": 13.0,
+ "MasakhaNEWSClusteringS2S (tir)": 42.49,
+ "MasakhaNEWSClusteringS2S (xho)": 23.83,
+ "MasakhaNEWSClusteringS2S (yor)": 21.0,
+ "MedrxivClusteringP2P": 18.12,
+ "MedrxivClusteringS2S": 15.2,
+ "RedditClustering": 12.77,
+ "RedditClusteringP2P": 21.5,
"RuSciBenchGRNTIClusteringP2P": 36.66,
- "RuSciBenchOECDClusteringP2P": 33.31
+ "RuSciBenchOECDClusteringP2P": 33.31,
+ "StackExchangeClustering": 20.03,
+ "StackExchangeClusteringP2P": 20.05,
+ "TenKGnadClusteringP2P": 11.94,
+ "TenKGnadClusteringS2S": 5.7,
+ "TwentyNewsgroupsClustering": 11.68
}
]
},
@@ -6134,13 +17724,51 @@
"max_ap": [
{
"Model": "deberta-v1-base",
+ "CDSC-E": 43.0,
+ "FalseFriendsGermanEnglish": 49.28,
"OpusparcusPC (ru)": 83.31,
- "TERRa": 53.78
+ "OpusparcusPC (de)": 87.3,
+ "OpusparcusPC (en)": 93.71,
+ "OpusparcusPC (fi)": 77.56,
+ "OpusparcusPC (fr)": 85.28,
+ "OpusparcusPC (sv)": 77.47,
+ "PSC": 55.97,
+ "PawsXPairClassification (de)": 51.36,
+ "PawsXPairClassification (en)": 51.62,
+ "PawsXPairClassification (es)": 51.5,
+ "PawsXPairClassification (fr)": 54.75,
+ "PawsXPairClassification (ja)": 49.06,
+ "PawsXPairClassification (ko)": 49.3,
+ "PawsXPairClassification (zh)": 54.11,
+ "SICK-E-PL": 43.35,
+ "SprintDuplicateQuestions": 35.51,
+ "TERRa": 53.78,
+ "TwitterSemEval2015": 45.73,
+ "TwitterURLCorpus": 64.83
},
{
"Model": "deberta-v1-base",
+ "CDSC-E": 43.0,
+ "FalseFriendsGermanEnglish": 49.33,
"OpusparcusPC (ru)": 83.69,
- "TERRa": 56.49
+ "OpusparcusPC (de)": 87.56,
+ "OpusparcusPC (en)": 93.83,
+ "OpusparcusPC (fi)": 77.72,
+ "OpusparcusPC (fr)": 85.53,
+ "OpusparcusPC (sv)": 77.85,
+ "PSC": 58.6,
+ "PawsXPairClassification (de)": 51.6,
+ "PawsXPairClassification (en)": 51.62,
+ "PawsXPairClassification (es)": 51.67,
+ "PawsXPairClassification (fr)": 54.82,
+ "PawsXPairClassification (ja)": 49.29,
+ "PawsXPairClassification (ko)": 49.52,
+ "PawsXPairClassification (zh)": 54.14,
+ "SICK-E-PL": 43.35,
+ "SprintDuplicateQuestions": 35.51,
+ "TERRa": 56.49,
+ "TwitterSemEval2015": 45.73,
+ "TwitterURLCorpus": 64.83
}
]
},
@@ -6148,7 +17776,35 @@
"map": [
{
"Model": "deberta-v1-base",
- "RuBQReranking": 34.01
+ "AlloprofReranking": 27.81,
+ "AskUbuntuDupQuestions": 43.92,
+ "MindSmallReranking": 27.66,
+ "RuBQReranking": 34.01,
+ "SciDocsRR": 43.56,
+ "StackOverflowDupQuestions": 31.42,
+ "SyntecReranking": 33.19,
+ "T2Reranking": 50.29
+ },
+ {
+ "Model": "deberta-v1-base",
+ "MIRACLReranking (ar)": 2.44,
+ "MIRACLReranking (bn)": 3.57,
+ "MIRACLReranking (de)": 3.51,
+ "MIRACLReranking (en)": 4.73,
+ "MIRACLReranking (es)": 4.5,
+ "MIRACLReranking (fa)": 3.87,
+ "MIRACLReranking (fi)": 5.18,
+ "MIRACLReranking (fr)": 4.07,
+ "MIRACLReranking (hi)": 4.35,
+ "MIRACLReranking (id)": 4.58,
+ "MIRACLReranking (ja)": 3.08,
+ "MIRACLReranking (ko)": 4.3,
+ "MIRACLReranking (ru)": 13.36,
+ "MIRACLReranking (sw)": 4.92,
+ "MIRACLReranking (te)": 2.02,
+ "MIRACLReranking (th)": 3.48,
+ "MIRACLReranking (yo)": 5.55,
+ "MIRACLReranking (zh)": 2.03
}
]
},
@@ -6156,8 +17812,142 @@
"ndcg_at_10": [
{
"Model": "deberta-v1-base",
+ "AILACasedocs": 6.61,
+ "AILAStatutes": 9.71,
+ "ARCChallenge": 1.06,
+ "AlloprofRetrieval": 0.12,
+ "AlphaNLI": 0.64,
+ "AppsRetrieval": 0.05,
+ "ArguAna": 12.39,
+ "BSARDRetrieval": 0.0,
+ "ClimateFEVER": 0.03,
+ "CmedqaRetrieval": 0.27,
+ "CodeFeedbackMT": 3.15,
+ "CodeFeedbackST": 2.06,
+ "CodeSearchNetCCRetrieval (python)": 2.74,
+ "CodeSearchNetCCRetrieval (javascript)": 11.29,
+ "CodeSearchNetCCRetrieval (go)": 7.66,
+ "CodeSearchNetCCRetrieval (ruby)": 10.7,
+ "CodeSearchNetCCRetrieval (java)": 10.38,
+ "CodeSearchNetCCRetrieval (php)": 4.16,
+ "CodeSearchNetRetrieval (python)": 7.02,
+ "CodeSearchNetRetrieval (javascript)": 4.17,
+ "CodeSearchNetRetrieval (go)": 7.74,
+ "CodeSearchNetRetrieval (ruby)": 4.92,
+ "CodeSearchNetRetrieval (java)": 6.66,
+ "CodeSearchNetRetrieval (php)": 2.69,
+ "CodeTransOceanContest": 5.93,
+ "CodeTransOceanDL": 28.11,
+ "CosQA": 0.16,
+ "CovidRetrieval": 0.0,
+ "DBPedia": 0.16,
+ "FEVER": 0.1,
+ "FiQA2018": 0.32,
+ "GerDaLIR": 0.14,
+ "GerDaLIRSmall": 0.41,
+ "GermanQuAD-Retrieval": 2.67,
+ "HellaSwag": 1.89,
+ "HotpotQA": 0.35,
+ "LEMBNarrativeQARetrieval": 2.01,
+ "LEMBQMSumRetrieval": 5.27,
+ "LEMBSummScreenFDRetrieval": 3.95,
+ "LEMBWikimQARetrieval": 7.79,
+ "LeCaRDv2": 22.77,
+ "LegalBenchConsumerContractsQA": 8.18,
+ "LegalBenchCorporateLobbying": 10.45,
+ "LegalQuAD": 2.11,
+ "LegalSummarization": 13.98,
+ "MIRACLRetrieval (ar)": 0.0,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 0.0,
+ "MIRACLRetrieval (en)": 0.0,
+ "MIRACLRetrieval (es)": 0.0,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 0.24,
+ "MIRACLRetrieval (fr)": 0.0,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 0.0,
+ "MIRACLRetrieval (ja)": 0.0,
+ "MIRACLRetrieval (ko)": 0.0,
+ "MIRACLRetrieval (ru)": 3.02,
+ "MIRACLRetrieval (sw)": 0.71,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.0,
+ "MIRACLRetrieval (yo)": 0.36,
+ "MIRACLRetrieval (zh)": 0.0,
+ "MintakaRetrieval (ar)": 0.62,
+ "MintakaRetrieval (de)": 0.69,
+ "MintakaRetrieval (es)": 0.75,
+ "MintakaRetrieval (fr)": 1.02,
+ "MintakaRetrieval (hi)": 0.66,
+ "MintakaRetrieval (it)": 0.85,
+ "MintakaRetrieval (ja)": 0.46,
+ "MintakaRetrieval (pt)": 0.61,
+ "NFCorpus": 1.59,
+ "NQ": 0.06,
+ "PIQA": 1.59,
+ "Quail": 0.04,
+ "QuoraRetrieval": 49.21,
+ "RARbCode": 0.0,
+ "RARbMath": 4.04,
"RiaNewsRetrieval": 4.84,
- "RuBQRetrieval": 10.15
+ "RuBQRetrieval": 10.15,
+ "SCIDOCS": 0.18,
+ "SIQA": 0.03,
+ "SciFact": 0.35,
+ "SciFact-PL": 0.14,
+ "SpartQA": 0.28,
+ "StackOverflowQA": 6.63,
+ "SyntecRetrieval": 5.82,
+ "SyntheticText2SQL": 0.76,
+ "TRECCOVID": 3.56,
+ "TRECCOVID-PL": 2.91,
+ "TempReasonL1": 0.16,
+ "TempReasonL2Fact": 0.84,
+ "TempReasonL2Pure": 0.04,
+ "TempReasonL3Fact": 0.93,
+ "TempReasonL3Pure": 0.52,
+ "Touche2020": 0.0,
+ "WinoGrande": 0.03,
+ "XMarket (de)": 0.49,
+ "XMarket (en)": 0.65,
+ "XMarket (es)": 0.3,
+ "XPQARetrieval (ara-ara)": 2.45,
+ "XPQARetrieval (eng-ara)": 0.52,
+ "XPQARetrieval (ara-eng)": 0.85,
+ "XPQARetrieval (deu-deu)": 8.48,
+ "XPQARetrieval (eng-deu)": 0.46,
+ "XPQARetrieval (deu-eng)": 2.26,
+ "XPQARetrieval (spa-spa)": 4.58,
+ "XPQARetrieval (eng-spa)": 0.66,
+ "XPQARetrieval (spa-eng)": 1.32,
+ "XPQARetrieval (fra-fra)": 8.72,
+ "XPQARetrieval (eng-fra)": 0.55,
+ "XPQARetrieval (fra-eng)": 1.91,
+ "XPQARetrieval (hin-hin)": 14.72,
+ "XPQARetrieval (eng-hin)": 5.7,
+ "XPQARetrieval (hin-eng)": 0.66,
+ "XPQARetrieval (ita-ita)": 12.1,
+ "XPQARetrieval (eng-ita)": 0.92,
+ "XPQARetrieval (ita-eng)": 1.8,
+ "XPQARetrieval (jpn-jpn)": 5.58,
+ "XPQARetrieval (eng-jpn)": 0.47,
+ "XPQARetrieval (jpn-eng)": 0.46,
+ "XPQARetrieval (kor-kor)": 3.8,
+ "XPQARetrieval (eng-kor)": 0.88,
+ "XPQARetrieval (kor-eng)": 0.75,
+ "XPQARetrieval (pol-pol)": 4.63,
+ "XPQARetrieval (eng-pol)": 0.7,
+ "XPQARetrieval (pol-eng)": 0.63,
+ "XPQARetrieval (por-por)": 3.99,
+ "XPQARetrieval (eng-por)": 0.6,
+ "XPQARetrieval (por-eng)": 0.91,
+ "XPQARetrieval (tam-tam)": 1.97,
+ "XPQARetrieval (eng-tam)": 0.77,
+ "XPQARetrieval (tam-eng)": 0.48,
+ "XPQARetrieval (cmn-cmn)": 5.04,
+ "XPQARetrieval (eng-cmn)": 0.33,
+ "XPQARetrieval (cmn-eng)": 0.49
}
]
},
@@ -6165,15 +17955,129 @@
"cosine_spearman": [
{
"Model": "deberta-v1-base",
+ "BIOSSES": 36.15,
+ "CDSC-R": 62.16,
+ "GermanSTSBenchmark": 31.4,
+ "SICK-R": 49.94,
+ "SICK-R-PL": 36.4,
+ "SICKFr": 50.14,
+ "STS12": 44.76,
+ "STS13": 46.65,
+ "STS14": 38.15,
+ "STS15": 54.85,
+ "STS16": 49.15,
+ "STS17 (en-en)": 60.31,
+ "STS17 (es-es)": 53.94,
+ "STS17 (en-ar)": 1.36,
+ "STS17 (nl-en)": 17.23,
+ "STS17 (en-de)": 10.74,
+ "STS17 (it-en)": 15.7,
+ "STS17 (fr-en)": 11.55,
+ "STS17 (ar-ar)": 44.8,
+ "STS17 (es-en)": 5.34,
+ "STS17 (en-tr)": -1.09,
+ "STS17 (ko-ko)": 32.38,
+ "STS22 (it)": 32.04,
+ "STS22 (de-pl)": -14.13,
+ "STS22 (fr-pl)": 16.9,
+ "STS22 (ar)": 28.79,
+ "STS22 (de-fr)": -1.08,
+ "STS22 (pl-en)": 16.21,
+ "STS22 (zh-en)": 19.26,
+ "STS22 (en)": 44.81,
+ "STS22 (de)": 9.44,
+ "STS22 (es-it)": 24.56,
+ "STS22 (es)": 38.62,
+ "STS22 (pl)": 13.21,
+ "STS22 (es-en)": 21.02,
+ "STS22 (de-en)": 28.79,
+ "STS22 (fr)": 20.41,
+ "STS22 (zh)": 12.51,
+ "STS22 (tr)": 9.4,
+ "STSB": 24.8,
+ "STSBenchmark": 41.32,
+ "STSBenchmarkMultilingualSTS (it)": 37.39,
+ "STSBenchmarkMultilingualSTS (pl)": 38.12,
+ "STSBenchmarkMultilingualSTS (pt)": 28.76,
+ "STSBenchmarkMultilingualSTS (de)": 35.64,
+ "STSBenchmarkMultilingualSTS (nl)": 40.24,
+ "STSBenchmarkMultilingualSTS (zh)": 24.09,
+ "STSBenchmarkMultilingualSTS (es)": 38.78,
+ "STSBenchmarkMultilingualSTS (en)": 41.32,
+ "STSBenchmarkMultilingualSTS (fr)": 41.16
+ },
+ {
+ "Model": "deberta-v1-base",
+ "BIOSSES": 36.15,
+ "CDSC-R": 62.16,
+ "GermanSTSBenchmark": 31.4,
"RUParaPhraserSTS": 54.03,
"RuSTSBenchmarkSTS": 58.47,
+ "SICK-R": 49.94,
+ "SICK-R-PL": 36.4,
+ "SICKFr": 50.14,
+ "STS12": 44.76,
+ "STS13": 46.65,
+ "STS14": 38.15,
+ "STS15": 54.85,
+ "STS16": 49.15,
+ "STS17 (en-en)": 60.31,
+ "STS17 (es-es)": 53.95,
+ "STS17 (en-ar)": 1.36,
+ "STS17 (nl-en)": 17.23,
+ "STS17 (en-de)": 10.74,
+ "STS17 (it-en)": 15.7,
+ "STS17 (fr-en)": 11.55,
+ "STS17 (ar-ar)": 44.8,
+ "STS17 (es-en)": 5.34,
+ "STS17 (en-tr)": -1.09,
+ "STS17 (ko-ko)": 32.38,
"STS22 (ru)": 47.67,
- "STSBenchmarkMultilingualSTS (ru)": 58.45
+ "STS22 (it)": 32.04,
+ "STS22 (de-pl)": -14.13,
+ "STS22 (fr-pl)": 16.9,
+ "STS22 (ar)": 28.76,
+ "STS22 (de-fr)": -1.08,
+ "STS22 (pl-en)": 16.21,
+ "STS22 (zh-en)": 19.26,
+ "STS22 (en)": 44.81,
+ "STS22 (de)": 9.45,
+ "STS22 (es-it)": 24.56,
+ "STS22 (es)": 38.62,
+ "STS22 (pl)": 13.11,
+ "STS22 (es-en)": 21.02,
+ "STS22 (de-en)": 28.79,
+ "STS22 (fr)": 20.41,
+ "STS22 (zh)": 12.51,
+ "STS22 (tr)": 9.4,
+ "STSB": 24.8,
+ "STSBenchmark": 41.32,
+ "STSBenchmarkMultilingualSTS (ru)": 58.45,
+ "STSBenchmarkMultilingualSTS (it)": 37.39,
+ "STSBenchmarkMultilingualSTS (pl)": 38.13,
+ "STSBenchmarkMultilingualSTS (pt)": 28.76,
+ "STSBenchmarkMultilingualSTS (de)": 35.64,
+ "STSBenchmarkMultilingualSTS (nl)": 40.24,
+ "STSBenchmarkMultilingualSTS (zh)": 24.09,
+ "STSBenchmarkMultilingualSTS (es)": 38.78,
+ "STSBenchmarkMultilingualSTS (en)": 41.32,
+ "STSBenchmarkMultilingualSTS (fr)": 41.16
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "deberta-v1-base",
+ "SummEval": 28.91,
+ "SummEvalFr": 28.45
+ },
+ {
+ "Model": "deberta-v1-base",
+ "SummEval": 28.91,
+ "SummEvalFr": 28.43
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
@@ -6185,47 +18089,676 @@
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "deberta-v1-base",
+ "Core17InstructionRetrieval": -4.16,
+ "News21InstructionRetrieval": -0.79,
+ "Robust04InstructionRetrieval": -2.16
+ }
+ ]
}
},
"dwzhu__e5-base-4k": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "e5-base-4k",
+ "BornholmBitextMining": 37.44,
+ "Tatoeba (cym-eng)": 6.6,
+ "Tatoeba (kzj-eng)": 4.21,
+ "Tatoeba (ita-eng)": 17.65,
+ "Tatoeba (gla-eng)": 2.55,
+ "Tatoeba (hsb-eng)": 5.04,
+ "Tatoeba (cat-eng)": 17.25,
+ "Tatoeba (ina-eng)": 32.22,
+ "Tatoeba (deu-eng)": 24.09,
+ "Tatoeba (kab-eng)": 0.8,
+ "Tatoeba (fin-eng)": 2.98,
+ "Tatoeba (awa-eng)": 0.09,
+ "Tatoeba (kaz-eng)": 0.43,
+ "Tatoeba (yue-eng)": 1.35,
+ "Tatoeba (wuu-eng)": 1.25,
+ "Tatoeba (orv-eng)": 0.22,
+ "Tatoeba (ell-eng)": 0.57,
+ "Tatoeba (spa-eng)": 21.17,
+ "Tatoeba (dsb-eng)": 5.42,
+ "Tatoeba (glg-eng)": 19.87,
+ "Tatoeba (aze-eng)": 3.55,
+ "Tatoeba (ben-eng)": 0.01,
+ "Tatoeba (slv-eng)": 5.9,
+ "Tatoeba (ceb-eng)": 4.75,
+ "Tatoeba (khm-eng)": 0.17,
+ "Tatoeba (cha-eng)": 14.24,
+ "Tatoeba (lat-eng)": 7.74,
+ "Tatoeba (swh-eng)": 5.56,
+ "Tatoeba (pms-eng)": 12.14,
+ "Tatoeba (lit-eng)": 2.8,
+ "Tatoeba (hun-eng)": 3.16,
+ "Tatoeba (pol-eng)": 5.18,
+ "Tatoeba (bel-eng)": 1.62,
+ "Tatoeba (fra-eng)": 24.72,
+ "Tatoeba (cor-eng)": 2.78,
+ "Tatoeba (ukr-eng)": 1.09,
+ "Tatoeba (nld-eng)": 14.96,
+ "Tatoeba (cmn-eng)": 1.66,
+ "Tatoeba (xho-eng)": 3.12,
+ "Tatoeba (jav-eng)": 4.89,
+ "Tatoeba (tha-eng)": 0.67,
+ "Tatoeba (zsm-eng)": 7.68,
+ "Tatoeba (mar-eng)": 0.05,
+ "Tatoeba (hin-eng)": 0.1,
+ "Tatoeba (ang-eng)": 15.17,
+ "Tatoeba (mhr-eng)": 0.14,
+ "Tatoeba (kat-eng)": 0.31,
+ "Tatoeba (eus-eng)": 4.35,
+ "Tatoeba (bre-eng)": 3.8,
+ "Tatoeba (hye-eng)": 0.4,
+ "Tatoeba (kor-eng)": 0.81,
+ "Tatoeba (afr-eng)": 7.69,
+ "Tatoeba (oci-eng)": 11.11,
+ "Tatoeba (mon-eng)": 1.34,
+ "Tatoeba (swg-eng)": 14.58,
+ "Tatoeba (amh-eng)": 0.65,
+ "Tatoeba (ido-eng)": 15.55,
+ "Tatoeba (isl-eng)": 3.54,
+ "Tatoeba (gle-eng)": 3.15,
+ "Tatoeba (pam-eng)": 4.17,
+ "Tatoeba (est-eng)": 3.4,
+ "Tatoeba (srp-eng)": 2.59,
+ "Tatoeba (lfn-eng)": 13.48,
+ "Tatoeba (pes-eng)": 0.34,
+ "Tatoeba (nob-eng)": 12.85,
+ "Tatoeba (heb-eng)": 0.65,
+ "Tatoeba (mal-eng)": 0.0,
+ "Tatoeba (gsw-eng)": 16.05,
+ "Tatoeba (ber-eng)": 4.23,
+ "Tatoeba (uig-eng)": 0.36,
+ "Tatoeba (uzb-eng)": 1.78,
+ "Tatoeba (cbk-eng)": 14.44,
+ "Tatoeba (urd-eng)": 0.0,
+ "Tatoeba (ron-eng)": 11.04,
+ "Tatoeba (tam-eng)": 0.84,
+ "Tatoeba (tur-eng)": 3.5,
+ "Tatoeba (ara-eng)": 0.37,
+ "Tatoeba (max-eng)": 11.27,
+ "Tatoeba (slk-eng)": 5.34,
+ "Tatoeba (tat-eng)": 1.06,
+ "Tatoeba (ind-eng)": 6.76,
+ "Tatoeba (hrv-eng)": 6.57,
+ "Tatoeba (dan-eng)": 11.31,
+ "Tatoeba (arq-eng)": 0.51,
+ "Tatoeba (mkd-eng)": 0.24,
+ "Tatoeba (ces-eng)": 4.11,
+ "Tatoeba (tgl-eng)": 4.54,
+ "Tatoeba (fao-eng)": 7.05,
+ "Tatoeba (tzl-eng)": 14.85,
+ "Tatoeba (bul-eng)": 1.4,
+ "Tatoeba (kur-eng)": 7.91,
+ "Tatoeba (fry-eng)": 18.03,
+ "Tatoeba (por-eng)": 20.03,
+ "Tatoeba (sqi-eng)": 5.52,
+ "Tatoeba (epo-eng)": 11.31,
+ "Tatoeba (arz-eng)": 0.42,
+ "Tatoeba (lvs-eng)": 3.57,
+ "Tatoeba (war-eng)": 6.97,
+ "Tatoeba (tel-eng)": 0.53,
+ "Tatoeba (csb-eng)": 7.24,
+ "Tatoeba (swe-eng)": 10.78,
+ "Tatoeba (nov-eng)": 25.6,
+ "Tatoeba (ast-eng)": 17.6,
+ "Tatoeba (ile-eng)": 20.4,
+ "Tatoeba (yid-eng)": 0.24,
+ "Tatoeba (rus-eng)": 0.83,
+ "Tatoeba (nds-eng)": 12.48,
+ "Tatoeba (jpn-eng)": 0.92,
+ "Tatoeba (bos-eng)": 7.11,
+ "Tatoeba (vie-eng)": 4.39,
+ "Tatoeba (tuk-eng)": 2.82,
+ "Tatoeba (dtp-eng)": 3.12,
+ "Tatoeba (nno-eng)": 8.52
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "e5-base-4k",
+ "AllegroReviews": 25.0,
+ "AmazonCounterfactualClassification (en-ext)": 77.13,
+ "AmazonCounterfactualClassification (en)": 75.42,
+ "AmazonCounterfactualClassification (de)": 56.3,
+ "AmazonCounterfactualClassification (ja)": 59.45,
+ "AmazonReviewsClassification (en)": 42.61,
+ "AmazonReviewsClassification (de)": 28.9,
+ "AmazonReviewsClassification (es)": 33.41,
+ "AmazonReviewsClassification (fr)": 29.98,
+ "AmazonReviewsClassification (ja)": 22.06,
+ "AmazonReviewsClassification (zh)": 23.31,
+ "AngryTweetsClassification": 45.42,
+ "CBD": 54.25,
+ "DanishPoliticalCommentsClassification": 28.73,
+ "GeoreviewClassification": 29.03,
+ "HeadlineClassification": 34.62,
+ "InappropriatenessClassification": 52.6,
+ "KinopoiskClassification": 38.3,
+ "LccSentimentClassification": 36.73,
+ "MTOPDomainClassification (en)": 91.79,
+ "MTOPDomainClassification (de)": 76.14,
+ "MTOPDomainClassification (es)": 79.22,
+ "MTOPDomainClassification (fr)": 80.24,
+ "MTOPDomainClassification (hi)": 37.21,
+ "MTOPDomainClassification (th)": 17.87,
+ "MTOPIntentClassification (en)": 71.07,
+ "MTOPIntentClassification (de)": 53.67,
+ "MTOPIntentClassification (es)": 53.94,
+ "MTOPIntentClassification (fr)": 51.56,
+ "MTOPIntentClassification (hi)": 23.09,
+ "MTOPIntentClassification (th)": 4.97,
+ "MasakhaNEWSClassification (amh)": 34.41,
+ "MasakhaNEWSClassification (eng)": 78.88,
+ "MasakhaNEWSClassification (fra)": 78.41,
+ "MasakhaNEWSClassification (hau)": 65.27,
+ "MasakhaNEWSClassification (ibo)": 60.33,
+ "MasakhaNEWSClassification (lin)": 74.57,
+ "MasakhaNEWSClassification (lug)": 59.1,
+ "MasakhaNEWSClassification (orm)": 60.98,
+ "MasakhaNEWSClassification (pcm)": 93.05,
+ "MasakhaNEWSClassification (run)": 66.71,
+ "MasakhaNEWSClassification (sna)": 78.75,
+ "MasakhaNEWSClassification (som)": 56.26,
+ "MasakhaNEWSClassification (swa)": 58.05,
+ "MasakhaNEWSClassification (tir)": 24.45,
+ "MasakhaNEWSClassification (xho)": 62.12,
+ "MasakhaNEWSClassification (yor)": 66.86,
+ "MassiveIntentClassification (pl)": 48.05,
+ "MassiveIntentClassification (it)": 51.27,
+ "MassiveIntentClassification (ms)": 46.17,
+ "MassiveIntentClassification (ka)": 16.05,
+ "MassiveIntentClassification (id)": 48.44,
+ "MassiveIntentClassification (zh-CN)": 24.16,
+ "MassiveIntentClassification (ko)": 22.7,
+ "MassiveIntentClassification (zh-TW)": 22.92,
+ "MassiveIntentClassification (tl)": 48.06,
+ "MassiveIntentClassification (te)": 2.89,
+ "MassiveIntentClassification (bn)": 21.63,
+ "MassiveIntentClassification (kn)": 3.69,
+ "MassiveIntentClassification (jv)": 44.23,
+ "MassiveIntentClassification (en)": 68.79,
+ "MassiveIntentClassification (th)": 14.7,
+ "MassiveIntentClassification (af)": 46.12,
+ "MassiveIntentClassification (sv)": 48.32,
+ "MassiveIntentClassification (de)": 52.16,
+ "MassiveIntentClassification (km)": 4.44,
+ "MassiveIntentClassification (sq)": 48.83,
+ "MassiveIntentClassification (mn)": 32.05,
+ "MassiveIntentClassification (hy)": 13.87,
+ "MassiveIntentClassification (ru)": 40.19,
+ "MassiveIntentClassification (ja)": 35.46,
+ "MassiveIntentClassification (lv)": 46.85,
+ "MassiveIntentClassification (is)": 42.95,
+ "MassiveIntentClassification (nl)": 48.78,
+ "MassiveIntentClassification (ur)": 22.85,
+ "MassiveIntentClassification (ro)": 47.9,
+ "MassiveIntentClassification (az)": 45.89,
+ "MassiveIntentClassification (ta)": 15.46,
+ "MassiveIntentClassification (he)": 25.69,
+ "MassiveIntentClassification (sl)": 46.21,
+ "MassiveIntentClassification (fr)": 51.28,
+ "MassiveIntentClassification (ml)": 2.99,
+ "MassiveIntentClassification (hi)": 21.33,
+ "MassiveIntentClassification (fa)": 33.16,
+ "MassiveIntentClassification (es)": 50.58,
+ "MassiveIntentClassification (sw)": 42.91,
+ "MassiveIntentClassification (nb)": 47.51,
+ "MassiveIntentClassification (hu)": 45.44,
+ "MassiveIntentClassification (my)": 3.77,
+ "MassiveIntentClassification (fi)": 46.94,
+ "MassiveIntentClassification (el)": 37.56,
+ "MassiveIntentClassification (pt)": 52.64,
+ "MassiveIntentClassification (vi)": 42.19,
+ "MassiveIntentClassification (ar)": 27.37,
+ "MassiveIntentClassification (da)": 50.82,
+ "MassiveIntentClassification (tr)": 48.14,
+ "MassiveIntentClassification (cy)": 41.94,
+ "MassiveIntentClassification (am)": 3.14,
+ "MassiveScenarioClassification (tr)": 49.88,
+ "MassiveScenarioClassification (nl)": 53.28,
+ "MassiveScenarioClassification (ru)": 40.39,
+ "MassiveScenarioClassification (es)": 56.93,
+ "MassiveScenarioClassification (bn)": 25.42,
+ "MassiveScenarioClassification (en)": 74.89,
+ "MassiveScenarioClassification (ms)": 50.69,
+ "MassiveScenarioClassification (az)": 48.41,
+ "MassiveScenarioClassification (hy)": 17.81,
+ "MassiveScenarioClassification (ro)": 54.43,
+ "MassiveScenarioClassification (it)": 56.36,
+ "MassiveScenarioClassification (he)": 24.88,
+ "MassiveScenarioClassification (pl)": 50.65,
+ "MassiveScenarioClassification (th)": 23.86,
+ "MassiveScenarioClassification (ar)": 30.17,
+ "MassiveScenarioClassification (da)": 54.18,
+ "MassiveScenarioClassification (ta)": 19.69,
+ "MassiveScenarioClassification (km)": 9.53,
+ "MassiveScenarioClassification (is)": 46.28,
+ "MassiveScenarioClassification (id)": 51.86,
+ "MassiveScenarioClassification (ja)": 42.15,
+ "MassiveScenarioClassification (ko)": 24.97,
+ "MassiveScenarioClassification (mn)": 32.55,
+ "MassiveScenarioClassification (my)": 9.53,
+ "MassiveScenarioClassification (kn)": 8.33,
+ "MassiveScenarioClassification (fa)": 32.18,
+ "MassiveScenarioClassification (tl)": 52.71,
+ "MassiveScenarioClassification (vi)": 41.53,
+ "MassiveScenarioClassification (lv)": 47.51,
+ "MassiveScenarioClassification (de)": 58.81,
+ "MassiveScenarioClassification (sq)": 52.08,
+ "MassiveScenarioClassification (zh-TW)": 29.49,
+ "MassiveScenarioClassification (af)": 50.27,
+ "MassiveScenarioClassification (te)": 7.42,
+ "MassiveScenarioClassification (zh-CN)": 31.79,
+ "MassiveScenarioClassification (fi)": 48.38,
+ "MassiveScenarioClassification (ka)": 20.37,
+ "MassiveScenarioClassification (sl)": 48.83,
+ "MassiveScenarioClassification (fr)": 57.84,
+ "MassiveScenarioClassification (sw)": 45.52,
+ "MassiveScenarioClassification (cy)": 43.49,
+ "MassiveScenarioClassification (nb)": 52.14,
+ "MassiveScenarioClassification (el)": 42.01,
+ "MassiveScenarioClassification (am)": 7.88,
+ "MassiveScenarioClassification (jv)": 47.79,
+ "MassiveScenarioClassification (ml)": 6.72,
+ "MassiveScenarioClassification (ur)": 27.49,
+ "MassiveScenarioClassification (hu)": 49.43,
+ "MassiveScenarioClassification (hi)": 23.89,
+ "MassiveScenarioClassification (sv)": 51.18,
+ "MassiveScenarioClassification (pt)": 56.74,
+ "NoRecClassification": 39.24,
+ "NordicLangClassification": 59.99,
+ "PAC": 65.23,
+ "PolEmo2.0-IN": 44.43,
+ "PolEmo2.0-OUT": 28.2,
+ "RuReviewsClassification": 44.29,
+ "RuSciBenchGRNTIClassification": 17.9,
+ "RuSciBenchOECDClassification": 14.1
+ }
+ ]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "e5-base-4k",
+ "AlloProfClusteringP2P": 58.85,
+ "AlloProfClusteringS2S": 31.56,
+ "BlurbsClusteringP2P": 26.39,
+ "BlurbsClusteringS2S": 9.82,
+ "GeoreviewClusteringP2P": 16.68,
+ "HALClusteringS2S": 21.16,
+ "MLSUMClusteringP2P (de)": 39.77,
+ "MLSUMClusteringP2P (fr)": 43.92,
+ "MLSUMClusteringP2P (ru)": 24.68,
+ "MLSUMClusteringP2P (es)": 46.46,
+ "MLSUMClusteringS2S (de)": 41.62,
+ "MLSUMClusteringS2S (fr)": 43.87,
+ "MLSUMClusteringS2S (ru)": 23.86,
+ "MLSUMClusteringS2S (es)": 47.17,
+ "MasakhaNEWSClusteringP2P (amh)": 40.56,
+ "MasakhaNEWSClusteringP2P (eng)": 64.2,
+ "MasakhaNEWSClusteringP2P (fra)": 64.85,
+ "MasakhaNEWSClusteringP2P (hau)": 43.08,
+ "MasakhaNEWSClusteringP2P (ibo)": 37.42,
+ "MasakhaNEWSClusteringP2P (lin)": 61.0,
+ "MasakhaNEWSClusteringP2P (lug)": 48.71,
+ "MasakhaNEWSClusteringP2P (orm)": 26.61,
+ "MasakhaNEWSClusteringP2P (pcm)": 87.61,
+ "MasakhaNEWSClusteringP2P (run)": 54.56,
+ "MasakhaNEWSClusteringP2P (sna)": 52.73,
+ "MasakhaNEWSClusteringP2P (som)": 34.27,
+ "MasakhaNEWSClusteringP2P (swa)": 23.29,
+ "MasakhaNEWSClusteringP2P (tir)": 42.26,
+ "MasakhaNEWSClusteringP2P (xho)": 30.04,
+ "MasakhaNEWSClusteringP2P (yor)": 26.99,
+ "MasakhaNEWSClusteringS2S (amh)": 43.55,
+ "MasakhaNEWSClusteringS2S (eng)": 45.61,
+ "MasakhaNEWSClusteringS2S (fra)": 43.42,
+ "MasakhaNEWSClusteringS2S (hau)": 14.98,
+ "MasakhaNEWSClusteringS2S (ibo)": 34.4,
+ "MasakhaNEWSClusteringS2S (lin)": 47.29,
+ "MasakhaNEWSClusteringS2S (lug)": 43.89,
+ "MasakhaNEWSClusteringS2S (orm)": 26.09,
+ "MasakhaNEWSClusteringS2S (pcm)": 62.8,
+ "MasakhaNEWSClusteringS2S (run)": 46.22,
+ "MasakhaNEWSClusteringS2S (sna)": 49.67,
+ "MasakhaNEWSClusteringS2S (som)": 30.8,
+ "MasakhaNEWSClusteringS2S (swa)": 16.16,
+ "MasakhaNEWSClusteringS2S (tir)": 49.22,
+ "MasakhaNEWSClusteringS2S (xho)": 27.73,
+ "MasakhaNEWSClusteringS2S (yor)": 29.45,
+ "RuSciBenchGRNTIClusteringP2P": 15.63,
+ "RuSciBenchOECDClusteringP2P": 13.86,
+ "TenKGnadClusteringP2P": 36.97,
+ "TenKGnadClusteringS2S": 19.89
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "e5-base-4k",
+ "CDSC-E": 55.27,
+ "FalseFriendsGermanEnglish": 47.99,
+ "OpusparcusPC (de)": 91.41,
+ "OpusparcusPC (en)": 98.49,
+ "OpusparcusPC (fi)": 87.15,
+ "OpusparcusPC (fr)": 87.64,
+ "OpusparcusPC (ru)": 78.8,
+ "OpusparcusPC (sv)": 85.15,
+ "PSC": 95.48,
+ "PawsXPairClassification (de)": 52.25,
+ "PawsXPairClassification (en)": 55.42,
+ "PawsXPairClassification (es)": 54.44,
+ "PawsXPairClassification (fr)": 55.38,
+ "PawsXPairClassification (ja)": 49.52,
+ "PawsXPairClassification (ko)": 53.39,
+ "PawsXPairClassification (zh)": 52.8,
+ "SICK-E-PL": 48.65,
+ "TERRa": 47.66
+ },
+ {
+ "Model": "e5-base-4k",
+ "CDSC-E": 55.27,
+ "FalseFriendsGermanEnglish": 48.49,
+ "OpusparcusPC (de)": 91.69,
+ "OpusparcusPC (en)": 98.49,
+ "OpusparcusPC (fi)": 87.19,
+ "OpusparcusPC (fr)": 87.77,
+ "OpusparcusPC (ru)": 78.8,
+ "OpusparcusPC (sv)": 85.15,
+ "PSC": 95.48,
+ "PawsXPairClassification (de)": 52.57,
+ "PawsXPairClassification (en)": 55.42,
+ "PawsXPairClassification (es)": 54.44,
+ "PawsXPairClassification (fr)": 55.38,
+ "PawsXPairClassification (ja)": 49.99,
+ "PawsXPairClassification (ko)": 53.39,
+ "PawsXPairClassification (zh)": 52.92,
+ "SICK-E-PL": 48.72,
+ "TERRa": 50.83
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "e5-base-4k",
+ "AlloprofReranking": 58.33,
+ "RuBQReranking": 30.66,
+ "SyntecReranking": 66.58,
+ "T2Reranking": 58.61
+ },
+ {
+ "Model": "e5-base-4k",
+ "MIRACLReranking (ar)": 4.39,
+ "MIRACLReranking (bn)": 5.48,
+ "MIRACLReranking (de)": 20.91,
+ "MIRACLReranking (en)": 50.59,
+ "MIRACLReranking (es)": 31.16,
+ "MIRACLReranking (fa)": 7.34,
+ "MIRACLReranking (fi)": 35.17,
+ "MIRACLReranking (fr)": 22.29,
+ "MIRACLReranking (hi)": 8.13,
+ "MIRACLReranking (id)": 18.96,
+ "MIRACLReranking (ja)": 9.21,
+ "MIRACLReranking (ko)": 10.34,
+ "MIRACLReranking (ru)": 8.03,
+ "MIRACLReranking (sw)": 23.58,
+ "MIRACLReranking (te)": 1.48,
+ "MIRACLReranking (th)": 3.42,
+ "MIRACLReranking (yo)": 46.31,
+ "MIRACLReranking (zh)": 8.16
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
{
"Model": "e5-base-4k",
- "LEMBNarrativeQARetrieval": 30.35,
- "LEMBQMSumRetrieval": 35.6,
- "LEMBSummScreenFDRetrieval": 95.23,
- "LEMBWikimQARetrieval": 69.19
+ "AILACasedocs": 33.09,
+ "AILAStatutes": 21.14,
+ "ARCChallenge": 7.69,
+ "AlloprofRetrieval": 29.3,
+ "AlphaNLI": 15.44,
+ "AppsRetrieval": 9.64,
+ "BSARDRetrieval": 8.77,
+ "CmedqaRetrieval": 2.97,
+ "CodeFeedbackMT": 50.26,
+ "CodeFeedbackST": 68.77,
+ "CodeSearchNetCCRetrieval (python)": 61.3,
+ "CodeSearchNetCCRetrieval (javascript)": 58.62,
+ "CodeSearchNetCCRetrieval (go)": 43.79,
+ "CodeSearchNetCCRetrieval (ruby)": 55.57,
+ "CodeSearchNetCCRetrieval (java)": 56.55,
+ "CodeSearchNetCCRetrieval (php)": 44.61,
+ "CodeSearchNetRetrieval (python)": 81.0,
+ "CodeSearchNetRetrieval (javascript)": 62.39,
+ "CodeSearchNetRetrieval (go)": 85.89,
+ "CodeSearchNetRetrieval (ruby)": 70.06,
+ "CodeSearchNetRetrieval (java)": 68.1,
+ "CodeSearchNetRetrieval (php)": 76.27,
+ "CodeTransOceanContest": 66.52,
+ "CodeTransOceanDL": 21.23,
+ "CosQA": 27.05,
+ "CovidRetrieval": 4.76,
+ "GerDaLIR": 4.99,
+ "GerDaLIRSmall": 12.6,
+ "GermanQuAD-Retrieval": 79.75,
+ "HellaSwag": 23.04,
+ "LEMBNarrativeQARetrieval": 30.03,
+ "LEMBQMSumRetrieval": 31.27,
+ "LEMBSummScreenFDRetrieval": 93.87,
+ "LEMBWikimQARetrieval": 68.88,
+ "LeCaRDv2": 23.17,
+ "LegalBenchConsumerContractsQA": 71.44,
+ "LegalBenchCorporateLobbying": 90.17,
+ "LegalQuAD": 26.9,
+ "LegalSummarization": 56.85,
+ "MIRACLRetrieval (ar)": 0.02,
+ "MIRACLRetrieval (bn)": 0.04,
+ "MIRACLRetrieval (de)": 7.87,
+ "MIRACLRetrieval (en)": 38.19,
+ "MIRACLRetrieval (es)": 13.31,
+ "MIRACLRetrieval (fa)": 0.13,
+ "MIRACLRetrieval (fi)": 15.01,
+ "MIRACLRetrieval (fr)": 6.84,
+ "MIRACLRetrieval (hi)": 0.27,
+ "MIRACLRetrieval (id)": 7.37,
+ "MIRACLRetrieval (ja)": 0.71,
+ "MIRACLRetrieval (ko)": 2.23,
+ "MIRACLRetrieval (ru)": 0.89,
+ "MIRACLRetrieval (sw)": 12.56,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.5,
+ "MIRACLRetrieval (yo)": 28.13,
+ "MIRACLRetrieval (zh)": 0.16,
+ "MintakaRetrieval (ar)": 1.52,
+ "MintakaRetrieval (de)": 14.29,
+ "MintakaRetrieval (es)": 14.17,
+ "MintakaRetrieval (fr)": 13.58,
+ "MintakaRetrieval (hi)": 3.77,
+ "MintakaRetrieval (it)": 12.69,
+ "MintakaRetrieval (ja)": 7.43,
+ "MintakaRetrieval (pt)": 13.59,
+ "PIQA": 21.78,
+ "Quail": 4.21,
+ "RARbCode": 43.05,
+ "RARbMath": 60.52,
+ "RiaNewsRetrieval": 2.94,
+ "RuBQRetrieval": 3.12,
+ "SIQA": 2.94,
+ "SciFact-PL": 32.21,
+ "SpartQA": 3.54,
+ "StackOverflowQA": 79.92,
+ "SyntecRetrieval": 60.84,
+ "SyntheticText2SQL": 46.72,
+ "TRECCOVID-PL": 16.5,
+ "TempReasonL1": 1.47,
+ "TempReasonL2Fact": 21.62,
+ "TempReasonL2Pure": 2.09,
+ "TempReasonL3Fact": 19.08,
+ "TempReasonL3Pure": 9.6,
+ "WinoGrande": 37.33,
+ "XMarket (de)": 6.28,
+ "XMarket (en)": 9.09,
+ "XMarket (es)": 8.74,
+ "XPQARetrieval (ara-ara)": 7.66,
+ "XPQARetrieval (eng-ara)": 3.29,
+ "XPQARetrieval (ara-eng)": 8.24,
+ "XPQARetrieval (deu-deu)": 55.65,
+ "XPQARetrieval (eng-deu)": 13.61,
+ "XPQARetrieval (deu-eng)": 29.81,
+ "XPQARetrieval (spa-spa)": 41.97,
+ "XPQARetrieval (eng-spa)": 12.56,
+ "XPQARetrieval (spa-eng)": 23.27,
+ "XPQARetrieval (fra-fra)": 49.52,
+ "XPQARetrieval (eng-fra)": 15.6,
+ "XPQARetrieval (fra-eng)": 30.23,
+ "XPQARetrieval (hin-hin)": 24.77,
+ "XPQARetrieval (eng-hin)": 7.98,
+ "XPQARetrieval (hin-eng)": 7.88,
+ "XPQARetrieval (ita-ita)": 51.7,
+ "XPQARetrieval (eng-ita)": 11.12,
+ "XPQARetrieval (ita-eng)": 26.41,
+ "XPQARetrieval (jpn-jpn)": 33.68,
+ "XPQARetrieval (eng-jpn)": 4.41,
+ "XPQARetrieval (jpn-eng)": 16.85,
+ "XPQARetrieval (kor-kor)": 8.61,
+ "XPQARetrieval (eng-kor)": 7.06,
+ "XPQARetrieval (kor-eng)": 7.1,
+ "XPQARetrieval (pol-pol)": 32.38,
+ "XPQARetrieval (eng-pol)": 11.33,
+ "XPQARetrieval (pol-eng)": 18.59,
+ "XPQARetrieval (por-por)": 35.01,
+ "XPQARetrieval (eng-por)": 8.38,
+ "XPQARetrieval (por-eng)": 22.92,
+ "XPQARetrieval (tam-tam)": 5.89,
+ "XPQARetrieval (eng-tam)": 3.29,
+ "XPQARetrieval (tam-eng)": 3.85,
+ "XPQARetrieval (cmn-cmn)": 19.17,
+ "XPQARetrieval (eng-cmn)": 4.99,
+ "XPQARetrieval (cmn-eng)": 9.77
}
]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-base-4k",
+ "CDSC-R": 84.98,
+ "GermanSTSBenchmark": 61.53,
+ "RUParaPhraserSTS": 48.39,
+ "RuSTSBenchmarkSTS": 55.63,
+ "SICK-R-PL": 56.01,
+ "SICKFr": 62.07,
+ "STS22 (it)": 70.79,
+ "STS22 (ru)": 24.97,
+ "STS22 (de-pl)": 16.84,
+ "STS22 (es)": 61.84,
+ "STS22 (de)": 31.41,
+ "STS22 (es-en)": 61.41,
+ "STS22 (es-it)": 55.06,
+ "STS22 (ar)": 34.56,
+ "STS22 (fr-pl)": 61.98,
+ "STS22 (tr)": 48.3,
+ "STS22 (fr)": 72.42,
+ "STS22 (de-en)": 48.14,
+ "STS22 (pl-en)": 55.43,
+ "STS22 (de-fr)": 47.22,
+ "STS22 (zh-en)": 33.19,
+ "STS22 (pl)": 21.75,
+ "STS22 (en)": 61.92,
+ "STS22 (zh)": 52.55,
+ "STSB": 30.49,
+ "STSBenchmarkMultilingualSTS (zh)": 31.46,
+ "STSBenchmarkMultilingualSTS (fr)": 65.61,
+ "STSBenchmarkMultilingualSTS (en)": 82.93,
+ "STSBenchmarkMultilingualSTS (nl)": 63.39,
+ "STSBenchmarkMultilingualSTS (es)": 66.75,
+ "STSBenchmarkMultilingualSTS (ru)": 56.1,
+ "STSBenchmarkMultilingualSTS (de)": 62.41,
+ "STSBenchmarkMultilingualSTS (pt)": 63.81,
+ "STSBenchmarkMultilingualSTS (it)": 64.35,
+ "STSBenchmarkMultilingualSTS (pl)": 57.73
+ },
+ {
+ "Model": "e5-base-4k",
+ "CDSC-R": 84.98,
+ "GermanSTSBenchmark": 61.53,
+ "RUParaPhraserSTS": 48.39,
+ "RuSTSBenchmarkSTS": 55.64,
+ "SICK-R-PL": 56.01,
+ "SICKFr": 62.07,
+ "STS22 (it)": 70.79,
+ "STS22 (ru)": 24.96,
+ "STS22 (de-pl)": 16.84,
+ "STS22 (es)": 61.84,
+ "STS22 (de)": 31.42,
+ "STS22 (es-en)": 61.41,
+ "STS22 (es-it)": 55.06,
+ "STS22 (ar)": 34.55,
+ "STS22 (fr-pl)": 61.98,
+ "STS22 (tr)": 48.3,
+ "STS22 (fr)": 72.42,
+ "STS22 (de-en)": 48.14,
+ "STS22 (pl-en)": 55.43,
+ "STS22 (de-fr)": 47.22,
+ "STS22 (zh-en)": 33.19,
+ "STS22 (pl)": 21.86,
+ "STS22 (en)": 61.92,
+ "STS22 (zh)": 52.55,
+ "STSB": 30.48,
+ "STSBenchmarkMultilingualSTS (zh)": 31.46,
+ "STSBenchmarkMultilingualSTS (fr)": 65.61,
+ "STSBenchmarkMultilingualSTS (en)": 82.93,
+ "STSBenchmarkMultilingualSTS (nl)": 63.39,
+ "STSBenchmarkMultilingualSTS (es)": 66.75,
+ "STSBenchmarkMultilingualSTS (ru)": 56.1,
+ "STSBenchmarkMultilingualSTS (de)": 62.41,
+ "STSBenchmarkMultilingualSTS (pt)": 63.81,
+ "STSBenchmarkMultilingualSTS (it)": 64.35,
+ "STSBenchmarkMultilingualSTS (pl)": 57.73
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-base-4k",
+ "SummEvalFr": 29.55
+ },
+ {
+ "Model": "e5-base-4k",
+ "SummEvalFr": 29.55
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "e5-base-4k",
+ "CEDRClassification": 33.96,
+ "SensitiveTopicsClassification": 17.86
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "e5-base-4k",
+ "Core17InstructionRetrieval": -2.14,
+ "News21InstructionRetrieval": -0.43,
+ "Robust04InstructionRetrieval": -6.2
+ }
+ ]
}
},
"elastic__elser-v2": {
@@ -8105,7 +20638,119 @@
"f1": [
{
"Model": "e5-base",
- "BornholmBitextMining": 40.09
+ "BornholmBitextMining": 40.09,
+ "Tatoeba (ast-eng)": 16.87,
+ "Tatoeba (tzl-eng)": 15.98,
+ "Tatoeba (csb-eng)": 8.62,
+ "Tatoeba (est-eng)": 3.75,
+ "Tatoeba (nov-eng)": 34.73,
+ "Tatoeba (por-eng)": 30.46,
+ "Tatoeba (eus-eng)": 7.28,
+ "Tatoeba (hun-eng)": 5.8,
+ "Tatoeba (xho-eng)": 4.37,
+ "Tatoeba (fra-eng)": 33.47,
+ "Tatoeba (rus-eng)": 0.3,
+ "Tatoeba (pam-eng)": 5.21,
+ "Tatoeba (mal-eng)": 0.15,
+ "Tatoeba (nld-eng)": 18.58,
+ "Tatoeba (pol-eng)": 7.24,
+ "Tatoeba (kzj-eng)": 5.03,
+ "Tatoeba (tam-eng)": 0.0,
+ "Tatoeba (kaz-eng)": 0.73,
+ "Tatoeba (hin-eng)": 0.0,
+ "Tatoeba (yid-eng)": 0.05,
+ "Tatoeba (max-eng)": 11.72,
+ "Tatoeba (nob-eng)": 15.27,
+ "Tatoeba (slk-eng)": 6.67,
+ "Tatoeba (hye-eng)": 0.67,
+ "Tatoeba (nds-eng)": 16.31,
+ "Tatoeba (khm-eng)": 0.28,
+ "Tatoeba (tat-eng)": 0.68,
+ "Tatoeba (tgl-eng)": 6.8,
+ "Tatoeba (lit-eng)": 3.32,
+ "Tatoeba (spa-eng)": 30.23,
+ "Tatoeba (cha-eng)": 14.81,
+ "Tatoeba (gle-eng)": 3.74,
+ "Tatoeba (deu-eng)": 24.57,
+ "Tatoeba (arq-eng)": 0.48,
+ "Tatoeba (kur-eng)": 7.76,
+ "Tatoeba (cmn-eng)": 2.06,
+ "Tatoeba (jpn-eng)": 0.24,
+ "Tatoeba (glg-eng)": 25.31,
+ "Tatoeba (vie-eng)": 5.91,
+ "Tatoeba (swe-eng)": 11.32,
+ "Tatoeba (heb-eng)": 0.61,
+ "Tatoeba (war-eng)": 6.66,
+ "Tatoeba (zsm-eng)": 9.47,
+ "Tatoeba (ina-eng)": 36.55,
+ "Tatoeba (pes-eng)": 0.6,
+ "Tatoeba (dsb-eng)": 6.67,
+ "Tatoeba (dan-eng)": 15.22,
+ "Tatoeba (cbk-eng)": 18.11,
+ "Tatoeba (fin-eng)": 4.66,
+ "Tatoeba (cat-eng)": 20.57,
+ "Tatoeba (afr-eng)": 8.9,
+ "Tatoeba (yue-eng)": 1.15,
+ "Tatoeba (bel-eng)": 1.63,
+ "Tatoeba (orv-eng)": 0.01,
+ "Tatoeba (kor-eng)": 1.31,
+ "Tatoeba (hrv-eng)": 9.04,
+ "Tatoeba (srp-eng)": 4.41,
+ "Tatoeba (ron-eng)": 14.49,
+ "Tatoeba (mon-eng)": 1.46,
+ "Tatoeba (ceb-eng)": 6.24,
+ "Tatoeba (ile-eng)": 27.93,
+ "Tatoeba (tur-eng)": 4.92,
+ "Tatoeba (ber-eng)": 5.41,
+ "Tatoeba (uig-eng)": 0.5,
+ "Tatoeba (gla-eng)": 2.87,
+ "Tatoeba (awa-eng)": 0.03,
+ "Tatoeba (uzb-eng)": 4.12,
+ "Tatoeba (slv-eng)": 6.94,
+ "Tatoeba (bul-eng)": 0.77,
+ "Tatoeba (lfn-eng)": 19.71,
+ "Tatoeba (bos-eng)": 11.19,
+ "Tatoeba (ben-eng)": 0.0,
+ "Tatoeba (cym-eng)": 6.44,
+ "Tatoeba (fao-eng)": 9.59,
+ "Tatoeba (bre-eng)": 4.45,
+ "Tatoeba (fry-eng)": 18.26,
+ "Tatoeba (sqi-eng)": 7.02,
+ "Tatoeba (ita-eng)": 22.75,
+ "Tatoeba (kat-eng)": 0.81,
+ "Tatoeba (ces-eng)": 5.56,
+ "Tatoeba (oci-eng)": 14.39,
+ "Tatoeba (hsb-eng)": 5.43,
+ "Tatoeba (mkd-eng)": 0.19,
+ "Tatoeba (ara-eng)": 0.39,
+ "Tatoeba (ell-eng)": 0.5,
+ "Tatoeba (mhr-eng)": 0.11,
+ "Tatoeba (isl-eng)": 5.16,
+ "Tatoeba (ang-eng)": 17.07,
+ "Tatoeba (ido-eng)": 22.37,
+ "Tatoeba (lvs-eng)": 5.23,
+ "Tatoeba (cor-eng)": 2.91,
+ "Tatoeba (kab-eng)": 1.2,
+ "Tatoeba (arz-eng)": 0.45,
+ "Tatoeba (swh-eng)": 6.96,
+ "Tatoeba (ind-eng)": 8.47,
+ "Tatoeba (jav-eng)": 6.0,
+ "Tatoeba (ukr-eng)": 1.0,
+ "Tatoeba (amh-eng)": 0.01,
+ "Tatoeba (urd-eng)": 0.0,
+ "Tatoeba (tel-eng)": 0.02,
+ "Tatoeba (epo-eng)": 14.41,
+ "Tatoeba (aze-eng)": 4.7,
+ "Tatoeba (tha-eng)": 1.22,
+ "Tatoeba (tuk-eng)": 4.64,
+ "Tatoeba (dtp-eng)": 3.63,
+ "Tatoeba (lat-eng)": 10.81,
+ "Tatoeba (wuu-eng)": 1.36,
+ "Tatoeba (pms-eng)": 14.19,
+ "Tatoeba (nno-eng)": 10.62,
+ "Tatoeba (swg-eng)": 14.61,
+ "Tatoeba (mar-eng)": 0.01,
+ "Tatoeba (gsw-eng)": 16.17
}
]
},
@@ -8113,105 +20758,1256 @@
"accuracy": [
{
"Model": "e5-base",
+ "AllegroReviews": 25.57,
+ "AmazonCounterfactualClassification (en-ext)": 79.76,
+ "AmazonCounterfactualClassification (en)": 78.16,
+ "AmazonCounterfactualClassification (de)": 55.84,
+ "AmazonCounterfactualClassification (ja)": 61.0,
+ "AmazonReviewsClassification (en)": 42.32,
+ "AmazonReviewsClassification (de)": 28.15,
+ "AmazonReviewsClassification (es)": 33.78,
+ "AmazonReviewsClassification (fr)": 30.98,
+ "AmazonReviewsClassification (ja)": 21.86,
+ "AmazonReviewsClassification (zh)": 22.48,
"AngryTweetsClassification": 45.06,
+ "CBD": 50.95,
"DKHateClassification": 58.51,
"DanishPoliticalCommentsClassification": 28.43,
+ "GeoreviewClassification": 27.81,
+ "HeadlineClassification": 29.8,
+ "InappropriatenessClassification": 51.95,
+ "KinopoiskClassification": 34.33,
"LccSentimentClassification": 37.47,
- "MassiveIntentClassification (da)": 44.25,
- "MassiveIntentClassification (nb)": 41.57,
+ "MTOPDomainClassification (en)": 90.44,
+ "MTOPDomainClassification (de)": 75.4,
+ "MTOPDomainClassification (es)": 77.44,
+ "MTOPDomainClassification (fr)": 79.77,
+ "MTOPDomainClassification (hi)": 33.01,
+ "MTOPDomainClassification (th)": 15.24,
+ "MTOPIntentClassification (en)": 60.07,
+ "MTOPIntentClassification (de)": 46.75,
+ "MTOPIntentClassification (es)": 44.19,
+ "MTOPIntentClassification (fr)": 40.11,
+ "MTOPIntentClassification (hi)": 11.91,
+ "MTOPIntentClassification (th)": 5.26,
+ "MasakhaNEWSClassification (amh)": 34.36,
+ "MasakhaNEWSClassification (eng)": 76.82,
+ "MasakhaNEWSClassification (fra)": 75.0,
+ "MasakhaNEWSClassification (hau)": 61.32,
+ "MasakhaNEWSClassification (ibo)": 58.13,
+ "MasakhaNEWSClassification (lin)": 73.77,
+ "MasakhaNEWSClassification (lug)": 55.38,
+ "MasakhaNEWSClassification (orm)": 58.46,
+ "MasakhaNEWSClassification (pcm)": 91.64,
+ "MasakhaNEWSClassification (run)": 61.89,
+ "MasakhaNEWSClassification (sna)": 74.5,
+ "MasakhaNEWSClassification (som)": 50.88,
+ "MasakhaNEWSClassification (swa)": 54.96,
+ "MasakhaNEWSClassification (tir)": 28.57,
+ "MasakhaNEWSClassification (xho)": 60.2,
+ "MasakhaNEWSClassification (yor)": 67.69,
+ "MassiveIntentClassification (mn)": 18.38,
+ "MassiveIntentClassification (cy)": 37.34,
+ "MassiveIntentClassification (pl)": 40.23,
+ "MassiveIntentClassification (zh-TW)": 18.05,
+ "MassiveIntentClassification (ro)": 43.84,
+ "MassiveIntentClassification (sq)": 43.14,
+ "MassiveIntentClassification (hy)": 7.64,
+ "MassiveIntentClassification (zh-CN)": 18.45,
+ "MassiveIntentClassification (hi)": 11.95,
+ "MassiveIntentClassification (fr)": 47.51,
+ "MassiveIntentClassification (th)": 10.45,
+ "MassiveIntentClassification (fi)": 41.0,
+ "MassiveIntentClassification (af)": 40.46,
+ "MassiveIntentClassification (sw)": 39.67,
+ "MassiveIntentClassification (he)": 18.66,
+ "MassiveIntentClassification (az)": 34.85,
+ "MassiveIntentClassification (ka)": 9.79,
+ "MassiveIntentClassification (ru)": 21.3,
"MassiveIntentClassification (sv)": 41.34,
- "MassiveScenarioClassification (da)": 52.99,
+ "MassiveIntentClassification (am)": 2.64,
+ "MassiveIntentClassification (ar)": 19.69,
+ "MassiveIntentClassification (ml)": 2.72,
+ "MassiveIntentClassification (jv)": 38.65,
+ "MassiveIntentClassification (vi)": 36.42,
+ "MassiveIntentClassification (es)": 45.51,
+ "MassiveIntentClassification (ja)": 21.63,
+ "MassiveIntentClassification (en)": 66.65,
+ "MassiveIntentClassification (fa)": 24.17,
+ "MassiveIntentClassification (pt)": 48.65,
+ "MassiveIntentClassification (lv)": 41.61,
+ "MassiveIntentClassification (sl)": 40.01,
+ "MassiveIntentClassification (nb)": 41.57,
+ "MassiveIntentClassification (id)": 42.07,
+ "MassiveIntentClassification (bn)": 14.29,
+ "MassiveIntentClassification (nl)": 42.0,
+ "MassiveIntentClassification (it)": 45.7,
+ "MassiveIntentClassification (my)": 3.5,
+ "MassiveIntentClassification (da)": 44.25,
+ "MassiveIntentClassification (ta)": 9.46,
+ "MassiveIntentClassification (is)": 36.7,
+ "MassiveIntentClassification (tr)": 40.9,
+ "MassiveIntentClassification (km)": 4.41,
+ "MassiveIntentClassification (de)": 45.41,
+ "MassiveIntentClassification (hu)": 39.2,
+ "MassiveIntentClassification (kn)": 3.1,
+ "MassiveIntentClassification (tl)": 41.07,
+ "MassiveIntentClassification (ms)": 38.71,
+ "MassiveIntentClassification (te)": 2.29,
+ "MassiveIntentClassification (el)": 21.94,
+ "MassiveIntentClassification (ko)": 18.8,
+ "MassiveIntentClassification (ur)": 14.28,
+ "MassiveScenarioClassification (id)": 48.83,
+ "MassiveScenarioClassification (ka)": 16.56,
+ "MassiveScenarioClassification (ru)": 27.27,
+ "MassiveScenarioClassification (lv)": 46.2,
+ "MassiveScenarioClassification (jv)": 46.01,
+ "MassiveScenarioClassification (sw)": 47.12,
+ "MassiveScenarioClassification (ur)": 21.3,
+ "MassiveScenarioClassification (th)": 19.11,
+ "MassiveScenarioClassification (pt)": 56.73,
+ "MassiveScenarioClassification (sl)": 46.15,
+ "MassiveScenarioClassification (km)": 8.75,
+ "MassiveScenarioClassification (te)": 6.83,
+ "MassiveScenarioClassification (my)": 10.36,
+ "MassiveScenarioClassification (ta)": 15.15,
+ "MassiveScenarioClassification (fi)": 45.22,
+ "MassiveScenarioClassification (kn)": 7.87,
"MassiveScenarioClassification (nb)": 50.33,
+ "MassiveScenarioClassification (am)": 7.21,
+ "MassiveScenarioClassification (hi)": 16.29,
+ "MassiveScenarioClassification (pl)": 47.8,
+ "MassiveScenarioClassification (it)": 54.55,
+ "MassiveScenarioClassification (ko)": 25.82,
+ "MassiveScenarioClassification (tr)": 48.74,
+ "MassiveScenarioClassification (ar)": 27.66,
+ "MassiveScenarioClassification (he)": 23.55,
"MassiveScenarioClassification (sv)": 50.0,
+ "MassiveScenarioClassification (el)": 29.77,
+ "MassiveScenarioClassification (es)": 54.38,
+ "MassiveScenarioClassification (hu)": 46.8,
+ "MassiveScenarioClassification (af)": 51.01,
+ "MassiveScenarioClassification (zh-TW)": 25.6,
+ "MassiveScenarioClassification (ro)": 53.32,
+ "MassiveScenarioClassification (cy)": 44.14,
+ "MassiveScenarioClassification (hy)": 14.18,
+ "MassiveScenarioClassification (is)": 46.12,
+ "MassiveScenarioClassification (bn)": 18.33,
+ "MassiveScenarioClassification (da)": 52.99,
+ "MassiveScenarioClassification (fr)": 57.6,
+ "MassiveScenarioClassification (ms)": 48.26,
+ "MassiveScenarioClassification (sq)": 50.04,
+ "MassiveScenarioClassification (nl)": 51.15,
+ "MassiveScenarioClassification (vi)": 42.46,
+ "MassiveScenarioClassification (tl)": 50.06,
+ "MassiveScenarioClassification (en)": 71.52,
+ "MassiveScenarioClassification (ja)": 27.89,
+ "MassiveScenarioClassification (zh-CN)": 25.12,
+ "MassiveScenarioClassification (fa)": 30.11,
+ "MassiveScenarioClassification (ml)": 6.99,
+ "MassiveScenarioClassification (mn)": 25.07,
+ "MassiveScenarioClassification (de)": 56.41,
+ "MassiveScenarioClassification (az)": 42.58,
"NoRecClassification": 42.0,
"NordicLangClassification": 59.34,
"NorwegianParliament": 57.42,
+ "PAC": 62.64,
+ "PolEmo2.0-IN": 41.04,
+ "PolEmo2.0-OUT": 22.04,
+ "RuReviewsClassification": 41.31,
+ "RuSciBenchGRNTIClassification": 10.71,
+ "RuSciBenchOECDClassification": 9.01,
"ScalaDaClassification": 50.08,
"ScalaNbClassification": 50.18
}
]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "e5-base",
+ "AlloProfClusteringP2P": 59.49,
+ "AlloProfClusteringS2S": 36.98,
+ "BlurbsClusteringP2P": 27.05,
+ "BlurbsClusteringS2S": 11.1,
+ "GeoreviewClusteringP2P": 17.46,
+ "HALClusteringS2S": 22.25,
+ "MLSUMClusteringP2P (de)": 39.08,
+ "MLSUMClusteringP2P (fr)": 40.92,
+ "MLSUMClusteringP2P (ru)": 19.41,
+ "MLSUMClusteringP2P (es)": 42.61,
+ "MLSUMClusteringS2S (de)": 39.48,
+ "MLSUMClusteringS2S (fr)": 40.94,
+ "MLSUMClusteringS2S (ru)": 19.13,
+ "MLSUMClusteringS2S (es)": 42.12,
+ "MasakhaNEWSClusteringP2P (amh)": 41.08,
+ "MasakhaNEWSClusteringP2P (eng)": 41.62,
+ "MasakhaNEWSClusteringP2P (fra)": 53.43,
+ "MasakhaNEWSClusteringP2P (hau)": 43.06,
+ "MasakhaNEWSClusteringP2P (ibo)": 34.51,
+ "MasakhaNEWSClusteringP2P (lin)": 62.39,
+ "MasakhaNEWSClusteringP2P (lug)": 60.71,
+ "MasakhaNEWSClusteringP2P (orm)": 28.2,
+ "MasakhaNEWSClusteringP2P (pcm)": 73.86,
+ "MasakhaNEWSClusteringP2P (run)": 55.76,
+ "MasakhaNEWSClusteringP2P (sna)": 55.4,
+ "MasakhaNEWSClusteringP2P (som)": 38.72,
+ "MasakhaNEWSClusteringP2P (swa)": 25.09,
+ "MasakhaNEWSClusteringP2P (tir)": 42.6,
+ "MasakhaNEWSClusteringP2P (xho)": 29.35,
+ "MasakhaNEWSClusteringP2P (yor)": 41.38,
+ "MasakhaNEWSClusteringS2S (amh)": 44.43,
+ "MasakhaNEWSClusteringS2S (eng)": 48.11,
+ "MasakhaNEWSClusteringS2S (fra)": 35.92,
+ "MasakhaNEWSClusteringS2S (hau)": 17.05,
+ "MasakhaNEWSClusteringS2S (ibo)": 34.08,
+ "MasakhaNEWSClusteringS2S (lin)": 43.73,
+ "MasakhaNEWSClusteringS2S (lug)": 48.53,
+ "MasakhaNEWSClusteringS2S (orm)": 25.17,
+ "MasakhaNEWSClusteringS2S (pcm)": 67.45,
+ "MasakhaNEWSClusteringS2S (run)": 53.55,
+ "MasakhaNEWSClusteringS2S (sna)": 47.02,
+ "MasakhaNEWSClusteringS2S (som)": 30.17,
+ "MasakhaNEWSClusteringS2S (swa)": 18.21,
+ "MasakhaNEWSClusteringS2S (tir)": 42.48,
+ "MasakhaNEWSClusteringS2S (xho)": 24.68,
+ "MasakhaNEWSClusteringS2S (yor)": 30.24,
+ "RuSciBenchGRNTIClusteringP2P": 14.34,
+ "RuSciBenchOECDClusteringP2P": 12.36,
+ "TenKGnadClusteringP2P": 41.9,
+ "TenKGnadClusteringS2S": 19.9
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "e5-base",
+ "CDSC-E": 50.26,
+ "FalseFriendsGermanEnglish": 47.82,
+ "OpusparcusPC (de)": 92.43,
+ "OpusparcusPC (en)": 98.55,
+ "OpusparcusPC (fi)": 86.98,
+ "OpusparcusPC (fr)": 87.93,
+ "OpusparcusPC (ru)": 80.41,
+ "OpusparcusPC (sv)": 84.1,
+ "PSC": 94.63,
+ "PawsXPairClassification (de)": 51.53,
+ "PawsXPairClassification (en)": 60.53,
+ "PawsXPairClassification (es)": 54.26,
+ "PawsXPairClassification (fr)": 55.75,
+ "PawsXPairClassification (ja)": 48.41,
+ "PawsXPairClassification (ko)": 51.15,
+ "PawsXPairClassification (zh)": 53.9,
+ "SICK-E-PL": 47.25,
+ "TERRa": 46.42
+ },
+ {
+ "Model": "e5-base",
+ "CDSC-E": 50.26,
+ "FalseFriendsGermanEnglish": 47.9,
+ "OpusparcusPC (de)": 92.43,
+ "OpusparcusPC (en)": 98.55,
+ "OpusparcusPC (fi)": 86.99,
+ "OpusparcusPC (fr)": 87.93,
+ "OpusparcusPC (ru)": 80.45,
+ "OpusparcusPC (sv)": 84.12,
+ "PSC": 94.63,
+ "PawsXPairClassification (de)": 51.9,
+ "PawsXPairClassification (en)": 60.53,
+ "PawsXPairClassification (es)": 54.29,
+ "PawsXPairClassification (fr)": 55.85,
+ "PawsXPairClassification (ja)": 48.41,
+ "PawsXPairClassification (ko)": 51.33,
+ "PawsXPairClassification (zh)": 53.91,
+ "SICK-E-PL": 47.25,
+ "TERRa": 46.42
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "e5-base",
+ "AlloprofReranking": 62.92,
+ "RuBQReranking": 21.8,
+ "SyntecReranking": 69.01,
+ "T2Reranking": 57.43
+ },
+ {
+ "Model": "e5-base",
+ "MIRACLReranking (ar)": 4.57,
+ "MIRACLReranking (bn)": 9.14,
+ "MIRACLReranking (de)": 21.47,
+ "MIRACLReranking (en)": 54.98,
+ "MIRACLReranking (es)": 37.56,
+ "MIRACLReranking (fa)": 6.97,
+ "MIRACLReranking (fi)": 39.52,
+ "MIRACLReranking (fr)": 30.93,
+ "MIRACLReranking (hi)": 5.55,
+ "MIRACLReranking (id)": 25.9,
+ "MIRACLReranking (ja)": 8.26,
+ "MIRACLReranking (ko)": 11.11,
+ "MIRACLReranking (ru)": 6.67,
+ "MIRACLReranking (sw)": 33.98,
+ "MIRACLReranking (te)": 2.08,
+ "MIRACLReranking (th)": 3.21,
+ "MIRACLReranking (yo)": 51.95,
+ "MIRACLReranking (zh)": 7.82
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
{
"Model": "e5-base",
+ "AILACasedocs": 23.83,
+ "AILAStatutes": 17.72,
+ "ARCChallenge": 7.44,
+ "AlloprofRetrieval": 29.4,
+ "AlphaNLI": 13.03,
+ "AppsRetrieval": 9.31,
+ "BSARDRetrieval": 6.07,
+ "CmedqaRetrieval": 2.57,
+ "CodeFeedbackMT": 44.23,
+ "CodeFeedbackST": 69.49,
+ "CodeSearchNetCCRetrieval (python)": 61.67,
+ "CodeSearchNetCCRetrieval (javascript)": 58.13,
+ "CodeSearchNetCCRetrieval (go)": 39.41,
+ "CodeSearchNetCCRetrieval (ruby)": 56.05,
+ "CodeSearchNetCCRetrieval (java)": 53.01,
+ "CodeSearchNetCCRetrieval (php)": 40.64,
+ "CodeSearchNetRetrieval (python)": 82.35,
+ "CodeSearchNetRetrieval (javascript)": 64.36,
+ "CodeSearchNetRetrieval (go)": 76.52,
+ "CodeSearchNetRetrieval (ruby)": 73.49,
+ "CodeSearchNetRetrieval (java)": 69.23,
+ "CodeSearchNetRetrieval (php)": 70.34,
+ "CodeTransOceanContest": 59.1,
+ "CodeTransOceanDL": 28.57,
+ "CosQA": 31.45,
+ "CovidRetrieval": 1.64,
+ "GerDaLIR": 1.11,
+ "GerDaLIRSmall": 2.91,
+ "GermanQuAD-Retrieval": 76.71,
+ "HellaSwag": 23.88,
"LEMBNarrativeQARetrieval": 25.31,
"LEMBQMSumRetrieval": 23.83,
"LEMBSummScreenFDRetrieval": 74.67,
- "LEMBWikimQARetrieval": 55.85
+ "LEMBWikimQARetrieval": 55.85,
+ "LeCaRDv2": 12.43,
+ "LegalBenchConsumerContractsQA": 72.62,
+ "LegalBenchCorporateLobbying": 89.47,
+ "LegalQuAD": 22.76,
+ "LegalSummarization": 54.14,
+ "MIRACLRetrieval (ar)": 0.03,
+ "MIRACLRetrieval (bn)": 0.14,
+ "MIRACLRetrieval (de)": 11.93,
+ "MIRACLRetrieval (en)": 46.32,
+ "MIRACLRetrieval (es)": 26.44,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 22.27,
+ "MIRACLRetrieval (fr)": 20.84,
+ "MIRACLRetrieval (hi)": 0.35,
+ "MIRACLRetrieval (id)": 16.56,
+ "MIRACLRetrieval (ja)": 0.68,
+ "MIRACLRetrieval (ko)": 2.76,
+ "MIRACLRetrieval (ru)": 0.34,
+ "MIRACLRetrieval (sw)": 24.95,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.41,
+ "MIRACLRetrieval (yo)": 49.82,
+ "MIRACLRetrieval (zh)": 0.1,
+ "MintakaRetrieval (ar)": 2.49,
+ "MintakaRetrieval (de)": 16.63,
+ "MintakaRetrieval (es)": 18.39,
+ "MintakaRetrieval (fr)": 19.89,
+ "MintakaRetrieval (hi)": 2.33,
+ "MintakaRetrieval (it)": 16.9,
+ "MintakaRetrieval (ja)": 4.95,
+ "MintakaRetrieval (pt)": 17.27,
+ "PIQA": 22.1,
+ "Quail": 4.04,
+ "RARbCode": 42.52,
+ "RARbMath": 66.63,
+ "RiaNewsRetrieval": 2.03,
+ "RuBQRetrieval": 1.74,
+ "SIQA": 2.79,
+ "SciFact-PL": 39.16,
+ "SpartQA": 3.17,
+ "StackOverflowQA": 81.66,
+ "SyntecRetrieval": 62.09,
+ "SyntheticText2SQL": 50.99,
+ "TRECCOVID-PL": 30.17,
+ "TempReasonL1": 1.53,
+ "TempReasonL2Fact": 32.0,
+ "TempReasonL2Pure": 1.64,
+ "TempReasonL3Fact": 27.08,
+ "TempReasonL3Pure": 7.2,
+ "WinoGrande": 51.98,
+ "XMarket (de)": 16.14,
+ "XMarket (en)": 32.8,
+ "XMarket (es)": 19.03,
+ "XPQARetrieval (ara-ara)": 7.85,
+ "XPQARetrieval (eng-ara)": 4.12,
+ "XPQARetrieval (ara-eng)": 8.5,
+ "XPQARetrieval (deu-deu)": 56.34,
+ "XPQARetrieval (eng-deu)": 12.75,
+ "XPQARetrieval (deu-eng)": 27.96,
+ "XPQARetrieval (spa-spa)": 40.63,
+ "XPQARetrieval (eng-spa)": 13.49,
+ "XPQARetrieval (spa-eng)": 22.02,
+ "XPQARetrieval (fra-fra)": 47.54,
+ "XPQARetrieval (eng-fra)": 16.18,
+ "XPQARetrieval (fra-eng)": 29.25,
+ "XPQARetrieval (hin-hin)": 18.96,
+ "XPQARetrieval (eng-hin)": 5.17,
+ "XPQARetrieval (hin-eng)": 7.48,
+ "XPQARetrieval (ita-ita)": 50.51,
+ "XPQARetrieval (eng-ita)": 11.44,
+ "XPQARetrieval (ita-eng)": 22.3,
+ "XPQARetrieval (jpn-jpn)": 29.51,
+ "XPQARetrieval (eng-jpn)": 4.82,
+ "XPQARetrieval (jpn-eng)": 16.17,
+ "XPQARetrieval (kor-kor)": 10.63,
+ "XPQARetrieval (eng-kor)": 8.31,
+ "XPQARetrieval (kor-eng)": 7.63,
+ "XPQARetrieval (pol-pol)": 30.5,
+ "XPQARetrieval (eng-pol)": 11.44,
+ "XPQARetrieval (pol-eng)": 17.32,
+ "XPQARetrieval (por-por)": 34.15,
+ "XPQARetrieval (eng-por)": 10.74,
+ "XPQARetrieval (por-eng)": 20.65,
+ "XPQARetrieval (tam-tam)": 10.09,
+ "XPQARetrieval (eng-tam)": 4.81,
+ "XPQARetrieval (tam-eng)": 3.6,
+ "XPQARetrieval (cmn-cmn)": 20.81,
+ "XPQARetrieval (eng-cmn)": 6.24,
+ "XPQARetrieval (cmn-eng)": 12.33
}
]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-base",
+ "CDSC-R": 83.36,
+ "GermanSTSBenchmark": 65.51,
+ "RUParaPhraserSTS": 39.12,
+ "RuSTSBenchmarkSTS": 51.35,
+ "SICK-R-PL": 56.69,
+ "SICKFr": 69.46,
+ "STS22 (de)": 35.15,
+ "STS22 (de-en)": 49.21,
+ "STS22 (zh-en)": 32.75,
+ "STS22 (en)": 63.74,
+ "STS22 (es)": 58.71,
+ "STS22 (es-en)": 67.35,
+ "STS22 (pl-en)": 60.4,
+ "STS22 (zh)": 43.29,
+ "STS22 (de-pl)": 45.33,
+ "STS22 (pl)": 30.13,
+ "STS22 (de-fr)": 55.99,
+ "STS22 (it)": 70.51,
+ "STS22 (ru)": 8.56,
+ "STS22 (es-it)": 55.56,
+ "STS22 (fr-pl)": 50.71,
+ "STS22 (fr)": 77.27,
+ "STS22 (tr)": 51.02,
+ "STS22 (ar)": 30.46,
+ "STSB": 33.72,
+ "STSBenchmarkMultilingualSTS (zh)": 35.42,
+ "STSBenchmarkMultilingualSTS (pt)": 70.26,
+ "STSBenchmarkMultilingualSTS (nl)": 66.67,
+ "STSBenchmarkMultilingualSTS (pl)": 62.15,
+ "STSBenchmarkMultilingualSTS (es)": 72.18,
+ "STSBenchmarkMultilingualSTS (en)": 86.35,
+ "STSBenchmarkMultilingualSTS (it)": 67.99,
+ "STSBenchmarkMultilingualSTS (fr)": 71.44,
+ "STSBenchmarkMultilingualSTS (de)": 66.2,
+ "STSBenchmarkMultilingualSTS (ru)": 51.32
+ },
+ {
+ "Model": "e5-base",
+ "CDSC-R": 83.36,
+ "GermanSTSBenchmark": 65.51,
+ "RUParaPhraserSTS": 39.12,
+ "RuSTSBenchmarkSTS": 51.35,
+ "SICK-R-PL": 56.69,
+ "SICKFr": 69.46,
+ "STS22 (de)": 35.13,
+ "STS22 (de-en)": 49.21,
+ "STS22 (zh-en)": 32.75,
+ "STS22 (en)": 63.74,
+ "STS22 (es)": 58.71,
+ "STS22 (es-en)": 67.35,
+ "STS22 (pl-en)": 60.4,
+ "STS22 (zh)": 43.29,
+ "STS22 (de-pl)": 45.33,
+ "STS22 (pl)": 30.31,
+ "STS22 (de-fr)": 55.99,
+ "STS22 (it)": 70.51,
+ "STS22 (ru)": 8.56,
+ "STS22 (es-it)": 55.56,
+ "STS22 (fr-pl)": 50.71,
+ "STS22 (fr)": 77.27,
+ "STS22 (tr)": 51.02,
+ "STS22 (ar)": 30.48,
+ "STSB": 33.71,
+ "STSBenchmarkMultilingualSTS (zh)": 35.4,
+ "STSBenchmarkMultilingualSTS (pt)": 70.26,
+ "STSBenchmarkMultilingualSTS (nl)": 66.67,
+ "STSBenchmarkMultilingualSTS (pl)": 62.16,
+ "STSBenchmarkMultilingualSTS (es)": 72.18,
+ "STSBenchmarkMultilingualSTS (en)": 86.35,
+ "STSBenchmarkMultilingualSTS (it)": 67.99,
+ "STSBenchmarkMultilingualSTS (fr)": 71.44,
+ "STSBenchmarkMultilingualSTS (de)": 66.2,
+ "STSBenchmarkMultilingualSTS (ru)": 51.32
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-base",
+ "SummEvalFr": 30.06
+ },
+ {
+ "Model": "e5-base",
+ "SummEvalFr": 30.06
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "e5-base",
+ "CEDRClassification": 33.32,
+ "SensitiveTopicsClassification": 17.5
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "e5-base",
+ "Core17InstructionRetrieval": -2.41,
+ "News21InstructionRetrieval": -2.17,
+ "Robust04InstructionRetrieval": -5.89
+ }
+ ]
}
},
"intfloat__e5-base-v2": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "e5-base-v2",
+ "BornholmBitextMining": 38.49,
+ "Tatoeba (kor-eng)": 1.23,
+ "Tatoeba (kab-eng)": 1.42,
+ "Tatoeba (pes-eng)": 0.52,
+ "Tatoeba (afr-eng)": 10.45,
+ "Tatoeba (fao-eng)": 11.7,
+ "Tatoeba (cor-eng)": 3.76,
+ "Tatoeba (oci-eng)": 15.52,
+ "Tatoeba (khm-eng)": 0.47,
+ "Tatoeba (max-eng)": 13.15,
+ "Tatoeba (bul-eng)": 1.84,
+ "Tatoeba (arz-eng)": 0.53,
+ "Tatoeba (fra-eng)": 34.76,
+ "Tatoeba (nov-eng)": 37.07,
+ "Tatoeba (kaz-eng)": 1.15,
+ "Tatoeba (tha-eng)": 1.03,
+ "Tatoeba (yue-eng)": 2.48,
+ "Tatoeba (mon-eng)": 2.62,
+ "Tatoeba (lvs-eng)": 5.15,
+ "Tatoeba (slv-eng)": 8.13,
+ "Tatoeba (mar-eng)": 0.28,
+ "Tatoeba (ind-eng)": 8.38,
+ "Tatoeba (por-eng)": 30.56,
+ "Tatoeba (ron-eng)": 16.3,
+ "Tatoeba (fry-eng)": 21.29,
+ "Tatoeba (csb-eng)": 7.91,
+ "Tatoeba (glg-eng)": 26.68,
+ "Tatoeba (spa-eng)": 31.62,
+ "Tatoeba (hin-eng)": 0.0,
+ "Tatoeba (dsb-eng)": 7.33,
+ "Tatoeba (lat-eng)": 11.03,
+ "Tatoeba (gsw-eng)": 17.85,
+ "Tatoeba (amh-eng)": 0.68,
+ "Tatoeba (gle-eng)": 3.36,
+ "Tatoeba (pms-eng)": 16.13,
+ "Tatoeba (cmn-eng)": 3.15,
+ "Tatoeba (mal-eng)": 0.15,
+ "Tatoeba (ara-eng)": 0.51,
+ "Tatoeba (kur-eng)": 8.55,
+ "Tatoeba (tam-eng)": 0.01,
+ "Tatoeba (nob-eng)": 15.99,
+ "Tatoeba (ces-eng)": 6.83,
+ "Tatoeba (isl-eng)": 6.57,
+ "Tatoeba (ita-eng)": 23.44,
+ "Tatoeba (nno-eng)": 12.53,
+ "Tatoeba (lfn-eng)": 21.16,
+ "Tatoeba (tzl-eng)": 16.48,
+ "Tatoeba (ido-eng)": 21.92,
+ "Tatoeba (ast-eng)": 25.63,
+ "Tatoeba (eus-eng)": 8.28,
+ "Tatoeba (cbk-eng)": 20.37,
+ "Tatoeba (wuu-eng)": 1.92,
+ "Tatoeba (ell-eng)": 0.66,
+ "Tatoeba (xho-eng)": 3.03,
+ "Tatoeba (kzj-eng)": 5.49,
+ "Tatoeba (hrv-eng)": 9.74,
+ "Tatoeba (dtp-eng)": 3.36,
+ "Tatoeba (hye-eng)": 0.47,
+ "Tatoeba (ben-eng)": 0.13,
+ "Tatoeba (ceb-eng)": 6.11,
+ "Tatoeba (bos-eng)": 12.34,
+ "Tatoeba (sqi-eng)": 7.49,
+ "Tatoeba (tgl-eng)": 6.46,
+ "Tatoeba (ang-eng)": 22.42,
+ "Tatoeba (urd-eng)": 0.27,
+ "Tatoeba (pam-eng)": 6.15,
+ "Tatoeba (ile-eng)": 29.23,
+ "Tatoeba (arq-eng)": 0.89,
+ "Tatoeba (bel-eng)": 2.67,
+ "Tatoeba (swg-eng)": 12.1,
+ "Tatoeba (kat-eng)": 0.26,
+ "Tatoeba (swe-eng)": 13.87,
+ "Tatoeba (hsb-eng)": 6.97,
+ "Tatoeba (mhr-eng)": 0.46,
+ "Tatoeba (rus-eng)": 2.58,
+ "Tatoeba (aze-eng)": 4.82,
+ "Tatoeba (pol-eng)": 7.65,
+ "Tatoeba (tuk-eng)": 5.0,
+ "Tatoeba (lit-eng)": 3.48,
+ "Tatoeba (yid-eng)": 0.49,
+ "Tatoeba (zsm-eng)": 9.91,
+ "Tatoeba (jav-eng)": 5.96,
+ "Tatoeba (uzb-eng)": 3.94,
+ "Tatoeba (cym-eng)": 6.76,
+ "Tatoeba (bre-eng)": 5.12,
+ "Tatoeba (ber-eng)": 5.43,
+ "Tatoeba (fin-eng)": 5.15,
+ "Tatoeba (uig-eng)": 0.5,
+ "Tatoeba (nds-eng)": 18.57,
+ "Tatoeba (dan-eng)": 16.58,
+ "Tatoeba (slk-eng)": 7.19,
+ "Tatoeba (heb-eng)": 0.91,
+ "Tatoeba (jpn-eng)": 1.72,
+ "Tatoeba (mkd-eng)": 0.66,
+ "Tatoeba (orv-eng)": 0.21,
+ "Tatoeba (swh-eng)": 8.51,
+ "Tatoeba (tur-eng)": 5.49,
+ "Tatoeba (epo-eng)": 16.56,
+ "Tatoeba (hun-eng)": 6.96,
+ "Tatoeba (gla-eng)": 2.99,
+ "Tatoeba (srp-eng)": 4.86,
+ "Tatoeba (awa-eng)": 0.03,
+ "Tatoeba (tel-eng)": 0.36,
+ "Tatoeba (cha-eng)": 18.54,
+ "Tatoeba (war-eng)": 7.47,
+ "Tatoeba (est-eng)": 4.68,
+ "Tatoeba (cat-eng)": 23.42,
+ "Tatoeba (ukr-eng)": 1.4,
+ "Tatoeba (tat-eng)": 1.03,
+ "Tatoeba (deu-eng)": 33.61,
+ "Tatoeba (nld-eng)": 21.17,
+ "Tatoeba (ina-eng)": 41.66,
+ "Tatoeba (vie-eng)": 6.37
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "e5-base-v2",
+ "AllegroReviews": 26.17,
+ "AmazonCounterfactualClassification (en-ext)": 76.15,
+ "AmazonCounterfactualClassification (en)": 75.51,
+ "AmazonCounterfactualClassification (de)": 56.61,
+ "AmazonCounterfactualClassification (ja)": 58.58,
+ "AmazonReviewsClassification (en)": 47.82,
+ "AmazonReviewsClassification (de)": 30.12,
+ "AmazonReviewsClassification (es)": 36.78,
+ "AmazonReviewsClassification (fr)": 32.32,
+ "AmazonReviewsClassification (ja)": 22.65,
+ "AmazonReviewsClassification (zh)": 23.28,
+ "AngryTweetsClassification": 46.32,
+ "CBD": 52.74,
+ "DanishPoliticalCommentsClassification": 28.84,
+ "GeoreviewClassification": 30.9,
+ "HeadlineClassification": 41.74,
+ "InappropriatenessClassification": 53.27,
+ "KinopoiskClassification": 34.67,
+ "LccSentimentClassification": 38.07,
+ "MTOPDomainClassification (en)": 92.23,
+ "MTOPDomainClassification (de)": 77.51,
+ "MTOPDomainClassification (es)": 78.32,
+ "MTOPDomainClassification (fr)": 80.17,
+ "MTOPDomainClassification (hi)": 41.05,
+ "MTOPDomainClassification (th)": 16.63,
+ "MTOPIntentClassification (en)": 62.41,
+ "MTOPIntentClassification (de)": 47.19,
+ "MTOPIntentClassification (es)": 44.66,
+ "MTOPIntentClassification (fr)": 40.64,
+ "MTOPIntentClassification (hi)": 17.59,
+ "MTOPIntentClassification (th)": 4.73,
+ "MasakhaNEWSClassification (amh)": 33.91,
+ "MasakhaNEWSClassification (eng)": 77.48,
+ "MasakhaNEWSClassification (fra)": 75.36,
+ "MasakhaNEWSClassification (hau)": 62.34,
+ "MasakhaNEWSClassification (ibo)": 60.51,
+ "MasakhaNEWSClassification (lin)": 74.57,
+ "MasakhaNEWSClassification (lug)": 57.53,
+ "MasakhaNEWSClassification (orm)": 61.17,
+ "MasakhaNEWSClassification (pcm)": 91.9,
+ "MasakhaNEWSClassification (run)": 64.84,
+ "MasakhaNEWSClassification (sna)": 75.56,
+ "MasakhaNEWSClassification (som)": 52.24,
+ "MasakhaNEWSClassification (swa)": 57.71,
+ "MasakhaNEWSClassification (tir)": 24.15,
+ "MasakhaNEWSClassification (xho)": 63.84,
+ "MasakhaNEWSClassification (yor)": 70.58,
+ "MassiveIntentClassification (ta)": 11.31,
+ "MassiveIntentClassification (ml)": 2.79,
+ "MassiveIntentClassification (fi)": 41.47,
+ "MassiveIntentClassification (zh-TW)": 20.56,
+ "MassiveIntentClassification (hy)": 11.83,
+ "MassiveIntentClassification (es)": 46.34,
+ "MassiveIntentClassification (ms)": 40.24,
+ "MassiveIntentClassification (nl)": 42.8,
+ "MassiveIntentClassification (my)": 3.73,
+ "MassiveIntentClassification (he)": 20.9,
+ "MassiveIntentClassification (mn)": 26.6,
+ "MassiveIntentClassification (lv)": 41.59,
+ "MassiveIntentClassification (hu)": 39.35,
+ "MassiveIntentClassification (en)": 67.43,
+ "MassiveIntentClassification (ar)": 22.84,
+ "MassiveIntentClassification (hi)": 19.05,
+ "MassiveIntentClassification (sw)": 37.56,
+ "MassiveIntentClassification (it)": 45.97,
+ "MassiveIntentClassification (te)": 2.32,
+ "MassiveIntentClassification (jv)": 37.57,
+ "MassiveIntentClassification (pt)": 48.51,
+ "MassiveIntentClassification (sv)": 41.41,
+ "MassiveIntentClassification (bn)": 19.72,
+ "MassiveIntentClassification (fa)": 29.84,
+ "MassiveIntentClassification (sq)": 42.6,
+ "MassiveIntentClassification (ko)": 19.32,
+ "MassiveIntentClassification (az)": 37.86,
+ "MassiveIntentClassification (is)": 35.64,
+ "MassiveIntentClassification (el)": 33.89,
+ "MassiveIntentClassification (de)": 46.68,
+ "MassiveIntentClassification (km)": 4.65,
+ "MassiveIntentClassification (af)": 39.96,
+ "MassiveIntentClassification (cy)": 36.9,
+ "MassiveIntentClassification (nb)": 42.31,
+ "MassiveIntentClassification (ur)": 21.12,
+ "MassiveIntentClassification (ro)": 44.32,
+ "MassiveIntentClassification (da)": 44.0,
+ "MassiveIntentClassification (kn)": 3.43,
+ "MassiveIntentClassification (ka)": 11.85,
+ "MassiveIntentClassification (pl)": 40.34,
+ "MassiveIntentClassification (fr)": 45.88,
+ "MassiveIntentClassification (ja)": 33.31,
+ "MassiveIntentClassification (zh-CN)": 22.87,
+ "MassiveIntentClassification (ru)": 36.82,
+ "MassiveIntentClassification (id)": 41.48,
+ "MassiveIntentClassification (vi)": 34.44,
+ "MassiveIntentClassification (sl)": 40.43,
+ "MassiveIntentClassification (th)": 11.62,
+ "MassiveIntentClassification (tr)": 42.46,
+ "MassiveIntentClassification (tl)": 41.48,
+ "MassiveIntentClassification (am)": 2.57,
+ "MassiveScenarioClassification (jv)": 45.5,
+ "MassiveScenarioClassification (fa)": 33.21,
+ "MassiveScenarioClassification (en)": 72.73,
+ "MassiveScenarioClassification (te)": 7.51,
+ "MassiveScenarioClassification (mn)": 31.27,
+ "MassiveScenarioClassification (hy)": 17.6,
+ "MassiveScenarioClassification (sv)": 51.62,
+ "MassiveScenarioClassification (el)": 42.78,
+ "MassiveScenarioClassification (fr)": 56.43,
+ "MassiveScenarioClassification (ta)": 18.76,
+ "MassiveScenarioClassification (ro)": 53.73,
+ "MassiveScenarioClassification (ko)": 24.11,
+ "MassiveScenarioClassification (it)": 55.31,
+ "MassiveScenarioClassification (bn)": 25.07,
+ "MassiveScenarioClassification (ka)": 17.99,
+ "MassiveScenarioClassification (pt)": 56.15,
+ "MassiveScenarioClassification (sl)": 46.26,
+ "MassiveScenarioClassification (km)": 9.97,
+ "MassiveScenarioClassification (sw)": 45.06,
+ "MassiveScenarioClassification (hu)": 48.21,
+ "MassiveScenarioClassification (pl)": 49.18,
+ "MassiveScenarioClassification (de)": 57.53,
+ "MassiveScenarioClassification (kn)": 8.25,
+ "MassiveScenarioClassification (lv)": 45.8,
+ "MassiveScenarioClassification (ml)": 7.41,
+ "MassiveScenarioClassification (ar)": 29.99,
+ "MassiveScenarioClassification (zh-TW)": 29.47,
+ "MassiveScenarioClassification (ru)": 42.08,
+ "MassiveScenarioClassification (hi)": 24.44,
+ "MassiveScenarioClassification (fi)": 47.17,
+ "MassiveScenarioClassification (nl)": 51.53,
+ "MassiveScenarioClassification (zh-CN)": 32.08,
+ "MassiveScenarioClassification (tr)": 48.92,
+ "MassiveScenarioClassification (vi)": 40.52,
+ "MassiveScenarioClassification (ur)": 29.73,
+ "MassiveScenarioClassification (he)": 24.27,
+ "MassiveScenarioClassification (cy)": 42.85,
+ "MassiveScenarioClassification (am)": 7.48,
+ "MassiveScenarioClassification (ms)": 50.7,
+ "MassiveScenarioClassification (az)": 46.28,
+ "MassiveScenarioClassification (id)": 49.71,
+ "MassiveScenarioClassification (is)": 45.13,
+ "MassiveScenarioClassification (da)": 52.67,
+ "MassiveScenarioClassification (th)": 20.51,
+ "MassiveScenarioClassification (nb)": 50.89,
+ "MassiveScenarioClassification (tl)": 49.67,
+ "MassiveScenarioClassification (es)": 55.92,
+ "MassiveScenarioClassification (sq)": 50.57,
+ "MassiveScenarioClassification (ja)": 41.65,
+ "MassiveScenarioClassification (my)": 10.56,
+ "MassiveScenarioClassification (af)": 49.81,
+ "NoRecClassification": 41.2,
+ "NordicLangClassification": 60.79,
+ "PAC": 68.06,
+ "PolEmo2.0-IN": 42.48,
+ "PolEmo2.0-OUT": 21.05,
+ "RuReviewsClassification": 47.13,
+ "RuSciBenchGRNTIClassification": 20.87,
+ "RuSciBenchOECDClassification": 16.49,
+ "ToxicConversationsClassification": 65.87
+ }
+ ]
},
"Clustering": {
"v_measure": [
{
"Model": "e5-base-v2",
+ "AlloProfClusteringP2P": 58.55,
+ "AlloProfClusteringS2S": 36.35,
"BiorxivClusteringP2P": 37.12,
"BiorxivClusteringS2S": 33.41,
+ "BlurbsClusteringP2P": 29.09,
+ "BlurbsClusteringS2S": 12.41,
+ "GeoreviewClusteringP2P": 23.27,
+ "HALClusteringS2S": 22.33,
+ "MLSUMClusteringP2P (de)": 39.19,
+ "MLSUMClusteringP2P (fr)": 42.38,
+ "MLSUMClusteringP2P (ru)": 24.28,
+ "MLSUMClusteringP2P (es)": 42.72,
+ "MLSUMClusteringS2S (de)": 38.13,
+ "MLSUMClusteringS2S (fr)": 42.2,
+ "MLSUMClusteringS2S (ru)": 21.66,
+ "MLSUMClusteringS2S (es)": 42.13,
+ "MasakhaNEWSClusteringP2P (amh)": 40.74,
+ "MasakhaNEWSClusteringP2P (eng)": 53.59,
+ "MasakhaNEWSClusteringP2P (fra)": 50.56,
+ "MasakhaNEWSClusteringP2P (hau)": 44.87,
+ "MasakhaNEWSClusteringP2P (ibo)": 38.96,
+ "MasakhaNEWSClusteringP2P (lin)": 62.06,
+ "MasakhaNEWSClusteringP2P (lug)": 53.37,
+ "MasakhaNEWSClusteringP2P (orm)": 30.93,
+ "MasakhaNEWSClusteringP2P (pcm)": 70.3,
+ "MasakhaNEWSClusteringP2P (run)": 51.59,
+ "MasakhaNEWSClusteringP2P (sna)": 50.4,
+ "MasakhaNEWSClusteringP2P (som)": 32.39,
+ "MasakhaNEWSClusteringP2P (swa)": 24.14,
+ "MasakhaNEWSClusteringP2P (tir)": 43.19,
+ "MasakhaNEWSClusteringP2P (xho)": 31.76,
+ "MasakhaNEWSClusteringP2P (yor)": 42.67,
+ "MasakhaNEWSClusteringS2S (amh)": 41.94,
+ "MasakhaNEWSClusteringS2S (eng)": 53.89,
+ "MasakhaNEWSClusteringS2S (fra)": 47.26,
+ "MasakhaNEWSClusteringS2S (hau)": 24.73,
+ "MasakhaNEWSClusteringS2S (ibo)": 37.06,
+ "MasakhaNEWSClusteringS2S (lin)": 64.92,
+ "MasakhaNEWSClusteringS2S (lug)": 46.85,
+ "MasakhaNEWSClusteringS2S (orm)": 29.17,
+ "MasakhaNEWSClusteringS2S (pcm)": 63.68,
+ "MasakhaNEWSClusteringS2S (run)": 54.79,
+ "MasakhaNEWSClusteringS2S (sna)": 45.56,
+ "MasakhaNEWSClusteringS2S (som)": 28.23,
+ "MasakhaNEWSClusteringS2S (swa)": 9.13,
+ "MasakhaNEWSClusteringS2S (tir)": 48.64,
+ "MasakhaNEWSClusteringS2S (xho)": 27.14,
+ "MasakhaNEWSClusteringS2S (yor)": 34.75,
"MedrxivClusteringP2P": 31.82,
"MedrxivClusteringS2S": 29.68,
"RedditClustering": 56.54,
"RedditClusteringP2P": 63.23,
+ "RuSciBenchGRNTIClusteringP2P": 18.28,
+ "RuSciBenchOECDClusteringP2P": 16.16,
"StackExchangeClustering": 64.6,
"StackExchangeClusteringP2P": 33.02,
+ "TenKGnadClusteringP2P": 42.2,
+ "TenKGnadClusteringS2S": 24.86,
"TwentyNewsgroupsClustering": 49.86
}
]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "e5-base-v2",
+ "CDSC-E": 56.82,
+ "FalseFriendsGermanEnglish": 47.34,
+ "OpusparcusPC (de)": 91.87,
+ "OpusparcusPC (en)": 98.58,
+ "OpusparcusPC (fi)": 86.94,
+ "OpusparcusPC (fr)": 88.17,
+ "OpusparcusPC (ru)": 79.74,
+ "OpusparcusPC (sv)": 84.7,
+ "PSC": 96.95,
+ "PawsXPairClassification (de)": 50.76,
+ "PawsXPairClassification (en)": 57.62,
+ "PawsXPairClassification (es)": 52.74,
+ "PawsXPairClassification (fr)": 53.73,
+ "PawsXPairClassification (ja)": 48.4,
+ "PawsXPairClassification (ko)": 50.0,
+ "PawsXPairClassification (zh)": 52.5,
+ "SICK-E-PL": 50.72,
+ "SprintDuplicateQuestions": 94.26,
+ "TERRa": 48.81,
+ "TwitterURLCorpus": 86.66
+ },
+ {
+ "Model": "e5-base-v2",
+ "CDSC-E": 56.82,
+ "FalseFriendsGermanEnglish": 47.36,
+ "OpusparcusPC (de)": 91.87,
+ "OpusparcusPC (en)": 98.59,
+ "OpusparcusPC (fi)": 86.94,
+ "OpusparcusPC (fr)": 88.17,
+ "OpusparcusPC (ru)": 79.74,
+ "OpusparcusPC (sv)": 84.72,
+ "PSC": 96.95,
+ "PawsXPairClassification (de)": 50.99,
+ "PawsXPairClassification (en)": 57.62,
+ "PawsXPairClassification (es)": 52.76,
+ "PawsXPairClassification (fr)": 53.76,
+ "PawsXPairClassification (ja)": 48.61,
+ "PawsXPairClassification (ko)": 50.03,
+ "PawsXPairClassification (zh)": 52.63,
+ "SICK-E-PL": 50.72,
+ "SprintDuplicateQuestions": 94.26,
+ "TERRa": 49.06,
+ "TwitterURLCorpus": 86.66
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "e5-base-v2",
+ "AlloprofReranking": 65.41,
+ "RuBQReranking": 45.35,
+ "SyntecReranking": 75.91,
+ "T2Reranking": 60.5
+ },
+ {
+ "Model": "e5-base-v2",
+ "MIRACLReranking (ar)": 8.62,
+ "MIRACLReranking (bn)": 8.23,
+ "MIRACLReranking (de)": 25.63,
+ "MIRACLReranking (en)": 57.65,
+ "MIRACLReranking (es)": 38.47,
+ "MIRACLReranking (fa)": 9.28,
+ "MIRACLReranking (fi)": 38.49,
+ "MIRACLReranking (fr)": 31.33,
+ "MIRACLReranking (hi)": 11.91,
+ "MIRACLReranking (id)": 24.01,
+ "MIRACLReranking (ja)": 14.12,
+ "MIRACLReranking (ko)": 9.68,
+ "MIRACLReranking (ru)": 16.0,
+ "MIRACLReranking (sw)": 33.36,
+ "MIRACLReranking (te)": 2.83,
+ "MIRACLReranking (th)": 3.61,
+ "MIRACLReranking (yo)": 54.71,
+ "MIRACLReranking (zh)": 12.9
+ }
+ ]
},
"Retrieval": {
- "ndcg_at_10": []
+ "ndcg_at_10": [
+ {
+ "Model": "e5-base-v2",
+ "AILACasedocs": 27.17,
+ "AILAStatutes": 19.61,
+ "ARCChallenge": 10.01,
+ "AlloprofRetrieval": 31.86,
+ "AlphaNLI": 21.73,
+ "AppsRetrieval": 11.52,
+ "ArguAna": 44.57,
+ "BSARDRetrieval": 11.7,
+ "CmedqaRetrieval": 3.49,
+ "CodeFeedbackMT": 41.56,
+ "CodeFeedbackST": 74.52,
+ "CodeSearchNetCCRetrieval (python)": 64.84,
+ "CodeSearchNetCCRetrieval (javascript)": 63.02,
+ "CodeSearchNetCCRetrieval (go)": 42.29,
+ "CodeSearchNetCCRetrieval (ruby)": 61.06,
+ "CodeSearchNetCCRetrieval (java)": 61.35,
+ "CodeSearchNetCCRetrieval (php)": 48.67,
+ "CodeSearchNetRetrieval (python)": 88.66,
+ "CodeSearchNetRetrieval (javascript)": 72.32,
+ "CodeSearchNetRetrieval (go)": 93.92,
+ "CodeSearchNetRetrieval (ruby)": 79.43,
+ "CodeSearchNetRetrieval (java)": 76.68,
+ "CodeSearchNetRetrieval (php)": 82.86,
+ "CodeTransOceanContest": 62.5,
+ "CodeTransOceanDL": 21.87,
+ "CosQA": 32.59,
+ "CovidRetrieval": 14.88,
+ "GerDaLIR": 1.66,
+ "GerDaLIRSmall": 4.09,
+ "GermanQuAD-Retrieval": 82.98,
+ "HellaSwag": 25.48,
+ "LEMBNarrativeQARetrieval": 25.32,
+ "LEMBQMSumRetrieval": 23.86,
+ "LEMBSummScreenFDRetrieval": 74.66,
+ "LEMBWikimQARetrieval": 55.97,
+ "LeCaRDv2": 20.53,
+ "LegalBenchConsumerContractsQA": 71.92,
+ "LegalBenchCorporateLobbying": 91.92,
+ "LegalQuAD": 22.32,
+ "LegalSummarization": 58.72,
+ "MIRACLRetrieval (ar)": 0.34,
+ "MIRACLRetrieval (bn)": 0.2,
+ "MIRACLRetrieval (de)": 14.8,
+ "MIRACLRetrieval (en)": 49.53,
+ "MIRACLRetrieval (es)": 25.84,
+ "MIRACLRetrieval (fa)": 0.26,
+ "MIRACLRetrieval (fi)": 23.1,
+ "MIRACLRetrieval (fr)": 19.91,
+ "MIRACLRetrieval (hi)": 1.02,
+ "MIRACLRetrieval (id)": 14.33,
+ "MIRACLRetrieval (ja)": 2.7,
+ "MIRACLRetrieval (ko)": 2.98,
+ "MIRACLRetrieval (ru)": 3.39,
+ "MIRACLRetrieval (sw)": 24.52,
+ "MIRACLRetrieval (te)": 0.08,
+ "MIRACLRetrieval (th)": 0.28,
+ "MIRACLRetrieval (yo)": 48.59,
+ "MIRACLRetrieval (zh)": 0.61,
+ "MintakaRetrieval (ar)": 6.1,
+ "MintakaRetrieval (de)": 23.04,
+ "MintakaRetrieval (es)": 21.95,
+ "MintakaRetrieval (fr)": 23.51,
+ "MintakaRetrieval (hi)": 7.02,
+ "MintakaRetrieval (it)": 20.37,
+ "MintakaRetrieval (ja)": 10.84,
+ "MintakaRetrieval (pt)": 23.03,
+ "PIQA": 27.71,
+ "Quail": 4.94,
+ "RARbCode": 54.47,
+ "RARbMath": 67.76,
+ "RiaNewsRetrieval": 14.82,
+ "RuBQRetrieval": 16.23,
+ "SCIDOCS": 18.68,
+ "SIQA": 4.37,
+ "SciFact-PL": 42.16,
+ "SpartQA": 7.84,
+ "StackOverflowQA": 87.85,
+ "SyntecRetrieval": 67.34,
+ "SyntheticText2SQL": 51.88,
+ "TRECCOVID": 69.63,
+ "TRECCOVID-PL": 20.13,
+ "TempReasonL1": 1.74,
+ "TempReasonL2Fact": 37.62,
+ "TempReasonL2Pure": 2.92,
+ "TempReasonL3Fact": 32.63,
+ "TempReasonL3Pure": 10.25,
+ "WinoGrande": 46.99,
+ "XMarket (de)": 14.27,
+ "XMarket (en)": 29.85,
+ "XMarket (es)": 17.87,
+ "XPQARetrieval (ara-ara)": 13.74,
+ "XPQARetrieval (eng-ara)": 4.02,
+ "XPQARetrieval (ara-eng)": 9.62,
+ "XPQARetrieval (deu-deu)": 56.5,
+ "XPQARetrieval (eng-deu)": 15.21,
+ "XPQARetrieval (deu-eng)": 29.85,
+ "XPQARetrieval (spa-spa)": 46.27,
+ "XPQARetrieval (eng-spa)": 13.06,
+ "XPQARetrieval (spa-eng)": 24.73,
+ "XPQARetrieval (fra-fra)": 53.68,
+ "XPQARetrieval (eng-fra)": 18.07,
+ "XPQARetrieval (fra-eng)": 30.23,
+ "XPQARetrieval (hin-hin)": 34.7,
+ "XPQARetrieval (eng-hin)": 8.68,
+ "XPQARetrieval (hin-eng)": 8.87,
+ "XPQARetrieval (ita-ita)": 57.76,
+ "XPQARetrieval (eng-ita)": 12.66,
+ "XPQARetrieval (ita-eng)": 26.19,
+ "XPQARetrieval (jpn-jpn)": 44.02,
+ "XPQARetrieval (eng-jpn)": 6.34,
+ "XPQARetrieval (jpn-eng)": 18.04,
+ "XPQARetrieval (kor-kor)": 16.11,
+ "XPQARetrieval (eng-kor)": 8.23,
+ "XPQARetrieval (kor-eng)": 7.56,
+ "XPQARetrieval (pol-pol)": 34.92,
+ "XPQARetrieval (eng-pol)": 13.06,
+ "XPQARetrieval (pol-eng)": 18.95,
+ "XPQARetrieval (por-por)": 37.84,
+ "XPQARetrieval (eng-por)": 10.58,
+ "XPQARetrieval (por-eng)": 23.87,
+ "XPQARetrieval (tam-tam)": 12.57,
+ "XPQARetrieval (eng-tam)": 4.59,
+ "XPQARetrieval (tam-eng)": 4.58,
+ "XPQARetrieval (cmn-cmn)": 25.63,
+ "XPQARetrieval (eng-cmn)": 7.63,
+ "XPQARetrieval (cmn-eng)": 11.54
+ }
+ ]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-base-v2",
+ "CDSC-R": 85.88,
+ "GermanSTSBenchmark": 68.61,
+ "RUParaPhraserSTS": 54.96,
+ "RuSTSBenchmarkSTS": 64.35,
+ "SICK-R": 78.4,
+ "SICK-R-PL": 58.0,
+ "SICKFr": 68.97,
+ "STS12": 73.49,
+ "STS13": 83.0,
+ "STS14": 80.45,
+ "STS15": 88.18,
+ "STS17 (en-en)": 88.89,
+ "STS17 (nl-en)": 42.58,
+ "STS17 (en-tr)": -7.22,
+ "STS17 (es-en)": 44.73,
+ "STS17 (en-de)": 41.4,
+ "STS17 (fr-en)": 51.76,
+ "STS17 (es-es)": 80.35,
+ "STS17 (it-en)": 27.04,
+ "STS17 (ar-ar)": 54.21,
+ "STS17 (en-ar)": -6.22,
+ "STS17 (ko-ko)": 44.35,
+ "STS22 (fr-pl)": 50.71,
+ "STS22 (en)": 66.51,
+ "STS22 (zh)": 49.26,
+ "STS22 (ar)": 28.74,
+ "STS22 (fr)": 76.93,
+ "STS22 (de-fr)": 58.23,
+ "STS22 (pl)": 31.46,
+ "STS22 (es-en)": 65.16,
+ "STS22 (tr)": 52.41,
+ "STS22 (de-en)": 50.28,
+ "STS22 (ru)": 21.62,
+ "STS22 (de-pl)": 26.82,
+ "STS22 (it)": 68.41,
+ "STS22 (zh-en)": 37.18,
+ "STS22 (pl-en)": 61.96,
+ "STS22 (es-it)": 64.1,
+ "STS22 (de)": 29.17,
+ "STS22 (es)": 61.48,
+ "STSB": 34.59,
+ "STSBenchmark": 85.48,
+ "STSBenchmarkMultilingualSTS (en)": 85.48,
+ "STSBenchmarkMultilingualSTS (fr)": 71.25,
+ "STSBenchmarkMultilingualSTS (zh)": 35.53,
+ "STSBenchmarkMultilingualSTS (pl)": 63.06,
+ "STSBenchmarkMultilingualSTS (es)": 72.75,
+ "STSBenchmarkMultilingualSTS (nl)": 67.07,
+ "STSBenchmarkMultilingualSTS (pt)": 69.5,
+ "STSBenchmarkMultilingualSTS (ru)": 64.43,
+ "STSBenchmarkMultilingualSTS (it)": 69.5,
+ "STSBenchmarkMultilingualSTS (de)": 68.9
+ },
+ {
+ "Model": "e5-base-v2",
+ "CDSC-R": 85.88,
+ "GermanSTSBenchmark": 68.61,
+ "RUParaPhraserSTS": 54.96,
+ "RuSTSBenchmarkSTS": 64.35,
+ "SICK-R": 78.4,
+ "SICK-R-PL": 58.0,
+ "SICKFr": 68.97,
+ "STS12": 73.49,
+ "STS13": 83.0,
+ "STS14": 80.45,
+ "STS15": 88.18,
+ "STS17 (en-en)": 88.89,
+ "STS17 (nl-en)": 42.58,
+ "STS17 (en-tr)": -7.22,
+ "STS17 (es-en)": 44.73,
+ "STS17 (en-de)": 41.4,
+ "STS17 (fr-en)": 51.76,
+ "STS17 (es-es)": 80.35,
+ "STS17 (it-en)": 27.04,
+ "STS17 (ar-ar)": 54.21,
+ "STS17 (en-ar)": -6.22,
+ "STS17 (ko-ko)": 44.35,
+ "STS22 (fr-pl)": 50.71,
+ "STS22 (en)": 66.51,
+ "STS22 (zh)": 49.26,
+ "STS22 (ar)": 28.72,
+ "STS22 (fr)": 76.93,
+ "STS22 (de-fr)": 58.23,
+ "STS22 (pl)": 31.37,
+ "STS22 (es-en)": 65.16,
+ "STS22 (tr)": 52.41,
+ "STS22 (de-en)": 50.28,
+ "STS22 (ru)": 21.62,
+ "STS22 (de-pl)": 26.82,
+ "STS22 (it)": 68.41,
+ "STS22 (zh-en)": 37.18,
+ "STS22 (pl-en)": 61.96,
+ "STS22 (es-it)": 64.1,
+ "STS22 (de)": 29.16,
+ "STS22 (es)": 61.48,
+ "STSB": 34.59,
+ "STSBenchmark": 85.48,
+ "STSBenchmarkMultilingualSTS (en)": 85.48,
+ "STSBenchmarkMultilingualSTS (fr)": 71.25,
+ "STSBenchmarkMultilingualSTS (zh)": 35.52,
+ "STSBenchmarkMultilingualSTS (pl)": 63.06,
+ "STSBenchmarkMultilingualSTS (es)": 72.75,
+ "STSBenchmarkMultilingualSTS (nl)": 67.07,
+ "STSBenchmarkMultilingualSTS (pt)": 69.5,
+ "STSBenchmarkMultilingualSTS (ru)": 64.43,
+ "STSBenchmarkMultilingualSTS (it)": 69.5,
+ "STSBenchmarkMultilingualSTS (de)": 68.9
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-base-v2",
+ "SummEvalFr": 31.87
+ },
+ {
+ "Model": "e5-base-v2",
+ "SummEvalFr": 31.87
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "e5-base-v2",
+ "CEDRClassification": 34.11,
+ "SensitiveTopicsClassification": 18.14
+ }
+ ]
},
"InstructionRetrieval": {
"p-MRR": [
{
"Model": "e5-base-v2",
- "Core17InstructionRetrieval": -2.9,
- "News21InstructionRetrieval": -2.0,
- "Robust04InstructionRetrieval": -6.73
+ "Core17InstructionRetrieval": -2.92,
+ "News21InstructionRetrieval": -2.08,
+ "Robust04InstructionRetrieval": -6.81
}
]
}
@@ -8221,7 +22017,119 @@
"f1": [
{
"Model": "e5-large",
- "BornholmBitextMining": 40.15
+ "BornholmBitextMining": 40.15,
+ "Tatoeba (pam-eng)": 6.43,
+ "Tatoeba (kab-eng)": 1.32,
+ "Tatoeba (tam-eng)": 0.42,
+ "Tatoeba (cmn-eng)": 2.83,
+ "Tatoeba (xho-eng)": 2.65,
+ "Tatoeba (ita-eng)": 29.93,
+ "Tatoeba (vie-eng)": 7.01,
+ "Tatoeba (fry-eng)": 19.73,
+ "Tatoeba (wuu-eng)": 1.46,
+ "Tatoeba (hin-eng)": 0.0,
+ "Tatoeba (bel-eng)": 1.67,
+ "Tatoeba (est-eng)": 3.57,
+ "Tatoeba (gla-eng)": 3.37,
+ "Tatoeba (kzj-eng)": 5.68,
+ "Tatoeba (srp-eng)": 4.41,
+ "Tatoeba (uzb-eng)": 3.84,
+ "Tatoeba (nds-eng)": 20.18,
+ "Tatoeba (ber-eng)": 5.52,
+ "Tatoeba (fra-eng)": 44.31,
+ "Tatoeba (slk-eng)": 8.08,
+ "Tatoeba (lvs-eng)": 5.02,
+ "Tatoeba (amh-eng)": 0.08,
+ "Tatoeba (kaz-eng)": 0.95,
+ "Tatoeba (ina-eng)": 45.6,
+ "Tatoeba (dan-eng)": 18.87,
+ "Tatoeba (ell-eng)": 0.6,
+ "Tatoeba (lat-eng)": 13.19,
+ "Tatoeba (kur-eng)": 10.07,
+ "Tatoeba (deu-eng)": 39.83,
+ "Tatoeba (zsm-eng)": 10.44,
+ "Tatoeba (ang-eng)": 15.06,
+ "Tatoeba (swh-eng)": 6.72,
+ "Tatoeba (heb-eng)": 0.47,
+ "Tatoeba (nob-eng)": 19.22,
+ "Tatoeba (hrv-eng)": 11.06,
+ "Tatoeba (aze-eng)": 5.52,
+ "Tatoeba (csb-eng)": 11.53,
+ "Tatoeba (tha-eng)": 1.4,
+ "Tatoeba (nno-eng)": 13.17,
+ "Tatoeba (rus-eng)": 0.33,
+ "Tatoeba (fin-eng)": 6.29,
+ "Tatoeba (slv-eng)": 9.23,
+ "Tatoeba (uig-eng)": 0.6,
+ "Tatoeba (gle-eng)": 3.77,
+ "Tatoeba (ces-eng)": 6.27,
+ "Tatoeba (isl-eng)": 6.28,
+ "Tatoeba (mar-eng)": 0.14,
+ "Tatoeba (swe-eng)": 15.61,
+ "Tatoeba (cor-eng)": 2.95,
+ "Tatoeba (ceb-eng)": 7.57,
+ "Tatoeba (arq-eng)": 0.61,
+ "Tatoeba (bre-eng)": 4.9,
+ "Tatoeba (tat-eng)": 1.18,
+ "Tatoeba (hun-eng)": 6.26,
+ "Tatoeba (jpn-eng)": 0.74,
+ "Tatoeba (bos-eng)": 13.26,
+ "Tatoeba (yid-eng)": 0.31,
+ "Tatoeba (kat-eng)": 0.59,
+ "Tatoeba (pms-eng)": 16.27,
+ "Tatoeba (dsb-eng)": 7.25,
+ "Tatoeba (dtp-eng)": 3.64,
+ "Tatoeba (mkd-eng)": 0.22,
+ "Tatoeba (hye-eng)": 0.4,
+ "Tatoeba (tzl-eng)": 22.2,
+ "Tatoeba (lit-eng)": 3.4,
+ "Tatoeba (ben-eng)": 0.0,
+ "Tatoeba (ile-eng)": 34.31,
+ "Tatoeba (lfn-eng)": 24.09,
+ "Tatoeba (max-eng)": 12.62,
+ "Tatoeba (tel-eng)": 0.43,
+ "Tatoeba (swg-eng)": 16.76,
+ "Tatoeba (oci-eng)": 16.23,
+ "Tatoeba (fao-eng)": 10.97,
+ "Tatoeba (ara-eng)": 0.31,
+ "Tatoeba (ind-eng)": 9.53,
+ "Tatoeba (cbk-eng)": 24.06,
+ "Tatoeba (tuk-eng)": 4.12,
+ "Tatoeba (ido-eng)": 26.18,
+ "Tatoeba (spa-eng)": 40.6,
+ "Tatoeba (afr-eng)": 11.53,
+ "Tatoeba (mhr-eng)": 0.01,
+ "Tatoeba (jav-eng)": 7.79,
+ "Tatoeba (cym-eng)": 8.04,
+ "Tatoeba (mon-eng)": 2.03,
+ "Tatoeba (bul-eng)": 0.5,
+ "Tatoeba (yue-eng)": 1.76,
+ "Tatoeba (ukr-eng)": 0.83,
+ "Tatoeba (eus-eng)": 8.28,
+ "Tatoeba (urd-eng)": 0.11,
+ "Tatoeba (tgl-eng)": 6.7,
+ "Tatoeba (ast-eng)": 25.64,
+ "Tatoeba (mal-eng)": 0.3,
+ "Tatoeba (tur-eng)": 6.06,
+ "Tatoeba (sqi-eng)": 7.94,
+ "Tatoeba (khm-eng)": 0.42,
+ "Tatoeba (gsw-eng)": 17.47,
+ "Tatoeba (por-eng)": 38.89,
+ "Tatoeba (kor-eng)": 1.44,
+ "Tatoeba (arz-eng)": 0.21,
+ "Tatoeba (epo-eng)": 17.29,
+ "Tatoeba (nld-eng)": 22.82,
+ "Tatoeba (orv-eng)": 0.1,
+ "Tatoeba (hsb-eng)": 6.93,
+ "Tatoeba (nov-eng)": 34.03,
+ "Tatoeba (glg-eng)": 29.58,
+ "Tatoeba (cha-eng)": 20.74,
+ "Tatoeba (ron-eng)": 14.73,
+ "Tatoeba (cat-eng)": 25.7,
+ "Tatoeba (pol-eng)": 8.06,
+ "Tatoeba (awa-eng)": 0.76,
+ "Tatoeba (war-eng)": 7.61,
+ "Tatoeba (pes-eng)": 0.8
}
]
},
@@ -8229,89 +22137,1248 @@
"accuracy": [
{
"Model": "e5-large",
+ "AllegroReviews": 25.75,
+ "AmazonCounterfactualClassification (en-ext)": 76.67,
+ "AmazonCounterfactualClassification (en)": 75.07,
+ "AmazonCounterfactualClassification (de)": 53.79,
+ "AmazonCounterfactualClassification (ja)": 57.11,
+ "AmazonReviewsClassification (en)": 41.52,
+ "AmazonReviewsClassification (de)": 29.28,
+ "AmazonReviewsClassification (es)": 34.19,
+ "AmazonReviewsClassification (fr)": 32.4,
+ "AmazonReviewsClassification (ja)": 22.62,
+ "AmazonReviewsClassification (zh)": 23.0,
"AngryTweetsClassification": 46.14,
+ "CBD": 47.88,
"DKHateClassification": 58.72,
"DanishPoliticalCommentsClassification": 28.67,
+ "GeoreviewClassification": 28.65,
+ "HeadlineClassification": 29.77,
+ "InappropriatenessClassification": 52.5,
+ "KinopoiskClassification": 35.86,
"LccSentimentClassification": 42.13,
- "MassiveIntentClassification (da)": 42.29,
- "MassiveIntentClassification (nb)": 40.63,
+ "MTOPDomainClassification (en)": 92.07,
+ "MTOPDomainClassification (de)": 76.63,
+ "MTOPDomainClassification (es)": 80.4,
+ "MTOPDomainClassification (fr)": 78.82,
+ "MTOPDomainClassification (hi)": 39.36,
+ "MTOPDomainClassification (th)": 15.72,
+ "MTOPIntentClassification (en)": 61.22,
+ "MTOPIntentClassification (de)": 43.21,
+ "MTOPIntentClassification (es)": 42.85,
+ "MTOPIntentClassification (fr)": 37.12,
+ "MTOPIntentClassification (hi)": 16.58,
+ "MTOPIntentClassification (th)": 4.55,
+ "MasakhaNEWSClassification (amh)": 34.97,
+ "MasakhaNEWSClassification (eng)": 79.34,
+ "MasakhaNEWSClassification (fra)": 77.49,
+ "MasakhaNEWSClassification (hau)": 67.19,
+ "MasakhaNEWSClassification (ibo)": 59.51,
+ "MasakhaNEWSClassification (lin)": 73.66,
+ "MasakhaNEWSClassification (lug)": 60.45,
+ "MasakhaNEWSClassification (orm)": 64.31,
+ "MasakhaNEWSClassification (pcm)": 90.85,
+ "MasakhaNEWSClassification (run)": 66.15,
+ "MasakhaNEWSClassification (sna)": 74.77,
+ "MasakhaNEWSClassification (som)": 51.09,
+ "MasakhaNEWSClassification (swa)": 57.31,
+ "MasakhaNEWSClassification (tir)": 24.52,
+ "MasakhaNEWSClassification (xho)": 64.65,
+ "MasakhaNEWSClassification (yor)": 70.56,
+ "MassiveIntentClassification (bn)": 19.59,
+ "MassiveIntentClassification (lv)": 41.56,
+ "MassiveIntentClassification (ko)": 19.9,
+ "MassiveIntentClassification (fr)": 47.2,
+ "MassiveIntentClassification (zh-CN)": 19.53,
+ "MassiveIntentClassification (af)": 38.87,
"MassiveIntentClassification (sv)": 40.69,
- "MassiveScenarioClassification (da)": 52.95,
+ "MassiveIntentClassification (ml)": 2.4,
+ "MassiveIntentClassification (tr)": 40.61,
+ "MassiveIntentClassification (ro)": 42.2,
+ "MassiveIntentClassification (ar)": 19.0,
+ "MassiveIntentClassification (az)": 39.86,
+ "MassiveIntentClassification (ka)": 10.95,
+ "MassiveIntentClassification (km)": 4.71,
+ "MassiveIntentClassification (ur)": 13.5,
+ "MassiveIntentClassification (kn)": 2.8,
+ "MassiveIntentClassification (mn)": 18.67,
+ "MassiveIntentClassification (es)": 45.1,
+ "MassiveIntentClassification (hu)": 38.79,
+ "MassiveIntentClassification (is)": 35.21,
+ "MassiveIntentClassification (am)": 2.55,
+ "MassiveIntentClassification (jv)": 38.55,
+ "MassiveIntentClassification (th)": 11.37,
+ "MassiveIntentClassification (pl)": 39.06,
+ "MassiveIntentClassification (hi)": 17.3,
+ "MassiveIntentClassification (nl)": 40.28,
+ "MassiveIntentClassification (he)": 19.98,
+ "MassiveIntentClassification (sl)": 40.13,
+ "MassiveIntentClassification (ta)": 10.87,
+ "MassiveIntentClassification (cy)": 36.04,
+ "MassiveIntentClassification (it)": 45.25,
+ "MassiveIntentClassification (te)": 2.58,
+ "MassiveIntentClassification (vi)": 37.57,
+ "MassiveIntentClassification (nb)": 40.63,
+ "MassiveIntentClassification (sq)": 42.14,
+ "MassiveIntentClassification (my)": 3.72,
+ "MassiveIntentClassification (de)": 44.57,
+ "MassiveIntentClassification (ms)": 38.31,
+ "MassiveIntentClassification (pt)": 46.8,
+ "MassiveIntentClassification (en)": 67.77,
+ "MassiveIntentClassification (fa)": 25.28,
+ "MassiveIntentClassification (sw)": 39.86,
+ "MassiveIntentClassification (ja)": 25.24,
+ "MassiveIntentClassification (id)": 41.65,
+ "MassiveIntentClassification (zh-TW)": 19.52,
+ "MassiveIntentClassification (ru)": 22.51,
+ "MassiveIntentClassification (tl)": 40.76,
+ "MassiveIntentClassification (el)": 25.9,
+ "MassiveIntentClassification (fi)": 40.87,
+ "MassiveIntentClassification (hy)": 9.33,
+ "MassiveIntentClassification (da)": 42.29,
+ "MassiveScenarioClassification (km)": 8.73,
+ "MassiveScenarioClassification (hu)": 46.67,
+ "MassiveScenarioClassification (lv)": 47.37,
+ "MassiveScenarioClassification (ml)": 6.8,
+ "MassiveScenarioClassification (vi)": 42.45,
+ "MassiveScenarioClassification (fr)": 57.57,
+ "MassiveScenarioClassification (tl)": 52.61,
+ "MassiveScenarioClassification (my)": 10.65,
"MassiveScenarioClassification (nb)": 51.91,
+ "MassiveScenarioClassification (fa)": 30.19,
+ "MassiveScenarioClassification (fi)": 48.25,
+ "MassiveScenarioClassification (ro)": 53.29,
+ "MassiveScenarioClassification (cy)": 44.51,
+ "MassiveScenarioClassification (is)": 45.03,
+ "MassiveScenarioClassification (ms)": 49.02,
+ "MassiveScenarioClassification (am)": 7.76,
+ "MassiveScenarioClassification (ru)": 26.94,
+ "MassiveScenarioClassification (az)": 47.25,
+ "MassiveScenarioClassification (hi)": 22.69,
+ "MassiveScenarioClassification (da)": 52.95,
+ "MassiveScenarioClassification (ja)": 30.28,
+ "MassiveScenarioClassification (en)": 71.99,
"MassiveScenarioClassification (sv)": 50.97,
+ "MassiveScenarioClassification (ur)": 20.58,
+ "MassiveScenarioClassification (el)": 35.32,
+ "MassiveScenarioClassification (es)": 55.79,
+ "MassiveScenarioClassification (pt)": 57.43,
+ "MassiveScenarioClassification (th)": 20.16,
+ "MassiveScenarioClassification (zh-TW)": 27.76,
+ "MassiveScenarioClassification (ar)": 27.2,
+ "MassiveScenarioClassification (ka)": 17.42,
+ "MassiveScenarioClassification (sw)": 46.97,
+ "MassiveScenarioClassification (ta)": 17.1,
+ "MassiveScenarioClassification (id)": 48.89,
+ "MassiveScenarioClassification (sl)": 47.11,
+ "MassiveScenarioClassification (kn)": 7.98,
+ "MassiveScenarioClassification (ko)": 27.82,
+ "MassiveScenarioClassification (af)": 49.65,
+ "MassiveScenarioClassification (hy)": 16.0,
+ "MassiveScenarioClassification (he)": 23.14,
+ "MassiveScenarioClassification (te)": 7.01,
+ "MassiveScenarioClassification (bn)": 26.41,
+ "MassiveScenarioClassification (sq)": 51.61,
+ "MassiveScenarioClassification (it)": 55.68,
+ "MassiveScenarioClassification (tr)": 48.86,
+ "MassiveScenarioClassification (zh-CN)": 28.01,
+ "MassiveScenarioClassification (de)": 59.26,
+ "MassiveScenarioClassification (nl)": 52.19,
+ "MassiveScenarioClassification (mn)": 26.77,
+ "MassiveScenarioClassification (pl)": 48.0,
+ "MassiveScenarioClassification (jv)": 47.57,
"NoRecClassification": 41.83,
"NordicLangClassification": 58.3,
"NorwegianParliament": 57.26,
+ "PAC": 67.56,
+ "PolEmo2.0-IN": 43.98,
+ "PolEmo2.0-OUT": 23.54,
+ "RuReviewsClassification": 43.2,
+ "RuSciBenchGRNTIClassification": 14.83,
+ "RuSciBenchOECDClassification": 11.64,
"ScalaDaClassification": 49.9,
"ScalaNbClassification": 50.13
}
]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "e5-large",
+ "AlloProfClusteringP2P": 58.4,
+ "AlloProfClusteringS2S": 37.22,
+ "BlurbsClusteringP2P": 31.08,
+ "BlurbsClusteringS2S": 11.45,
+ "GeoreviewClusteringP2P": 18.26,
+ "HALClusteringS2S": 23.44,
+ "MLSUMClusteringP2P (de)": 36.9,
+ "MLSUMClusteringP2P (fr)": 42.92,
+ "MLSUMClusteringP2P (ru)": 21.22,
+ "MLSUMClusteringP2P (es)": 42.73,
+ "MLSUMClusteringS2S (de)": 39.73,
+ "MLSUMClusteringS2S (fr)": 42.99,
+ "MLSUMClusteringS2S (ru)": 21.36,
+ "MLSUMClusteringS2S (es)": 43.02,
+ "MasakhaNEWSClusteringP2P (amh)": 41.03,
+ "MasakhaNEWSClusteringP2P (eng)": 56.6,
+ "MasakhaNEWSClusteringP2P (fra)": 59.73,
+ "MasakhaNEWSClusteringP2P (hau)": 32.38,
+ "MasakhaNEWSClusteringP2P (ibo)": 35.88,
+ "MasakhaNEWSClusteringP2P (lin)": 46.97,
+ "MasakhaNEWSClusteringP2P (lug)": 61.55,
+ "MasakhaNEWSClusteringP2P (orm)": 33.01,
+ "MasakhaNEWSClusteringP2P (pcm)": 74.24,
+ "MasakhaNEWSClusteringP2P (run)": 55.65,
+ "MasakhaNEWSClusteringP2P (sna)": 48.59,
+ "MasakhaNEWSClusteringP2P (som)": 38.98,
+ "MasakhaNEWSClusteringP2P (swa)": 25.93,
+ "MasakhaNEWSClusteringP2P (tir)": 43.52,
+ "MasakhaNEWSClusteringP2P (xho)": 29.18,
+ "MasakhaNEWSClusteringP2P (yor)": 30.58,
+ "MasakhaNEWSClusteringS2S (amh)": 45.44,
+ "MasakhaNEWSClusteringS2S (eng)": 51.0,
+ "MasakhaNEWSClusteringS2S (fra)": 33.53,
+ "MasakhaNEWSClusteringS2S (hau)": 16.31,
+ "MasakhaNEWSClusteringS2S (ibo)": 42.6,
+ "MasakhaNEWSClusteringS2S (lin)": 41.71,
+ "MasakhaNEWSClusteringS2S (lug)": 45.92,
+ "MasakhaNEWSClusteringS2S (orm)": 24.84,
+ "MasakhaNEWSClusteringS2S (pcm)": 65.43,
+ "MasakhaNEWSClusteringS2S (run)": 45.69,
+ "MasakhaNEWSClusteringS2S (sna)": 44.39,
+ "MasakhaNEWSClusteringS2S (som)": 29.42,
+ "MasakhaNEWSClusteringS2S (swa)": 17.13,
+ "MasakhaNEWSClusteringS2S (tir)": 44.01,
+ "MasakhaNEWSClusteringS2S (xho)": 31.38,
+ "MasakhaNEWSClusteringS2S (yor)": 40.1,
+ "RuSciBenchGRNTIClusteringP2P": 14.85,
+ "RuSciBenchOECDClusteringP2P": 13.66,
+ "TenKGnadClusteringP2P": 43.22,
+ "TenKGnadClusteringS2S": 21.2
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "e5-large",
+ "CDSC-E": 69.58,
+ "FalseFriendsGermanEnglish": 47.55,
+ "OpusparcusPC (de)": 91.79,
+ "OpusparcusPC (en)": 98.74,
+ "OpusparcusPC (fi)": 86.87,
+ "OpusparcusPC (fr)": 88.26,
+ "OpusparcusPC (ru)": 81.03,
+ "OpusparcusPC (sv)": 84.1,
+ "PSC": 96.15,
+ "PawsXPairClassification (de)": 51.97,
+ "PawsXPairClassification (en)": 64.65,
+ "PawsXPairClassification (es)": 53.65,
+ "PawsXPairClassification (fr)": 55.63,
+ "PawsXPairClassification (ja)": 48.78,
+ "PawsXPairClassification (ko)": 51.23,
+ "PawsXPairClassification (zh)": 52.84,
+ "SICK-E-PL": 56.09,
+ "TERRa": 47.37
+ },
+ {
+ "Model": "e5-large",
+ "CDSC-E": 69.58,
+ "FalseFriendsGermanEnglish": 47.55,
+ "OpusparcusPC (de)": 91.79,
+ "OpusparcusPC (en)": 98.74,
+ "OpusparcusPC (fi)": 86.89,
+ "OpusparcusPC (fr)": 88.26,
+ "OpusparcusPC (ru)": 81.05,
+ "OpusparcusPC (sv)": 84.1,
+ "PSC": 96.15,
+ "PawsXPairClassification (de)": 52.46,
+ "PawsXPairClassification (en)": 64.66,
+ "PawsXPairClassification (es)": 53.65,
+ "PawsXPairClassification (fr)": 55.72,
+ "PawsXPairClassification (ja)": 48.92,
+ "PawsXPairClassification (ko)": 51.36,
+ "PawsXPairClassification (zh)": 52.93,
+ "SICK-E-PL": 56.09,
+ "TERRa": 47.37
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "e5-large",
+ "AlloprofReranking": 61.93,
+ "RuBQReranking": 30.83,
+ "SyntecReranking": 75.95,
+ "T2Reranking": 59.72
+ },
+ {
+ "Model": "e5-large",
+ "MIRACLReranking (ar)": 6.68,
+ "MIRACLReranking (bn)": 9.74,
+ "MIRACLReranking (de)": 27.94,
+ "MIRACLReranking (en)": 55.79,
+ "MIRACLReranking (es)": 44.12,
+ "MIRACLReranking (fa)": 8.37,
+ "MIRACLReranking (fi)": 42.68,
+ "MIRACLReranking (fr)": 37.76,
+ "MIRACLReranking (hi)": 8.17,
+ "MIRACLReranking (id)": 28.76,
+ "MIRACLReranking (ja)": 10.84,
+ "MIRACLReranking (ko)": 13.09,
+ "MIRACLReranking (ru)": 10.52,
+ "MIRACLReranking (sw)": 34.27,
+ "MIRACLReranking (te)": 1.19,
+ "MIRACLReranking (th)": 3.31,
+ "MIRACLReranking (yo)": 58.1,
+ "MIRACLReranking (zh)": 10.4
+ }
+ ]
},
"Retrieval": {
- "ndcg_at_10": []
+ "ndcg_at_10": [
+ {
+ "Model": "e5-large",
+ "AILACasedocs": 26.91,
+ "AILAStatutes": 22.9,
+ "ARCChallenge": 8.01,
+ "AlloprofRetrieval": 33.73,
+ "AlphaNLI": 13.3,
+ "AppsRetrieval": 9.18,
+ "BSARDRetrieval": 11.87,
+ "CmedqaRetrieval": 2.58,
+ "CodeFeedbackMT": 45.83,
+ "CodeFeedbackST": 73.29,
+ "CodeSearchNetCCRetrieval (python)": 63.21,
+ "CodeSearchNetCCRetrieval (javascript)": 60.51,
+ "CodeSearchNetCCRetrieval (go)": 42.42,
+ "CodeSearchNetCCRetrieval (ruby)": 57.91,
+ "CodeSearchNetCCRetrieval (java)": 55.47,
+ "CodeSearchNetCCRetrieval (php)": 44.14,
+ "CodeSearchNetRetrieval (python)": 83.25,
+ "CodeSearchNetRetrieval (javascript)": 64.28,
+ "CodeSearchNetRetrieval (go)": 80.19,
+ "CodeSearchNetRetrieval (ruby)": 74.52,
+ "CodeSearchNetRetrieval (java)": 75.38,
+ "CodeSearchNetRetrieval (php)": 72.31,
+ "CodeTransOceanContest": 63.15,
+ "CodeTransOceanDL": 31.77,
+ "CosQA": 32.86,
+ "CovidRetrieval": 4.01,
+ "GerDaLIR": 3.65,
+ "GerDaLIRSmall": 8.26,
+ "GermanQuAD-Retrieval": 85.05,
+ "HellaSwag": 26.48,
+ "LEMBNarrativeQARetrieval": 23.45,
+ "LEMBQMSumRetrieval": 22.61,
+ "LEMBSummScreenFDRetrieval": 73.65,
+ "LEMBWikimQARetrieval": 47.74,
+ "LeCaRDv2": 19.56,
+ "LegalBenchConsumerContractsQA": 75.67,
+ "LegalBenchCorporateLobbying": 91.61,
+ "LegalQuAD": 27.15,
+ "LegalSummarization": 57.9,
+ "MIRACLRetrieval (ar)": 0.15,
+ "MIRACLRetrieval (bn)": 0.34,
+ "MIRACLRetrieval (de)": 19.7,
+ "MIRACLRetrieval (en)": 47.81,
+ "MIRACLRetrieval (es)": 32.6,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 24.84,
+ "MIRACLRetrieval (fr)": 31.33,
+ "MIRACLRetrieval (hi)": 0.34,
+ "MIRACLRetrieval (id)": 18.93,
+ "MIRACLRetrieval (ja)": 0.77,
+ "MIRACLRetrieval (ko)": 4.79,
+ "MIRACLRetrieval (ru)": 1.49,
+ "MIRACLRetrieval (sw)": 27.95,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.26,
+ "MIRACLRetrieval (yo)": 56.96,
+ "MIRACLRetrieval (zh)": 0.14,
+ "MintakaRetrieval (ar)": 3.13,
+ "MintakaRetrieval (de)": 20.81,
+ "MintakaRetrieval (es)": 20.17,
+ "MintakaRetrieval (fr)": 22.39,
+ "MintakaRetrieval (hi)": 3.41,
+ "MintakaRetrieval (it)": 18.62,
+ "MintakaRetrieval (ja)": 6.35,
+ "MintakaRetrieval (pt)": 18.68,
+ "PIQA": 25.21,
+ "Quail": 4.96,
+ "RARbCode": 44.8,
+ "RARbMath": 72.81,
+ "RiaNewsRetrieval": 4.39,
+ "RuBQRetrieval": 4.69,
+ "SIQA": 2.86,
+ "SciFact-PL": 44.95,
+ "SpartQA": 1.75,
+ "StackOverflowQA": 82.85,
+ "SyntecRetrieval": 70.86,
+ "SyntheticText2SQL": 50.85,
+ "TRECCOVID-PL": 35.81,
+ "TempReasonL1": 1.74,
+ "TempReasonL2Fact": 33.63,
+ "TempReasonL2Pure": 1.25,
+ "TempReasonL3Fact": 27.44,
+ "TempReasonL3Pure": 7.58,
+ "WinoGrande": 51.0,
+ "XMarket (de)": 16.74,
+ "XMarket (en)": 32.99,
+ "XMarket (es)": 19.63,
+ "XPQARetrieval (ara-ara)": 9.6,
+ "XPQARetrieval (eng-ara)": 4.13,
+ "XPQARetrieval (ara-eng)": 8.82,
+ "XPQARetrieval (deu-deu)": 58.49,
+ "XPQARetrieval (eng-deu)": 15.55,
+ "XPQARetrieval (deu-eng)": 31.58,
+ "XPQARetrieval (spa-spa)": 44.1,
+ "XPQARetrieval (eng-spa)": 16.48,
+ "XPQARetrieval (spa-eng)": 26.66,
+ "XPQARetrieval (fra-fra)": 50.79,
+ "XPQARetrieval (eng-fra)": 19.38,
+ "XPQARetrieval (fra-eng)": 32.08,
+ "XPQARetrieval (hin-hin)": 32.52,
+ "XPQARetrieval (eng-hin)": 6.95,
+ "XPQARetrieval (hin-eng)": 6.86,
+ "XPQARetrieval (ita-ita)": 56.26,
+ "XPQARetrieval (eng-ita)": 13.54,
+ "XPQARetrieval (ita-eng)": 29.12,
+ "XPQARetrieval (jpn-jpn)": 34.82,
+ "XPQARetrieval (eng-jpn)": 6.61,
+ "XPQARetrieval (jpn-eng)": 17.54,
+ "XPQARetrieval (kor-kor)": 10.57,
+ "XPQARetrieval (eng-kor)": 10.03,
+ "XPQARetrieval (kor-eng)": 7.86,
+ "XPQARetrieval (pol-pol)": 29.41,
+ "XPQARetrieval (eng-pol)": 11.89,
+ "XPQARetrieval (pol-eng)": 17.28,
+ "XPQARetrieval (por-por)": 36.71,
+ "XPQARetrieval (eng-por)": 13.2,
+ "XPQARetrieval (por-eng)": 24.48,
+ "XPQARetrieval (tam-tam)": 6.6,
+ "XPQARetrieval (eng-tam)": 4.91,
+ "XPQARetrieval (tam-eng)": 3.67,
+ "XPQARetrieval (cmn-cmn)": 23.41,
+ "XPQARetrieval (eng-cmn)": 7.33,
+ "XPQARetrieval (cmn-eng)": 15.24
+ }
+ ]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-large",
+ "CDSC-R": 84.14,
+ "GermanSTSBenchmark": 67.39,
+ "RUParaPhraserSTS": 46.38,
+ "RuSTSBenchmarkSTS": 53.27,
+ "SICK-R-PL": 58.82,
+ "SICKFr": 70.03,
+ "STS22 (fr-pl)": 73.25,
+ "STS22 (de)": 44.63,
+ "STS22 (it)": 71.03,
+ "STS22 (es)": 61.75,
+ "STS22 (pl-en)": 64.87,
+ "STS22 (es-en)": 68.92,
+ "STS22 (de-fr)": 54.29,
+ "STS22 (de-en)": 54.9,
+ "STS22 (ar)": 37.03,
+ "STS22 (tr)": 55.9,
+ "STS22 (ru)": 21.54,
+ "STS22 (es-it)": 63.53,
+ "STS22 (pl)": 32.97,
+ "STS22 (en)": 65.65,
+ "STS22 (zh)": 44.9,
+ "STS22 (fr)": 81.45,
+ "STS22 (de-pl)": 40.53,
+ "STS22 (zh-en)": 35.44,
+ "STSB": 37.53,
+ "STSBenchmarkMultilingualSTS (pl)": 61.52,
+ "STSBenchmarkMultilingualSTS (pt)": 73.29,
+ "STSBenchmarkMultilingualSTS (it)": 69.59,
+ "STSBenchmarkMultilingualSTS (es)": 74.87,
+ "STSBenchmarkMultilingualSTS (de)": 67.25,
+ "STSBenchmarkMultilingualSTS (zh)": 39.93,
+ "STSBenchmarkMultilingualSTS (nl)": 66.31,
+ "STSBenchmarkMultilingualSTS (en)": 87.15,
+ "STSBenchmarkMultilingualSTS (fr)": 72.65,
+ "STSBenchmarkMultilingualSTS (ru)": 53.29
+ },
+ {
+ "Model": "e5-large",
+ "CDSC-R": 84.14,
+ "GermanSTSBenchmark": 67.39,
+ "RUParaPhraserSTS": 46.38,
+ "RuSTSBenchmarkSTS": 53.27,
+ "SICK-R-PL": 58.82,
+ "SICKFr": 70.03,
+ "STS22 (fr-pl)": 73.25,
+ "STS22 (de)": 44.63,
+ "STS22 (it)": 71.03,
+ "STS22 (es)": 61.75,
+ "STS22 (pl-en)": 64.87,
+ "STS22 (es-en)": 68.92,
+ "STS22 (de-fr)": 54.29,
+ "STS22 (de-en)": 54.9,
+ "STS22 (ar)": 37.01,
+ "STS22 (tr)": 55.9,
+ "STS22 (ru)": 21.54,
+ "STS22 (es-it)": 63.53,
+ "STS22 (pl)": 32.96,
+ "STS22 (en)": 65.65,
+ "STS22 (zh)": 44.9,
+ "STS22 (fr)": 81.45,
+ "STS22 (de-pl)": 40.53,
+ "STS22 (zh-en)": 35.44,
+ "STSB": 37.53,
+ "STSBenchmarkMultilingualSTS (pl)": 61.52,
+ "STSBenchmarkMultilingualSTS (pt)": 73.29,
+ "STSBenchmarkMultilingualSTS (it)": 69.59,
+ "STSBenchmarkMultilingualSTS (es)": 74.87,
+ "STSBenchmarkMultilingualSTS (de)": 67.25,
+ "STSBenchmarkMultilingualSTS (zh)": 39.93,
+ "STSBenchmarkMultilingualSTS (nl)": 66.31,
+ "STSBenchmarkMultilingualSTS (en)": 87.15,
+ "STSBenchmarkMultilingualSTS (fr)": 72.65,
+ "STSBenchmarkMultilingualSTS (ru)": 53.29
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-large",
+ "SummEvalFr": 31.82
+ },
+ {
+ "Model": "e5-large",
+ "SummEvalFr": 31.82
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "e5-large",
+ "CEDRClassification": 33.15,
+ "SensitiveTopicsClassification": 17.69
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "e5-large",
+ "Core17InstructionRetrieval": 0.21,
+ "News21InstructionRetrieval": -0.94,
+ "Robust04InstructionRetrieval": -6.25
+ }
+ ]
}
},
"intfloat__e5-large-v2": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "e5-large-v2",
+ "BornholmBitextMining": 40.86,
+ "Tatoeba (nld-eng)": 28.79,
+ "Tatoeba (cor-eng)": 3.96,
+ "Tatoeba (ita-eng)": 31.16,
+ "Tatoeba (orv-eng)": 0.51,
+ "Tatoeba (swe-eng)": 21.42,
+ "Tatoeba (isl-eng)": 10.27,
+ "Tatoeba (ang-eng)": 20.52,
+ "Tatoeba (awa-eng)": 0.07,
+ "Tatoeba (swh-eng)": 8.5,
+ "Tatoeba (fin-eng)": 5.89,
+ "Tatoeba (kur-eng)": 10.32,
+ "Tatoeba (afr-eng)": 14.06,
+ "Tatoeba (sqi-eng)": 10.53,
+ "Tatoeba (csb-eng)": 12.24,
+ "Tatoeba (aze-eng)": 5.82,
+ "Tatoeba (ces-eng)": 7.5,
+ "Tatoeba (hun-eng)": 6.66,
+ "Tatoeba (lvs-eng)": 6.81,
+ "Tatoeba (tam-eng)": 0.04,
+ "Tatoeba (por-eng)": 49.24,
+ "Tatoeba (mon-eng)": 2.73,
+ "Tatoeba (pes-eng)": 0.42,
+ "Tatoeba (gla-eng)": 3.45,
+ "Tatoeba (war-eng)": 8.24,
+ "Tatoeba (mar-eng)": 0.25,
+ "Tatoeba (gsw-eng)": 20.97,
+ "Tatoeba (hye-eng)": 0.81,
+ "Tatoeba (kor-eng)": 1.7,
+ "Tatoeba (max-eng)": 14.68,
+ "Tatoeba (est-eng)": 4.5,
+ "Tatoeba (ceb-eng)": 8.33,
+ "Tatoeba (lfn-eng)": 25.13,
+ "Tatoeba (ara-eng)": 0.54,
+ "Tatoeba (nob-eng)": 27.97,
+ "Tatoeba (lat-eng)": 14.13,
+ "Tatoeba (pam-eng)": 6.46,
+ "Tatoeba (mkd-eng)": 1.64,
+ "Tatoeba (hrv-eng)": 14.76,
+ "Tatoeba (vie-eng)": 7.3,
+ "Tatoeba (ukr-eng)": 2.09,
+ "Tatoeba (fao-eng)": 15.09,
+ "Tatoeba (fry-eng)": 25.25,
+ "Tatoeba (dsb-eng)": 10.42,
+ "Tatoeba (yid-eng)": 0.49,
+ "Tatoeba (kab-eng)": 1.64,
+ "Tatoeba (ind-eng)": 9.85,
+ "Tatoeba (ell-eng)": 1.44,
+ "Tatoeba (gle-eng)": 4.29,
+ "Tatoeba (zsm-eng)": 12.54,
+ "Tatoeba (ile-eng)": 33.88,
+ "Tatoeba (tzl-eng)": 21.14,
+ "Tatoeba (tha-eng)": 1.4,
+ "Tatoeba (bul-eng)": 4.13,
+ "Tatoeba (ast-eng)": 34.17,
+ "Tatoeba (mal-eng)": 0.32,
+ "Tatoeba (bre-eng)": 5.13,
+ "Tatoeba (tur-eng)": 5.49,
+ "Tatoeba (cbk-eng)": 29.76,
+ "Tatoeba (tuk-eng)": 4.64,
+ "Tatoeba (hin-eng)": 0.3,
+ "Tatoeba (tat-eng)": 1.03,
+ "Tatoeba (hsb-eng)": 9.41,
+ "Tatoeba (kat-eng)": 1.27,
+ "Tatoeba (jav-eng)": 7.58,
+ "Tatoeba (kaz-eng)": 1.44,
+ "Tatoeba (fra-eng)": 51.44,
+ "Tatoeba (dtp-eng)": 4.58,
+ "Tatoeba (yue-eng)": 3.74,
+ "Tatoeba (rus-eng)": 4.8,
+ "Tatoeba (spa-eng)": 49.84,
+ "Tatoeba (dan-eng)": 29.27,
+ "Tatoeba (uig-eng)": 0.61,
+ "Tatoeba (bos-eng)": 14.46,
+ "Tatoeba (swg-eng)": 20.81,
+ "Tatoeba (ina-eng)": 49.41,
+ "Tatoeba (xho-eng)": 4.91,
+ "Tatoeba (nov-eng)": 37.2,
+ "Tatoeba (ido-eng)": 22.58,
+ "Tatoeba (cym-eng)": 7.79,
+ "Tatoeba (lit-eng)": 4.44,
+ "Tatoeba (jpn-eng)": 2.29,
+ "Tatoeba (cha-eng)": 16.95,
+ "Tatoeba (srp-eng)": 7.67,
+ "Tatoeba (epo-eng)": 18.77,
+ "Tatoeba (uzb-eng)": 3.43,
+ "Tatoeba (bel-eng)": 2.84,
+ "Tatoeba (urd-eng)": 0.1,
+ "Tatoeba (tgl-eng)": 8.73,
+ "Tatoeba (tel-eng)": 0.38,
+ "Tatoeba (nds-eng)": 25.08,
+ "Tatoeba (kzj-eng)": 6.54,
+ "Tatoeba (glg-eng)": 36.15,
+ "Tatoeba (arz-eng)": 0.21,
+ "Tatoeba (khm-eng)": 0.32,
+ "Tatoeba (slk-eng)": 9.2,
+ "Tatoeba (deu-eng)": 57.44,
+ "Tatoeba (ber-eng)": 5.31,
+ "Tatoeba (slv-eng)": 11.91,
+ "Tatoeba (pms-eng)": 16.64,
+ "Tatoeba (oci-eng)": 17.76,
+ "Tatoeba (nno-eng)": 18.76,
+ "Tatoeba (cat-eng)": 29.27,
+ "Tatoeba (eus-eng)": 9.2,
+ "Tatoeba (heb-eng)": 0.87,
+ "Tatoeba (ron-eng)": 17.06,
+ "Tatoeba (amh-eng)": 0.05,
+ "Tatoeba (cmn-eng)": 2.63,
+ "Tatoeba (mhr-eng)": 0.67,
+ "Tatoeba (arq-eng)": 0.93,
+ "Tatoeba (wuu-eng)": 2.59,
+ "Tatoeba (pol-eng)": 10.63,
+ "Tatoeba (ben-eng)": 0.0
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "e5-large-v2",
+ "AllegroReviews": 26.1,
+ "AmazonCounterfactualClassification (en-ext)": 77.78,
+ "AmazonCounterfactualClassification (en)": 78.34,
+ "AmazonCounterfactualClassification (de)": 58.14,
+ "AmazonCounterfactualClassification (ja)": 59.74,
+ "AmazonReviewsClassification (en)": 49.45,
+ "AmazonReviewsClassification (de)": 34.5,
+ "AmazonReviewsClassification (es)": 40.02,
+ "AmazonReviewsClassification (fr)": 37.46,
+ "AmazonReviewsClassification (ja)": 23.15,
+ "AmazonReviewsClassification (zh)": 24.05,
+ "AngryTweetsClassification": 47.98,
+ "CBD": 51.48,
+ "DanishPoliticalCommentsClassification": 29.08,
+ "GeoreviewClassification": 32.09,
+ "HeadlineClassification": 44.38,
+ "InappropriatenessClassification": 53.71,
+ "KinopoiskClassification": 37.27,
+ "LccSentimentClassification": 45.87,
+ "MTOPDomainClassification (en)": 93.08,
+ "MTOPDomainClassification (de)": 81.84,
+ "MTOPDomainClassification (es)": 82.67,
+ "MTOPDomainClassification (fr)": 83.05,
+ "MTOPDomainClassification (hi)": 41.93,
+ "MTOPDomainClassification (th)": 15.39,
+ "MTOPIntentClassification (en)": 64.58,
+ "MTOPIntentClassification (de)": 53.16,
+ "MTOPIntentClassification (es)": 49.57,
+ "MTOPIntentClassification (fr)": 44.56,
+ "MTOPIntentClassification (hi)": 19.16,
+ "MTOPIntentClassification (th)": 5.33,
+ "MasakhaNEWSClassification (amh)": 34.65,
+ "MasakhaNEWSClassification (eng)": 77.31,
+ "MasakhaNEWSClassification (fra)": 75.47,
+ "MasakhaNEWSClassification (hau)": 68.08,
+ "MasakhaNEWSClassification (ibo)": 61.36,
+ "MasakhaNEWSClassification (lin)": 73.66,
+ "MasakhaNEWSClassification (lug)": 62.87,
+ "MasakhaNEWSClassification (orm)": 65.08,
+ "MasakhaNEWSClassification (pcm)": 90.46,
+ "MasakhaNEWSClassification (run)": 65.78,
+ "MasakhaNEWSClassification (sna)": 77.29,
+ "MasakhaNEWSClassification (som)": 51.33,
+ "MasakhaNEWSClassification (swa)": 59.94,
+ "MasakhaNEWSClassification (tir)": 24.38,
+ "MasakhaNEWSClassification (xho)": 64.71,
+ "MasakhaNEWSClassification (yor)": 72.0,
+ "MassiveIntentClassification (am)": 2.69,
+ "MassiveIntentClassification (ko)": 24.94,
+ "MassiveIntentClassification (sl)": 41.54,
+ "MassiveIntentClassification (th)": 13.15,
+ "MassiveIntentClassification (hu)": 40.23,
+ "MassiveIntentClassification (tl)": 42.84,
+ "MassiveIntentClassification (my)": 3.88,
+ "MassiveIntentClassification (id)": 43.12,
+ "MassiveIntentClassification (ur)": 23.97,
+ "MassiveIntentClassification (sv)": 44.13,
+ "MassiveIntentClassification (nl)": 44.37,
+ "MassiveIntentClassification (jv)": 38.7,
+ "MassiveIntentClassification (ml)": 3.0,
+ "MassiveIntentClassification (zh-CN)": 22.35,
+ "MassiveIntentClassification (af)": 41.84,
+ "MassiveIntentClassification (es)": 49.84,
+ "MassiveIntentClassification (ja)": 34.0,
+ "MassiveIntentClassification (ru)": 40.62,
+ "MassiveIntentClassification (te)": 2.35,
+ "MassiveIntentClassification (lv)": 42.13,
+ "MassiveIntentClassification (zh-TW)": 20.97,
+ "MassiveIntentClassification (en)": 68.14,
+ "MassiveIntentClassification (vi)": 37.67,
+ "MassiveIntentClassification (mn)": 28.0,
+ "MassiveIntentClassification (is)": 35.18,
+ "MassiveIntentClassification (el)": 37.8,
+ "MassiveIntentClassification (kn)": 3.3,
+ "MassiveIntentClassification (pl)": 41.75,
+ "MassiveIntentClassification (nb)": 43.32,
+ "MassiveIntentClassification (fa)": 35.75,
+ "MassiveIntentClassification (bn)": 24.23,
+ "MassiveIntentClassification (hi)": 22.04,
+ "MassiveIntentClassification (hy)": 13.79,
+ "MassiveIntentClassification (pt)": 51.74,
+ "MassiveIntentClassification (da)": 45.7,
+ "MassiveIntentClassification (sq)": 43.45,
+ "MassiveIntentClassification (ar)": 27.84,
+ "MassiveIntentClassification (he)": 28.2,
+ "MassiveIntentClassification (ms)": 40.45,
+ "MassiveIntentClassification (cy)": 38.39,
+ "MassiveIntentClassification (tr)": 43.02,
+ "MassiveIntentClassification (fi)": 41.93,
+ "MassiveIntentClassification (it)": 47.73,
+ "MassiveIntentClassification (de)": 49.47,
+ "MassiveIntentClassification (sw)": 38.6,
+ "MassiveIntentClassification (az)": 41.8,
+ "MassiveIntentClassification (ta)": 14.95,
+ "MassiveIntentClassification (ro)": 44.27,
+ "MassiveIntentClassification (ka)": 14.56,
+ "MassiveIntentClassification (km)": 4.57,
+ "MassiveIntentClassification (fr)": 50.66,
+ "MassiveScenarioClassification (ja)": 42.73,
+ "MassiveScenarioClassification (pt)": 57.88,
+ "MassiveScenarioClassification (ar)": 34.93,
+ "MassiveScenarioClassification (ta)": 20.23,
+ "MassiveScenarioClassification (fr)": 57.97,
+ "MassiveScenarioClassification (am)": 7.54,
+ "MassiveScenarioClassification (sv)": 53.47,
+ "MassiveScenarioClassification (id)": 49.05,
+ "MassiveScenarioClassification (vi)": 42.88,
+ "MassiveScenarioClassification (bn)": 31.56,
+ "MassiveScenarioClassification (da)": 55.41,
+ "MassiveScenarioClassification (hi)": 25.87,
+ "MassiveScenarioClassification (ml)": 7.22,
+ "MassiveScenarioClassification (sq)": 51.31,
+ "MassiveScenarioClassification (tr)": 49.95,
+ "MassiveScenarioClassification (en)": 71.5,
+ "MassiveScenarioClassification (az)": 48.4,
+ "MassiveScenarioClassification (ka)": 20.37,
+ "MassiveScenarioClassification (ko)": 30.68,
+ "MassiveScenarioClassification (ro)": 53.72,
+ "MassiveScenarioClassification (el)": 46.24,
+ "MassiveScenarioClassification (ru)": 45.16,
+ "MassiveScenarioClassification (sl)": 47.66,
+ "MassiveScenarioClassification (fa)": 38.06,
+ "MassiveScenarioClassification (nb)": 53.32,
+ "MassiveScenarioClassification (te)": 7.09,
+ "MassiveScenarioClassification (fi)": 48.37,
+ "MassiveScenarioClassification (he)": 30.38,
+ "MassiveScenarioClassification (nl)": 55.05,
+ "MassiveScenarioClassification (is)": 44.26,
+ "MassiveScenarioClassification (th)": 22.92,
+ "MassiveScenarioClassification (hu)": 48.6,
+ "MassiveScenarioClassification (km)": 9.84,
+ "MassiveScenarioClassification (sw)": 45.69,
+ "MassiveScenarioClassification (ur)": 32.19,
+ "MassiveScenarioClassification (ms)": 49.14,
+ "MassiveScenarioClassification (jv)": 47.09,
+ "MassiveScenarioClassification (mn)": 32.73,
+ "MassiveScenarioClassification (cy)": 45.72,
+ "MassiveScenarioClassification (af)": 51.77,
+ "MassiveScenarioClassification (hy)": 19.82,
+ "MassiveScenarioClassification (kn)": 8.32,
+ "MassiveScenarioClassification (zh-CN)": 31.82,
+ "MassiveScenarioClassification (it)": 56.57,
+ "MassiveScenarioClassification (zh-TW)": 30.42,
+ "MassiveScenarioClassification (my)": 10.49,
+ "MassiveScenarioClassification (pl)": 49.17,
+ "MassiveScenarioClassification (es)": 58.39,
+ "MassiveScenarioClassification (tl)": 51.52,
+ "MassiveScenarioClassification (lv)": 47.14,
+ "MassiveScenarioClassification (de)": 60.64,
+ "NoRecClassification": 43.18,
+ "NordicLangClassification": 59.82,
+ "PAC": 69.79,
+ "PolEmo2.0-IN": 46.99,
+ "PolEmo2.0-OUT": 23.28,
+ "RuReviewsClassification": 47.99,
+ "RuSciBenchGRNTIClassification": 24.33,
+ "RuSciBenchOECDClassification": 18.45,
+ "ToxicConversationsClassification": 63.29
+ }
+ ]
},
"Clustering": {
"v_measure": [
{
"Model": "e5-large-v2",
+ "AlloProfClusteringP2P": 58.75,
+ "AlloProfClusteringS2S": 34.92,
"BiorxivClusteringP2P": 36.72,
"BiorxivClusteringS2S": 35.47,
+ "BlurbsClusteringP2P": 31.19,
+ "BlurbsClusteringS2S": 12.4,
+ "GeoreviewClusteringP2P": 27.6,
+ "HALClusteringS2S": 23.39,
+ "MLSUMClusteringP2P (de)": 38.35,
+ "MLSUMClusteringP2P (fr)": 42.34,
+ "MLSUMClusteringP2P (ru)": 26.59,
+ "MLSUMClusteringP2P (es)": 43.65,
+ "MLSUMClusteringS2S (de)": 38.99,
+ "MLSUMClusteringS2S (fr)": 42.95,
+ "MLSUMClusteringS2S (ru)": 24.89,
+ "MLSUMClusteringS2S (es)": 43.69,
+ "MasakhaNEWSClusteringP2P (amh)": 41.67,
+ "MasakhaNEWSClusteringP2P (eng)": 53.4,
+ "MasakhaNEWSClusteringP2P (fra)": 36.7,
+ "MasakhaNEWSClusteringP2P (hau)": 47.77,
+ "MasakhaNEWSClusteringP2P (ibo)": 39.78,
+ "MasakhaNEWSClusteringP2P (lin)": 46.47,
+ "MasakhaNEWSClusteringP2P (lug)": 60.71,
+ "MasakhaNEWSClusteringP2P (orm)": 30.18,
+ "MasakhaNEWSClusteringP2P (pcm)": 72.23,
+ "MasakhaNEWSClusteringP2P (run)": 53.93,
+ "MasakhaNEWSClusteringP2P (sna)": 49.14,
+ "MasakhaNEWSClusteringP2P (som)": 33.73,
+ "MasakhaNEWSClusteringP2P (swa)": 26.38,
+ "MasakhaNEWSClusteringP2P (tir)": 45.28,
+ "MasakhaNEWSClusteringP2P (xho)": 33.98,
+ "MasakhaNEWSClusteringP2P (yor)": 38.67,
+ "MasakhaNEWSClusteringS2S (amh)": 41.49,
+ "MasakhaNEWSClusteringS2S (eng)": 33.38,
+ "MasakhaNEWSClusteringS2S (fra)": 45.31,
+ "MasakhaNEWSClusteringS2S (hau)": 18.89,
+ "MasakhaNEWSClusteringS2S (ibo)": 42.67,
+ "MasakhaNEWSClusteringS2S (lin)": 47.62,
+ "MasakhaNEWSClusteringS2S (lug)": 44.12,
+ "MasakhaNEWSClusteringS2S (orm)": 23.86,
+ "MasakhaNEWSClusteringS2S (pcm)": 55.76,
+ "MasakhaNEWSClusteringS2S (run)": 46.23,
+ "MasakhaNEWSClusteringS2S (sna)": 40.38,
+ "MasakhaNEWSClusteringS2S (som)": 24.86,
+ "MasakhaNEWSClusteringS2S (swa)": 17.28,
+ "MasakhaNEWSClusteringS2S (tir)": 42.63,
+ "MasakhaNEWSClusteringS2S (xho)": 24.27,
+ "MasakhaNEWSClusteringS2S (yor)": 35.68,
"MedrxivClusteringP2P": 31.45,
"MedrxivClusteringS2S": 29.91,
"RedditClustering": 55.5,
"RedditClusteringP2P": 63.71,
+ "RuSciBenchGRNTIClusteringP2P": 20.36,
+ "RuSciBenchOECDClusteringP2P": 18.0,
"StackExchangeClustering": 65.23,
"StackExchangeClusteringP2P": 33.62,
+ "TenKGnadClusteringP2P": 44.85,
+ "TenKGnadClusteringS2S": 24.61,
"TwentyNewsgroupsClustering": 48.73
}
]
},
- "PairClassification": {
- "max_ap": []
- },
+ "PairClassification": {
+ "max_ap": [
+ {
+ "Model": "e5-large-v2",
+ "CDSC-E": 65.96,
+ "FalseFriendsGermanEnglish": 47.9,
+ "OpusparcusPC (de)": 92.28,
+ "OpusparcusPC (en)": 98.02,
+ "OpusparcusPC (fi)": 86.16,
+ "OpusparcusPC (fr)": 88.99,
+ "OpusparcusPC (ru)": 79.79,
+ "OpusparcusPC (sv)": 85.77,
+ "PSC": 96.59,
+ "PawsXPairClassification (de)": 50.96,
+ "PawsXPairClassification (en)": 60.33,
+ "PawsXPairClassification (es)": 52.65,
+ "PawsXPairClassification (fr)": 56.07,
+ "PawsXPairClassification (ja)": 49.23,
+ "PawsXPairClassification (ko)": 50.21,
+ "PawsXPairClassification (zh)": 52.84,
+ "SICK-E-PL": 58.41,
+ "SprintDuplicateQuestions": 94.83,
+ "TERRa": 47.24,
+ "TwitterURLCorpus": 86.44
+ },
+ {
+ "Model": "e5-large-v2",
+ "CDSC-E": 65.96,
+ "FalseFriendsGermanEnglish": 47.9,
+ "OpusparcusPC (de)": 92.28,
+ "OpusparcusPC (en)": 98.02,
+ "OpusparcusPC (fi)": 86.16,
+ "OpusparcusPC (fr)": 89.0,
+ "OpusparcusPC (ru)": 79.79,
+ "OpusparcusPC (sv)": 85.77,
+ "PSC": 96.64,
+ "PawsXPairClassification (de)": 51.15,
+ "PawsXPairClassification (en)": 60.36,
+ "PawsXPairClassification (es)": 52.65,
+ "PawsXPairClassification (fr)": 56.08,
+ "PawsXPairClassification (ja)": 49.25,
+ "PawsXPairClassification (ko)": 50.21,
+ "PawsXPairClassification (zh)": 53.0,
+ "SICK-E-PL": 58.41,
+ "SprintDuplicateQuestions": 94.84,
+ "TERRa": 47.27,
+ "TwitterURLCorpus": 86.44
+ }
+ ]
+ },
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "e5-large-v2",
+ "AlloprofReranking": 65.12,
+ "RuBQReranking": 47.84,
+ "SyntecReranking": 75.92,
+ "T2Reranking": 59.61
+ },
+ {
+ "Model": "e5-large-v2",
+ "MIRACLReranking (ar)": 12.57,
+ "MIRACLReranking (bn)": 12.42,
+ "MIRACLReranking (de)": 34.86,
+ "MIRACLReranking (en)": 57.3,
+ "MIRACLReranking (es)": 45.36,
+ "MIRACLReranking (fa)": 12.44,
+ "MIRACLReranking (fi)": 44.32,
+ "MIRACLReranking (fr)": 39.0,
+ "MIRACLReranking (hi)": 11.72,
+ "MIRACLReranking (id)": 27.98,
+ "MIRACLReranking (ja)": 14.96,
+ "MIRACLReranking (ko)": 13.9,
+ "MIRACLReranking (ru)": 21.61,
+ "MIRACLReranking (sw)": 30.43,
+ "MIRACLReranking (te)": 1.79,
+ "MIRACLReranking (th)": 4.38,
+ "MIRACLReranking (yo)": 51.77,
+ "MIRACLReranking (zh)": 12.59
+ }
+ ]
},
"Retrieval": {
- "ndcg_at_10": []
+ "ndcg_at_10": [
+ {
+ "Model": "e5-large-v2",
+ "AILACasedocs": 31.23,
+ "AILAStatutes": 17.63,
+ "ARCChallenge": 11.29,
+ "AlloprofRetrieval": 34.13,
+ "AlphaNLI": 15.16,
+ "AppsRetrieval": 14.18,
+ "ArguAna": 46.43,
+ "BSARDRetrieval": 11.5,
+ "CmedqaRetrieval": 3.27,
+ "CodeFeedbackMT": 47.83,
+ "CodeFeedbackST": 76.16,
+ "CodeSearchNetCCRetrieval (python)": 67.59,
+ "CodeSearchNetCCRetrieval (javascript)": 65.18,
+ "CodeSearchNetCCRetrieval (go)": 47.64,
+ "CodeSearchNetCCRetrieval (ruby)": 62.31,
+ "CodeSearchNetCCRetrieval (java)": 63.92,
+ "CodeSearchNetCCRetrieval (php)": 53.38,
+ "CodeSearchNetRetrieval (python)": 88.15,
+ "CodeSearchNetRetrieval (javascript)": 71.79,
+ "CodeSearchNetRetrieval (go)": 91.6,
+ "CodeSearchNetRetrieval (ruby)": 80.25,
+ "CodeSearchNetRetrieval (java)": 83.2,
+ "CodeSearchNetRetrieval (php)": 84.16,
+ "CodeTransOceanContest": 65.14,
+ "CodeTransOceanDL": 32.4,
+ "CosQA": 32.09,
+ "CovidRetrieval": 19.59,
+ "GerDaLIR": 2.66,
+ "GerDaLIRSmall": 6.47,
+ "GermanQuAD-Retrieval": 87.74,
+ "HellaSwag": 27.86,
+ "LEMBNarrativeQARetrieval": 26.38,
+ "LEMBQMSumRetrieval": 25.08,
+ "LEMBSummScreenFDRetrieval": 77.36,
+ "LEMBWikimQARetrieval": 58.43,
+ "LeCaRDv2": 22.94,
+ "LegalBenchConsumerContractsQA": 77.32,
+ "LegalBenchCorporateLobbying": 91.5,
+ "LegalQuAD": 27.18,
+ "LegalSummarization": 59.53,
+ "MIRACLRetrieval (ar)": 0.44,
+ "MIRACLRetrieval (bn)": 0.94,
+ "MIRACLRetrieval (de)": 21.89,
+ "MIRACLRetrieval (en)": 50.42,
+ "MIRACLRetrieval (es)": 29.08,
+ "MIRACLRetrieval (fa)": 0.16,
+ "MIRACLRetrieval (fi)": 28.1,
+ "MIRACLRetrieval (fr)": 26.84,
+ "MIRACLRetrieval (hi)": 1.45,
+ "MIRACLRetrieval (id)": 17.92,
+ "MIRACLRetrieval (ja)": 2.38,
+ "MIRACLRetrieval (ko)": 4.12,
+ "MIRACLRetrieval (ru)": 6.14,
+ "MIRACLRetrieval (sw)": 21.81,
+ "MIRACLRetrieval (te)": 0.04,
+ "MIRACLRetrieval (th)": 0.21,
+ "MIRACLRetrieval (yo)": 47.36,
+ "MIRACLRetrieval (zh)": 0.58,
+ "MintakaRetrieval (ar)": 5.94,
+ "MintakaRetrieval (de)": 26.31,
+ "MintakaRetrieval (es)": 23.76,
+ "MintakaRetrieval (fr)": 25.45,
+ "MintakaRetrieval (hi)": 6.51,
+ "MintakaRetrieval (it)": 19.44,
+ "MintakaRetrieval (ja)": 10.25,
+ "MintakaRetrieval (pt)": 23.69,
+ "PIQA": 29.55,
+ "Quail": 5.28,
+ "RARbCode": 58.67,
+ "RARbMath": 75.51,
+ "RiaNewsRetrieval": 17.66,
+ "RuBQRetrieval": 21.29,
+ "SCIDOCS": 20.5,
+ "SIQA": 4.07,
+ "SciFact-PL": 39.68,
+ "SpartQA": 10.37,
+ "StackOverflowQA": 89.93,
+ "SyntecRetrieval": 71.14,
+ "SyntheticText2SQL": 49.66,
+ "TRECCOVID": 66.64,
+ "TRECCOVID-PL": 26.57,
+ "TempReasonL1": 1.77,
+ "TempReasonL2Fact": 49.91,
+ "TempReasonL2Pure": 3.29,
+ "TempReasonL3Fact": 41.83,
+ "TempReasonL3Pure": 9.7,
+ "WinoGrande": 51.52,
+ "XMarket (de)": 15.44,
+ "XMarket (en)": 31.88,
+ "XMarket (es)": 17.15,
+ "XPQARetrieval (ara-ara)": 14.05,
+ "XPQARetrieval (eng-ara)": 4.24,
+ "XPQARetrieval (ara-eng)": 10.86,
+ "XPQARetrieval (deu-deu)": 57.02,
+ "XPQARetrieval (eng-deu)": 16.48,
+ "XPQARetrieval (deu-eng)": 33.15,
+ "XPQARetrieval (spa-spa)": 45.31,
+ "XPQARetrieval (eng-spa)": 15.12,
+ "XPQARetrieval (spa-eng)": 29.12,
+ "XPQARetrieval (fra-fra)": 53.52,
+ "XPQARetrieval (eng-fra)": 18.82,
+ "XPQARetrieval (fra-eng)": 37.01,
+ "XPQARetrieval (hin-hin)": 39.8,
+ "XPQARetrieval (eng-hin)": 8.58,
+ "XPQARetrieval (hin-eng)": 10.09,
+ "XPQARetrieval (ita-ita)": 57.54,
+ "XPQARetrieval (eng-ita)": 11.68,
+ "XPQARetrieval (ita-eng)": 31.21,
+ "XPQARetrieval (jpn-jpn)": 45.76,
+ "XPQARetrieval (eng-jpn)": 6.83,
+ "XPQARetrieval (jpn-eng)": 20.11,
+ "XPQARetrieval (kor-kor)": 15.97,
+ "XPQARetrieval (eng-kor)": 8.65,
+ "XPQARetrieval (kor-eng)": 9.01,
+ "XPQARetrieval (pol-pol)": 33.84,
+ "XPQARetrieval (eng-pol)": 12.23,
+ "XPQARetrieval (pol-eng)": 19.21,
+ "XPQARetrieval (por-por)": 36.56,
+ "XPQARetrieval (eng-por)": 11.36,
+ "XPQARetrieval (por-eng)": 26.54,
+ "XPQARetrieval (tam-tam)": 10.17,
+ "XPQARetrieval (eng-tam)": 4.15,
+ "XPQARetrieval (tam-eng)": 4.9,
+ "XPQARetrieval (cmn-cmn)": 24.68,
+ "XPQARetrieval (eng-cmn)": 8.97,
+ "XPQARetrieval (cmn-eng)": 14.98
+ }
+ ]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-large-v2",
+ "CDSC-R": 83.06,
+ "GermanSTSBenchmark": 69.37,
+ "RUParaPhraserSTS": 54.89,
+ "RuSTSBenchmarkSTS": 63.79,
+ "SICK-R": 79.16,
+ "SICK-R-PL": 60.7,
+ "SICKFr": 71.54,
+ "STS12": 73.55,
+ "STS13": 80.98,
+ "STS14": 79.16,
+ "STS15": 88.29,
+ "STS17 (it-en)": 38.07,
+ "STS17 (en-tr)": 1.19,
+ "STS17 (en-ar)": -1.96,
+ "STS17 (nl-en)": 39.39,
+ "STS17 (ko-ko)": 51.73,
+ "STS17 (en-de)": 61.58,
+ "STS17 (ar-ar)": 59.75,
+ "STS17 (es-en)": 50.83,
+ "STS17 (en-en)": 89.55,
+ "STS17 (es-es)": 82.16,
+ "STS17 (fr-en)": 56.29,
+ "STS22 (es)": 61.36,
+ "STS22 (es-en)": 71.31,
+ "STS22 (zh-en)": 39.49,
+ "STS22 (tr)": 53.57,
+ "STS22 (ar)": 43.95,
+ "STS22 (ru)": 34.37,
+ "STS22 (en)": 67.2,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (zh)": 50.33,
+ "STS22 (pl)": 31.98,
+ "STS22 (es-it)": 65.36,
+ "STS22 (de)": 38.45,
+ "STS22 (it)": 71.04,
+ "STS22 (pl-en)": 48.81,
+ "STS22 (de-fr)": 60.02,
+ "STS22 (de-en)": 49.8,
+ "STS22 (fr)": 78.75,
+ "STS22 (de-pl)": 40.19,
+ "STSB": 36.95,
+ "STSBenchmark": 84.55,
+ "STSBenchmarkMultilingualSTS (pl)": 62.31,
+ "STSBenchmarkMultilingualSTS (fr)": 71.98,
+ "STSBenchmarkMultilingualSTS (de)": 69.73,
+ "STSBenchmarkMultilingualSTS (en)": 84.55,
+ "STSBenchmarkMultilingualSTS (nl)": 66.44,
+ "STSBenchmarkMultilingualSTS (zh)": 37.55,
+ "STSBenchmarkMultilingualSTS (pt)": 70.38,
+ "STSBenchmarkMultilingualSTS (es)": 74.51,
+ "STSBenchmarkMultilingualSTS (it)": 70.29,
+ "STSBenchmarkMultilingualSTS (ru)": 64.21
+ },
+ {
+ "Model": "e5-large-v2",
+ "CDSC-R": 83.06,
+ "GermanSTSBenchmark": 69.37,
+ "RUParaPhraserSTS": 54.89,
+ "RuSTSBenchmarkSTS": 63.79,
+ "SICK-R": 79.16,
+ "SICK-R-PL": 60.7,
+ "SICKFr": 71.54,
+ "STS12": 73.55,
+ "STS13": 80.98,
+ "STS14": 79.16,
+ "STS15": 88.29,
+ "STS17 (it-en)": 38.07,
+ "STS17 (en-tr)": 1.19,
+ "STS17 (en-ar)": -1.96,
+ "STS17 (nl-en)": 39.39,
+ "STS17 (ko-ko)": 51.73,
+ "STS17 (en-de)": 61.58,
+ "STS17 (ar-ar)": 59.75,
+ "STS17 (es-en)": 50.83,
+ "STS17 (en-en)": 89.55,
+ "STS17 (es-es)": 82.16,
+ "STS17 (fr-en)": 56.29,
+ "STS22 (es)": 61.36,
+ "STS22 (es-en)": 71.31,
+ "STS22 (zh-en)": 39.49,
+ "STS22 (tr)": 53.57,
+ "STS22 (ar)": 43.98,
+ "STS22 (ru)": 34.37,
+ "STS22 (en)": 67.2,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (zh)": 50.33,
+ "STS22 (pl)": 31.98,
+ "STS22 (es-it)": 65.36,
+ "STS22 (de)": 38.45,
+ "STS22 (it)": 71.04,
+ "STS22 (pl-en)": 48.81,
+ "STS22 (de-fr)": 60.02,
+ "STS22 (de-en)": 49.8,
+ "STS22 (fr)": 78.75,
+ "STS22 (de-pl)": 40.19,
+ "STSB": 36.95,
+ "STSBenchmark": 84.55,
+ "STSBenchmarkMultilingualSTS (pl)": 62.31,
+ "STSBenchmarkMultilingualSTS (fr)": 71.97,
+ "STSBenchmarkMultilingualSTS (de)": 69.73,
+ "STSBenchmarkMultilingualSTS (en)": 84.55,
+ "STSBenchmarkMultilingualSTS (nl)": 66.44,
+ "STSBenchmarkMultilingualSTS (zh)": 37.57,
+ "STSBenchmarkMultilingualSTS (pt)": 70.38,
+ "STSBenchmarkMultilingualSTS (es)": 74.51,
+ "STSBenchmarkMultilingualSTS (it)": 70.29,
+ "STSBenchmarkMultilingualSTS (ru)": 64.21
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-large-v2",
+ "SummEvalFr": 30.39
+ },
+ {
+ "Model": "e5-large-v2",
+ "SummEvalFr": 30.39
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "e5-large-v2",
+ "CEDRClassification": 35.55,
+ "SensitiveTopicsClassification": 18.85
+ }
+ ]
},
"InstructionRetrieval": {
"p-MRR": [
@@ -8627,9 +23694,19 @@
"Model": "e5-mistral-7b-instruct",
"AlloProfClusteringP2P": 61.06,
"AlloProfClusteringS2S": 28.12,
+ "BlurbsClusteringP2P": 47.07,
+ "BlurbsClusteringS2S": 21.35,
"GeoreviewClusteringP2P": 76.32,
"HALClusteringS2S": 19.69,
+ "MLSUMClusteringP2P (de)": 52.88,
+ "MLSUMClusteringP2P (fr)": 48.33,
+ "MLSUMClusteringP2P (ru)": 58.06,
+ "MLSUMClusteringP2P (es)": 50.23,
"MLSUMClusteringP2P": 45.59,
+ "MLSUMClusteringS2S (de)": 52.23,
+ "MLSUMClusteringS2S (fr)": 47.79,
+ "MLSUMClusteringS2S (ru)": 56.58,
+ "MLSUMClusteringS2S (es)": 49.27,
"MLSUMClusteringS2S": 32.0,
"MasakhaNEWSClusteringP2P (amh)": 47.57,
"MasakhaNEWSClusteringP2P (eng)": 71.97,
@@ -8664,7 +23741,9 @@
"MasakhaNEWSClusteringS2S (xho)": 26.61,
"MasakhaNEWSClusteringS2S (yor)": 63.18,
"RuSciBenchGRNTIClusteringP2P": 62.27,
- "RuSciBenchOECDClusteringP2P": 54.13
+ "RuSciBenchOECDClusteringP2P": 54.13,
+ "TenKGnadClusteringP2P": 54.92,
+ "TenKGnadClusteringS2S": 40.21
}
]
},
@@ -8673,6 +23752,7 @@
{
"Model": "e5-mistral-7b-instruct",
"CDSC-E": 75.86,
+ "FalseFriendsGermanEnglish": 54.05,
"OpusparcusPC (de)": 97.63,
"OpusparcusPC (en)": 99.1,
"OpusparcusPC (fi)": 92.76,
@@ -8696,6 +23776,7 @@
{
"Model": "e5-mistral-7b-instruct",
"CDSC-E": 75.86,
+ "FalseFriendsGermanEnglish": 54.05,
"OpusparcusPC (de)": 97.63,
"OpusparcusPC (en)": 99.1,
"OpusparcusPC (fi)": 92.76,
@@ -8739,7 +23820,24 @@
},
{
"Model": "e5-mistral-7b-instruct",
- "MIRACLReranking (ru)": 63.61
+ "MIRACLReranking (ru)": 63.61,
+ "MIRACLReranking (ar)": 73.64,
+ "MIRACLReranking (bn)": 69.72,
+ "MIRACLReranking (de)": 52.52,
+ "MIRACLReranking (en)": 62.21,
+ "MIRACLReranking (es)": 61.1,
+ "MIRACLReranking (fa)": 54.85,
+ "MIRACLReranking (fi)": 76.94,
+ "MIRACLReranking (fr)": 53.64,
+ "MIRACLReranking (hi)": 58.2,
+ "MIRACLReranking (id)": 56.08,
+ "MIRACLReranking (ja)": 62.67,
+ "MIRACLReranking (ko)": 56.25,
+ "MIRACLReranking (sw)": 61.5,
+ "MIRACLReranking (te)": 74.42,
+ "MIRACLReranking (th)": 70.8,
+ "MIRACLReranking (yo)": 66.72,
+ "MIRACLReranking (zh)": 49.74
}
]
},
@@ -8791,7 +23889,10 @@
"EcomRetrieval": 45.94,
"FiQA-PL": 35.34,
"FiQA2018": 56.81,
+ "GerDaLIR": 7.22,
"GerDaLIRSmall": 37.18,
+ "GermanDPR": 84.25,
+ "GermanQuAD-Retrieval": 95.21,
"HellaSwag": 35.37,
"LEMBNarrativeQARetrieval": 44.62,
"LEMBQMSumRetrieval": 43.63,
@@ -8841,6 +23942,9 @@
"Touche2020": 26.27,
"VideoRetrieval": 45.34,
"WinoGrande": 39.51,
+ "XMarket (de)": 26.33,
+ "XMarket (en)": 32.82,
+ "XMarket (es)": 27.14,
"XPQARetrieval (ara-ara)": 45.94,
"XPQARetrieval (eng-ara)": 30.38,
"XPQARetrieval (ara-eng)": 41.53,
@@ -8903,6 +24007,7 @@
"BIOSSES": 85.5,
"BQ": 50.63,
"CDSC-R": 92.19,
+ "GermanSTSBenchmark": 85.11,
"LCQMC": 75.48,
"PAWSX": 16.81,
"RUParaPhraserSTS": 76.17,
@@ -8927,6 +24032,23 @@
"STS17 (fr-en)": 88.08,
"STS17 (it-en)": 89.69,
"STS22 (ru)": 60.83,
+ "STS22 (de)": 49.12,
+ "STS22 (tr)": 68.72,
+ "STS22 (en)": 67.3,
+ "STS22 (it)": 75.88,
+ "STS22 (pl-en)": 73.18,
+ "STS22 (es)": 66.98,
+ "STS22 (es-it)": 75.05,
+ "STS22 (de-fr)": 61.39,
+ "STS22 (ar)": 54.12,
+ "STS22 (pl)": 39.19,
+ "STS22 (zh-en)": 71.9,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (fr)": 79.83,
+ "STS22 (de-en)": 60.92,
+ "STS22 (es-en)": 75.85,
+ "STS22 (de-pl)": 54.47,
+ "STS22 (zh)": 63.38,
"STSB": 81.81,
"STSBenchmark": 88.6,
"STSBenchmarkMultilingualSTS (pl)": 83.62,
@@ -8947,6 +24069,7 @@
"BIOSSES": 85.5,
"BQ": 50.63,
"CDSC-R": 92.19,
+ "GermanSTSBenchmark": 85.11,
"LCQMC": 75.48,
"PAWSX": 16.81,
"RUParaPhraserSTS": 76.17,
@@ -8971,6 +24094,23 @@
"STS17 (fr-en)": 88.08,
"STS17 (it-en)": 89.69,
"STS22 (ru)": 60.83,
+ "STS22 (de)": 49.12,
+ "STS22 (tr)": 68.72,
+ "STS22 (en)": 67.3,
+ "STS22 (it)": 75.88,
+ "STS22 (pl-en)": 73.18,
+ "STS22 (es)": 66.98,
+ "STS22 (es-it)": 75.05,
+ "STS22 (de-fr)": 61.39,
+ "STS22 (ar)": 54.12,
+ "STS22 (pl)": 39.19,
+ "STS22 (zh-en)": 71.9,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (fr)": 79.83,
+ "STS22 (de-en)": 60.92,
+ "STS22 (es-en)": 75.85,
+ "STS22 (de-pl)": 54.47,
+ "STS22 (zh)": 63.38,
"STSB": 81.81,
"STSBenchmark": 88.6,
"STSBenchmarkMultilingualSTS (pl)": 83.62,
@@ -9086,7 +24226,119 @@
"f1": [
{
"Model": "e5-small",
- "BornholmBitextMining": 40.27
+ "BornholmBitextMining": 40.27,
+ "Tatoeba (dsb-eng)": 6.51,
+ "Tatoeba (spa-eng)": 35.33,
+ "Tatoeba (kat-eng)": 0.42,
+ "Tatoeba (lvs-eng)": 4.56,
+ "Tatoeba (afr-eng)": 8.44,
+ "Tatoeba (vie-eng)": 5.09,
+ "Tatoeba (pes-eng)": 0.6,
+ "Tatoeba (epo-eng)": 15.45,
+ "Tatoeba (kzj-eng)": 5.09,
+ "Tatoeba (ido-eng)": 17.98,
+ "Tatoeba (nob-eng)": 13.56,
+ "Tatoeba (dan-eng)": 13.05,
+ "Tatoeba (yid-eng)": 0.31,
+ "Tatoeba (slv-eng)": 6.94,
+ "Tatoeba (bul-eng)": 0.42,
+ "Tatoeba (jpn-eng)": 0.3,
+ "Tatoeba (yue-eng)": 1.1,
+ "Tatoeba (pms-eng)": 14.19,
+ "Tatoeba (tuk-eng)": 4.07,
+ "Tatoeba (pol-eng)": 6.56,
+ "Tatoeba (srp-eng)": 3.72,
+ "Tatoeba (glg-eng)": 24.9,
+ "Tatoeba (tzl-eng)": 17.55,
+ "Tatoeba (aze-eng)": 4.19,
+ "Tatoeba (csb-eng)": 6.78,
+ "Tatoeba (lfn-eng)": 15.24,
+ "Tatoeba (uzb-eng)": 3.58,
+ "Tatoeba (gla-eng)": 2.72,
+ "Tatoeba (tgl-eng)": 6.37,
+ "Tatoeba (ceb-eng)": 5.59,
+ "Tatoeba (hye-eng)": 0.4,
+ "Tatoeba (hin-eng)": 0.01,
+ "Tatoeba (mon-eng)": 1.82,
+ "Tatoeba (swh-eng)": 6.44,
+ "Tatoeba (hrv-eng)": 8.26,
+ "Tatoeba (kaz-eng)": 0.54,
+ "Tatoeba (awa-eng)": 0.01,
+ "Tatoeba (cmn-eng)": 1.62,
+ "Tatoeba (fao-eng)": 8.66,
+ "Tatoeba (fin-eng)": 3.82,
+ "Tatoeba (zsm-eng)": 7.91,
+ "Tatoeba (mhr-eng)": 0.17,
+ "Tatoeba (ita-eng)": 20.04,
+ "Tatoeba (fra-eng)": 30.19,
+ "Tatoeba (nno-eng)": 10.77,
+ "Tatoeba (isl-eng)": 5.86,
+ "Tatoeba (lat-eng)": 10.38,
+ "Tatoeba (dtp-eng)": 3.59,
+ "Tatoeba (tha-eng)": 0.85,
+ "Tatoeba (wuu-eng)": 1.32,
+ "Tatoeba (orv-eng)": 0.09,
+ "Tatoeba (gsw-eng)": 14.93,
+ "Tatoeba (mar-eng)": 0.0,
+ "Tatoeba (nov-eng)": 30.82,
+ "Tatoeba (uig-eng)": 0.54,
+ "Tatoeba (est-eng)": 3.51,
+ "Tatoeba (ben-eng)": 0.0,
+ "Tatoeba (urd-eng)": 0.01,
+ "Tatoeba (kur-eng)": 8.01,
+ "Tatoeba (swe-eng)": 9.28,
+ "Tatoeba (ber-eng)": 5.33,
+ "Tatoeba (hsb-eng)": 6.14,
+ "Tatoeba (tat-eng)": 0.85,
+ "Tatoeba (mal-eng)": 0.15,
+ "Tatoeba (khm-eng)": 0.14,
+ "Tatoeba (slk-eng)": 6.65,
+ "Tatoeba (swg-eng)": 10.97,
+ "Tatoeba (cbk-eng)": 19.13,
+ "Tatoeba (ina-eng)": 38.44,
+ "Tatoeba (ara-eng)": 0.38,
+ "Tatoeba (ron-eng)": 13.08,
+ "Tatoeba (max-eng)": 10.19,
+ "Tatoeba (oci-eng)": 13.94,
+ "Tatoeba (eus-eng)": 7.24,
+ "Tatoeba (tam-eng)": 0.7,
+ "Tatoeba (ell-eng)": 0.5,
+ "Tatoeba (bos-eng)": 7.77,
+ "Tatoeba (heb-eng)": 0.3,
+ "Tatoeba (arq-eng)": 0.44,
+ "Tatoeba (kab-eng)": 1.36,
+ "Tatoeba (ile-eng)": 25.58,
+ "Tatoeba (cym-eng)": 6.71,
+ "Tatoeba (tur-eng)": 3.97,
+ "Tatoeba (bel-eng)": 1.47,
+ "Tatoeba (ukr-eng)": 0.86,
+ "Tatoeba (hun-eng)": 4.62,
+ "Tatoeba (cat-eng)": 22.05,
+ "Tatoeba (pam-eng)": 4.7,
+ "Tatoeba (cha-eng)": 17.69,
+ "Tatoeba (sqi-eng)": 7.01,
+ "Tatoeba (mkd-eng)": 0.11,
+ "Tatoeba (lit-eng)": 3.47,
+ "Tatoeba (jav-eng)": 4.78,
+ "Tatoeba (ces-eng)": 4.36,
+ "Tatoeba (rus-eng)": 0.17,
+ "Tatoeba (deu-eng)": 20.28,
+ "Tatoeba (xho-eng)": 3.71,
+ "Tatoeba (amh-eng)": 0.01,
+ "Tatoeba (ind-eng)": 6.89,
+ "Tatoeba (kor-eng)": 1.18,
+ "Tatoeba (arz-eng)": 0.06,
+ "Tatoeba (por-eng)": 32.48,
+ "Tatoeba (gle-eng)": 3.99,
+ "Tatoeba (nld-eng)": 17.28,
+ "Tatoeba (cor-eng)": 2.93,
+ "Tatoeba (fry-eng)": 15.8,
+ "Tatoeba (bre-eng)": 3.76,
+ "Tatoeba (tel-eng)": 0.43,
+ "Tatoeba (nds-eng)": 14.38,
+ "Tatoeba (ast-eng)": 26.43,
+ "Tatoeba (ang-eng)": 19.24,
+ "Tatoeba (war-eng)": 6.15
}
]
},
@@ -9094,19 +24346,165 @@
"accuracy": [
{
"Model": "e5-small",
+ "AllegroReviews": 25.72,
+ "AmazonCounterfactualClassification (en-ext)": 76.9,
+ "AmazonCounterfactualClassification (en)": 73.42,
+ "AmazonCounterfactualClassification (de)": 56.98,
+ "AmazonCounterfactualClassification (ja)": 56.66,
+ "AmazonReviewsClassification (en)": 41.5,
+ "AmazonReviewsClassification (de)": 29.2,
+ "AmazonReviewsClassification (es)": 35.79,
+ "AmazonReviewsClassification (fr)": 32.31,
+ "AmazonReviewsClassification (ja)": 21.99,
+ "AmazonReviewsClassification (zh)": 22.37,
"AngryTweetsClassification": 43.6,
+ "CBD": 51.9,
"DKHateClassification": 57.57,
"DanishPoliticalCommentsClassification": 28.37,
+ "GeoreviewClassification": 27.15,
+ "HeadlineClassification": 28.01,
+ "InappropriatenessClassification": 51.46,
+ "KinopoiskClassification": 33.59,
"LccSentimentClassification": 40.27,
+ "MTOPDomainClassification (en)": 87.88,
+ "MTOPDomainClassification (de)": 70.66,
+ "MTOPDomainClassification (es)": 80.16,
+ "MTOPDomainClassification (fr)": 74.2,
+ "MTOPDomainClassification (hi)": 33.51,
+ "MTOPDomainClassification (th)": 16.14,
+ "MTOPIntentClassification (en)": 52.97,
+ "MTOPIntentClassification (de)": 41.29,
+ "MTOPIntentClassification (es)": 41.87,
+ "MTOPIntentClassification (fr)": 37.43,
+ "MTOPIntentClassification (hi)": 12.14,
+ "MTOPIntentClassification (th)": 3.94,
+ "MasakhaNEWSClassification (amh)": 30.96,
+ "MasakhaNEWSClassification (eng)": 74.17,
+ "MasakhaNEWSClassification (fra)": 73.36,
+ "MasakhaNEWSClassification (hau)": 58.1,
+ "MasakhaNEWSClassification (ibo)": 52.49,
+ "MasakhaNEWSClassification (lin)": 65.71,
+ "MasakhaNEWSClassification (lug)": 56.23,
+ "MasakhaNEWSClassification (orm)": 57.88,
+ "MasakhaNEWSClassification (pcm)": 91.25,
+ "MasakhaNEWSClassification (run)": 59.1,
+ "MasakhaNEWSClassification (sna)": 73.41,
+ "MasakhaNEWSClassification (som)": 50.07,
+ "MasakhaNEWSClassification (swa)": 49.58,
+ "MasakhaNEWSClassification (tir)": 26.58,
+ "MasakhaNEWSClassification (xho)": 58.99,
+ "MasakhaNEWSClassification (yor)": 63.94,
+ "MassiveIntentClassification (fr)": 43.86,
"MassiveIntentClassification (da)": 41.89,
- "MassiveIntentClassification (nb)": 40.25,
+ "MassiveIntentClassification (zh-CN)": 15.28,
+ "MassiveIntentClassification (pt)": 43.79,
+ "MassiveIntentClassification (ml)": 2.43,
+ "MassiveIntentClassification (tr)": 40.61,
+ "MassiveIntentClassification (jv)": 37.53,
+ "MassiveIntentClassification (ka)": 5.84,
+ "MassiveIntentClassification (hu)": 38.95,
+ "MassiveIntentClassification (am)": 2.33,
+ "MassiveIntentClassification (sq)": 40.53,
+ "MassiveIntentClassification (sl)": 38.93,
+ "MassiveIntentClassification (ko)": 14.25,
+ "MassiveIntentClassification (hi)": 12.03,
+ "MassiveIntentClassification (vi)": 34.35,
+ "MassiveIntentClassification (af)": 37.7,
+ "MassiveIntentClassification (he)": 14.19,
+ "MassiveIntentClassification (pl)": 37.54,
+ "MassiveIntentClassification (km)": 5.02,
+ "MassiveIntentClassification (de)": 39.86,
+ "MassiveIntentClassification (ru)": 13.65,
+ "MassiveIntentClassification (ta)": 9.05,
+ "MassiveIntentClassification (my)": 3.72,
+ "MassiveIntentClassification (hy)": 5.85,
+ "MassiveIntentClassification (zh-TW)": 15.31,
+ "MassiveIntentClassification (id)": 41.2,
+ "MassiveIntentClassification (fa)": 16.92,
+ "MassiveIntentClassification (az)": 36.22,
+ "MassiveIntentClassification (it)": 42.07,
"MassiveIntentClassification (sv)": 40.07,
- "MassiveScenarioClassification (da)": 49.93,
- "MassiveScenarioClassification (nb)": 48.58,
+ "MassiveIntentClassification (es)": 42.88,
+ "MassiveIntentClassification (lv)": 39.91,
+ "MassiveIntentClassification (ro)": 41.5,
+ "MassiveIntentClassification (nl)": 40.9,
+ "MassiveIntentClassification (en)": 64.47,
+ "MassiveIntentClassification (ja)": 17.46,
+ "MassiveIntentClassification (mn)": 13.5,
+ "MassiveIntentClassification (is)": 35.37,
+ "MassiveIntentClassification (tl)": 38.77,
+ "MassiveIntentClassification (fi)": 39.24,
+ "MassiveIntentClassification (ur)": 7.04,
+ "MassiveIntentClassification (th)": 8.9,
+ "MassiveIntentClassification (kn)": 2.98,
+ "MassiveIntentClassification (ms)": 37.52,
+ "MassiveIntentClassification (ar)": 10.6,
+ "MassiveIntentClassification (cy)": 35.74,
+ "MassiveIntentClassification (nb)": 40.25,
+ "MassiveIntentClassification (el)": 17.22,
+ "MassiveIntentClassification (te)": 2.06,
+ "MassiveIntentClassification (sw)": 37.87,
+ "MassiveIntentClassification (bn)": 8.06,
+ "MassiveScenarioClassification (af)": 45.65,
+ "MassiveScenarioClassification (lv)": 45.66,
+ "MassiveScenarioClassification (bn)": 13.57,
+ "MassiveScenarioClassification (hu)": 45.63,
+ "MassiveScenarioClassification (sq)": 49.52,
+ "MassiveScenarioClassification (de)": 51.82,
+ "MassiveScenarioClassification (kn)": 7.95,
+ "MassiveScenarioClassification (az)": 44.79,
"MassiveScenarioClassification (sv)": 47.06,
+ "MassiveScenarioClassification (ru)": 18.39,
+ "MassiveScenarioClassification (vi)": 40.19,
+ "MassiveScenarioClassification (ur)": 14.5,
+ "MassiveScenarioClassification (ta)": 14.99,
+ "MassiveScenarioClassification (fi)": 45.24,
+ "MassiveScenarioClassification (ka)": 10.16,
+ "MassiveScenarioClassification (sl)": 44.26,
+ "MassiveScenarioClassification (cy)": 42.34,
+ "MassiveScenarioClassification (km)": 9.27,
+ "MassiveScenarioClassification (sw)": 45.98,
+ "MassiveScenarioClassification (tr)": 45.9,
+ "MassiveScenarioClassification (zh-TW)": 23.24,
+ "MassiveScenarioClassification (he)": 16.06,
+ "MassiveScenarioClassification (ms)": 45.47,
+ "MassiveScenarioClassification (en)": 68.78,
+ "MassiveScenarioClassification (is)": 42.96,
+ "MassiveScenarioClassification (tl)": 48.34,
+ "MassiveScenarioClassification (mn)": 19.67,
+ "MassiveScenarioClassification (el)": 26.04,
+ "MassiveScenarioClassification (my)": 10.53,
+ "MassiveScenarioClassification (nl)": 49.09,
+ "MassiveScenarioClassification (th)": 17.88,
+ "MassiveScenarioClassification (it)": 50.68,
+ "MassiveScenarioClassification (ko)": 19.59,
+ "MassiveScenarioClassification (id)": 45.81,
+ "MassiveScenarioClassification (es)": 52.79,
+ "MassiveScenarioClassification (nb)": 48.58,
+ "MassiveScenarioClassification (ro)": 51.69,
+ "MassiveScenarioClassification (hy)": 13.0,
+ "MassiveScenarioClassification (fa)": 21.99,
+ "MassiveScenarioClassification (da)": 49.93,
+ "MassiveScenarioClassification (pt)": 52.65,
+ "MassiveScenarioClassification (jv)": 45.73,
+ "MassiveScenarioClassification (ml)": 6.53,
+ "MassiveScenarioClassification (te)": 7.26,
+ "MassiveScenarioClassification (fr)": 55.22,
+ "MassiveScenarioClassification (hi)": 18.9,
+ "MassiveScenarioClassification (pl)": 45.06,
+ "MassiveScenarioClassification (am)": 7.25,
+ "MassiveScenarioClassification (zh-CN)": 22.16,
+ "MassiveScenarioClassification (ar)": 17.93,
+ "MassiveScenarioClassification (ja)": 23.45,
"NoRecClassification": 41.84,
"NordicLangClassification": 53.47,
"NorwegianParliament": 56.57,
+ "PAC": 64.07,
+ "PolEmo2.0-IN": 41.3,
+ "PolEmo2.0-OUT": 26.8,
+ "RuReviewsClassification": 39.68,
+ "RuSciBenchGRNTIClassification": 8.39,
+ "RuSciBenchOECDClassification": 7.24,
"ScalaDaClassification": 50.15,
"ScalaNbClassification": 50.03
}
@@ -9116,38 +24514,386 @@
"v_measure": [
{
"Model": "e5-small",
+ "AlloProfClusteringP2P": 59.37,
+ "AlloProfClusteringS2S": 31.27,
"BiorxivClusteringP2P": 36.1,
"BiorxivClusteringS2S": 31.51,
+ "BlurbsClusteringP2P": 24.39,
+ "BlurbsClusteringS2S": 9.82,
+ "GeoreviewClusteringP2P": 17.95,
+ "HALClusteringS2S": 20.02,
+ "MLSUMClusteringP2P (de)": 34.5,
+ "MLSUMClusteringP2P (fr)": 39.55,
+ "MLSUMClusteringP2P (ru)": 20.5,
+ "MLSUMClusteringP2P (es)": 41.18,
+ "MLSUMClusteringS2S (de)": 37.15,
+ "MLSUMClusteringS2S (fr)": 39.48,
+ "MLSUMClusteringS2S (ru)": 21.72,
+ "MLSUMClusteringS2S (es)": 40.83,
+ "MasakhaNEWSClusteringP2P (amh)": 40.12,
+ "MasakhaNEWSClusteringP2P (eng)": 48.25,
+ "MasakhaNEWSClusteringP2P (fra)": 37.37,
+ "MasakhaNEWSClusteringP2P (hau)": 36.18,
+ "MasakhaNEWSClusteringP2P (ibo)": 32.4,
+ "MasakhaNEWSClusteringP2P (lin)": 61.22,
+ "MasakhaNEWSClusteringP2P (lug)": 49.59,
+ "MasakhaNEWSClusteringP2P (orm)": 34.6,
+ "MasakhaNEWSClusteringP2P (pcm)": 81.16,
+ "MasakhaNEWSClusteringP2P (run)": 51.16,
+ "MasakhaNEWSClusteringP2P (sna)": 46.57,
+ "MasakhaNEWSClusteringP2P (som)": 36.83,
+ "MasakhaNEWSClusteringP2P (swa)": 20.68,
+ "MasakhaNEWSClusteringP2P (tir)": 42.93,
+ "MasakhaNEWSClusteringP2P (xho)": 35.15,
+ "MasakhaNEWSClusteringP2P (yor)": 36.69,
+ "MasakhaNEWSClusteringS2S (amh)": 45.0,
+ "MasakhaNEWSClusteringS2S (eng)": 45.62,
+ "MasakhaNEWSClusteringS2S (fra)": 32.27,
+ "MasakhaNEWSClusteringS2S (hau)": 16.48,
+ "MasakhaNEWSClusteringS2S (ibo)": 38.94,
+ "MasakhaNEWSClusteringS2S (lin)": 54.36,
+ "MasakhaNEWSClusteringS2S (lug)": 46.99,
+ "MasakhaNEWSClusteringS2S (orm)": 25.01,
+ "MasakhaNEWSClusteringS2S (pcm)": 55.79,
+ "MasakhaNEWSClusteringS2S (run)": 50.72,
+ "MasakhaNEWSClusteringS2S (sna)": 41.33,
+ "MasakhaNEWSClusteringS2S (som)": 28.2,
+ "MasakhaNEWSClusteringS2S (swa)": 16.31,
+ "MasakhaNEWSClusteringS2S (tir)": 43.0,
+ "MasakhaNEWSClusteringS2S (xho)": 23.47,
+ "MasakhaNEWSClusteringS2S (yor)": 34.42,
"MedrxivClusteringP2P": 31.31,
"MedrxivClusteringS2S": 28.32,
"RedditClustering": 43.27,
"RedditClusteringP2P": 57.22,
+ "RuSciBenchGRNTIClusteringP2P": 12.29,
+ "RuSciBenchOECDClusteringP2P": 11.19,
"StackExchangeClustering": 59.6,
"StackExchangeClusteringP2P": 30.82,
+ "TenKGnadClusteringP2P": 37.23,
+ "TenKGnadClusteringS2S": 16.54,
"TwentyNewsgroupsClustering": 37.65
}
]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "e5-small",
+ "CDSC-E": 66.65,
+ "FalseFriendsGermanEnglish": 47.65,
+ "OpusparcusPC (de)": 91.65,
+ "OpusparcusPC (en)": 98.34,
+ "OpusparcusPC (fi)": 86.59,
+ "OpusparcusPC (fr)": 87.26,
+ "OpusparcusPC (ru)": 78.63,
+ "OpusparcusPC (sv)": 85.48,
+ "PSC": 90.03,
+ "PawsXPairClassification (de)": 50.72,
+ "PawsXPairClassification (en)": 60.62,
+ "PawsXPairClassification (es)": 53.6,
+ "PawsXPairClassification (fr)": 55.22,
+ "PawsXPairClassification (ja)": 48.14,
+ "PawsXPairClassification (ko)": 52.35,
+ "PawsXPairClassification (zh)": 53.36,
+ "SICK-E-PL": 52.6,
+ "TERRa": 46.91
+ },
+ {
+ "Model": "e5-small",
+ "CDSC-E": 66.65,
+ "FalseFriendsGermanEnglish": 47.65,
+ "OpusparcusPC (de)": 91.65,
+ "OpusparcusPC (en)": 98.34,
+ "OpusparcusPC (fi)": 86.59,
+ "OpusparcusPC (fr)": 87.26,
+ "OpusparcusPC (ru)": 78.63,
+ "OpusparcusPC (sv)": 85.48,
+ "PSC": 90.03,
+ "PawsXPairClassification (de)": 51.15,
+ "PawsXPairClassification (en)": 60.82,
+ "PawsXPairClassification (es)": 53.6,
+ "PawsXPairClassification (fr)": 55.29,
+ "PawsXPairClassification (ja)": 48.31,
+ "PawsXPairClassification (ko)": 52.57,
+ "PawsXPairClassification (zh)": 53.47,
+ "SICK-E-PL": 52.68,
+ "TERRa": 46.91
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "e5-small",
+ "AlloprofReranking": 59.86,
+ "RuBQReranking": 17.63,
+ "SyntecReranking": 75.29,
+ "T2Reranking": 58.81
+ },
+ {
+ "Model": "e5-small",
+ "MIRACLReranking (ar)": 2.85,
+ "MIRACLReranking (bn)": 2.19,
+ "MIRACLReranking (de)": 21.97,
+ "MIRACLReranking (en)": 51.23,
+ "MIRACLReranking (es)": 39.23,
+ "MIRACLReranking (fa)": 4.9,
+ "MIRACLReranking (fi)": 38.89,
+ "MIRACLReranking (fr)": 30.5,
+ "MIRACLReranking (hi)": 4.77,
+ "MIRACLReranking (id)": 23.31,
+ "MIRACLReranking (ja)": 5.82,
+ "MIRACLReranking (ko)": 6.92,
+ "MIRACLReranking (ru)": 3.22,
+ "MIRACLReranking (sw)": 33.97,
+ "MIRACLReranking (te)": 5.39,
+ "MIRACLReranking (th)": 3.67,
+ "MIRACLReranking (yo)": 56.21,
+ "MIRACLReranking (zh)": 6.9
+ }
+ ]
},
"Retrieval": {
- "ndcg_at_10": []
+ "ndcg_at_10": [
+ {
+ "Model": "e5-small",
+ "AILACasedocs": 22.47,
+ "AILAStatutes": 20.78,
+ "ARCChallenge": 6.21,
+ "AlloprofRetrieval": 27.41,
+ "AlphaNLI": 15.1,
+ "AppsRetrieval": 4.32,
+ "BSARDRetrieval": 9.94,
+ "CmedqaRetrieval": 2.09,
+ "CodeFeedbackMT": 36.47,
+ "CodeFeedbackST": 66.82,
+ "CodeSearchNetCCRetrieval (python)": 55.19,
+ "CodeSearchNetCCRetrieval (javascript)": 50.9,
+ "CodeSearchNetCCRetrieval (go)": 35.3,
+ "CodeSearchNetCCRetrieval (ruby)": 51.14,
+ "CodeSearchNetCCRetrieval (java)": 47.64,
+ "CodeSearchNetCCRetrieval (php)": 35.44,
+ "CodeSearchNetRetrieval (python)": 78.07,
+ "CodeSearchNetRetrieval (javascript)": 60.72,
+ "CodeSearchNetRetrieval (go)": 75.26,
+ "CodeSearchNetRetrieval (ruby)": 70.33,
+ "CodeSearchNetRetrieval (java)": 57.51,
+ "CodeSearchNetRetrieval (php)": 68.6,
+ "CodeTransOceanContest": 45.87,
+ "CodeTransOceanDL": 27.67,
+ "CosQA": 29.9,
+ "CovidRetrieval": 1.51,
+ "GerDaLIR": 2.15,
+ "GerDaLIRSmall": 5.14,
+ "GermanQuAD-Retrieval": 74.27,
+ "HellaSwag": 22.97,
+ "LEMBNarrativeQARetrieval": 18.34,
+ "LEMBQMSumRetrieval": 20.99,
+ "LEMBSummScreenFDRetrieval": 66.38,
+ "LEMBWikimQARetrieval": 46.75,
+ "LeCaRDv2": 15.35,
+ "LegalBenchConsumerContractsQA": 73.83,
+ "LegalBenchCorporateLobbying": 90.21,
+ "LegalQuAD": 20.49,
+ "LegalSummarization": 54.25,
+ "MIRACLRetrieval (ar)": 0.02,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 12.21,
+ "MIRACLRetrieval (en)": 43.04,
+ "MIRACLRetrieval (es)": 26.61,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 23.91,
+ "MIRACLRetrieval (fr)": 21.55,
+ "MIRACLRetrieval (hi)": 0.18,
+ "MIRACLRetrieval (id)": 13.54,
+ "MIRACLRetrieval (ja)": 0.6,
+ "MIRACLRetrieval (ko)": 2.78,
+ "MIRACLRetrieval (ru)": 0.21,
+ "MIRACLRetrieval (sw)": 25.66,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.36,
+ "MIRACLRetrieval (yo)": 52.12,
+ "MIRACLRetrieval (zh)": 0.0,
+ "MintakaRetrieval (ar)": 0.64,
+ "MintakaRetrieval (de)": 17.45,
+ "MintakaRetrieval (es)": 19.14,
+ "MintakaRetrieval (fr)": 20.03,
+ "MintakaRetrieval (hi)": 2.28,
+ "MintakaRetrieval (it)": 16.7,
+ "MintakaRetrieval (ja)": 3.39,
+ "MintakaRetrieval (pt)": 18.38,
+ "PIQA": 20.91,
+ "Quail": 4.1,
+ "RARbCode": 34.44,
+ "RARbMath": 61.73,
+ "RiaNewsRetrieval": 1.35,
+ "RuBQRetrieval": 1.48,
+ "SIQA": 2.65,
+ "SciFact-PL": 34.48,
+ "SpartQA": 4.32,
+ "StackOverflowQA": 71.5,
+ "SyntecRetrieval": 66.58,
+ "SyntheticText2SQL": 50.95,
+ "TRECCOVID-PL": 23.8,
+ "TempReasonL1": 1.7,
+ "TempReasonL2Fact": 28.06,
+ "TempReasonL2Pure": 0.47,
+ "TempReasonL3Fact": 24.34,
+ "TempReasonL3Pure": 3.4,
+ "WinoGrande": 46.73,
+ "XMarket (de)": 15.22,
+ "XMarket (en)": 31.36,
+ "XMarket (es)": 17.83,
+ "XPQARetrieval (ara-ara)": 5.91,
+ "XPQARetrieval (eng-ara)": 3.32,
+ "XPQARetrieval (ara-eng)": 7.1,
+ "XPQARetrieval (deu-deu)": 56.84,
+ "XPQARetrieval (eng-deu)": 13.47,
+ "XPQARetrieval (deu-eng)": 25.24,
+ "XPQARetrieval (spa-spa)": 38.23,
+ "XPQARetrieval (eng-spa)": 13.27,
+ "XPQARetrieval (spa-eng)": 19.29,
+ "XPQARetrieval (fra-fra)": 47.45,
+ "XPQARetrieval (eng-fra)": 15.06,
+ "XPQARetrieval (fra-eng)": 24.79,
+ "XPQARetrieval (hin-hin)": 22.81,
+ "XPQARetrieval (eng-hin)": 5.31,
+ "XPQARetrieval (hin-eng)": 5.66,
+ "XPQARetrieval (ita-ita)": 55.62,
+ "XPQARetrieval (eng-ita)": 10.4,
+ "XPQARetrieval (ita-eng)": 20.19,
+ "XPQARetrieval (jpn-jpn)": 26.1,
+ "XPQARetrieval (eng-jpn)": 4.86,
+ "XPQARetrieval (jpn-eng)": 15.59,
+ "XPQARetrieval (kor-kor)": 6.89,
+ "XPQARetrieval (eng-kor)": 8.11,
+ "XPQARetrieval (kor-eng)": 7.33,
+ "XPQARetrieval (pol-pol)": 31.37,
+ "XPQARetrieval (eng-pol)": 10.7,
+ "XPQARetrieval (pol-eng)": 15.43,
+ "XPQARetrieval (por-por)": 31.21,
+ "XPQARetrieval (eng-por)": 10.95,
+ "XPQARetrieval (por-eng)": 19.74,
+ "XPQARetrieval (tam-tam)": 7.42,
+ "XPQARetrieval (eng-tam)": 4.98,
+ "XPQARetrieval (tam-eng)": 4.79,
+ "XPQARetrieval (cmn-cmn)": 19.09,
+ "XPQARetrieval (eng-cmn)": 5.85,
+ "XPQARetrieval (cmn-eng)": 9.93
+ }
+ ]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-small",
+ "CDSC-R": 84.87,
+ "GermanSTSBenchmark": 65.29,
+ "RUParaPhraserSTS": 31.91,
+ "RuSTSBenchmarkSTS": 40.23,
+ "SICK-R-PL": 57.14,
+ "SICKFr": 68.03,
+ "STS22 (de-en)": 43.7,
+ "STS22 (tr)": 54.58,
+ "STS22 (es-it)": 57.13,
+ "STS22 (es-en)": 66.1,
+ "STS22 (de-fr)": 50.67,
+ "STS22 (es)": 60.03,
+ "STS22 (ru)": 18.1,
+ "STS22 (fr)": 77.03,
+ "STS22 (pl)": 26.18,
+ "STS22 (de-pl)": 32.33,
+ "STS22 (en)": 64.77,
+ "STS22 (de)": 41.84,
+ "STS22 (ar)": 28.26,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (pl-en)": 49.23,
+ "STS22 (zh)": 36.76,
+ "STS22 (zh-en)": 21.97,
+ "STS22 (it)": 71.58,
+ "STSB": 30.14,
+ "STSBenchmarkMultilingualSTS (es)": 73.64,
+ "STSBenchmarkMultilingualSTS (fr)": 70.47,
+ "STSBenchmarkMultilingualSTS (zh)": 35.3,
+ "STSBenchmarkMultilingualSTS (ru)": 40.24,
+ "STSBenchmarkMultilingualSTS (pt)": 71.46,
+ "STSBenchmarkMultilingualSTS (nl)": 65.35,
+ "STSBenchmarkMultilingualSTS (de)": 65.97,
+ "STSBenchmarkMultilingualSTS (pl)": 61.07,
+ "STSBenchmarkMultilingualSTS (it)": 68.17,
+ "STSBenchmarkMultilingualSTS (en)": 86.63
+ },
+ {
+ "Model": "e5-small",
+ "CDSC-R": 84.87,
+ "GermanSTSBenchmark": 65.29,
+ "RUParaPhraserSTS": 31.9,
+ "RuSTSBenchmarkSTS": 40.23,
+ "SICK-R-PL": 57.14,
+ "SICKFr": 68.03,
+ "STS22 (de-en)": 43.7,
+ "STS22 (tr)": 54.58,
+ "STS22 (es-it)": 57.13,
+ "STS22 (es-en)": 66.1,
+ "STS22 (de-fr)": 50.67,
+ "STS22 (es)": 60.03,
+ "STS22 (ru)": 18.1,
+ "STS22 (fr)": 77.03,
+ "STS22 (pl)": 26.4,
+ "STS22 (de-pl)": 32.33,
+ "STS22 (en)": 64.77,
+ "STS22 (de)": 41.84,
+ "STS22 (ar)": 28.24,
+ "STS22 (fr-pl)": 84.52,
+ "STS22 (pl-en)": 49.23,
+ "STS22 (zh)": 36.76,
+ "STS22 (zh-en)": 21.97,
+ "STS22 (it)": 71.58,
+ "STSB": 30.15,
+ "STSBenchmarkMultilingualSTS (es)": 73.64,
+ "STSBenchmarkMultilingualSTS (fr)": 70.47,
+ "STSBenchmarkMultilingualSTS (zh)": 35.3,
+ "STSBenchmarkMultilingualSTS (ru)": 40.24,
+ "STSBenchmarkMultilingualSTS (pt)": 71.46,
+ "STSBenchmarkMultilingualSTS (nl)": 65.35,
+ "STSBenchmarkMultilingualSTS (de)": 65.97,
+ "STSBenchmarkMultilingualSTS (pl)": 61.07,
+ "STSBenchmarkMultilingualSTS (it)": 68.17,
+ "STSBenchmarkMultilingualSTS (en)": 86.63
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "e5-small",
+ "SummEvalFr": 30.79
+ },
+ {
+ "Model": "e5-small",
+ "SummEvalFr": 30.79
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "e5-small",
+ "CEDRClassification": 31.09,
+ "SensitiveTopicsClassification": 17.42
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "e5-small",
+ "Core17InstructionRetrieval": -0.54,
+ "News21InstructionRetrieval": 0.87,
+ "Robust04InstructionRetrieval": -4.69
+ }
+ ]
}
},
"intfloat__multilingual-e5-base": {
@@ -9462,13 +25208,21 @@
"ArxivClusteringS2S": 36.0,
"BiorxivClusteringP2P": 37.55,
"BiorxivClusteringS2S": 30.33,
+ "BlurbsClusteringP2P": 38.35,
+ "BlurbsClusteringS2S": 16.25,
"CLSClusteringP2P": 32.41,
"CLSClusteringS2S": 36.99,
"GeoreviewClusteringP2P": 54.46,
"HALClusteringS2S": 22.48,
"MLSUMClusteringP2P (ru)": 43.47,
+ "MLSUMClusteringP2P (de)": 38.86,
+ "MLSUMClusteringP2P (fr)": 43.53,
+ "MLSUMClusteringP2P (es)": 47.58,
"MLSUMClusteringP2P": 43.48,
"MLSUMClusteringS2S (ru)": 40.87,
+ "MLSUMClusteringS2S (de)": 37.63,
+ "MLSUMClusteringS2S (fr)": 44.01,
+ "MLSUMClusteringS2S (es)": 47.19,
"MLSUMClusteringS2S": 38.53,
"MasakhaNEWSClusteringP2P (amh)": 58.05,
"MasakhaNEWSClusteringP2P (eng)": 43.8,
@@ -9510,6 +25264,8 @@
"RuSciBenchOECDClusteringP2P": 44.79,
"StackExchangeClustering": 55.31,
"StackExchangeClusteringP2P": 33.51,
+ "TenKGnadClusteringP2P": 41.92,
+ "TenKGnadClusteringS2S": 33.01,
"ThuNewsClusteringP2P": 40.98,
"ThuNewsClusteringS2S": 52.36,
"TwentyNewsgroupsClustering": 35.55
@@ -9521,6 +25277,7 @@
{
"Model": "multilingual-e5-base",
"CDSC-E": 72.7,
+ "FalseFriendsGermanEnglish": 51.62,
"OpusparcusPC (de)": 95.83,
"OpusparcusPC (en)": 98.71,
"OpusparcusPC (fi)": 90.3,
@@ -9545,6 +25302,7 @@
"Model": "multilingual-e5-base",
"CDSC-E": 72.67,
"Cmnli": 74.98,
+ "FalseFriendsGermanEnglish": 51.64,
"Ocnli": 60.47,
"OpusparcusPC (de)": 95.83,
"OpusparcusPC (en)": 98.71,
@@ -9598,7 +25356,24 @@
},
{
"Model": "multilingual-e5-base",
- "MIRACLReranking (ru)": 60.47
+ "MIRACLReranking (ru)": 60.47,
+ "MIRACLReranking (ar)": 73.8,
+ "MIRACLReranking (bn)": 70.78,
+ "MIRACLReranking (de)": 50.26,
+ "MIRACLReranking (en)": 56.97,
+ "MIRACLReranking (es)": 60.25,
+ "MIRACLReranking (fa)": 57.36,
+ "MIRACLReranking (fi)": 77.01,
+ "MIRACLReranking (fr)": 50.27,
+ "MIRACLReranking (hi)": 61.78,
+ "MIRACLReranking (id)": 56.25,
+ "MIRACLReranking (ja)": 63.27,
+ "MIRACLReranking (ko)": 53.53,
+ "MIRACLReranking (sw)": 63.77,
+ "MIRACLReranking (te)": 73.38,
+ "MIRACLReranking (th)": 73.81,
+ "MIRACLReranking (yo)": 64.37,
+ "MIRACLReranking (zh)": 49.11
}
]
},
@@ -9639,7 +25414,10 @@
"EcomRetrieval": 54.17,
"FiQA-PL": 25.52,
"FiQA2018": 38.15,
+ "GerDaLIR": 6.89,
"GerDaLIRSmall": 15.3,
+ "GermanDPR": 79.51,
+ "GermanQuAD-Retrieval": 93.93,
"HellaSwag": 24.79,
"HotpotQA-PL": 63.52,
"LEMBNarrativeQARetrieval": 23.6,
@@ -9652,6 +25430,23 @@
"LegalQuAD": 47.85,
"LegalSummarization": 61.69,
"MIRACLRetrieval (ru)": 61.6,
+ "MIRACLRetrieval (ar)": 71.62,
+ "MIRACLRetrieval (bn)": 70.23,
+ "MIRACLRetrieval (de)": 52.05,
+ "MIRACLRetrieval (en)": 51.17,
+ "MIRACLRetrieval (es)": 51.54,
+ "MIRACLRetrieval (fa)": 57.48,
+ "MIRACLRetrieval (fi)": 74.35,
+ "MIRACLRetrieval (fr)": 49.63,
+ "MIRACLRetrieval (hi)": 58.33,
+ "MIRACLRetrieval (id)": 51.04,
+ "MIRACLRetrieval (ja)": 64.74,
+ "MIRACLRetrieval (ko)": 62.27,
+ "MIRACLRetrieval (sw)": 71.06,
+ "MIRACLRetrieval (te)": 75.07,
+ "MIRACLRetrieval (th)": 75.27,
+ "MIRACLRetrieval (yo)": 70.56,
+ "MIRACLRetrieval (zh)": 51.52,
"MMarcoRetrieval": 76.04,
"MSMARCO-PL": 29.52,
"MedicalRetrieval": 48.35,
@@ -9663,7 +25458,7 @@
"MintakaRetrieval (it)": 29.77,
"MintakaRetrieval (ja)": 22.98,
"MintakaRetrieval (pt)": 30.62,
- "NFCorpus": 32.49,
+ "NFCorpus": 32.45,
"NFCorpus-PL": 25.98,
"NQ-PL": 44.8,
"PIQA": 25.09,
@@ -9676,14 +25471,14 @@
"SCIDOCS": 17.17,
"SCIDOCS-PL": 12.35,
"SIQA": 3.72,
- "SciFact": 69.39,
+ "SciFact": 69.65,
"SciFact-PL": 62.11,
"SpartQA": 7.91,
"StackOverflowQA": 85.11,
"SyntecRetrieval": 80.49,
"SyntheticText2SQL": 53.61,
"T2Retrieval": 70.86,
- "TRECCOVID": 69.5,
+ "TRECCOVID": 69.49,
"TRECCOVID-PL": 66.06,
"TempReasonL1": 0.72,
"TempReasonL2Fact": 38.76,
@@ -9693,6 +25488,9 @@
"Touche2020": 21.5,
"VideoRetrieval": 61.3,
"WinoGrande": 56.18,
+ "XMarket (de)": 16.27,
+ "XMarket (en)": 22.04,
+ "XMarket (es)": 11.89,
"XPQARetrieval (ara-ara)": 39.97,
"XPQARetrieval (eng-ara)": 17.23,
"XPQARetrieval (ara-eng)": 34.35,
@@ -9742,6 +25540,7 @@
"BIOSSES": 85.05,
"BQ": 45.45,
"CDSC-R": 90.09,
+ "GermanSTSBenchmark": 78.86,
"LCQMC": 74.15,
"PAWSX": 12.13,
"RUParaPhraserSTS": 70.17,
@@ -9803,6 +25602,7 @@
"BIOSSES": 85.05,
"BQ": 45.45,
"CDSC-R": 90.09,
+ "GermanSTSBenchmark": 78.86,
"LCQMC": 74.15,
"PAWSX": 12.13,
"RUParaPhraserSTS": 70.17,
@@ -10223,14 +26023,22 @@
"AlloProfClusteringS2S": 32.26,
"BiorxivClusteringP2P": 35.5,
"BiorxivClusteringS2S": 33.3,
+ "BlurbsClusteringP2P": 41.52,
+ "BlurbsClusteringS2S": 16.8,
"CLSClusteringP2P": 40.68,
"CLSClusteringS2S": 38.59,
"GeoreviewClusteringP2P": 59.59,
"HALClusteringS2S": 22.44,
- "MLSUMClusteringP2P (ru)": 42.79,
+ "MLSUMClusteringP2P (ru)": 42.04,
"MLSUMClusteringP2P": 44.04,
- "MLSUMClusteringS2S (ru)": 44.32,
+ "MLSUMClusteringP2P (de)": 39.43,
+ "MLSUMClusteringP2P (fr)": 44.04,
+ "MLSUMClusteringP2P (es)": 47.42,
+ "MLSUMClusteringS2S (ru)": 44.84,
"MLSUMClusteringS2S": 37.65,
+ "MLSUMClusteringS2S (de)": 39.14,
+ "MLSUMClusteringS2S (fr)": 45.24,
+ "MLSUMClusteringS2S (es)": 48.17,
"MasakhaNEWSClusteringP2P (amh)": 67.16,
"MasakhaNEWSClusteringP2P (eng)": 61.1,
"MasakhaNEWSClusteringP2P (fra)": 40.94,
@@ -10271,6 +26079,8 @@
"RuSciBenchOECDClusteringP2P": 45.12,
"StackExchangeClustering": 58.37,
"StackExchangeClusteringP2P": 32.9,
+ "TenKGnadClusteringP2P": 44.83,
+ "TenKGnadClusteringS2S": 30.83,
"ThuNewsClusteringP2P": 58.05,
"ThuNewsClusteringS2S": 55.59,
"TwentyNewsgroupsClustering": 39.4
@@ -10282,6 +26092,7 @@
{
"Model": "multilingual-e5-large",
"CDSC-E": 74.47,
+ "FalseFriendsGermanEnglish": 53.45,
"OpusparcusPC (de)": 97.27,
"OpusparcusPC (en)": 98.74,
"OpusparcusPC (fi)": 94.26,
@@ -10306,6 +26117,7 @@
"Model": "multilingual-e5-large",
"CDSC-E": 74.47,
"Cmnli": 78.18,
+ "FalseFriendsGermanEnglish": 53.53,
"Ocnli": 61.6,
"OpusparcusPC (de)": 97.27,
"OpusparcusPC (en)": 98.74,
@@ -10359,7 +26171,24 @@
},
{
"Model": "multilingual-e5-large",
- "MIRACLReranking (ru)": 63.71
+ "MIRACLReranking (ru)": 63.71,
+ "MIRACLReranking (ar)": 77.24,
+ "MIRACLReranking (bn)": 75.31,
+ "MIRACLReranking (de)": 53.61,
+ "MIRACLReranking (en)": 58.11,
+ "MIRACLReranking (es)": 61.65,
+ "MIRACLReranking (fa)": 59.36,
+ "MIRACLReranking (fi)": 79.22,
+ "MIRACLReranking (fr)": 53.96,
+ "MIRACLReranking (hi)": 65.84,
+ "MIRACLReranking (id)": 58.56,
+ "MIRACLReranking (ja)": 66.7,
+ "MIRACLReranking (ko)": 55.31,
+ "MIRACLReranking (sw)": 65.06,
+ "MIRACLReranking (te)": 81.64,
+ "MIRACLReranking (th)": 77.06,
+ "MIRACLReranking (yo)": 65.32,
+ "MIRACLReranking (zh)": 51.5
}
]
},
@@ -10376,7 +26205,8 @@
"ArguAna": 54.36,
"ArguAna-PL": 53.02,
"BSARDRetrieval": 0.27,
- "CmedqaRetrieval": 28.67,
+ "ClimateFEVER": 25.73,
+ "CmedqaRetrieval": 28.66,
"CodeFeedbackMT": 42.78,
"CodeFeedbackST": 74.26,
"CodeSearchNetCCRetrieval (python)": 84.45,
@@ -10395,13 +26225,18 @@
"CodeTransOceanDL": 31.28,
"CosQA": 34.8,
"CovidRetrieval": 75.51,
+ "DBPedia": 41.3,
"DBPedia-PL": 35.82,
"DuRetrieval": 85.32,
"EcomRetrieval": 54.75,
"FiQA-PL": 33.0,
"FiQA2018": 43.81,
+ "GerDaLIR": 6.53,
"GerDaLIRSmall": 15.72,
+ "GermanDPR": 82.89,
+ "GermanQuAD-Retrieval": 94.66,
"HellaSwag": 27.35,
+ "HotpotQA": 71.22,
"HotpotQA-PL": 67.41,
"LEMBNarrativeQARetrieval": 24.22,
"LEMBQMSumRetrieval": 24.26,
@@ -10424,12 +26259,14 @@
"MintakaRetrieval (it)": 33.84,
"MintakaRetrieval (ja)": 26.45,
"MintakaRetrieval (pt)": 35.9,
- "NFCorpus": 33.95,
+ "NFCorpus": 33.98,
"NFCorpus-PL": 30.24,
+ "NQ": 64.03,
"NQ-PL": 52.79,
"PIQA": 28.82,
"Quail": 4.85,
"Quora-PL": 83.65,
+ "QuoraRetrieval": 89.26,
"RARbCode": 58.92,
"RARbMath": 67.32,
"RiaNewsRetrieval": 80.67,
@@ -10437,15 +26274,15 @@
"SCIDOCS": 17.45,
"SCIDOCS-PL": 13.81,
"SIQA": 5.36,
- "SciFact": 70.42,
- "SciFact-PL": 65.66,
+ "SciFact": 70.2,
+ "SciFact-PL": 65.8,
"SpartQA": 5.64,
"StackOverflowQA": 88.89,
"SyntecRetrieval": 81.07,
"SyntheticText2SQL": 53.07,
"T2Retrieval": 76.11,
- "TRECCOVID": 71.21,
- "TRECCOVID-PL": 70.03,
+ "TRECCOVID": 71.15,
+ "TRECCOVID-PL": 69.91,
"TempReasonL1": 1.14,
"TempReasonL2Fact": 42.96,
"TempReasonL2Pure": 2.05,
@@ -10454,6 +26291,9 @@
"Touche2020": 23.13,
"VideoRetrieval": 58.25,
"WinoGrande": 54.99,
+ "XMarket (de)": 17.46,
+ "XMarket (en)": 20.59,
+ "XMarket (es)": 13.48,
"XPQARetrieval (ara-ara)": 43.69,
"XPQARetrieval (eng-ara)": 30.86,
"XPQARetrieval (ara-eng)": 39.11,
@@ -10503,6 +26343,7 @@
"BIOSSES": 82.49,
"BQ": 46.44,
"CDSC-R": 91.0,
+ "GermanSTSBenchmark": 83.64,
"LCQMC": 75.95,
"PAWSX": 14.63,
"RUParaPhraserSTS": 71.82,
@@ -10564,6 +26405,7 @@
"BIOSSES": 82.49,
"BQ": 46.44,
"CDSC-R": 91.0,
+ "GermanSTSBenchmark": 83.64,
"LCQMC": 75.95,
"PAWSX": 14.63,
"RUParaPhraserSTS": 71.82,
@@ -10975,9 +26817,20 @@
"v_measure": [
{
"Model": "multilingual-e5-large-instruct",
+ "AlloProfClusteringP2P": 70.39,
+ "AlloProfClusteringS2S": 60.95,
+ "BlurbsClusteringP2P": 44.35,
+ "BlurbsClusteringS2S": 20.81,
"GeoreviewClusteringP2P": 74.34,
+ "HALClusteringS2S": 28.53,
"MLSUMClusteringP2P (ru)": 57.77,
+ "MLSUMClusteringP2P (de)": 49.88,
+ "MLSUMClusteringP2P (fr)": 47.09,
+ "MLSUMClusteringP2P (es)": 49.74,
"MLSUMClusteringS2S (ru)": 57.5,
+ "MLSUMClusteringS2S (de)": 49.23,
+ "MLSUMClusteringS2S (fr)": 46.51,
+ "MLSUMClusteringS2S (es)": 48.93,
"MasakhaNEWSClusteringP2P (amh)": 74.82,
"MasakhaNEWSClusteringP2P (eng)": 70.12,
"MasakhaNEWSClusteringP2P (fra)": 70.48,
@@ -11011,7 +26864,9 @@
"MasakhaNEWSClusteringS2S (xho)": 43.19,
"MasakhaNEWSClusteringS2S (yor)": 54.23,
"RuSciBenchGRNTIClusteringP2P": 62.21,
- "RuSciBenchOECDClusteringP2P": 53.09,
+ "RuSciBenchOECDClusteringP2P": 52.83,
+ "TenKGnadClusteringP2P": 54.02,
+ "TenKGnadClusteringS2S": 41.71,
"TwentyNewsgroupsClustering": 51.03
}
]
@@ -11021,6 +26876,7 @@
{
"Model": "multilingual-e5-large-instruct",
"CDSC-E": 76.17,
+ "FalseFriendsGermanEnglish": 52.3,
"OpusparcusPC (de)": 97.56,
"OpusparcusPC (en)": 98.91,
"OpusparcusPC (fi)": 94.74,
@@ -11044,6 +26900,7 @@
{
"Model": "multilingual-e5-large-instruct",
"CDSC-E": 76.17,
+ "FalseFriendsGermanEnglish": 52.3,
"OpusparcusPC (de)": 97.56,
"OpusparcusPC (en)": 98.92,
"OpusparcusPC (fi)": 94.74,
@@ -11074,7 +26931,7 @@
"AskUbuntuDupQuestions": 64.41,
"MMarcoReranking": 23.6,
"MindSmallReranking": 33.07,
- "RuBQReranking": 75.84,
+ "RuBQReranking": 71.66,
"SciDocsRR": 85.75,
"StackOverflowDupQuestions": 52.45,
"SyntecReranking": 89.95,
@@ -11082,7 +26939,24 @@
},
{
"Model": "multilingual-e5-large-instruct",
- "MIRACLReranking (ru)": 62.49
+ "MIRACLReranking (ru)": 57.03,
+ "MIRACLReranking (ar)": 68.84,
+ "MIRACLReranking (bn)": 68.46,
+ "MIRACLReranking (de)": 46.65,
+ "MIRACLReranking (en)": 52.2,
+ "MIRACLReranking (es)": 53.13,
+ "MIRACLReranking (fa)": 53.97,
+ "MIRACLReranking (fi)": 73.31,
+ "MIRACLReranking (fr)": 45.44,
+ "MIRACLReranking (hi)": 59.44,
+ "MIRACLReranking (id)": 53.13,
+ "MIRACLReranking (ja)": 57.35,
+ "MIRACLReranking (ko)": 52.34,
+ "MIRACLReranking (sw)": 59.61,
+ "MIRACLReranking (te)": 72.85,
+ "MIRACLReranking (th)": 68.61,
+ "MIRACLReranking (yo)": 64.19,
+ "MIRACLReranking (zh)": 46.74
}
]
},
@@ -11122,7 +26996,10 @@
"EcomRetrieval": 53.92,
"FiQA-PL": 32.01,
"FiQA2018": 48.42,
+ "GerDaLIR": 9.31,
"GerDaLIRSmall": 21.34,
+ "GermanDPR": 80.84,
+ "GermanQuAD-Retrieval": 94.67,
"HellaSwag": 32.02,
"LEMBNarrativeQARetrieval": 26.71,
"LEMBQMSumRetrieval": 26.08,
@@ -11133,7 +27010,24 @@
"LegalBenchCorporateLobbying": 94.25,
"LegalQuAD": 51.25,
"LegalSummarization": 68.07,
- "MIRACLRetrieval (ru)": 66.08,
+ "MIRACLRetrieval (ru)": 53.08,
+ "MIRACLRetrieval (ar)": 63.3,
+ "MIRACLRetrieval (bn)": 65.27,
+ "MIRACLRetrieval (de)": 43.92,
+ "MIRACLRetrieval (en)": 43.09,
+ "MIRACLRetrieval (es)": 39.67,
+ "MIRACLRetrieval (fa)": 50.97,
+ "MIRACLRetrieval (fi)": 67.97,
+ "MIRACLRetrieval (fr)": 38.88,
+ "MIRACLRetrieval (hi)": 51.57,
+ "MIRACLRetrieval (id)": 45.73,
+ "MIRACLRetrieval (ja)": 55.26,
+ "MIRACLRetrieval (ko)": 59.95,
+ "MIRACLRetrieval (sw)": 63.4,
+ "MIRACLRetrieval (te)": 74.51,
+ "MIRACLRetrieval (th)": 67.95,
+ "MIRACLRetrieval (yo)": 78.29,
+ "MIRACLRetrieval (zh)": 46.05,
"MMarcoRetrieval": 78.81,
"MedicalRetrieval": 56.55,
"MintakaRetrieval (ar)": 26.13,
@@ -11150,8 +27044,8 @@
"Quail": 8.63,
"RARbCode": 71.22,
"RARbMath": 71.95,
- "RiaNewsRetrieval": 83.26,
- "RuBQRetrieval": 73.9,
+ "RiaNewsRetrieval": 82.44,
+ "RuBQRetrieval": 69.18,
"SCIDOCS": 19.24,
"SCIDOCS-PL": 17.15,
"SIQA": 7.33,
@@ -11172,6 +27066,9 @@
"Touche2020": 27.4,
"VideoRetrieval": 52.24,
"WinoGrande": 54.27,
+ "XMarket (de)": 24.4,
+ "XMarket (en)": 27.51,
+ "XMarket (es)": 25.01,
"XPQARetrieval (ara-ara)": 48.56,
"XPQARetrieval (eng-ara)": 34.01,
"XPQARetrieval (ara-eng)": 45.13,
@@ -11220,6 +27117,7 @@
"BIOSSES": 87.46,
"BQ": 48.8,
"CDSC-R": 92.35,
+ "GermanSTSBenchmark": 84.84,
"LCQMC": 76.06,
"PAWSX": 15.06,
"RUParaPhraserSTS": 75.4,
@@ -11249,7 +27147,7 @@
"STS22 (es)": 68.45,
"STS22 (de-fr)": 65.52,
"STS22 (pl)": 40.97,
- "STS22 (ru)": 65.17,
+ "STS22 (ru)": 70.06,
"STS22 (en)": 68.67,
"STS22 (fr)": 82.25,
"STS22 (es-it)": 75.25,
@@ -11281,6 +27179,7 @@
"BIOSSES": 87.46,
"BQ": 48.8,
"CDSC-R": 92.35,
+ "GermanSTSBenchmark": 84.84,
"LCQMC": 76.06,
"PAWSX": 15.06,
"RUParaPhraserSTS": 75.4,
@@ -11310,7 +27209,7 @@
"STS22 (es)": 68.45,
"STS22 (de-fr)": 65.52,
"STS22 (pl)": 40.97,
- "STS22 (ru)": 65.17,
+ "STS22 (ru)": 70.06,
"STS22 (en)": 68.67,
"STS22 (fr)": 82.25,
"STS22 (es-it)": 75.25,
@@ -11497,13 +27396,13 @@
{
"Model": "multilingual-e5-small",
"AllegroReviews": 37.42,
+ "AmazonCounterfactualClassification (de)": 71.72,
"AmazonCounterfactualClassification (en-ext)": 73.07,
"AmazonCounterfactualClassification (en)": 71.87,
- "AmazonCounterfactualClassification (de)": 71.72,
"AmazonCounterfactualClassification (ja)": 61.46,
"AmazonPolarityClassification": 88.61,
- "AmazonReviewsClassification (en)": 45.75,
"AmazonReviewsClassification (de)": 41.07,
+ "AmazonReviewsClassification (en)": 45.75,
"AmazonReviewsClassification (es)": 41.37,
"AmazonReviewsClassification (fr)": 39.68,
"AmazonReviewsClassification (ja)": 38.55,
@@ -11522,14 +27421,14 @@
"JDReview": 79.34,
"KinopoiskClassification": 49.96,
"LccSentimentClassification": 57.87,
- "MTOPDomainClassification (en)": 88.99,
"MTOPDomainClassification (de)": 86.15,
+ "MTOPDomainClassification (en)": 88.99,
"MTOPDomainClassification (es)": 85.53,
"MTOPDomainClassification (fr)": 81.2,
"MTOPDomainClassification (hi)": 84.07,
"MTOPDomainClassification (th)": 83.16,
- "MTOPIntentClassification (en)": 56.69,
"MTOPIntentClassification (de)": 55.88,
+ "MTOPIntentClassification (en)": 56.69,
"MTOPIntentClassification (es)": 53.15,
"MTOPIntentClassification (fr)": 46.01,
"MTOPIntentClassification (hi)": 52.26,
@@ -11550,108 +27449,108 @@
"MasakhaNEWSClassification (tir)": 68.01,
"MasakhaNEWSClassification (xho)": 72.22,
"MasakhaNEWSClassification (yor)": 73.84,
- "MassiveIntentClassification (is)": 41.53,
- "MassiveIntentClassification (tl)": 48.7,
- "MassiveIntentClassification (he)": 51.11,
- "MassiveIntentClassification (ta)": 47.65,
- "MassiveIntentClassification (ar)": 47.78,
- "MassiveIntentClassification (my)": 45.64,
- "MassiveIntentClassification (sl)": 47.71,
- "MassiveIntentClassification (af)": 48.74,
+ "MassiveIntentClassification (ru)": 58.43,
+ "MassiveIntentClassification (sq)": 48.68,
+ "MassiveIntentClassification (ms)": 50.8,
+ "MassiveIntentClassification (hi)": 55.69,
+ "MassiveIntentClassification (pt)": 60.12,
+ "MassiveIntentClassification (zh-TW)": 53.75,
+ "MassiveIntentClassification (id)": 56.2,
+ "MassiveIntentClassification (nl)": 59.27,
+ "MassiveIntentClassification (ur)": 50.51,
+ "MassiveIntentClassification (te)": 48.85,
"MassiveIntentClassification (de)": 55.52,
- "MassiveIntentClassification (pl)": 57.4,
+ "MassiveIntentClassification (ro)": 52.82,
+ "MassiveIntentClassification (da)": 54.63,
+ "MassiveIntentClassification (am)": 43.52,
"MassiveIntentClassification (en)": 63.87,
- "MassiveIntentClassification (fi)": 55.14,
- "MassiveIntentClassification (lv)": 44.93,
"MassiveIntentClassification (fr)": 57.9,
- "MassiveIntentClassification (ur)": 50.51,
+ "MassiveIntentClassification (sw)": 44.84,
+ "MassiveIntentClassification (bn)": 50.68,
"MassiveIntentClassification (mn)": 47.38,
- "MassiveIntentClassification (it)": 58.8,
+ "MassiveIntentClassification (kn)": 47.85,
+ "MassiveIntentClassification (ja)": 61.58,
+ "MassiveIntentClassification (hy)": 47.89,
"MassiveIntentClassification (ko)": 57.12,
- "MassiveIntentClassification (nb)": 53.96,
"MassiveIntentClassification (es)": 59.19,
- "MassiveIntentClassification (ja)": 61.58,
- "MassiveIntentClassification (da)": 54.63,
- "MassiveIntentClassification (zh-TW)": 53.75,
- "MassiveIntentClassification (id)": 56.2,
- "MassiveIntentClassification (ka)": 39.52,
- "MassiveIntentClassification (hi)": 55.69,
- "MassiveIntentClassification (cy)": 36.62,
- "MassiveIntentClassification (kn)": 47.85,
- "MassiveIntentClassification (pt)": 60.12,
- "MassiveIntentClassification (th)": 56.26,
- "MassiveIntentClassification (fa)": 57.73,
- "MassiveIntentClassification (bn)": 50.68,
- "MassiveIntentClassification (ml)": 52.81,
- "MassiveIntentClassification (ro)": 52.82,
- "MassiveIntentClassification (am)": 43.52,
- "MassiveIntentClassification (hu)": 53.21,
- "MassiveIntentClassification (sw)": 44.84,
- "MassiveIntentClassification (ms)": 50.8,
- "MassiveIntentClassification (tr)": 56.88,
+ "MassiveIntentClassification (is)": 41.53,
"MassiveIntentClassification (km)": 33.45,
- "MassiveIntentClassification (ru)": 58.43,
- "MassiveIntentClassification (az)": 49.32,
- "MassiveIntentClassification (te)": 48.85,
- "MassiveIntentClassification (nl)": 59.27,
+ "MassiveIntentClassification (af)": 48.74,
"MassiveIntentClassification (zh-CN)": 62.04,
- "MassiveIntentClassification (sq)": 48.68,
+ "MassiveIntentClassification (he)": 51.11,
+ "MassiveIntentClassification (fa)": 57.73,
+ "MassiveIntentClassification (nb)": 53.96,
+ "MassiveIntentClassification (sv)": 56.6,
+ "MassiveIntentClassification (ta)": 47.65,
+ "MassiveIntentClassification (tr)": 56.88,
+ "MassiveIntentClassification (sl)": 47.71,
+ "MassiveIntentClassification (it)": 58.8,
+ "MassiveIntentClassification (tl)": 48.7,
"MassiveIntentClassification (vi)": 56.19,
+ "MassiveIntentClassification (lv)": 44.93,
+ "MassiveIntentClassification (fi)": 55.14,
+ "MassiveIntentClassification (pl)": 57.4,
"MassiveIntentClassification (jv)": 42.96,
- "MassiveIntentClassification (sv)": 56.6,
- "MassiveIntentClassification (hy)": 47.89,
+ "MassiveIntentClassification (my)": 45.64,
+ "MassiveIntentClassification (ka)": 39.52,
+ "MassiveIntentClassification (th)": 56.26,
"MassiveIntentClassification (el)": 54.14,
+ "MassiveIntentClassification (ar)": 47.78,
+ "MassiveIntentClassification (az)": 49.32,
+ "MassiveIntentClassification (cy)": 36.62,
+ "MassiveIntentClassification (hu)": 53.21,
+ "MassiveIntentClassification (ml)": 52.81,
+ "MassiveScenarioClassification (ru)": 63.89,
+ "MassiveScenarioClassification (fr)": 63.9,
+ "MassiveScenarioClassification (my)": 51.07,
+ "MassiveScenarioClassification (sq)": 56.15,
+ "MassiveScenarioClassification (af)": 58.0,
"MassiveScenarioClassification (de)": 65.88,
- "MassiveScenarioClassification (nb)": 59.9,
- "MassiveScenarioClassification (th)": 65.72,
"MassiveScenarioClassification (ka)": 44.96,
- "MassiveScenarioClassification (jv)": 51.39,
- "MassiveScenarioClassification (sv)": 65.54,
- "MassiveScenarioClassification (fr)": 63.9,
"MassiveScenarioClassification (tl)": 55.3,
- "MassiveScenarioClassification (hu)": 61.93,
- "MassiveScenarioClassification (ur)": 55.91,
+ "MassiveScenarioClassification (zh-CN)": 68.96,
+ "MassiveScenarioClassification (pl)": 64.25,
+ "MassiveScenarioClassification (mn)": 52.41,
"MassiveScenarioClassification (ms)": 59.18,
- "MassiveScenarioClassification (az)": 53.27,
- "MassiveScenarioClassification (af)": 58.0,
- "MassiveScenarioClassification (zh-TW)": 61.15,
- "MassiveScenarioClassification (lv)": 51.0,
- "MassiveScenarioClassification (km)": 39.01,
- "MassiveScenarioClassification (el)": 62.29,
- "MassiveScenarioClassification (bn)": 57.38,
"MassiveScenarioClassification (da)": 62.34,
- "MassiveScenarioClassification (ml)": 60.31,
- "MassiveScenarioClassification (ro)": 60.0,
- "MassiveScenarioClassification (ru)": 63.89,
- "MassiveScenarioClassification (it)": 64.03,
"MassiveScenarioClassification (am)": 50.53,
+ "MassiveScenarioClassification (en)": 69.28,
"MassiveScenarioClassification (is)": 49.66,
- "MassiveScenarioClassification (ja)": 67.75,
- "MassiveScenarioClassification (zh-CN)": 68.96,
- "MassiveScenarioClassification (id)": 62.0,
- "MassiveScenarioClassification (tr)": 62.14,
- "MassiveScenarioClassification (fa)": 63.32,
- "MassiveScenarioClassification (ta)": 52.74,
- "MassiveScenarioClassification (kn)": 52.73,
- "MassiveScenarioClassification (pt)": 62.75,
- "MassiveScenarioClassification (cy)": 44.63,
- "MassiveScenarioClassification (my)": 51.07,
+ "MassiveScenarioClassification (it)": 64.03,
+ "MassiveScenarioClassification (jv)": 51.39,
+ "MassiveScenarioClassification (nl)": 67.01,
+ "MassiveScenarioClassification (ro)": 60.0,
+ "MassiveScenarioClassification (te)": 54.86,
+ "MassiveScenarioClassification (bn)": 57.38,
"MassiveScenarioClassification (es)": 64.43,
+ "MassiveScenarioClassification (el)": 62.29,
+ "MassiveScenarioClassification (lv)": 51.0,
+ "MassiveScenarioClassification (he)": 59.22,
"MassiveScenarioClassification (hi)": 62.22,
- "MassiveScenarioClassification (te)": 54.86,
- "MassiveScenarioClassification (mn)": 52.41,
+ "MassiveScenarioClassification (ar)": 54.56,
+ "MassiveScenarioClassification (pt)": 62.75,
+ "MassiveScenarioClassification (sv)": 65.54,
"MassiveScenarioClassification (ko)": 65.7,
- "MassiveScenarioClassification (sl)": 54.05,
+ "MassiveScenarioClassification (ta)": 52.74,
+ "MassiveScenarioClassification (vi)": 62.67,
+ "MassiveScenarioClassification (fa)": 63.32,
"MassiveScenarioClassification (sw)": 52.42,
- "MassiveScenarioClassification (hy)": 52.93,
- "MassiveScenarioClassification (nl)": 67.01,
- "MassiveScenarioClassification (sq)": 56.15,
+ "MassiveScenarioClassification (ml)": 60.31,
+ "MassiveScenarioClassification (sl)": 54.05,
+ "MassiveScenarioClassification (ja)": 67.75,
"MassiveScenarioClassification (fi)": 61.89,
- "MassiveScenarioClassification (en)": 69.28,
- "MassiveScenarioClassification (vi)": 62.67,
- "MassiveScenarioClassification (he)": 59.22,
- "MassiveScenarioClassification (ar)": 54.56,
- "MassiveScenarioClassification (pl)": 64.25,
+ "MassiveScenarioClassification (zh-TW)": 61.15,
+ "MassiveScenarioClassification (id)": 62.0,
+ "MassiveScenarioClassification (tr)": 62.14,
+ "MassiveScenarioClassification (km)": 39.01,
+ "MassiveScenarioClassification (nb)": 59.9,
+ "MassiveScenarioClassification (az)": 53.27,
+ "MassiveScenarioClassification (hy)": 52.93,
+ "MassiveScenarioClassification (cy)": 44.63,
+ "MassiveScenarioClassification (th)": 65.72,
+ "MassiveScenarioClassification (hu)": 61.93,
+ "MassiveScenarioClassification (kn)": 52.73,
+ "MassiveScenarioClassification (ur)": 55.91,
"MultilingualSentiment": 64.74,
"NoRecClassification": 53.96,
"NordicLangClassification": 75.15,
@@ -11681,13 +27580,21 @@
"AlloProfClusteringS2S": 32.52,
"BiorxivClusteringP2P": 35.84,
"BiorxivClusteringS2S": 27.35,
+ "BlurbsClusteringP2P": 37.05,
+ "BlurbsClusteringS2S": 15.24,
"CLSClusteringP2P": 39.14,
"CLSClusteringS2S": 37.79,
"GeoreviewClusteringP2P": 58.57,
"HALClusteringS2S": 18.95,
+ "MLSUMClusteringP2P (de)": 40.19,
+ "MLSUMClusteringP2P (fr)": 43.01,
"MLSUMClusteringP2P (ru)": 39.69,
+ "MLSUMClusteringP2P (es)": 46.38,
"MLSUMClusteringP2P": 43.2,
+ "MLSUMClusteringS2S (de)": 39.39,
+ "MLSUMClusteringS2S (fr)": 42.84,
"MLSUMClusteringS2S (ru)": 39.9,
+ "MLSUMClusteringS2S (es)": 45.78,
"MLSUMClusteringS2S": 37.61,
"MasakhaNEWSClusteringP2P (amh)": 66.2,
"MasakhaNEWSClusteringP2P (eng)": 50.08,
@@ -11729,6 +27636,8 @@
"RuSciBenchOECDClusteringP2P": 44.33,
"StackExchangeClustering": 53.32,
"StackExchangeClusteringP2P": 31.87,
+ "TenKGnadClusteringP2P": 43.77,
+ "TenKGnadClusteringS2S": 30.52,
"ThuNewsClusteringP2P": 55.18,
"ThuNewsClusteringS2S": 48.93,
"TwentyNewsgroupsClustering": 33.67
@@ -11740,6 +27649,7 @@
{
"Model": "multilingual-e5-small",
"CDSC-E": 69.69,
+ "FalseFriendsGermanEnglish": 49.46,
"OpusparcusPC (de)": 94.9,
"OpusparcusPC (en)": 98.42,
"OpusparcusPC (fi)": 88.29,
@@ -11764,6 +27674,7 @@
"Model": "multilingual-e5-small",
"CDSC-E": 69.84,
"Cmnli": 72.12,
+ "FalseFriendsGermanEnglish": 49.49,
"Ocnli": 60.77,
"OpusparcusPC (de)": 94.9,
"OpusparcusPC (en)": 98.42,
@@ -11817,7 +27728,24 @@
},
{
"Model": "multilingual-e5-small",
- "MIRACLReranking (ru)": 59.12
+ "MIRACLReranking (ru)": 59.12,
+ "MIRACLReranking (ar)": 73.56,
+ "MIRACLReranking (bn)": 69.43,
+ "MIRACLReranking (de)": 48.16,
+ "MIRACLReranking (en)": 55.99,
+ "MIRACLReranking (es)": 60.46,
+ "MIRACLReranking (fa)": 54.65,
+ "MIRACLReranking (fi)": 76.06,
+ "MIRACLReranking (fr)": 48.53,
+ "MIRACLReranking (hi)": 59.93,
+ "MIRACLReranking (id)": 56.22,
+ "MIRACLReranking (ja)": 62.58,
+ "MIRACLReranking (ko)": 54.56,
+ "MIRACLReranking (sw)": 62.33,
+ "MIRACLReranking (te)": 77.27,
+ "MIRACLReranking (th)": 73.44,
+ "MIRACLReranking (yo)": 58.12,
+ "MIRACLReranking (zh)": 45.45
}
]
},
@@ -11858,7 +27786,10 @@
"EcomRetrieval": 53.56,
"FiQA-PL": 22.03,
"FiQA2018": 33.13,
+ "GerDaLIR": 6.87,
"GerDaLIRSmall": 14.81,
+ "GermanDPR": 78.94,
+ "GermanQuAD-Retrieval": 93.14,
"HellaSwag": 23.73,
"HotpotQA-PL": 60.15,
"LEMBNarrativeQARetrieval": 22.6,
@@ -11871,6 +27802,23 @@
"LegalQuAD": 47.8,
"LegalSummarization": 55.76,
"MIRACLRetrieval (ru)": 59.01,
+ "MIRACLRetrieval (ar)": 71.35,
+ "MIRACLRetrieval (bn)": 68.27,
+ "MIRACLRetrieval (de)": 48.75,
+ "MIRACLRetrieval (en)": 47.98,
+ "MIRACLRetrieval (es)": 51.23,
+ "MIRACLRetrieval (fa)": 53.34,
+ "MIRACLRetrieval (fi)": 73.35,
+ "MIRACLRetrieval (fr)": 47.61,
+ "MIRACLRetrieval (hi)": 55.13,
+ "MIRACLRetrieval (id)": 50.67,
+ "MIRACLRetrieval (ja)": 63.61,
+ "MIRACLRetrieval (ko)": 61.24,
+ "MIRACLRetrieval (sw)": 68.48,
+ "MIRACLRetrieval (te)": 81.31,
+ "MIRACLRetrieval (th)": 74.9,
+ "MIRACLRetrieval (yo)": 45.28,
+ "MIRACLRetrieval (zh)": 45.95,
"MMarcoRetrieval": 73.17,
"MSMARCO-PL": 26.94,
"MedicalRetrieval": 44.84,
@@ -11912,6 +27860,9 @@
"Touche2020": 21.16,
"VideoRetrieval": 58.09,
"WinoGrande": 37.46,
+ "XMarket (de)": 15.55,
+ "XMarket (en)": 18.11,
+ "XMarket (es)": 11.55,
"XPQARetrieval (ara-ara)": 39.93,
"XPQARetrieval (eng-ara)": 18.09,
"XPQARetrieval (ara-eng)": 31.64,
@@ -11961,6 +27912,7 @@
"BIOSSES": 82.46,
"BQ": 43.27,
"CDSC-R": 90.27,
+ "GermanSTSBenchmark": 78.42,
"LCQMC": 72.7,
"PAWSX": 11.0,
"RUParaPhraserSTS": 70.46,
@@ -11984,36 +27936,36 @@
"STS17 (es-es)": 84.83,
"STS17 (en-de)": 76.82,
"STS17 (es-en)": 72.43,
- "STS22 (pl-en)": 72.69,
+ "STS22 (ru)": 59.9,
+ "STS22 (de-fr)": 60.62,
+ "STS22 (es-en)": 74.2,
+ "STS22 (it)": 76.53,
+ "STS22 (zh)": 66.85,
+ "STS22 (zh-en)": 65.32,
"STS22 (de-en)": 56.07,
+ "STS22 (de-pl)": 28.24,
"STS22 (es)": 66.86,
+ "STS22 (tr)": 63.69,
"STS22 (ar)": 56.65,
- "STS22 (ru)": 59.9,
+ "STS22 (fr)": 76.58,
+ "STS22 (fr-pl)": 84.52,
"STS22 (de)": 53.45,
- "STS22 (zh-en)": 65.32,
+ "STS22 (pl)": 35.78,
+ "STS22 (pl-en)": 72.69,
"STS22 (en)": 61.25,
- "STS22 (fr)": 76.58,
- "STS22 (it)": 76.53,
"STS22 (es-it)": 71.74,
- "STS22 (es-en)": 74.2,
- "STS22 (de-fr)": 60.62,
- "STS22 (tr)": 63.69,
- "STS22 (pl)": 35.78,
- "STS22 (fr-pl)": 84.52,
- "STS22 (zh)": 66.85,
- "STS22 (de-pl)": 28.24,
"STSB": 77.73,
"STSBenchmark": 84.11,
- "STSBenchmarkMultilingualSTS (zh)": 78.49,
- "STSBenchmarkMultilingualSTS (pl)": 72.61,
- "STSBenchmarkMultilingualSTS (ru)": 78.24,
+ "STSBenchmarkMultilingualSTS (es)": 80.31,
"STSBenchmarkMultilingualSTS (en)": 84.11,
- "STSBenchmarkMultilingualSTS (pt)": 77.39,
- "STSBenchmarkMultilingualSTS (it)": 78.21,
- "STSBenchmarkMultilingualSTS (fr)": 79.2,
"STSBenchmarkMultilingualSTS (de)": 79.17,
"STSBenchmarkMultilingualSTS (nl)": 76.04,
- "STSBenchmarkMultilingualSTS (es)": 80.31
+ "STSBenchmarkMultilingualSTS (it)": 78.21,
+ "STSBenchmarkMultilingualSTS (zh)": 78.49,
+ "STSBenchmarkMultilingualSTS (ru)": 78.24,
+ "STSBenchmarkMultilingualSTS (fr)": 79.2,
+ "STSBenchmarkMultilingualSTS (pt)": 77.39,
+ "STSBenchmarkMultilingualSTS (pl)": 72.61
},
{
"Model": "multilingual-e5-small",
@@ -12022,6 +27974,7 @@
"BIOSSES": 82.46,
"BQ": 43.27,
"CDSC-R": 90.27,
+ "GermanSTSBenchmark": 78.42,
"LCQMC": 72.7,
"PAWSX": 11.0,
"RUParaPhraserSTS": 70.46,
@@ -12045,36 +27998,36 @@
"STS17 (es-es)": 84.83,
"STS17 (en-de)": 76.82,
"STS17 (es-en)": 72.43,
- "STS22 (pl-en)": 72.69,
+ "STS22 (ru)": 59.9,
+ "STS22 (de-fr)": 60.62,
+ "STS22 (es-en)": 74.2,
+ "STS22 (it)": 76.53,
+ "STS22 (zh)": 66.85,
+ "STS22 (zh-en)": 65.32,
"STS22 (de-en)": 56.07,
+ "STS22 (de-pl)": 28.24,
"STS22 (es)": 66.86,
+ "STS22 (tr)": 63.69,
"STS22 (ar)": 56.65,
- "STS22 (ru)": 59.9,
+ "STS22 (fr)": 76.58,
+ "STS22 (fr-pl)": 84.52,
"STS22 (de)": 53.45,
- "STS22 (zh-en)": 65.32,
+ "STS22 (pl)": 35.78,
+ "STS22 (pl-en)": 72.69,
"STS22 (en)": 61.25,
- "STS22 (fr)": 76.58,
- "STS22 (it)": 76.53,
"STS22 (es-it)": 71.74,
- "STS22 (es-en)": 74.2,
- "STS22 (de-fr)": 60.62,
- "STS22 (tr)": 63.69,
- "STS22 (pl)": 35.78,
- "STS22 (fr-pl)": 84.52,
- "STS22 (zh)": 66.85,
- "STS22 (de-pl)": 28.24,
"STSB": 77.73,
"STSBenchmark": 84.11,
- "STSBenchmarkMultilingualSTS (zh)": 78.49,
- "STSBenchmarkMultilingualSTS (pl)": 72.61,
- "STSBenchmarkMultilingualSTS (ru)": 78.24,
+ "STSBenchmarkMultilingualSTS (es)": 80.31,
"STSBenchmarkMultilingualSTS (en)": 84.11,
- "STSBenchmarkMultilingualSTS (pt)": 77.39,
- "STSBenchmarkMultilingualSTS (it)": 78.21,
- "STSBenchmarkMultilingualSTS (fr)": 79.2,
"STSBenchmarkMultilingualSTS (de)": 79.17,
"STSBenchmarkMultilingualSTS (nl)": 76.04,
- "STSBenchmarkMultilingualSTS (es)": 80.31
+ "STSBenchmarkMultilingualSTS (it)": 78.21,
+ "STSBenchmarkMultilingualSTS (zh)": 78.49,
+ "STSBenchmarkMultilingualSTS (ru)": 78.24,
+ "STSBenchmarkMultilingualSTS (fr)": 79.2,
+ "STSBenchmarkMultilingualSTS (pt)": 77.39,
+ "STSBenchmarkMultilingualSTS (pl)": 72.61
},
{
"Model": "multilingual-e5-small",
@@ -12303,18 +28256,40 @@
},
"izhx__udever-bloom-1b1": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "udever-bloom-1b1",
+ "BornholmBitextMining": 21.91
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "udever-bloom-1b1",
+ "AllegroReviews": 23.37,
"AmazonReviewsClassification (fr)": 35.12,
+ "AngryTweetsClassification": 44.39,
+ "CBD": 51.92,
+ "DanishPoliticalCommentsClassification": 27.83,
+ "GeoreviewClassification": 30.5,
+ "HeadlineClassification": 36.09,
+ "InappropriatenessClassification": 54.64,
+ "KinopoiskClassification": 41.23,
+ "LccSentimentClassification": 36.87,
"MTOPDomainClassification (fr)": 69.24,
"MTOPIntentClassification (fr)": 51.25,
"MasakhaNEWSClassification (fra)": 80.83,
"MassiveIntentClassification (fr)": 43.21,
- "MassiveScenarioClassification (fr)": 49.78
+ "MassiveScenarioClassification (fr)": 49.78,
+ "NoRecClassification": 38.75,
+ "NordicLangClassification": 59.31,
+ "PAC": 64.44,
+ "PolEmo2.0-IN": 41.79,
+ "PolEmo2.0-OUT": 32.67,
+ "RuReviewsClassification": 45.26,
+ "RuSciBenchGRNTIClassification": 30.37,
+ "RuSciBenchOECDClassification": 22.65
}
]
},
@@ -12322,13 +28297,35 @@
"v_measure": [
{
"Model": "udever-bloom-1b1",
- "AlloProfClusteringP2P": 62.22,
- "AlloProfClusteringS2S": 27.06,
- "HALClusteringS2S": 13.86,
+ "AlloProfClusteringP2P": 61.84,
+ "AlloProfClusteringS2S": 25.56,
+ "BlurbsClusteringP2P": 24.28,
+ "BlurbsClusteringS2S": 8.08,
+ "GeoreviewClusteringP2P": 26.01,
+ "HALClusteringS2S": 14.15,
"MLSUMClusteringP2P": 44.11,
"MLSUMClusteringS2S": 30.47,
"MasakhaNEWSClusteringP2P (fra)": 40.2,
- "MasakhaNEWSClusteringS2S (fra)": 27.35
+ "MasakhaNEWSClusteringS2S (fra)": 28.75,
+ "MasakhaNEWSClusteringS2S (amh)": 44.47,
+ "MasakhaNEWSClusteringS2S (eng)": 12.52,
+ "MasakhaNEWSClusteringS2S (hau)": 5.12,
+ "MasakhaNEWSClusteringS2S (ibo)": 30.74,
+ "MasakhaNEWSClusteringS2S (lin)": 45.75,
+ "MasakhaNEWSClusteringS2S (lug)": 44.31,
+ "MasakhaNEWSClusteringS2S (orm)": 21.26,
+ "MasakhaNEWSClusteringS2S (pcm)": 50.75,
+ "MasakhaNEWSClusteringS2S (run)": 47.24,
+ "MasakhaNEWSClusteringS2S (sna)": 43.2,
+ "MasakhaNEWSClusteringS2S (som)": 25.55,
+ "MasakhaNEWSClusteringS2S (swa)": 7.31,
+ "MasakhaNEWSClusteringS2S (tir)": 42.49,
+ "MasakhaNEWSClusteringS2S (xho)": 22.07,
+ "MasakhaNEWSClusteringS2S (yor)": 29.83,
+ "RuSciBenchGRNTIClusteringP2P": 22.15,
+ "RuSciBenchOECDClusteringP2P": 19.23,
+ "TenKGnadClusteringP2P": 36.81,
+ "TenKGnadClusteringS2S": 7.05
}
]
},
@@ -12336,13 +28333,50 @@
"max_ap": [
{
"Model": "udever-bloom-1b1",
+ "CDSC-E": 45.12,
+ "FalseFriendsGermanEnglish": 48.72,
+ "OpusparcusPC (de)": 88.82,
+ "OpusparcusPC (en)": 94.61,
+ "OpusparcusPC (fi)": 79.58,
"OpusparcusPC (fr)": 85.54,
- "PawsXPairClassification (fr)": 61.99
+ "OpusparcusPC (ru)": 79.54,
+ "OpusparcusPC (sv)": 78.74,
+ "PSC": 80.86,
+ "PawsXPairClassification (de)": 57.54,
+ "PawsXPairClassification (en)": 64.77,
+ "PawsXPairClassification (es)": 60.22,
+ "PawsXPairClassification (fr)": 61.99,
+ "PawsXPairClassification (ja)": 52.85,
+ "PawsXPairClassification (ko)": 55.63,
+ "PawsXPairClassification (zh)": 59.83,
+ "SICK-E-PL": 43.86,
+ "TERRa": 48.11
},
{
"Model": "udever-bloom-1b1",
+ "CDSC-E": 46.68,
+ "FalseFriendsGermanEnglish": 48.89,
"OpusparcusPC (fr)": 90.15,
- "PawsXPairClassification (fr)": 63.95
+ "OpusparcusPC (de)": 90.29,
+ "OpusparcusPC (en)": 95.92,
+ "OpusparcusPC (fi)": 81.01,
+ "OpusparcusPC (ru)": 80.51,
+ "OpusparcusPC (sv)": 79.73,
+ "PSC": 85.54,
+ "PawsXPairClassification (fr)": 64.15,
+ "PawsXPairClassification (de)": 58.58,
+ "PawsXPairClassification (en)": 66.19,
+ "PawsXPairClassification (es)": 62.39,
+ "PawsXPairClassification (ja)": 53.39,
+ "PawsXPairClassification (ko)": 55.7,
+ "PawsXPairClassification (zh)": 60.76,
+ "SICK-E-PL": 44.32,
+ "TERRa": 50.86
+ },
+ {
+ "Model": "udever-bloom-1b1",
+ "OpusparcusPC (fr)": 85.54,
+ "PawsXPairClassification (fr)": 61.99
}
]
},
@@ -12350,8 +28384,30 @@
"map": [
{
"Model": "udever-bloom-1b1",
- "AlloprofReranking": 39.13,
- "SyntecReranking": 62.58
+ "AlloprofReranking": 38.6,
+ "RuBQReranking": 23.18,
+ "SyntecReranking": 47.99
+ },
+ {
+ "Model": "udever-bloom-1b1",
+ "MIRACLReranking (ar)": 17.32,
+ "MIRACLReranking (bn)": 33.92,
+ "MIRACLReranking (de)": 7.29,
+ "MIRACLReranking (en)": 25.91,
+ "MIRACLReranking (es)": 20.2,
+ "MIRACLReranking (fa)": 11.09,
+ "MIRACLReranking (fi)": 11.8,
+ "MIRACLReranking (fr)": 13.63,
+ "MIRACLReranking (hi)": 32.18,
+ "MIRACLReranking (id)": 12.28,
+ "MIRACLReranking (ja)": 13.57,
+ "MIRACLReranking (ko)": 21.09,
+ "MIRACLReranking (ru)": 7.12,
+ "MIRACLReranking (sw)": 17.07,
+ "MIRACLReranking (te)": 17.5,
+ "MIRACLReranking (th)": 8.98,
+ "MIRACLReranking (yo)": 6.34,
+ "MIRACLReranking (zh)": 18.61
}
]
},
@@ -12359,16 +28415,120 @@
"ndcg_at_10": [
{
"Model": "udever-bloom-1b1",
- "AlloprofRetrieval": 12.37,
- "BSARDRetrieval": 0.0,
+ "AILAStatutes": 20.96,
+ "ARCChallenge": 4.32,
+ "AlloprofRetrieval": 12.25,
+ "AlphaNLI": 2.95,
+ "BSARDRetrieval": 6.61,
+ "GermanQuAD-Retrieval": 18.27,
+ "HellaSwag": 13.79,
+ "LegalBenchConsumerContractsQA": 43.75,
+ "LegalBenchCorporateLobbying": 75.83,
+ "LegalSummarization": 46.2,
"MintakaRetrieval (fr)": 2.78,
- "SyntecRetrieval": 40.57,
- "XPQARetrieval (fr)": 33.82
+ "MintakaRetrieval (ar)": 3.36,
+ "MintakaRetrieval (de)": 1.01,
+ "MintakaRetrieval (es)": 2.35,
+ "MintakaRetrieval (hi)": 6.76,
+ "MintakaRetrieval (it)": 2.16,
+ "MintakaRetrieval (ja)": 3.72,
+ "MintakaRetrieval (pt)": 1.67,
+ "PIQA": 5.79,
+ "Quail": 1.65,
+ "RARbCode": 10.53,
+ "RARbMath": 26.48,
+ "RiaNewsRetrieval": 1.3,
+ "RuBQRetrieval": 0.91,
+ "SIQA": 0.78,
+ "SciFact-PL": 8.27,
+ "SpartQA": 0.11,
+ "SyntecRetrieval": 43.25,
+ "TempReasonL1": 0.2,
+ "TempReasonL2Fact": 4.22,
+ "TempReasonL2Pure": 0.24,
+ "TempReasonL3Fact": 5.01,
+ "TempReasonL3Pure": 2.13,
+ "WinoGrande": 2.56,
+ "XPQARetrieval (fr)": 33.82,
+ "XPQARetrieval (ara-ara)": 16.5,
+ "XPQARetrieval (eng-ara)": 1.9,
+ "XPQARetrieval (ara-eng)": 5.89,
+ "XPQARetrieval (deu-deu)": 18.76,
+ "XPQARetrieval (eng-deu)": 0.64,
+ "XPQARetrieval (deu-eng)": 10.08,
+ "XPQARetrieval (spa-spa)": 28.65,
+ "XPQARetrieval (eng-spa)": 2.55,
+ "XPQARetrieval (spa-eng)": 14.99,
+ "XPQARetrieval (fra-fra)": 33.82,
+ "XPQARetrieval (eng-fra)": 3.6,
+ "XPQARetrieval (fra-eng)": 13.52,
+ "XPQARetrieval (hin-hin)": 49.89,
+ "XPQARetrieval (eng-hin)": 9.46,
+ "XPQARetrieval (hin-eng)": 17.68,
+ "XPQARetrieval (ita-ita)": 22.47,
+ "XPQARetrieval (eng-ita)": 2.05,
+ "XPQARetrieval (ita-eng)": 14.23,
+ "XPQARetrieval (jpn-jpn)": 32.34,
+ "XPQARetrieval (eng-jpn)": 1.32,
+ "XPQARetrieval (jpn-eng)": 3.18,
+ "XPQARetrieval (kor-kor)": 12.19,
+ "XPQARetrieval (eng-kor)": 3.13,
+ "XPQARetrieval (kor-eng)": 2.06,
+ "XPQARetrieval (pol-pol)": 6.88,
+ "XPQARetrieval (eng-pol)": 0.74,
+ "XPQARetrieval (pol-eng)": 5.26,
+ "XPQARetrieval (por-por)": 21.11,
+ "XPQARetrieval (eng-por)": 1.57,
+ "XPQARetrieval (por-eng)": 7.35,
+ "XPQARetrieval (tam-tam)": 12.77,
+ "XPQARetrieval (eng-tam)": 3.73,
+ "XPQARetrieval (tam-eng)": 1.54,
+ "XPQARetrieval (cmn-cmn)": 36.29,
+ "XPQARetrieval (eng-cmn)": 2.16,
+ "XPQARetrieval (cmn-eng)": 8.26
}
]
},
"STS": {
"cosine_spearman": [
+ {
+ "Model": "udever-bloom-1b1",
+ "CDSC-R": 65.58,
+ "GermanSTSBenchmark": 35.78,
+ "RUParaPhraserSTS": 39.08,
+ "RuSTSBenchmarkSTS": 44.22,
+ "SICK-R-PL": 33.35,
+ "SICKFr": 59.94,
+ "STSBenchmarkMultilingualSTS (zh)": 49.47,
+ "STSBenchmarkMultilingualSTS (de)": 37.51,
+ "STSBenchmarkMultilingualSTS (en)": 48.85,
+ "STSBenchmarkMultilingualSTS (ru)": 45.06,
+ "STSBenchmarkMultilingualSTS (pl)": 35.54,
+ "STSBenchmarkMultilingualSTS (it)": 42.62,
+ "STSBenchmarkMultilingualSTS (fr)": 49.97,
+ "STSBenchmarkMultilingualSTS (nl)": 32.41,
+ "STSBenchmarkMultilingualSTS (es)": 49.08,
+ "STSBenchmarkMultilingualSTS (pt)": 35.28
+ },
+ {
+ "Model": "udever-bloom-1b1",
+ "CDSC-R": 65.58,
+ "GermanSTSBenchmark": 35.78,
+ "RUParaPhraserSTS": 39.08,
+ "RuSTSBenchmarkSTS": 44.22,
+ "SICK-R-PL": 33.35,
+ "SICKFr": 59.94,
+ "STSBenchmarkMultilingualSTS (zh)": 49.47,
+ "STSBenchmarkMultilingualSTS (de)": 37.51,
+ "STSBenchmarkMultilingualSTS (en)": 48.85,
+ "STSBenchmarkMultilingualSTS (ru)": 45.06,
+ "STSBenchmarkMultilingualSTS (pl)": 35.54,
+ "STSBenchmarkMultilingualSTS (it)": 42.62,
+ "STSBenchmarkMultilingualSTS (fr)": 49.97,
+ "STSBenchmarkMultilingualSTS (nl)": 32.41,
+ "STSBenchmarkMultilingualSTS (es)": 49.08,
+ "STSBenchmarkMultilingualSTS (pt)": 35.28
+ },
{
"Model": "udever-bloom-1b1",
"SICKFr": 59.94,
@@ -12379,6 +28539,14 @@
},
"Summarization": {
"cosine_spearman": [
+ {
+ "Model": "udever-bloom-1b1",
+ "SummEvalFr": 29.48
+ },
+ {
+ "Model": "udever-bloom-1b1",
+ "SummEvalFr": 29.48
+ },
{
"Model": "udever-bloom-1b1",
"SummEvalFr": 29.48
@@ -12386,26 +28554,61 @@
]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "udever-bloom-1b1",
+ "CEDRClassification": 31.77,
+ "SensitiveTopicsClassification": 17.36
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "udever-bloom-1b1",
+ "Core17InstructionRetrieval": 0.29,
+ "News21InstructionRetrieval": -0.24,
+ "Robust04InstructionRetrieval": -3.35
+ }
+ ]
}
},
"izhx__udever-bloom-560m": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "udever-bloom-560m",
+ "BornholmBitextMining": 11.73
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "udever-bloom-560m",
+ "AllegroReviews": 24.59,
"AmazonReviewsClassification (fr)": 26.85,
+ "AngryTweetsClassification": 40.61,
+ "CBD": 51.69,
+ "DanishPoliticalCommentsClassification": 24.87,
+ "GeoreviewClassification": 28.08,
+ "HeadlineClassification": 28.25,
+ "InappropriatenessClassification": 52.74,
+ "KinopoiskClassification": 39.85,
+ "LccSentimentClassification": 33.27,
"MTOPDomainClassification (fr)": 34.99,
"MTOPIntentClassification (fr)": 15.76,
"MasakhaNEWSClassification (fra)": 67.94,
"MassiveIntentClassification (fr)": 15.09,
- "MassiveScenarioClassification (fr)": 21.67
+ "MassiveScenarioClassification (fr)": 21.67,
+ "NoRecClassification": 35.14,
+ "NordicLangClassification": 49.08,
+ "PAC": 62.49,
+ "PolEmo2.0-IN": 33.59,
+ "PolEmo2.0-OUT": 30.18,
+ "RuReviewsClassification": 41.19,
+ "RuSciBenchGRNTIClassification": 16.08,
+ "RuSciBenchOECDClassification": 12.1
}
]
},
@@ -12413,13 +28616,35 @@
"v_measure": [
{
"Model": "udever-bloom-560m",
- "AlloProfClusteringP2P": 53.57,
- "AlloProfClusteringS2S": 22.13,
- "HALClusteringS2S": 7.68,
+ "AlloProfClusteringP2P": 35.43,
+ "AlloProfClusteringS2S": 18.33,
+ "BlurbsClusteringP2P": 7.37,
+ "BlurbsClusteringS2S": 6.68,
+ "GeoreviewClusteringP2P": 18.84,
+ "HALClusteringS2S": 3.92,
"MLSUMClusteringP2P": 36.43,
"MLSUMClusteringS2S": 25.26,
"MasakhaNEWSClusteringP2P (fra)": 37.57,
- "MasakhaNEWSClusteringS2S (fra)": 20.58
+ "MasakhaNEWSClusteringS2S (fra)": 21.0,
+ "MasakhaNEWSClusteringS2S (amh)": 41.35,
+ "MasakhaNEWSClusteringS2S (eng)": 1.71,
+ "MasakhaNEWSClusteringS2S (hau)": 1.76,
+ "MasakhaNEWSClusteringS2S (ibo)": 20.8,
+ "MasakhaNEWSClusteringS2S (lin)": 46.14,
+ "MasakhaNEWSClusteringS2S (lug)": 40.37,
+ "MasakhaNEWSClusteringS2S (orm)": 21.45,
+ "MasakhaNEWSClusteringS2S (pcm)": 22.96,
+ "MasakhaNEWSClusteringS2S (run)": 41.53,
+ "MasakhaNEWSClusteringS2S (sna)": 40.61,
+ "MasakhaNEWSClusteringS2S (som)": 20.85,
+ "MasakhaNEWSClusteringS2S (swa)": 7.08,
+ "MasakhaNEWSClusteringS2S (tir)": 44.0,
+ "MasakhaNEWSClusteringS2S (xho)": 22.49,
+ "MasakhaNEWSClusteringS2S (yor)": 20.56,
+ "RuSciBenchGRNTIClusteringP2P": 9.23,
+ "RuSciBenchOECDClusteringP2P": 9.31,
+ "TenKGnadClusteringP2P": 3.47,
+ "TenKGnadClusteringS2S": 3.19
}
]
},
@@ -12427,13 +28652,50 @@
"max_ap": [
{
"Model": "udever-bloom-560m",
+ "CDSC-E": 37.82,
+ "FalseFriendsGermanEnglish": 48.65,
+ "OpusparcusPC (de)": 85.58,
+ "OpusparcusPC (en)": 89.75,
+ "OpusparcusPC (fi)": 73.74,
"OpusparcusPC (fr)": 82.1,
- "PawsXPairClassification (fr)": 59.69
+ "OpusparcusPC (ru)": 75.19,
+ "OpusparcusPC (sv)": 74.82,
+ "PSC": 52.97,
+ "PawsXPairClassification (de)": 54.05,
+ "PawsXPairClassification (en)": 59.57,
+ "PawsXPairClassification (es)": 57.1,
+ "PawsXPairClassification (fr)": 59.69,
+ "PawsXPairClassification (ja)": 50.78,
+ "PawsXPairClassification (ko)": 54.88,
+ "PawsXPairClassification (zh)": 58.85,
+ "SICK-E-PL": 40.96,
+ "TERRa": 49.05
},
{
"Model": "udever-bloom-560m",
+ "CDSC-E": 39.2,
+ "FalseFriendsGermanEnglish": 49.26,
"OpusparcusPC (fr)": 85.87,
- "PawsXPairClassification (fr)": 61.99
+ "OpusparcusPC (de)": 87.15,
+ "OpusparcusPC (en)": 93.01,
+ "OpusparcusPC (fi)": 78.29,
+ "OpusparcusPC (ru)": 77.35,
+ "OpusparcusPC (sv)": 77.29,
+ "PSC": 59.59,
+ "PawsXPairClassification (fr)": 62.02,
+ "PawsXPairClassification (de)": 55.9,
+ "PawsXPairClassification (en)": 63.29,
+ "PawsXPairClassification (es)": 59.6,
+ "PawsXPairClassification (ja)": 51.72,
+ "PawsXPairClassification (ko)": 55.07,
+ "PawsXPairClassification (zh)": 59.72,
+ "SICK-E-PL": 42.39,
+ "TERRa": 49.05
+ },
+ {
+ "Model": "udever-bloom-560m",
+ "OpusparcusPC (fr)": 82.1,
+ "PawsXPairClassification (fr)": 59.69
}
]
},
@@ -12441,8 +28703,30 @@
"map": [
{
"Model": "udever-bloom-560m",
- "AlloprofReranking": 28.75,
- "SyntecReranking": 50.88
+ "AlloprofReranking": 30.7,
+ "RuBQReranking": 15.98,
+ "SyntecReranking": 47.21
+ },
+ {
+ "Model": "udever-bloom-560m",
+ "MIRACLReranking (ar)": 17.46,
+ "MIRACLReranking (bn)": 18.26,
+ "MIRACLReranking (de)": 3.33,
+ "MIRACLReranking (en)": 9.5,
+ "MIRACLReranking (es)": 11.12,
+ "MIRACLReranking (fa)": 8.22,
+ "MIRACLReranking (fi)": 6.01,
+ "MIRACLReranking (fr)": 6.85,
+ "MIRACLReranking (hi)": 20.97,
+ "MIRACLReranking (id)": 8.65,
+ "MIRACLReranking (ja)": 10.92,
+ "MIRACLReranking (ko)": 11.1,
+ "MIRACLReranking (ru)": 2.65,
+ "MIRACLReranking (sw)": 6.05,
+ "MIRACLReranking (te)": 11.42,
+ "MIRACLReranking (th)": 4.03,
+ "MIRACLReranking (yo)": 2.41,
+ "MIRACLReranking (zh)": 12.44
}
]
},
@@ -12450,16 +28734,120 @@
"ndcg_at_10": [
{
"Model": "udever-bloom-560m",
- "AlloprofRetrieval": 1.98,
- "BSARDRetrieval": 0.0,
+ "AILAStatutes": 15.61,
+ "ARCChallenge": 1.82,
+ "AlloprofRetrieval": 1.93,
+ "AlphaNLI": 0.79,
+ "BSARDRetrieval": 1.54,
+ "GermanQuAD-Retrieval": 3.72,
+ "HellaSwag": 4.43,
+ "LegalBenchConsumerContractsQA": 14.08,
+ "LegalBenchCorporateLobbying": 45.49,
+ "LegalSummarization": 19.72,
"MintakaRetrieval (fr)": 0.48,
- "SyntecRetrieval": 24.45,
- "XPQARetrieval (fr)": 12.98
+ "MintakaRetrieval (ar)": 1.54,
+ "MintakaRetrieval (de)": 0.48,
+ "MintakaRetrieval (es)": 0.75,
+ "MintakaRetrieval (hi)": 3.03,
+ "MintakaRetrieval (it)": 0.71,
+ "MintakaRetrieval (ja)": 2.11,
+ "MintakaRetrieval (pt)": 0.68,
+ "PIQA": 2.45,
+ "Quail": 0.28,
+ "RARbCode": 0.71,
+ "RARbMath": 3.05,
+ "RiaNewsRetrieval": 0.16,
+ "RuBQRetrieval": 0.19,
+ "SIQA": 0.08,
+ "SciFact-PL": 1.19,
+ "SpartQA": 0.04,
+ "SyntecRetrieval": 25.01,
+ "TempReasonL1": 0.02,
+ "TempReasonL2Fact": 0.81,
+ "TempReasonL2Pure": 0.13,
+ "TempReasonL3Fact": 1.14,
+ "TempReasonL3Pure": 0.55,
+ "WinoGrande": 0.03,
+ "XPQARetrieval (fr)": 12.98,
+ "XPQARetrieval (ara-ara)": 8.23,
+ "XPQARetrieval (eng-ara)": 0.43,
+ "XPQARetrieval (ara-eng)": 0.65,
+ "XPQARetrieval (deu-deu)": 2.14,
+ "XPQARetrieval (eng-deu)": 0.56,
+ "XPQARetrieval (deu-eng)": 1.09,
+ "XPQARetrieval (spa-spa)": 6.89,
+ "XPQARetrieval (eng-spa)": 2.03,
+ "XPQARetrieval (spa-eng)": 2.04,
+ "XPQARetrieval (fra-fra)": 12.98,
+ "XPQARetrieval (eng-fra)": 2.55,
+ "XPQARetrieval (fra-eng)": 1.84,
+ "XPQARetrieval (hin-hin)": 33.22,
+ "XPQARetrieval (eng-hin)": 7.69,
+ "XPQARetrieval (hin-eng)": 0.42,
+ "XPQARetrieval (ita-ita)": 6.4,
+ "XPQARetrieval (eng-ita)": 0.84,
+ "XPQARetrieval (ita-eng)": 2.84,
+ "XPQARetrieval (jpn-jpn)": 12.26,
+ "XPQARetrieval (eng-jpn)": 0.49,
+ "XPQARetrieval (jpn-eng)": 0.6,
+ "XPQARetrieval (kor-kor)": 3.69,
+ "XPQARetrieval (eng-kor)": 0.61,
+ "XPQARetrieval (kor-eng)": 0.69,
+ "XPQARetrieval (pol-pol)": 1.62,
+ "XPQARetrieval (eng-pol)": 0.72,
+ "XPQARetrieval (pol-eng)": 0.62,
+ "XPQARetrieval (por-por)": 6.58,
+ "XPQARetrieval (eng-por)": 1.99,
+ "XPQARetrieval (por-eng)": 2.64,
+ "XPQARetrieval (tam-tam)": 5.82,
+ "XPQARetrieval (eng-tam)": 0.79,
+ "XPQARetrieval (tam-eng)": 0.47,
+ "XPQARetrieval (cmn-cmn)": 19.15,
+ "XPQARetrieval (eng-cmn)": 0.41,
+ "XPQARetrieval (cmn-eng)": 1.01
}
]
},
"STS": {
"cosine_spearman": [
+ {
+ "Model": "udever-bloom-560m",
+ "CDSC-R": 48.51,
+ "GermanSTSBenchmark": 23.42,
+ "RUParaPhraserSTS": 29.73,
+ "RuSTSBenchmarkSTS": 32.6,
+ "SICK-R-PL": 28.02,
+ "SICKFr": 54.54,
+ "STSBenchmarkMultilingualSTS (fr)": 36.78,
+ "STSBenchmarkMultilingualSTS (nl)": 22.06,
+ "STSBenchmarkMultilingualSTS (de)": 25.43,
+ "STSBenchmarkMultilingualSTS (pt)": 29.27,
+ "STSBenchmarkMultilingualSTS (it)": 22.74,
+ "STSBenchmarkMultilingualSTS (zh)": 35.7,
+ "STSBenchmarkMultilingualSTS (en)": 30.66,
+ "STSBenchmarkMultilingualSTS (pl)": 29.04,
+ "STSBenchmarkMultilingualSTS (ru)": 33.44,
+ "STSBenchmarkMultilingualSTS (es)": 36.23
+ },
+ {
+ "Model": "udever-bloom-560m",
+ "CDSC-R": 48.51,
+ "GermanSTSBenchmark": 23.42,
+ "RUParaPhraserSTS": 29.73,
+ "RuSTSBenchmarkSTS": 32.6,
+ "SICK-R-PL": 28.02,
+ "SICKFr": 54.54,
+ "STSBenchmarkMultilingualSTS (fr)": 36.78,
+ "STSBenchmarkMultilingualSTS (nl)": 22.06,
+ "STSBenchmarkMultilingualSTS (de)": 25.43,
+ "STSBenchmarkMultilingualSTS (pt)": 29.27,
+ "STSBenchmarkMultilingualSTS (it)": 22.74,
+ "STSBenchmarkMultilingualSTS (zh)": 35.7,
+ "STSBenchmarkMultilingualSTS (en)": 30.66,
+ "STSBenchmarkMultilingualSTS (pl)": 29.04,
+ "STSBenchmarkMultilingualSTS (ru)": 33.44,
+ "STSBenchmarkMultilingualSTS (es)": 36.23
+ },
{
"Model": "udever-bloom-560m",
"SICKFr": 54.54,
@@ -12470,6 +28858,14 @@
},
"Summarization": {
"cosine_spearman": [
+ {
+ "Model": "udever-bloom-560m",
+ "SummEvalFr": 23.63
+ },
+ {
+ "Model": "udever-bloom-560m",
+ "SummEvalFr": 23.64
+ },
{
"Model": "udever-bloom-560m",
"SummEvalFr": 23.63
@@ -12477,10 +28873,23 @@
]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "udever-bloom-560m",
+ "CEDRClassification": 32.34,
+ "SensitiveTopicsClassification": 17.7
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "udever-bloom-560m",
+ "Core17InstructionRetrieval": -0.94,
+ "News21InstructionRetrieval": -0.31,
+ "Robust04InstructionRetrieval": -1.17
+ }
+ ]
}
},
"jhu-clsp__FollowIR-7B": {
@@ -12524,42 +28933,552 @@
},
"jinaai__jina-embeddings-v2-base-en": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "BornholmBitextMining": 0.62,
+ "Tatoeba (tel-eng)": 0.0,
+ "Tatoeba (por-eng)": 0.18,
+ "Tatoeba (heb-eng)": 0.0,
+ "Tatoeba (fra-eng)": 0.1,
+ "Tatoeba (uig-eng)": 0.0,
+ "Tatoeba (swg-eng)": 0.0,
+ "Tatoeba (ukr-eng)": 0.04,
+ "Tatoeba (kat-eng)": 0.02,
+ "Tatoeba (pam-eng)": 0.1,
+ "Tatoeba (lit-eng)": 0.05,
+ "Tatoeba (ind-eng)": 0.1,
+ "Tatoeba (vie-eng)": 0.13,
+ "Tatoeba (slk-eng)": 0.06,
+ "Tatoeba (mal-eng)": 0.03,
+ "Tatoeba (mon-eng)": 0.0,
+ "Tatoeba (zsm-eng)": 0.0,
+ "Tatoeba (arq-eng)": 0.11,
+ "Tatoeba (xho-eng)": 0.08,
+ "Tatoeba (bos-eng)": 0.0,
+ "Tatoeba (max-eng)": 0.0,
+ "Tatoeba (khm-eng)": 0.0,
+ "Tatoeba (yue-eng)": 0.0,
+ "Tatoeba (swh-eng)": 0.13,
+ "Tatoeba (tgl-eng)": 0.1,
+ "Tatoeba (eus-eng)": 0.15,
+ "Tatoeba (gla-eng)": 0.27,
+ "Tatoeba (cym-eng)": 0.11,
+ "Tatoeba (hye-eng)": 0.14,
+ "Tatoeba (bre-eng)": 0.01,
+ "Tatoeba (amh-eng)": 0.48,
+ "Tatoeba (hun-eng)": 0.1,
+ "Tatoeba (nno-eng)": 0.01,
+ "Tatoeba (ido-eng)": 0.0,
+ "Tatoeba (oci-eng)": 0.01,
+ "Tatoeba (wuu-eng)": 0.0,
+ "Tatoeba (uzb-eng)": 0.25,
+ "Tatoeba (deu-eng)": 0.12,
+ "Tatoeba (kor-eng)": 0.07,
+ "Tatoeba (ceb-eng)": 0.09,
+ "Tatoeba (mkd-eng)": 0.02,
+ "Tatoeba (ber-eng)": 0.04,
+ "Tatoeba (aze-eng)": 0.1,
+ "Tatoeba (csb-eng)": 0.0,
+ "Tatoeba (dan-eng)": 0.21,
+ "Tatoeba (hin-eng)": 0.1,
+ "Tatoeba (jav-eng)": 0.0,
+ "Tatoeba (cmn-eng)": 0.0,
+ "Tatoeba (arz-eng)": 0.09,
+ "Tatoeba (rus-eng)": 0.0,
+ "Tatoeba (orv-eng)": 0.09,
+ "Tatoeba (sqi-eng)": 0.18,
+ "Tatoeba (bel-eng)": 0.01,
+ "Tatoeba (dtp-eng)": 0.03,
+ "Tatoeba (ron-eng)": 0.16,
+ "Tatoeba (tuk-eng)": 0.0,
+ "Tatoeba (isl-eng)": 0.04,
+ "Tatoeba (cha-eng)": 0.66,
+ "Tatoeba (ara-eng)": 0.02,
+ "Tatoeba (mhr-eng)": 0.25,
+ "Tatoeba (cor-eng)": 0.05,
+ "Tatoeba (pms-eng)": 0.13,
+ "Tatoeba (lat-eng)": 0.19,
+ "Tatoeba (spa-eng)": 0.0,
+ "Tatoeba (ang-eng)": 1.11,
+ "Tatoeba (tzl-eng)": 0.64,
+ "Tatoeba (bul-eng)": 0.0,
+ "Tatoeba (ben-eng)": 0.04,
+ "Tatoeba (nld-eng)": 0.19,
+ "Tatoeba (fao-eng)": 0.15,
+ "Tatoeba (nds-eng)": 0.06,
+ "Tatoeba (tha-eng)": 0.0,
+ "Tatoeba (cat-eng)": 0.19,
+ "Tatoeba (swe-eng)": 0.05,
+ "Tatoeba (kur-eng)": 0.24,
+ "Tatoeba (dsb-eng)": 0.06,
+ "Tatoeba (kab-eng)": 0.05,
+ "Tatoeba (war-eng)": 0.03,
+ "Tatoeba (fin-eng)": 0.08,
+ "Tatoeba (tat-eng)": 0.07,
+ "Tatoeba (slv-eng)": 0.0,
+ "Tatoeba (epo-eng)": 0.05,
+ "Tatoeba (hrv-eng)": 0.0,
+ "Tatoeba (kaz-eng)": 0.0,
+ "Tatoeba (gle-eng)": 0.12,
+ "Tatoeba (pol-eng)": 0.16,
+ "Tatoeba (mar-eng)": 0.0,
+ "Tatoeba (jpn-eng)": 0.0,
+ "Tatoeba (nov-eng)": 1.47,
+ "Tatoeba (kzj-eng)": 0.1,
+ "Tatoeba (ell-eng)": 0.0,
+ "Tatoeba (lfn-eng)": 0.12,
+ "Tatoeba (hsb-eng)": 0.08,
+ "Tatoeba (tam-eng)": 0.03,
+ "Tatoeba (glg-eng)": 0.04,
+ "Tatoeba (pes-eng)": 0.07,
+ "Tatoeba (fry-eng)": 0.0,
+ "Tatoeba (nob-eng)": 0.19,
+ "Tatoeba (tur-eng)": 0.0,
+ "Tatoeba (ina-eng)": 0.07,
+ "Tatoeba (awa-eng)": 0.0,
+ "Tatoeba (ast-eng)": 0.98,
+ "Tatoeba (urd-eng)": 0.11,
+ "Tatoeba (yid-eng)": 0.0,
+ "Tatoeba (gsw-eng)": 0.43,
+ "Tatoeba (est-eng)": 0.0,
+ "Tatoeba (ita-eng)": 0.2,
+ "Tatoeba (lvs-eng)": 0.04,
+ "Tatoeba (afr-eng)": 0.01,
+ "Tatoeba (srp-eng)": 0.2,
+ "Tatoeba (ile-eng)": 0.14,
+ "Tatoeba (cbk-eng)": 0.02,
+ "Tatoeba (ces-eng)": 0.03
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "AllegroReviews": 20.51,
+ "AmazonCounterfactualClassification (en-ext)": 52.46,
+ "AmazonCounterfactualClassification (en)": 52.79,
+ "AmazonCounterfactualClassification (de)": 50.26,
+ "AmazonCounterfactualClassification (ja)": 51.52,
+ "AmazonReviewsClassification (en)": 21.39,
+ "AmazonReviewsClassification (de)": 21.09,
+ "AmazonReviewsClassification (es)": 21.59,
+ "AmazonReviewsClassification (fr)": 21.01,
+ "AmazonReviewsClassification (ja)": 20.43,
+ "AmazonReviewsClassification (zh)": 20.77,
+ "AngryTweetsClassification": 36.78,
+ "CBD": 51.45,
+ "DanishPoliticalCommentsClassification": 22.89,
+ "GeoreviewClassification": 21.25,
+ "HeadlineClassification": 19.82,
+ "InappropriatenessClassification": 51.48,
+ "LccSentimentClassification": 32.27,
+ "MTOPDomainClassification (en)": 20.7,
+ "MTOPDomainClassification (de)": 21.39,
+ "MTOPDomainClassification (es)": 19.5,
+ "MTOPDomainClassification (fr)": 17.71,
+ "MTOPDomainClassification (hi)": 21.09,
+ "MTOPDomainClassification (th)": 15.02,
+ "MTOPIntentClassification (en)": 6.2,
+ "MTOPIntentClassification (de)": 5.88,
+ "MTOPIntentClassification (es)": 6.74,
+ "MTOPIntentClassification (fr)": 4.33,
+ "MTOPIntentClassification (hi)": 5.96,
+ "MTOPIntentClassification (th)": 4.51,
+ "MasakhaNEWSClassification (amh)": 28.56,
+ "MasakhaNEWSClassification (eng)": 21.54,
+ "MasakhaNEWSClassification (fra)": 23.53,
+ "MasakhaNEWSClassification (hau)": 17.96,
+ "MasakhaNEWSClassification (ibo)": 19.85,
+ "MasakhaNEWSClassification (lin)": 25.43,
+ "MasakhaNEWSClassification (lug)": 24.57,
+ "MasakhaNEWSClassification (orm)": 21.72,
+ "MasakhaNEWSClassification (pcm)": 28.26,
+ "MasakhaNEWSClassification (run)": 20.31,
+ "MasakhaNEWSClassification (sna)": 33.63,
+ "MasakhaNEWSClassification (som)": 16.77,
+ "MasakhaNEWSClassification (swa)": 19.39,
+ "MasakhaNEWSClassification (tir)": 19.15,
+ "MasakhaNEWSClassification (xho)": 21.82,
+ "MasakhaNEWSClassification (yor)": 24.14,
+ "MassiveIntentClassification (fr)": 6.07,
+ "MassiveIntentClassification (kn)": 3.19,
+ "MassiveIntentClassification (sq)": 7.31,
+ "MassiveIntentClassification (ja)": 6.83,
+ "MassiveIntentClassification (am)": 2.38,
+ "MassiveIntentClassification (es)": 6.03,
+ "MassiveIntentClassification (ro)": 6.29,
+ "MassiveIntentClassification (ta)": 7.24,
+ "MassiveIntentClassification (hu)": 6.78,
+ "MassiveIntentClassification (az)": 6.46,
+ "MassiveIntentClassification (af)": 5.23,
+ "MassiveIntentClassification (ar)": 8.97,
+ "MassiveIntentClassification (mn)": 7.65,
+ "MassiveIntentClassification (pl)": 5.77,
+ "MassiveIntentClassification (tl)": 7.32,
+ "MassiveIntentClassification (it)": 7.34,
+ "MassiveIntentClassification (id)": 7.61,
+ "MassiveIntentClassification (cy)": 4.53,
+ "MassiveIntentClassification (sv)": 5.81,
+ "MassiveIntentClassification (el)": 10.05,
+ "MassiveIntentClassification (tr)": 5.59,
+ "MassiveIntentClassification (ko)": 6.61,
+ "MassiveIntentClassification (zh-TW)": 6.45,
+ "MassiveIntentClassification (zh-CN)": 7.23,
+ "MassiveIntentClassification (vi)": 6.18,
+ "MassiveIntentClassification (he)": 6.89,
+ "MassiveIntentClassification (my)": 3.51,
+ "MassiveIntentClassification (hy)": 5.72,
+ "MassiveIntentClassification (th)": 9.23,
+ "MassiveIntentClassification (sw)": 8.14,
+ "MassiveIntentClassification (pt)": 6.96,
+ "MassiveIntentClassification (ka)": 7.24,
+ "MassiveIntentClassification (ur)": 8.07,
+ "MassiveIntentClassification (sl)": 6.62,
+ "MassiveIntentClassification (nb)": 5.47,
+ "MassiveIntentClassification (hi)": 6.49,
+ "MassiveIntentClassification (bn)": 6.27,
+ "MassiveIntentClassification (ml)": 3.17,
+ "MassiveIntentClassification (km)": 4.02,
+ "MassiveIntentClassification (nl)": 6.17,
+ "MassiveIntentClassification (de)": 5.96,
+ "MassiveIntentClassification (ms)": 6.56,
+ "MassiveIntentClassification (fa)": 9.5,
+ "MassiveIntentClassification (da)": 6.46,
+ "MassiveIntentClassification (jv)": 6.8,
+ "MassiveIntentClassification (fi)": 6.58,
+ "MassiveIntentClassification (te)": 2.64,
+ "MassiveIntentClassification (lv)": 6.12,
+ "MassiveIntentClassification (is)": 6.69,
+ "MassiveIntentClassification (en)": 7.27,
+ "MassiveIntentClassification (ru)": 7.62,
+ "MassiveScenarioClassification (mn)": 12.47,
+ "MassiveScenarioClassification (af)": 8.85,
+ "MassiveScenarioClassification (jv)": 11.21,
+ "MassiveScenarioClassification (id)": 11.66,
+ "MassiveScenarioClassification (en)": 12.97,
+ "MassiveScenarioClassification (my)": 9.66,
+ "MassiveScenarioClassification (hy)": 10.41,
+ "MassiveScenarioClassification (sv)": 9.95,
+ "MassiveScenarioClassification (hi)": 10.14,
+ "MassiveScenarioClassification (ka)": 11.04,
+ "MassiveScenarioClassification (cy)": 9.59,
+ "MassiveScenarioClassification (is)": 12.42,
+ "MassiveScenarioClassification (az)": 11.18,
+ "MassiveScenarioClassification (tl)": 12.21,
+ "MassiveScenarioClassification (fi)": 10.56,
+ "MassiveScenarioClassification (km)": 8.93,
+ "MassiveScenarioClassification (ml)": 6.64,
+ "MassiveScenarioClassification (sw)": 12.76,
+ "MassiveScenarioClassification (lv)": 9.75,
+ "MassiveScenarioClassification (he)": 10.68,
+ "MassiveScenarioClassification (ko)": 11.67,
+ "MassiveScenarioClassification (hu)": 10.74,
+ "MassiveScenarioClassification (ar)": 14.67,
+ "MassiveScenarioClassification (nb)": 10.52,
+ "MassiveScenarioClassification (th)": 17.38,
+ "MassiveScenarioClassification (sl)": 11.11,
+ "MassiveScenarioClassification (da)": 10.65,
+ "MassiveScenarioClassification (vi)": 11.1,
+ "MassiveScenarioClassification (kn)": 8.12,
+ "MassiveScenarioClassification (ms)": 11.84,
+ "MassiveScenarioClassification (fr)": 10.69,
+ "MassiveScenarioClassification (ja)": 10.74,
+ "MassiveScenarioClassification (pt)": 10.5,
+ "MassiveScenarioClassification (am)": 7.81,
+ "MassiveScenarioClassification (zh-CN)": 11.7,
+ "MassiveScenarioClassification (pl)": 9.47,
+ "MassiveScenarioClassification (nl)": 9.88,
+ "MassiveScenarioClassification (es)": 10.31,
+ "MassiveScenarioClassification (te)": 6.59,
+ "MassiveScenarioClassification (it)": 10.59,
+ "MassiveScenarioClassification (de)": 10.65,
+ "MassiveScenarioClassification (ur)": 12.93,
+ "MassiveScenarioClassification (tr)": 10.63,
+ "MassiveScenarioClassification (zh-TW)": 10.76,
+ "MassiveScenarioClassification (ta)": 11.39,
+ "MassiveScenarioClassification (ro)": 10.48,
+ "MassiveScenarioClassification (sq)": 11.16,
+ "MassiveScenarioClassification (el)": 16.72,
+ "MassiveScenarioClassification (ru)": 11.82,
+ "MassiveScenarioClassification (bn)": 10.95,
+ "MassiveScenarioClassification (fa)": 14.71,
+ "NoRecClassification": 35.31,
+ "NordicLangClassification": 26.91,
+ "PAC": 50.97,
+ "PolEmo2.0-IN": 26.34,
+ "PolEmo2.0-OUT": 26.68,
+ "RuReviewsClassification": 33.7
+ }
+ ]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "AlloProfClusteringS2S": 12.62,
+ "BlurbsClusteringP2P": 4.57,
+ "BlurbsClusteringS2S": 4.9,
+ "GeoreviewClusteringP2P": 14.71,
+ "HALClusteringS2S": 0.94,
+ "MasakhaNEWSClusteringS2S (amh)": 40.28,
+ "MasakhaNEWSClusteringS2S (eng)": 0.84,
+ "MasakhaNEWSClusteringS2S (fra)": 20.99,
+ "MasakhaNEWSClusteringS2S (hau)": 1.12,
+ "MasakhaNEWSClusteringS2S (ibo)": 23.32,
+ "MasakhaNEWSClusteringS2S (lin)": 41.56,
+ "MasakhaNEWSClusteringS2S (lug)": 41.7,
+ "MasakhaNEWSClusteringS2S (orm)": 20.93,
+ "MasakhaNEWSClusteringS2S (pcm)": 22.24,
+ "MasakhaNEWSClusteringS2S (run)": 42.22,
+ "MasakhaNEWSClusteringS2S (sna)": 40.57,
+ "MasakhaNEWSClusteringS2S (som)": 23.45,
+ "MasakhaNEWSClusteringS2S (swa)": 1.75,
+ "MasakhaNEWSClusteringS2S (tir)": 42.63,
+ "MasakhaNEWSClusteringS2S (xho)": 20.26,
+ "MasakhaNEWSClusteringS2S (yor)": 21.53,
+ "RuSciBenchGRNTIClusteringP2P": 7.13,
+ "TenKGnadClusteringS2S": 1.45
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "CDSC-E": 23.16,
+ "FalseFriendsGermanEnglish": 48.17,
+ "OpusparcusPC (de)": 78.43,
+ "OpusparcusPC (en)": 81.21,
+ "OpusparcusPC (fi)": 72.33,
+ "OpusparcusPC (fr)": 73.14,
+ "OpusparcusPC (ru)": 72.46,
+ "OpusparcusPC (sv)": 65.72,
+ "PSC": 34.13,
+ "PawsXPairClassification (de)": 46.44,
+ "PawsXPairClassification (en)": 42.97,
+ "PawsXPairClassification (es)": 46.48,
+ "PawsXPairClassification (fr)": 48.6,
+ "PawsXPairClassification (ja)": 46.7,
+ "PawsXPairClassification (ko)": 46.28,
+ "PawsXPairClassification (zh)": 48.42,
+ "SICK-E-PL": 40.41,
+ "TERRa": 44.8
+ },
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "CDSC-E": 23.17,
+ "FalseFriendsGermanEnglish": 48.17,
+ "OpusparcusPC (de)": 78.61,
+ "OpusparcusPC (en)": 81.21,
+ "OpusparcusPC (fi)": 72.33,
+ "OpusparcusPC (fr)": 73.24,
+ "OpusparcusPC (ru)": 72.57,
+ "OpusparcusPC (sv)": 65.72,
+ "PSC": 34.13,
+ "PawsXPairClassification (de)": 46.79,
+ "PawsXPairClassification (en)": 43.17,
+ "PawsXPairClassification (es)": 47.31,
+ "PawsXPairClassification (fr)": 49.4,
+ "PawsXPairClassification (ja)": 46.98,
+ "PawsXPairClassification (ko)": 46.79,
+ "PawsXPairClassification (zh)": 51.23,
+ "SICK-E-PL": 40.46,
+ "TERRa": 44.8
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "AlloprofReranking": 28.82,
+ "RuBQReranking": 18.6,
+ "SyntecReranking": 30.07,
+ "T2Reranking": 54.47
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
{
"Model": "jina-embeddings-v2-base-en",
+ "AILACasedocs": 3.49,
+ "AILAStatutes": 10.48,
+ "ARCChallenge": 0.0,
+ "AlloprofRetrieval": 0.16,
+ "AlphaNLI": 0.0,
+ "AppsRetrieval": 0.05,
+ "CmedqaRetrieval": 0.09,
+ "CodeFeedbackMT": 0.03,
+ "CodeFeedbackST": 0.02,
+ "CodeSearchNetCCRetrieval (python)": 0.0,
+ "CodeSearchNetCCRetrieval (javascript)": 0.08,
+ "CodeSearchNetCCRetrieval (go)": 0.03,
+ "CodeSearchNetCCRetrieval (ruby)": 0.11,
+ "CodeSearchNetCCRetrieval (java)": 0.03,
+ "CodeSearchNetCCRetrieval (php)": 0.0,
+ "CodeSearchNetRetrieval (python)": 0.83,
+ "CodeSearchNetRetrieval (javascript)": 0.36,
+ "CodeSearchNetRetrieval (go)": 0.81,
+ "CodeSearchNetRetrieval (ruby)": 0.66,
+ "CodeSearchNetRetrieval (java)": 0.73,
+ "CodeSearchNetRetrieval (php)": 0.41,
+ "CodeTransOceanContest": 1.3,
+ "CodeTransOceanDL": 1.92,
+ "CosQA": 0.0,
+ "CovidRetrieval": 0.0,
+ "GerDaLIRSmall": 0.07,
+ "HellaSwag": 0.0,
"LEMBNarrativeQARetrieval": 37.89,
"LEMBQMSumRetrieval": 38.87,
"LEMBSummScreenFDRetrieval": 93.48,
- "LEMBWikimQARetrieval": 73.99
+ "LEMBWikimQARetrieval": 73.99,
+ "LeCaRDv2": 2.09,
+ "LegalBenchConsumerContractsQA": 4.23,
+ "LegalBenchCorporateLobbying": 1.27,
+ "LegalQuAD": 2.6,
+ "LegalSummarization": 1.35,
+ "MintakaRetrieval (ar)": 0.19,
+ "MintakaRetrieval (de)": 0.53,
+ "MintakaRetrieval (es)": 0.35,
+ "MintakaRetrieval (fr)": 0.32,
+ "MintakaRetrieval (hi)": 0.44,
+ "MintakaRetrieval (it)": 0.54,
+ "MintakaRetrieval (ja)": 0.32,
+ "MintakaRetrieval (pt)": 0.45,
+ "PIQA": 0.0,
+ "Quail": 0.0,
+ "RARbCode": 0.0,
+ "RARbMath": 0.01,
+ "RiaNewsRetrieval": 0.01,
+ "SIQA": 0.03,
+ "SciFact-PL": 13.78,
+ "SpartQA": 0.56,
+ "StackOverflowQA": 0.24,
+ "SyntecRetrieval": 0.79,
+ "SyntheticText2SQL": 0.0,
+ "TRECCOVID-PL": 0.14,
+ "TempReasonL1": 0.05,
+ "TempReasonL2Fact": 0.11,
+ "TempReasonL2Pure": 0.04,
+ "TempReasonL3Fact": 0.05,
+ "TempReasonL3Pure": 0.09,
+ "WinoGrande": 0.12,
+ "XPQARetrieval (ara-ara)": 0.66,
+ "XPQARetrieval (eng-ara)": 0.29,
+ "XPQARetrieval (ara-eng)": 1.02,
+ "XPQARetrieval (deu-deu)": 1.28,
+ "XPQARetrieval (eng-deu)": 0.38,
+ "XPQARetrieval (deu-eng)": 0.64,
+ "XPQARetrieval (spa-spa)": 0.43,
+ "XPQARetrieval (eng-spa)": 0.3,
+ "XPQARetrieval (spa-eng)": 0.62,
+ "XPQARetrieval (fra-fra)": 0.47,
+ "XPQARetrieval (eng-fra)": 0.4,
+ "XPQARetrieval (fra-eng)": 0.46,
+ "XPQARetrieval (hin-hin)": 4.04,
+ "XPQARetrieval (eng-hin)": 0.95,
+ "XPQARetrieval (hin-eng)": 0.35,
+ "XPQARetrieval (ita-ita)": 0.98,
+ "XPQARetrieval (eng-ita)": 0.32,
+ "XPQARetrieval (ita-eng)": 0.68,
+ "XPQARetrieval (jpn-jpn)": 1.85,
+ "XPQARetrieval (eng-jpn)": 0.83,
+ "XPQARetrieval (jpn-eng)": 0.41,
+ "XPQARetrieval (kor-kor)": 0.36,
+ "XPQARetrieval (eng-kor)": 0.52,
+ "XPQARetrieval (kor-eng)": 0.5,
+ "XPQARetrieval (pol-pol)": 0.6,
+ "XPQARetrieval (eng-pol)": 0.3,
+ "XPQARetrieval (pol-eng)": 0.3,
+ "XPQARetrieval (por-por)": 0.69,
+ "XPQARetrieval (eng-por)": 0.45,
+ "XPQARetrieval (por-eng)": 0.42,
+ "XPQARetrieval (tam-tam)": 0.77,
+ "XPQARetrieval (eng-tam)": 0.35,
+ "XPQARetrieval (tam-eng)": 0.46,
+ "XPQARetrieval (cmn-cmn)": 1.19,
+ "XPQARetrieval (eng-cmn)": 1.2,
+ "XPQARetrieval (cmn-eng)": 0.44
+ }
+ ]
+ },
+ "STS": {
+ "cosine_spearman": [
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "CDSC-R": 26.0,
+ "GermanSTSBenchmark": 20.08,
+ "RUParaPhraserSTS": 9.13,
+ "RuSTSBenchmarkSTS": 16.85,
+ "SICK-R-PL": 20.96,
+ "SICKFr": 26.02,
+ "STSB": 1.21,
+ "STSBenchmarkMultilingualSTS (fr)": 19.61,
+ "STSBenchmarkMultilingualSTS (ru)": 16.52,
+ "STSBenchmarkMultilingualSTS (es)": 16.52,
+ "STSBenchmarkMultilingualSTS (de)": 22.57,
+ "STSBenchmarkMultilingualSTS (nl)": 15.75,
+ "STSBenchmarkMultilingualSTS (pl)": 19.86,
+ "STSBenchmarkMultilingualSTS (it)": 14.09,
+ "STSBenchmarkMultilingualSTS (pt)": 19.29,
+ "STSBenchmarkMultilingualSTS (zh)": 2.67,
+ "STSBenchmarkMultilingualSTS (en)": 12.86
+ },
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "CDSC-R": 26.0,
+ "GermanSTSBenchmark": 20.08,
+ "RUParaPhraserSTS": 9.13,
+ "RuSTSBenchmarkSTS": 16.85,
+ "SICK-R-PL": 20.96,
+ "SICKFr": 26.01,
+ "STSB": 1.21,
+ "STSBenchmarkMultilingualSTS (fr)": 19.61,
+ "STSBenchmarkMultilingualSTS (ru)": 16.52,
+ "STSBenchmarkMultilingualSTS (es)": 16.51,
+ "STSBenchmarkMultilingualSTS (de)": 22.57,
+ "STSBenchmarkMultilingualSTS (nl)": 15.75,
+ "STSBenchmarkMultilingualSTS (pl)": 19.86,
+ "STSBenchmarkMultilingualSTS (it)": 14.09,
+ "STSBenchmarkMultilingualSTS (pt)": 19.29,
+ "STSBenchmarkMultilingualSTS (zh)": 2.68,
+ "STSBenchmarkMultilingualSTS (en)": 12.86
+ }
+ ]
+ },
+ "Summarization": {
+ "cosine_spearman": [
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "SummEvalFr": 27.71
+ },
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "SummEvalFr": 27.71
}
]
},
- "STS": {
- "cosine_spearman": []
- },
- "Summarization": {
- "cosine_spearman": []
- },
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "CEDRClassification": 29.49,
+ "SensitiveTopicsClassification": 17.63
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "jina-embeddings-v2-base-en",
+ "Core17InstructionRetrieval": -1.15,
+ "News21InstructionRetrieval": 0.38,
+ "Robust04InstructionRetrieval": -4.16
+ }
+ ]
}
},
"jonfd__electra-small-nordic": {
@@ -13101,42 +30020,664 @@
},
"nomic-ai__nomic-embed-text-v1": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "nomic-embed-text-v1",
+ "BornholmBitextMining": 48.41,
+ "Tatoeba (swh-eng)": 7.41,
+ "Tatoeba (wuu-eng)": 1.25,
+ "Tatoeba (ind-eng)": 5.42,
+ "Tatoeba (cbk-eng)": 10.21,
+ "Tatoeba (mon-eng)": 1.05,
+ "Tatoeba (ast-eng)": 12.01,
+ "Tatoeba (dtp-eng)": 2.55,
+ "Tatoeba (bre-eng)": 3.47,
+ "Tatoeba (kat-eng)": 0.57,
+ "Tatoeba (nld-eng)": 13.62,
+ "Tatoeba (hin-eng)": 0.14,
+ "Tatoeba (cym-eng)": 5.81,
+ "Tatoeba (ces-eng)": 3.77,
+ "Tatoeba (swg-eng)": 12.84,
+ "Tatoeba (aze-eng)": 3.13,
+ "Tatoeba (vie-eng)": 4.41,
+ "Tatoeba (tur-eng)": 3.56,
+ "Tatoeba (slv-eng)": 4.47,
+ "Tatoeba (urd-eng)": 0.0,
+ "Tatoeba (ben-eng)": 0.03,
+ "Tatoeba (gla-eng)": 1.82,
+ "Tatoeba (epo-eng)": 8.91,
+ "Tatoeba (pam-eng)": 4.62,
+ "Tatoeba (kur-eng)": 6.43,
+ "Tatoeba (rus-eng)": 0.11,
+ "Tatoeba (bel-eng)": 0.72,
+ "Tatoeba (pms-eng)": 8.55,
+ "Tatoeba (ell-eng)": 0.27,
+ "Tatoeba (gle-eng)": 3.46,
+ "Tatoeba (fin-eng)": 2.13,
+ "Tatoeba (nob-eng)": 11.05,
+ "Tatoeba (bos-eng)": 10.44,
+ "Tatoeba (tat-eng)": 0.76,
+ "Tatoeba (zsm-eng)": 7.62,
+ "Tatoeba (tam-eng)": 0.33,
+ "Tatoeba (ron-eng)": 8.6,
+ "Tatoeba (arz-eng)": 0.0,
+ "Tatoeba (ara-eng)": 0.07,
+ "Tatoeba (hun-eng)": 4.09,
+ "Tatoeba (tel-eng)": 0.5,
+ "Tatoeba (glg-eng)": 15.6,
+ "Tatoeba (mhr-eng)": 0.05,
+ "Tatoeba (dsb-eng)": 4.29,
+ "Tatoeba (kab-eng)": 0.73,
+ "Tatoeba (cha-eng)": 15.67,
+ "Tatoeba (yid-eng)": 0.45,
+ "Tatoeba (est-eng)": 2.58,
+ "Tatoeba (fao-eng)": 8.1,
+ "Tatoeba (afr-eng)": 7.44,
+ "Tatoeba (ile-eng)": 20.76,
+ "Tatoeba (yue-eng)": 1.61,
+ "Tatoeba (spa-eng)": 15.79,
+ "Tatoeba (eus-eng)": 6.37,
+ "Tatoeba (heb-eng)": 0.1,
+ "Tatoeba (slk-eng)": 4.36,
+ "Tatoeba (isl-eng)": 4.2,
+ "Tatoeba (kaz-eng)": 0.13,
+ "Tatoeba (ita-eng)": 16.28,
+ "Tatoeba (hsb-eng)": 4.34,
+ "Tatoeba (ina-eng)": 28.51,
+ "Tatoeba (khm-eng)": 0.14,
+ "Tatoeba (mal-eng)": 0.0,
+ "Tatoeba (nno-eng)": 9.33,
+ "Tatoeba (cat-eng)": 12.95,
+ "Tatoeba (dan-eng)": 10.9,
+ "Tatoeba (bul-eng)": 0.25,
+ "Tatoeba (mar-eng)": 0.12,
+ "Tatoeba (por-eng)": 13.64,
+ "Tatoeba (hrv-eng)": 6.28,
+ "Tatoeba (nds-eng)": 12.62,
+ "Tatoeba (fra-eng)": 19.4,
+ "Tatoeba (tgl-eng)": 3.61,
+ "Tatoeba (nov-eng)": 26.96,
+ "Tatoeba (swe-eng)": 9.4,
+ "Tatoeba (cor-eng)": 2.82,
+ "Tatoeba (ido-eng)": 13.67,
+ "Tatoeba (cmn-eng)": 2.32,
+ "Tatoeba (ceb-eng)": 3.68,
+ "Tatoeba (ang-eng)": 20.19,
+ "Tatoeba (srp-eng)": 1.71,
+ "Tatoeba (sqi-eng)": 6.88,
+ "Tatoeba (max-eng)": 9.71,
+ "Tatoeba (jav-eng)": 5.45,
+ "Tatoeba (pes-eng)": 0.01,
+ "Tatoeba (lvs-eng)": 3.57,
+ "Tatoeba (awa-eng)": 0.61,
+ "Tatoeba (gsw-eng)": 13.28,
+ "Tatoeba (oci-eng)": 9.03,
+ "Tatoeba (orv-eng)": 0.09,
+ "Tatoeba (csb-eng)": 5.85,
+ "Tatoeba (kor-eng)": 0.9,
+ "Tatoeba (lit-eng)": 2.04,
+ "Tatoeba (pol-eng)": 4.34,
+ "Tatoeba (deu-eng)": 14.13,
+ "Tatoeba (war-eng)": 5.65,
+ "Tatoeba (hye-eng)": 0.44,
+ "Tatoeba (xho-eng)": 1.5,
+ "Tatoeba (ber-eng)": 4.51,
+ "Tatoeba (uzb-eng)": 2.89,
+ "Tatoeba (arq-eng)": 0.18,
+ "Tatoeba (tuk-eng)": 3.91,
+ "Tatoeba (uig-eng)": 0.2,
+ "Tatoeba (mkd-eng)": 0.07,
+ "Tatoeba (kzj-eng)": 4.02,
+ "Tatoeba (tzl-eng)": 6.54,
+ "Tatoeba (tha-eng)": 0.73,
+ "Tatoeba (lfn-eng)": 10.78,
+ "Tatoeba (ukr-eng)": 0.48,
+ "Tatoeba (amh-eng)": 0.02,
+ "Tatoeba (fry-eng)": 21.97,
+ "Tatoeba (lat-eng)": 8.81,
+ "Tatoeba (jpn-eng)": 0.82
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "nomic-embed-text-v1",
+ "AllegroReviews": 24.94,
+ "AmazonCounterfactualClassification (en-ext)": 76.94,
+ "AmazonCounterfactualClassification (en)": 74.1,
+ "AmazonCounterfactualClassification (de)": 55.48,
+ "AmazonCounterfactualClassification (ja)": 57.85,
+ "AmazonReviewsClassification (en)": 49.47,
+ "AmazonReviewsClassification (de)": 27.58,
+ "AmazonReviewsClassification (es)": 33.09,
+ "AmazonReviewsClassification (fr)": 29.65,
+ "AmazonReviewsClassification (ja)": 23.47,
+ "AmazonReviewsClassification (zh)": 24.22,
+ "AngryTweetsClassification": 46.23,
+ "CBD": 51.88,
+ "DanishPoliticalCommentsClassification": 29.23,
+ "GeoreviewClassification": 28.69,
+ "HeadlineClassification": 31.06,
+ "InappropriatenessClassification": 51.92,
+ "KinopoiskClassification": 37.39,
+ "LccSentimentClassification": 38.53,
+ "MTOPDomainClassification (en)": 92.83,
+ "MTOPDomainClassification (de)": 71.23,
+ "MTOPDomainClassification (es)": 74.85,
+ "MTOPDomainClassification (fr)": 77.32,
+ "MTOPDomainClassification (hi)": 43.76,
+ "MTOPDomainClassification (th)": 15.77,
+ "MTOPIntentClassification (en)": 65.84,
+ "MTOPIntentClassification (de)": 40.67,
+ "MTOPIntentClassification (es)": 43.26,
+ "MTOPIntentClassification (fr)": 37.39,
+ "MTOPIntentClassification (hi)": 20.74,
+ "MTOPIntentClassification (th)": 4.6,
+ "MasakhaNEWSClassification (amh)": 35.45,
+ "MasakhaNEWSClassification (eng)": 79.83,
+ "MasakhaNEWSClassification (fra)": 79.91,
+ "MasakhaNEWSClassification (hau)": 65.78,
+ "MasakhaNEWSClassification (ibo)": 61.41,
+ "MasakhaNEWSClassification (lin)": 75.2,
+ "MasakhaNEWSClassification (lug)": 64.17,
+ "MasakhaNEWSClassification (orm)": 66.49,
+ "MasakhaNEWSClassification (pcm)": 92.39,
+ "MasakhaNEWSClassification (run)": 67.17,
+ "MasakhaNEWSClassification (sna)": 80.54,
+ "MasakhaNEWSClassification (som)": 57.69,
+ "MasakhaNEWSClassification (swa)": 59.98,
+ "MasakhaNEWSClassification (tir)": 23.93,
+ "MasakhaNEWSClassification (xho)": 67.21,
+ "MasakhaNEWSClassification (yor)": 70.22,
+ "MassiveIntentClassification (fa)": 31.13,
+ "MassiveIntentClassification (pl)": 38.6,
+ "MassiveIntentClassification (hu)": 37.7,
+ "MassiveIntentClassification (ms)": 36.19,
+ "MassiveIntentClassification (en)": 69.46,
+ "MassiveIntentClassification (sv)": 39.05,
+ "MassiveIntentClassification (cy)": 36.05,
+ "MassiveIntentClassification (th)": 11.62,
+ "MassiveIntentClassification (hy)": 12.26,
+ "MassiveIntentClassification (lv)": 38.53,
+ "MassiveIntentClassification (zh-TW)": 21.9,
+ "MassiveIntentClassification (fi)": 40.19,
+ "MassiveIntentClassification (ml)": 2.71,
+ "MassiveIntentClassification (ja)": 31.36,
+ "MassiveIntentClassification (sl)": 38.71,
+ "MassiveIntentClassification (mn)": 25.23,
+ "MassiveIntentClassification (de)": 42.62,
+ "MassiveIntentClassification (ru)": 29.22,
+ "MassiveIntentClassification (it)": 43.77,
+ "MassiveIntentClassification (hi)": 19.66,
+ "MassiveIntentClassification (az)": 38.52,
+ "MassiveIntentClassification (af)": 36.66,
+ "MassiveIntentClassification (da)": 42.12,
+ "MassiveIntentClassification (id)": 39.14,
+ "MassiveIntentClassification (ta)": 11.37,
+ "MassiveIntentClassification (km)": 5.47,
+ "MassiveIntentClassification (is)": 34.59,
+ "MassiveIntentClassification (sq)": 38.33,
+ "MassiveIntentClassification (ar)": 25.17,
+ "MassiveIntentClassification (tl)": 40.36,
+ "MassiveIntentClassification (es)": 43.09,
+ "MassiveIntentClassification (te)": 2.35,
+ "MassiveIntentClassification (tr)": 40.1,
+ "MassiveIntentClassification (my)": 3.75,
+ "MassiveIntentClassification (el)": 30.08,
+ "MassiveIntentClassification (nl)": 39.26,
+ "MassiveIntentClassification (ro)": 39.47,
+ "MassiveIntentClassification (sw)": 37.86,
+ "MassiveIntentClassification (ur)": 21.46,
+ "MassiveIntentClassification (he)": 25.09,
+ "MassiveIntentClassification (am)": 2.58,
+ "MassiveIntentClassification (jv)": 34.78,
+ "MassiveIntentClassification (kn)": 3.05,
+ "MassiveIntentClassification (pt)": 43.64,
+ "MassiveIntentClassification (zh-CN)": 23.66,
+ "MassiveIntentClassification (fr)": 44.42,
+ "MassiveIntentClassification (nb)": 39.21,
+ "MassiveIntentClassification (vi)": 35.8,
+ "MassiveIntentClassification (ka)": 11.31,
+ "MassiveIntentClassification (bn)": 19.27,
+ "MassiveIntentClassification (ko)": 20.66,
+ "MassiveScenarioClassification (am)": 7.4,
+ "MassiveScenarioClassification (sv)": 46.07,
+ "MassiveScenarioClassification (sl)": 43.56,
+ "MassiveScenarioClassification (lv)": 42.58,
+ "MassiveScenarioClassification (fa)": 34.37,
+ "MassiveScenarioClassification (kn)": 8.16,
+ "MassiveScenarioClassification (ur)": 28.77,
+ "MassiveScenarioClassification (cy)": 40.67,
+ "MassiveScenarioClassification (es)": 54.3,
+ "MassiveScenarioClassification (nl)": 49.39,
+ "MassiveScenarioClassification (ta)": 16.94,
+ "MassiveScenarioClassification (bn)": 24.28,
+ "MassiveScenarioClassification (jv)": 43.09,
+ "MassiveScenarioClassification (sq)": 47.46,
+ "MassiveScenarioClassification (is)": 43.54,
+ "MassiveScenarioClassification (el)": 39.9,
+ "MassiveScenarioClassification (ar)": 31.38,
+ "MassiveScenarioClassification (hi)": 25.2,
+ "MassiveScenarioClassification (en)": 74.64,
+ "MassiveScenarioClassification (ko)": 26.73,
+ "MassiveScenarioClassification (pl)": 46.63,
+ "MassiveScenarioClassification (zh-TW)": 31.89,
+ "MassiveScenarioClassification (az)": 46.1,
+ "MassiveScenarioClassification (he)": 28.28,
+ "MassiveScenarioClassification (ro)": 50.87,
+ "MassiveScenarioClassification (id)": 47.36,
+ "MassiveScenarioClassification (th)": 21.64,
+ "MassiveScenarioClassification (nb)": 49.28,
+ "MassiveScenarioClassification (mn)": 31.36,
+ "MassiveScenarioClassification (pt)": 54.03,
+ "MassiveScenarioClassification (vi)": 41.13,
+ "MassiveScenarioClassification (ms)": 46.51,
+ "MassiveScenarioClassification (fr)": 55.41,
+ "MassiveScenarioClassification (ka)": 17.61,
+ "MassiveScenarioClassification (da)": 49.98,
+ "MassiveScenarioClassification (hy)": 18.61,
+ "MassiveScenarioClassification (ml)": 7.32,
+ "MassiveScenarioClassification (hu)": 45.44,
+ "MassiveScenarioClassification (te)": 7.16,
+ "MassiveScenarioClassification (ru)": 33.56,
+ "MassiveScenarioClassification (de)": 54.59,
+ "MassiveScenarioClassification (ja)": 38.77,
+ "MassiveScenarioClassification (sw)": 44.78,
+ "MassiveScenarioClassification (af)": 45.97,
+ "MassiveScenarioClassification (it)": 53.73,
+ "MassiveScenarioClassification (fi)": 45.07,
+ "MassiveScenarioClassification (my)": 10.72,
+ "MassiveScenarioClassification (tl)": 50.41,
+ "MassiveScenarioClassification (zh-CN)": 34.02,
+ "MassiveScenarioClassification (tr)": 46.56,
+ "MassiveScenarioClassification (km)": 9.29,
+ "NoRecClassification": 41.05,
+ "NordicLangClassification": 52.66,
+ "PAC": 69.14,
+ "PolEmo2.0-IN": 41.22,
+ "PolEmo2.0-OUT": 22.83,
+ "RuReviewsClassification": 44.25,
+ "RuSciBenchGRNTIClassification": 19.29,
+ "RuSciBenchOECDClassification": 14.8
+ }
+ ]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "nomic-embed-text-v1",
+ "AlloProfClusteringP2P": 63.95,
+ "AlloProfClusteringS2S": 35.03,
+ "BlurbsClusteringP2P": 26.46,
+ "BlurbsClusteringS2S": 9.73,
+ "GeoreviewClusteringP2P": 22.34,
+ "HALClusteringS2S": 20.6,
+ "MLSUMClusteringP2P (de)": 42.92,
+ "MLSUMClusteringP2P (fr)": 44.41,
+ "MLSUMClusteringP2P (ru)": 24.98,
+ "MLSUMClusteringP2P (es)": 47.0,
+ "MLSUMClusteringS2S (de)": 42.96,
+ "MLSUMClusteringS2S (fr)": 44.16,
+ "MLSUMClusteringS2S (ru)": 26.26,
+ "MLSUMClusteringS2S (es)": 46.52,
+ "MasakhaNEWSClusteringP2P (amh)": 40.07,
+ "MasakhaNEWSClusteringP2P (eng)": 48.62,
+ "MasakhaNEWSClusteringP2P (fra)": 64.41,
+ "MasakhaNEWSClusteringP2P (hau)": 34.91,
+ "MasakhaNEWSClusteringP2P (ibo)": 44.15,
+ "MasakhaNEWSClusteringP2P (lin)": 65.38,
+ "MasakhaNEWSClusteringP2P (lug)": 46.46,
+ "MasakhaNEWSClusteringP2P (orm)": 29.2,
+ "MasakhaNEWSClusteringP2P (pcm)": 77.04,
+ "MasakhaNEWSClusteringP2P (run)": 50.6,
+ "MasakhaNEWSClusteringP2P (sna)": 56.66,
+ "MasakhaNEWSClusteringP2P (som)": 37.87,
+ "MasakhaNEWSClusteringP2P (swa)": 22.09,
+ "MasakhaNEWSClusteringP2P (tir)": 44.62,
+ "MasakhaNEWSClusteringP2P (xho)": 34.04,
+ "MasakhaNEWSClusteringP2P (yor)": 42.36,
+ "MasakhaNEWSClusteringS2S (amh)": 44.61,
+ "MasakhaNEWSClusteringS2S (eng)": 47.44,
+ "MasakhaNEWSClusteringS2S (fra)": 28.87,
+ "MasakhaNEWSClusteringS2S (hau)": 13.5,
+ "MasakhaNEWSClusteringS2S (ibo)": 39.01,
+ "MasakhaNEWSClusteringS2S (lin)": 55.75,
+ "MasakhaNEWSClusteringS2S (lug)": 43.28,
+ "MasakhaNEWSClusteringS2S (orm)": 27.05,
+ "MasakhaNEWSClusteringS2S (pcm)": 79.9,
+ "MasakhaNEWSClusteringS2S (run)": 53.17,
+ "MasakhaNEWSClusteringS2S (sna)": 50.67,
+ "MasakhaNEWSClusteringS2S (som)": 25.86,
+ "MasakhaNEWSClusteringS2S (swa)": 18.28,
+ "MasakhaNEWSClusteringS2S (tir)": 46.22,
+ "MasakhaNEWSClusteringS2S (xho)": 21.01,
+ "MasakhaNEWSClusteringS2S (yor)": 27.62,
+ "RuSciBenchGRNTIClusteringP2P": 16.46,
+ "RuSciBenchOECDClusteringP2P": 14.67,
+ "TenKGnadClusteringP2P": 45.55,
+ "TenKGnadClusteringS2S": 20.66
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "nomic-embed-text-v1",
+ "CDSC-E": 46.69,
+ "FalseFriendsGermanEnglish": 47.77,
+ "OpusparcusPC (de)": 91.07,
+ "OpusparcusPC (en)": 97.95,
+ "OpusparcusPC (fi)": 86.78,
+ "OpusparcusPC (fr)": 87.81,
+ "OpusparcusPC (ru)": 80.53,
+ "OpusparcusPC (sv)": 84.72,
+ "PSC": 95.9,
+ "PawsXPairClassification (de)": 51.49,
+ "PawsXPairClassification (en)": 61.67,
+ "PawsXPairClassification (es)": 53.23,
+ "PawsXPairClassification (fr)": 54.96,
+ "PawsXPairClassification (ja)": 49.26,
+ "PawsXPairClassification (ko)": 51.36,
+ "PawsXPairClassification (zh)": 53.29,
+ "SICK-E-PL": 47.37,
+ "TERRa": 46.32
+ },
+ {
+ "Model": "nomic-embed-text-v1",
+ "CDSC-E": 46.69,
+ "FalseFriendsGermanEnglish": 47.77,
+ "OpusparcusPC (de)": 91.07,
+ "OpusparcusPC (en)": 97.95,
+ "OpusparcusPC (fi)": 86.78,
+ "OpusparcusPC (fr)": 87.89,
+ "OpusparcusPC (ru)": 80.53,
+ "OpusparcusPC (sv)": 84.77,
+ "PSC": 95.96,
+ "PawsXPairClassification (de)": 51.51,
+ "PawsXPairClassification (en)": 61.68,
+ "PawsXPairClassification (es)": 53.23,
+ "PawsXPairClassification (fr)": 54.96,
+ "PawsXPairClassification (ja)": 49.41,
+ "PawsXPairClassification (ko)": 51.36,
+ "PawsXPairClassification (zh)": 53.32,
+ "SICK-E-PL": 47.38,
+ "TERRa": 46.32
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "nomic-embed-text-v1",
+ "AlloprofReranking": 62.47,
+ "RuBQReranking": 34.13,
+ "SyntecReranking": 71.41,
+ "T2Reranking": 61.43
+ },
+ {
+ "Model": "nomic-embed-text-v1",
+ "MIRACLReranking (ar)": 15.17,
+ "MIRACLReranking (bn)": 20.85,
+ "MIRACLReranking (de)": 21.85,
+ "MIRACLReranking (en)": 51.92,
+ "MIRACLReranking (es)": 32.35,
+ "MIRACLReranking (fa)": 18.46,
+ "MIRACLReranking (fi)": 35.54,
+ "MIRACLReranking (fr)": 18.42,
+ "MIRACLReranking (hi)": 8.7,
+ "MIRACLReranking (id)": 22.39,
+ "MIRACLReranking (ja)": 11.45,
+ "MIRACLReranking (ko)": 9.77,
+ "MIRACLReranking (ru)": 14.74,
+ "MIRACLReranking (sw)": 30.3,
+ "MIRACLReranking (te)": 1.51,
+ "MIRACLReranking (th)": 3.48,
+ "MIRACLReranking (yo)": 51.97,
+ "MIRACLReranking (zh)": 11.67
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
{
"Model": "nomic-embed-text-v1",
+ "AILACasedocs": 28.87,
+ "AILAStatutes": 16.25,
+ "ARCChallenge": 10.06,
+ "AlloprofRetrieval": 33.93,
+ "AlphaNLI": 29.55,
+ "AppsRetrieval": 5.3,
+ "BSARDRetrieval": 5.83,
+ "CmedqaRetrieval": 2.23,
+ "CodeFeedbackMT": 45.4,
+ "CodeFeedbackST": 66.32,
+ "CodeSearchNetCCRetrieval (python)": 56.7,
+ "CodeSearchNetCCRetrieval (javascript)": 57.78,
+ "CodeSearchNetCCRetrieval (go)": 42.45,
+ "CodeSearchNetCCRetrieval (ruby)": 59.09,
+ "CodeSearchNetCCRetrieval (java)": 54.08,
+ "CodeSearchNetCCRetrieval (php)": 45.3,
+ "CodeSearchNetRetrieval (python)": 86.99,
+ "CodeSearchNetRetrieval (javascript)": 70.2,
+ "CodeSearchNetRetrieval (go)": 91.5,
+ "CodeSearchNetRetrieval (ruby)": 76.44,
+ "CodeSearchNetRetrieval (java)": 82.0,
+ "CodeSearchNetRetrieval (php)": 83.86,
+ "CodeTransOceanContest": 49.87,
+ "CodeTransOceanDL": 20.66,
+ "CosQA": 27.23,
+ "CovidRetrieval": 18.66,
+ "GerDaLIR": 5.68,
+ "GerDaLIRSmall": 13.57,
+ "GermanQuAD-Retrieval": 81.22,
+ "HellaSwag": 24.75,
"LEMBNarrativeQARetrieval": 41.23,
"LEMBQMSumRetrieval": 36.65,
"LEMBSummScreenFDRetrieval": 92.97,
- "LEMBWikimQARetrieval": 73.75
+ "LEMBWikimQARetrieval": 73.75,
+ "LeCaRDv2": 22.76,
+ "LegalBenchConsumerContractsQA": 71.37,
+ "LegalBenchCorporateLobbying": 92.73,
+ "LegalQuAD": 25.97,
+ "LegalSummarization": 62.72,
+ "MIRACLRetrieval (ar)": 2.38,
+ "MIRACLRetrieval (bn)": 6.1,
+ "MIRACLRetrieval (de)": 15.68,
+ "MIRACLRetrieval (en)": 47.1,
+ "MIRACLRetrieval (es)": 24.39,
+ "MIRACLRetrieval (fa)": 2.51,
+ "MIRACLRetrieval (fi)": 25.45,
+ "MIRACLRetrieval (fr)": 15.24,
+ "MIRACLRetrieval (hi)": 1.35,
+ "MIRACLRetrieval (id)": 17.19,
+ "MIRACLRetrieval (ja)": 4.75,
+ "MIRACLRetrieval (ko)": 4.27,
+ "MIRACLRetrieval (ru)": 3.73,
+ "MIRACLRetrieval (sw)": 30.75,
+ "MIRACLRetrieval (te)": 0.06,
+ "MIRACLRetrieval (th)": 0.62,
+ "MIRACLRetrieval (yo)": 57.75,
+ "MIRACLRetrieval (zh)": 1.08,
+ "MintakaRetrieval (ar)": 5.85,
+ "MintakaRetrieval (de)": 16.46,
+ "MintakaRetrieval (es)": 15.44,
+ "MintakaRetrieval (fr)": 17.28,
+ "MintakaRetrieval (hi)": 6.02,
+ "MintakaRetrieval (it)": 12.58,
+ "MintakaRetrieval (ja)": 9.73,
+ "MintakaRetrieval (pt)": 15.43,
+ "PIQA": 25.33,
+ "Quail": 4.61,
+ "RARbCode": 29.77,
+ "RARbMath": 36.49,
+ "RiaNewsRetrieval": 12.92,
+ "RuBQRetrieval": 6.7,
+ "SIQA": 2.57,
+ "SciFact-PL": 37.77,
+ "SpartQA": 6.27,
+ "StackOverflowQA": 70.96,
+ "SyntecRetrieval": 51.91,
+ "SyntheticText2SQL": 51.02,
+ "TRECCOVID-PL": 30.44,
+ "TempReasonL1": 1.59,
+ "TempReasonL2Fact": 11.43,
+ "TempReasonL2Pure": 1.33,
+ "TempReasonL3Fact": 11.89,
+ "TempReasonL3Pure": 8.53,
+ "WinoGrande": 44.56,
+ "XMarket (de)": 12.76,
+ "XMarket (en)": 31.24,
+ "XMarket (es)": 15.04,
+ "XPQARetrieval (ara-ara)": 14.14,
+ "XPQARetrieval (eng-ara)": 4.51,
+ "XPQARetrieval (ara-eng)": 9.51,
+ "XPQARetrieval (deu-deu)": 59.09,
+ "XPQARetrieval (eng-deu)": 11.22,
+ "XPQARetrieval (deu-eng)": 33.02,
+ "XPQARetrieval (spa-spa)": 44.32,
+ "XPQARetrieval (eng-spa)": 7.2,
+ "XPQARetrieval (spa-eng)": 25.13,
+ "XPQARetrieval (fra-fra)": 55.47,
+ "XPQARetrieval (eng-fra)": 12.08,
+ "XPQARetrieval (fra-eng)": 34.68,
+ "XPQARetrieval (hin-hin)": 40.12,
+ "XPQARetrieval (eng-hin)": 8.2,
+ "XPQARetrieval (hin-eng)": 8.09,
+ "XPQARetrieval (ita-ita)": 61.93,
+ "XPQARetrieval (eng-ita)": 7.39,
+ "XPQARetrieval (ita-eng)": 30.87,
+ "XPQARetrieval (jpn-jpn)": 39.23,
+ "XPQARetrieval (eng-jpn)": 8.36,
+ "XPQARetrieval (jpn-eng)": 17.16,
+ "XPQARetrieval (kor-kor)": 12.58,
+ "XPQARetrieval (eng-kor)": 9.23,
+ "XPQARetrieval (kor-eng)": 7.14,
+ "XPQARetrieval (pol-pol)": 31.17,
+ "XPQARetrieval (eng-pol)": 11.38,
+ "XPQARetrieval (pol-eng)": 16.32,
+ "XPQARetrieval (por-por)": 37.72,
+ "XPQARetrieval (eng-por)": 8.47,
+ "XPQARetrieval (por-eng)": 25.62,
+ "XPQARetrieval (tam-tam)": 7.28,
+ "XPQARetrieval (eng-tam)": 4.82,
+ "XPQARetrieval (tam-eng)": 4.31,
+ "XPQARetrieval (cmn-cmn)": 26.41,
+ "XPQARetrieval (eng-cmn)": 8.91,
+ "XPQARetrieval (cmn-eng)": 15.94
}
]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "nomic-embed-text-v1",
+ "CDSC-R": 82.61,
+ "GermanSTSBenchmark": 62.7,
+ "RUParaPhraserSTS": 51.29,
+ "RuSTSBenchmarkSTS": 57.12,
+ "SICK-R-PL": 58.22,
+ "SICKFr": 65.29,
+ "STS22 (de-en)": 50.97,
+ "STS22 (fr-pl)": 73.25,
+ "STS22 (pl-en)": 64.12,
+ "STS22 (pl)": 30.72,
+ "STS22 (de-fr)": 50.64,
+ "STS22 (es-en)": 65.53,
+ "STS22 (es)": 65.18,
+ "STS22 (zh-en)": 46.19,
+ "STS22 (ru)": 32.23,
+ "STS22 (ar)": 36.31,
+ "STS22 (it)": 74.8,
+ "STS22 (es-it)": 61.2,
+ "STS22 (tr)": 54.48,
+ "STS22 (en)": 64.77,
+ "STS22 (zh)": 58.38,
+ "STS22 (de-pl)": 30.76,
+ "STS22 (de)": 38.66,
+ "STS22 (fr)": 79.58,
+ "STSB": 36.95,
+ "STSBenchmarkMultilingualSTS (it)": 64.94,
+ "STSBenchmarkMultilingualSTS (fr)": 65.88,
+ "STSBenchmarkMultilingualSTS (pl)": 61.0,
+ "STSBenchmarkMultilingualSTS (ru)": 56.56,
+ "STSBenchmarkMultilingualSTS (es)": 67.53,
+ "STSBenchmarkMultilingualSTS (de)": 63.17,
+ "STSBenchmarkMultilingualSTS (nl)": 60.97,
+ "STSBenchmarkMultilingualSTS (zh)": 39.96,
+ "STSBenchmarkMultilingualSTS (pt)": 64.39,
+ "STSBenchmarkMultilingualSTS (en)": 79.75
+ },
+ {
+ "Model": "nomic-embed-text-v1",
+ "CDSC-R": 82.61,
+ "GermanSTSBenchmark": 62.7,
+ "RUParaPhraserSTS": 51.29,
+ "RuSTSBenchmarkSTS": 57.12,
+ "SICK-R-PL": 58.22,
+ "SICKFr": 65.29,
+ "STS22 (de-en)": 50.97,
+ "STS22 (fr-pl)": 73.25,
+ "STS22 (pl-en)": 64.12,
+ "STS22 (pl)": 30.72,
+ "STS22 (de-fr)": 50.64,
+ "STS22 (es-en)": 65.53,
+ "STS22 (es)": 65.18,
+ "STS22 (zh-en)": 46.19,
+ "STS22 (ru)": 32.23,
+ "STS22 (ar)": 36.31,
+ "STS22 (it)": 74.8,
+ "STS22 (es-it)": 61.2,
+ "STS22 (tr)": 54.48,
+ "STS22 (en)": 64.77,
+ "STS22 (zh)": 58.38,
+ "STS22 (de-pl)": 30.76,
+ "STS22 (de)": 38.66,
+ "STS22 (fr)": 79.58,
+ "STSB": 36.95,
+ "STSBenchmarkMultilingualSTS (it)": 64.94,
+ "STSBenchmarkMultilingualSTS (fr)": 65.88,
+ "STSBenchmarkMultilingualSTS (pl)": 61.0,
+ "STSBenchmarkMultilingualSTS (ru)": 56.56,
+ "STSBenchmarkMultilingualSTS (es)": 67.53,
+ "STSBenchmarkMultilingualSTS (de)": 63.17,
+ "STSBenchmarkMultilingualSTS (nl)": 60.97,
+ "STSBenchmarkMultilingualSTS (zh)": 39.96,
+ "STSBenchmarkMultilingualSTS (pt)": 64.39,
+ "STSBenchmarkMultilingualSTS (en)": 79.75
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "nomic-embed-text-v1",
+ "SummEvalFr": 31.11
+ },
+ {
+ "Model": "nomic-embed-text-v1",
+ "SummEvalFr": 31.11
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "nomic-embed-text-v1",
+ "CEDRClassification": 34.96,
+ "SensitiveTopicsClassification": 17.91
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "nomic-embed-text-v1",
+ "Core17InstructionRetrieval": -2.11,
+ "News21InstructionRetrieval": 3.56,
+ "Robust04InstructionRetrieval": -11.19
+ }
+ ]
}
},
"nomic-ai__nomic-embed-text-v1.5-128": {
@@ -13758,23 +31299,291 @@
},
"openai__text-embedding-3-large": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "text-embedding-3-large",
+ "BornholmBitextMining": 43.85,
+ "Tatoeba (ron-eng)": 96.98,
+ "Tatoeba (slk-eng)": 95.33,
+ "Tatoeba (kab-eng)": 2.79,
+ "Tatoeba (tam-eng)": 39.2,
+ "Tatoeba (vie-eng)": 96.22,
+ "Tatoeba (uzb-eng)": 20.71,
+ "Tatoeba (war-eng)": 47.28,
+ "Tatoeba (bel-eng)": 82.25,
+ "Tatoeba (gla-eng)": 35.82,
+ "Tatoeba (swh-eng)": 65.95,
+ "Tatoeba (heb-eng)": 83.69,
+ "Tatoeba (orv-eng)": 53.49,
+ "Tatoeba (tgl-eng)": 95.18,
+ "Tatoeba (cym-eng)": 64.53,
+ "Tatoeba (kat-eng)": 2.84,
+ "Tatoeba (hrv-eng)": 96.22,
+ "Tatoeba (cha-eng)": 41.66,
+ "Tatoeba (tha-eng)": 89.4,
+ "Tatoeba (est-eng)": 93.91,
+ "Tatoeba (lvs-eng)": 74.8,
+ "Tatoeba (hye-eng)": 4.19,
+ "Tatoeba (pms-eng)": 60.53,
+ "Tatoeba (ast-eng)": 82.93,
+ "Tatoeba (nld-eng)": 97.27,
+ "Tatoeba (glg-eng)": 92.93,
+ "Tatoeba (gle-eng)": 46.47,
+ "Tatoeba (lfn-eng)": 66.2,
+ "Tatoeba (swe-eng)": 96.0,
+ "Tatoeba (zsm-eng)": 95.24,
+ "Tatoeba (ile-eng)": 78.1,
+ "Tatoeba (max-eng)": 54.53,
+ "Tatoeba (cat-eng)": 92.16,
+ "Tatoeba (nno-eng)": 93.82,
+ "Tatoeba (bos-eng)": 93.27,
+ "Tatoeba (cor-eng)": 7.54,
+ "Tatoeba (swg-eng)": 71.74,
+ "Tatoeba (pol-eng)": 97.73,
+ "Tatoeba (ara-eng)": 92.4,
+ "Tatoeba (ina-eng)": 94.98,
+ "Tatoeba (csb-eng)": 60.18,
+ "Tatoeba (pes-eng)": 89.77,
+ "Tatoeba (mar-eng)": 48.29,
+ "Tatoeba (khm-eng)": 1.87,
+ "Tatoeba (deu-eng)": 99.37,
+ "Tatoeba (nds-eng)": 81.35,
+ "Tatoeba (arz-eng)": 80.65,
+ "Tatoeba (gsw-eng)": 63.08,
+ "Tatoeba (arq-eng)": 42.73,
+ "Tatoeba (dan-eng)": 96.42,
+ "Tatoeba (epo-eng)": 88.32,
+ "Tatoeba (nov-eng)": 68.88,
+ "Tatoeba (nob-eng)": 98.07,
+ "Tatoeba (amh-eng)": 0.84,
+ "Tatoeba (xho-eng)": 13.86,
+ "Tatoeba (hun-eng)": 94.47,
+ "Tatoeba (wuu-eng)": 86.08,
+ "Tatoeba (hsb-eng)": 72.4,
+ "Tatoeba (afr-eng)": 93.93,
+ "Tatoeba (aze-eng)": 78.63,
+ "Tatoeba (spa-eng)": 98.03,
+ "Tatoeba (ell-eng)": 94.86,
+ "Tatoeba (dtp-eng)": 7.82,
+ "Tatoeba (ang-eng)": 77.64,
+ "Tatoeba (ita-eng)": 94.02,
+ "Tatoeba (ceb-eng)": 41.74,
+ "Tatoeba (kor-eng)": 88.92,
+ "Tatoeba (jav-eng)": 36.65,
+ "Tatoeba (cmn-eng)": 96.02,
+ "Tatoeba (tzl-eng)": 51.83,
+ "Tatoeba (yue-eng)": 88.84,
+ "Tatoeba (bre-eng)": 9.53,
+ "Tatoeba (rus-eng)": 93.4,
+ "Tatoeba (slv-eng)": 92.39,
+ "Tatoeba (uig-eng)": 4.24,
+ "Tatoeba (ukr-eng)": 93.22,
+ "Tatoeba (lat-eng)": 72.55,
+ "Tatoeba (mal-eng)": 50.26,
+ "Tatoeba (ber-eng)": 6.77,
+ "Tatoeba (bul-eng)": 93.3,
+ "Tatoeba (kzj-eng)": 9.08,
+ "Tatoeba (mhr-eng)": 11.87,
+ "Tatoeba (fry-eng)": 69.52,
+ "Tatoeba (por-eng)": 94.75,
+ "Tatoeba (tuk-eng)": 27.35,
+ "Tatoeba (fra-eng)": 96.0,
+ "Tatoeba (urd-eng)": 68.27,
+ "Tatoeba (pam-eng)": 15.43,
+ "Tatoeba (srp-eng)": 94.22,
+ "Tatoeba (lit-eng)": 87.59,
+ "Tatoeba (mon-eng)": 15.74,
+ "Tatoeba (tat-eng)": 17.64,
+ "Tatoeba (ind-eng)": 94.75,
+ "Tatoeba (ido-eng)": 74.09,
+ "Tatoeba (ces-eng)": 96.67,
+ "Tatoeba (sqi-eng)": 76.63,
+ "Tatoeba (kaz-eng)": 18.5,
+ "Tatoeba (cbk-eng)": 81.09,
+ "Tatoeba (isl-eng)": 89.9,
+ "Tatoeba (eus-eng)": 40.27,
+ "Tatoeba (oci-eng)": 67.46,
+ "Tatoeba (ben-eng)": 56.3,
+ "Tatoeba (hin-eng)": 91.49,
+ "Tatoeba (awa-eng)": 50.66,
+ "Tatoeba (dsb-eng)": 54.56,
+ "Tatoeba (kur-eng)": 24.66,
+ "Tatoeba (tel-eng)": 19.63,
+ "Tatoeba (fin-eng)": 97.83,
+ "Tatoeba (yid-eng)": 6.05,
+ "Tatoeba (fao-eng)": 74.77,
+ "Tatoeba (jpn-eng)": 95.78,
+ "Tatoeba (mkd-eng)": 88.11,
+ "Tatoeba (tur-eng)": 95.88
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "text-embedding-3-large",
- "AmazonCounterfactualClassification (en)": 78.93,
+ "AllegroReviews": 48.85,
+ "AmazonCounterfactualClassification (en-ext)": 80.09,
+ "AmazonCounterfactualClassification (en)": 78.99,
+ "AmazonCounterfactualClassification (de)": 71.7,
+ "AmazonCounterfactualClassification (ja)": 79.45,
"AmazonPolarityClassification": 92.85,
"AmazonReviewsClassification (en)": 48.7,
+ "AmazonReviewsClassification (de)": 48.18,
+ "AmazonReviewsClassification (es)": 45.87,
+ "AmazonReviewsClassification (fr)": 46.41,
+ "AmazonReviewsClassification (ja)": 44.0,
+ "AmazonReviewsClassification (zh)": 41.31,
+ "AngryTweetsClassification": 57.84,
"Banking77Classification": 85.69,
+ "CBD": 74.55,
+ "DanishPoliticalCommentsClassification": 43.41,
"EmotionClassification": 51.58,
+ "GeoreviewClassification": 47.06,
+ "HeadlineClassification": 77.19,
"ImdbClassification": 87.67,
+ "InappropriatenessClassification": 61.5,
+ "KinopoiskClassification": 60.21,
+ "LccSentimentClassification": 58.07,
"MTOPDomainClassification (en)": 95.36,
+ "MTOPDomainClassification (de)": 94.14,
+ "MTOPDomainClassification (es)": 95.07,
+ "MTOPDomainClassification (fr)": 93.39,
+ "MTOPDomainClassification (hi)": 88.28,
+ "MTOPDomainClassification (th)": 82.0,
"MTOPIntentClassification (en)": 75.07,
- "MassiveIntentClassification (en)": 74.64,
+ "MTOPIntentClassification (de)": 77.08,
+ "MTOPIntentClassification (es)": 78.24,
+ "MTOPIntentClassification (fr)": 73.68,
+ "MTOPIntentClassification (hi)": 64.64,
+ "MTOPIntentClassification (th)": 63.92,
+ "MasakhaNEWSClassification (amh)": 51.86,
+ "MasakhaNEWSClassification (eng)": 81.14,
+ "MasakhaNEWSClassification (fra)": 77.25,
+ "MasakhaNEWSClassification (hau)": 76.22,
+ "MasakhaNEWSClassification (ibo)": 65.33,
+ "MasakhaNEWSClassification (lin)": 78.29,
+ "MasakhaNEWSClassification (lug)": 70.31,
+ "MasakhaNEWSClassification (orm)": 77.2,
+ "MasakhaNEWSClassification (pcm)": 93.05,
+ "MasakhaNEWSClassification (run)": 78.66,
+ "MasakhaNEWSClassification (sna)": 85.64,
+ "MasakhaNEWSClassification (som)": 64.86,
+ "MasakhaNEWSClassification (swa)": 74.08,
+ "MasakhaNEWSClassification (tir)": 35.44,
+ "MasakhaNEWSClassification (xho)": 81.41,
+ "MasakhaNEWSClassification (yor)": 82.92,
+ "MassiveIntentClassification (nb)": 69.13,
+ "MassiveIntentClassification (vi)": 65.6,
+ "MassiveIntentClassification (sq)": 57.38,
+ "MassiveIntentClassification (fr)": 71.29,
+ "MassiveIntentClassification (ur)": 49.97,
+ "MassiveIntentClassification (zh-CN)": 69.44,
+ "MassiveIntentClassification (da)": 69.19,
+ "MassiveIntentClassification (pl)": 70.0,
+ "MassiveIntentClassification (am)": 34.46,
+ "MassiveIntentClassification (kn)": 43.85,
+ "MassiveIntentClassification (jv)": 52.81,
+ "MassiveIntentClassification (az)": 58.97,
+ "MassiveIntentClassification (ro)": 66.05,
+ "MassiveIntentClassification (pt)": 70.57,
+ "MassiveIntentClassification (el)": 67.62,
+ "MassiveIntentClassification (it)": 71.74,
+ "MassiveIntentClassification (hi)": 61.22,
+ "MassiveIntentClassification (te)": 41.49,
+ "MassiveIntentClassification (ja)": 71.12,
+ "MassiveIntentClassification (tl)": 61.1,
+ "MassiveIntentClassification (tr)": 67.96,
+ "MassiveIntentClassification (ar)": 62.79,
+ "MassiveIntentClassification (es)": 70.62,
+ "MassiveIntentClassification (zh-TW)": 65.52,
+ "MassiveIntentClassification (af)": 62.56,
+ "MassiveIntentClassification (ka)": 29.48,
+ "MassiveIntentClassification (is)": 54.4,
+ "MassiveIntentClassification (ko)": 63.93,
+ "MassiveIntentClassification (bn)": 50.02,
+ "MassiveIntentClassification (de)": 71.07,
+ "MassiveIntentClassification (mn)": 34.05,
+ "MassiveIntentClassification (en)": 74.63,
+ "MassiveIntentClassification (hu)": 67.59,
+ "MassiveIntentClassification (lv)": 59.58,
+ "MassiveIntentClassification (nl)": 71.56,
+ "MassiveIntentClassification (sw)": 55.94,
+ "MassiveIntentClassification (fa)": 64.77,
+ "MassiveIntentClassification (my)": 22.95,
+ "MassiveIntentClassification (sv)": 70.27,
+ "MassiveIntentClassification (sl)": 67.13,
+ "MassiveIntentClassification (km)": 32.55,
+ "MassiveIntentClassification (ms)": 65.4,
+ "MassiveIntentClassification (fi)": 69.9,
+ "MassiveIntentClassification (hy)": 33.21,
+ "MassiveIntentClassification (ml)": 43.95,
+ "MassiveIntentClassification (ru)": 70.11,
+ "MassiveIntentClassification (he)": 59.97,
+ "MassiveIntentClassification (cy)": 48.21,
+ "MassiveIntentClassification (th)": 57.86,
+ "MassiveIntentClassification (id)": 67.89,
+ "MassiveIntentClassification (ta)": 37.86,
+ "MassiveScenarioClassification (mn)": 40.58,
+ "MassiveScenarioClassification (hi)": 67.03,
+ "MassiveScenarioClassification (ar)": 69.99,
+ "MassiveScenarioClassification (cy)": 57.2,
+ "MassiveScenarioClassification (ms)": 71.7,
+ "MassiveScenarioClassification (it)": 76.95,
+ "MassiveScenarioClassification (ru)": 75.69,
+ "MassiveScenarioClassification (nb)": 75.27,
+ "MassiveScenarioClassification (zh-CN)": 76.96,
+ "MassiveScenarioClassification (bn)": 56.68,
+ "MassiveScenarioClassification (tl)": 69.52,
+ "MassiveScenarioClassification (kn)": 49.17,
+ "MassiveScenarioClassification (el)": 73.79,
+ "MassiveScenarioClassification (ko)": 71.49,
+ "MassiveScenarioClassification (da)": 75.91,
+ "MassiveScenarioClassification (nl)": 77.52,
+ "MassiveScenarioClassification (de)": 77.18,
+ "MassiveScenarioClassification (th)": 65.15,
+ "MassiveScenarioClassification (tr)": 72.26,
+ "MassiveScenarioClassification (jv)": 61.9,
+ "MassiveScenarioClassification (zh-TW)": 73.43,
+ "MassiveScenarioClassification (lv)": 66.96,
+ "MassiveScenarioClassification (ur)": 55.83,
+ "MassiveScenarioClassification (fr)": 76.46,
+ "MassiveScenarioClassification (id)": 74.18,
+ "MassiveScenarioClassification (sl)": 74.63,
+ "MassiveScenarioClassification (he)": 66.42,
+ "MassiveScenarioClassification (az)": 62.69,
+ "MassiveScenarioClassification (af)": 71.67,
+ "MassiveScenarioClassification (sw)": 65.09,
+ "MassiveScenarioClassification (te)": 48.1,
+ "MassiveScenarioClassification (km)": 38.68,
+ "MassiveScenarioClassification (hy)": 39.05,
+ "MassiveScenarioClassification (vi)": 70.48,
+ "MassiveScenarioClassification (ro)": 72.37,
+ "MassiveScenarioClassification (pl)": 74.98,
"MassiveScenarioClassification (en)": 79.79,
- "ToxicConversationsClassification": 72.92,
+ "MassiveScenarioClassification (sq)": 65.73,
+ "MassiveScenarioClassification (my)": 30.49,
+ "MassiveScenarioClassification (es)": 76.09,
+ "MassiveScenarioClassification (am)": 39.63,
+ "MassiveScenarioClassification (ml)": 48.25,
+ "MassiveScenarioClassification (is)": 64.11,
+ "MassiveScenarioClassification (ja)": 76.25,
+ "MassiveScenarioClassification (ka)": 33.8,
+ "MassiveScenarioClassification (sv)": 77.29,
+ "MassiveScenarioClassification (ta)": 43.66,
+ "MassiveScenarioClassification (fa)": 70.27,
+ "MassiveScenarioClassification (hu)": 73.14,
+ "MassiveScenarioClassification (fi)": 74.44,
+ "MassiveScenarioClassification (pt)": 74.66,
+ "NoRecClassification": 53.94,
+ "NordicLangClassification": 79.75,
+ "PAC": 70.33,
+ "PolEmo2.0-IN": 74.72,
+ "PolEmo2.0-OUT": 50.06,
+ "RuReviewsClassification": 66.11,
+ "RuSciBenchGRNTIClassification": 61.04,
+ "RuSciBenchOECDClassification": 47.07,
+ "ToxicConversationsClassification": 68.82,
"TweetSentimentExtractionClassification": 62.22
}
]
@@ -13783,22 +31592,94 @@
"v_measure": [
{
"Model": "text-embedding-3-large",
+ "AlloProfClusteringS2S": 52.89,
"ArxivClusteringP2P": 49.01,
"ArxivClusteringS2S": 44.45,
"BiorxivClusteringP2P": 38.03,
"BiorxivClusteringS2S": 36.53,
+ "BlurbsClusteringP2P": 44.1,
+ "BlurbsClusteringS2S": 21.85,
+ "GeoreviewClusteringP2P": 72.6,
+ "HALClusteringS2S": 27.68,
+ "MasakhaNEWSClusteringS2S (amh)": 44.44,
+ "MasakhaNEWSClusteringS2S (eng)": 53.54,
+ "MasakhaNEWSClusteringS2S (fra)": 39.71,
+ "MasakhaNEWSClusteringS2S (hau)": 28.77,
+ "MasakhaNEWSClusteringS2S (ibo)": 56.75,
+ "MasakhaNEWSClusteringS2S (lin)": 55.37,
+ "MasakhaNEWSClusteringS2S (lug)": 47.22,
+ "MasakhaNEWSClusteringS2S (orm)": 37.09,
+ "MasakhaNEWSClusteringS2S (pcm)": 68.2,
+ "MasakhaNEWSClusteringS2S (run)": 53.4,
+ "MasakhaNEWSClusteringS2S (sna)": 61.4,
+ "MasakhaNEWSClusteringS2S (som)": 32.0,
+ "MasakhaNEWSClusteringS2S (swa)": 29.03,
+ "MasakhaNEWSClusteringS2S (tir)": 46.06,
+ "MasakhaNEWSClusteringS2S (xho)": 35.97,
+ "MasakhaNEWSClusteringS2S (yor)": 41.0,
"MedrxivClusteringP2P": 32.7,
"MedrxivClusteringS2S": 31.27,
"RedditClustering": 67.84,
"RedditClusteringP2P": 67.96,
+ "RuSciBenchGRNTIClusteringP2P": 54.17,
+ "RuSciBenchOECDClusteringP2P": 46.73,
"StackExchangeClustering": 76.26,
"StackExchangeClusteringP2P": 36.88,
+ "TenKGnadClusteringS2S": 37.78,
"TwentyNewsgroupsClustering": 58.14
}
]
},
"PairClassification": {
"max_ap": [
+ {
+ "Model": "text-embedding-3-large",
+ "CDSC-E": 70.46,
+ "FalseFriendsGermanEnglish": 56.04,
+ "OpusparcusPC (de)": 98.55,
+ "OpusparcusPC (en)": 98.79,
+ "OpusparcusPC (fi)": 95.88,
+ "OpusparcusPC (fr)": 95.64,
+ "OpusparcusPC (ru)": 91.16,
+ "OpusparcusPC (sv)": 96.74,
+ "PSC": 99.55,
+ "PawsXPairClassification (de)": 61.83,
+ "PawsXPairClassification (en)": 66.41,
+ "PawsXPairClassification (es)": 63.25,
+ "PawsXPairClassification (fr)": 64.9,
+ "PawsXPairClassification (ja)": 53.92,
+ "PawsXPairClassification (ko)": 53.64,
+ "PawsXPairClassification (zh)": 59.71,
+ "SICK-E-PL": 70.11,
+ "SprintDuplicateQuestions": 92.24,
+ "TERRa": 57.99,
+ "TwitterSemEval2015": 77.15,
+ "TwitterURLCorpus": 87.79
+ },
+ {
+ "Model": "text-embedding-3-large",
+ "CDSC-E": 70.48,
+ "FalseFriendsGermanEnglish": 56.04,
+ "OpusparcusPC (de)": 98.55,
+ "OpusparcusPC (en)": 98.79,
+ "OpusparcusPC (fi)": 95.88,
+ "OpusparcusPC (fr)": 95.64,
+ "OpusparcusPC (ru)": 91.16,
+ "OpusparcusPC (sv)": 96.74,
+ "PSC": 99.57,
+ "PawsXPairClassification (de)": 62.29,
+ "PawsXPairClassification (en)": 66.41,
+ "PawsXPairClassification (es)": 63.25,
+ "PawsXPairClassification (fr)": 64.99,
+ "PawsXPairClassification (ja)": 54.25,
+ "PawsXPairClassification (ko)": 53.88,
+ "PawsXPairClassification (zh)": 59.97,
+ "SICK-E-PL": 70.11,
+ "SprintDuplicateQuestions": 92.51,
+ "TERRa": 58.08,
+ "TwitterSemEval2015": 77.15,
+ "TwitterURLCorpus": 87.79
+ },
{
"Model": "text-embedding-3-large",
"SprintDuplicateQuestions": 92.25,
@@ -13811,10 +31692,14 @@
"map": [
{
"Model": "text-embedding-3-large",
+ "AlloprofReranking": 80.3,
"AskUbuntuDupQuestions": 65.03,
"MindSmallReranking": 29.86,
+ "RuBQReranking": 75.19,
"SciDocsRR": 86.66,
- "StackOverflowDupQuestions": 55.08
+ "StackOverflowDupQuestions": 55.08,
+ "SyntecReranking": 90.38,
+ "T2Reranking": 67.05
}
]
},
@@ -13823,10 +31708,12 @@
{
"Model": "text-embedding-3-large",
"AILACasedocs": 39.0,
- "AILAStatutes": 41.31,
+ "AILAStatutes": 41.99,
"ARCChallenge": 23.98,
+ "AlloprofRetrieval": 60.28,
"AlphaNLI": 37.27,
- "ArguAna": 58.05,
+ "AppsRetrieval": 28.37,
+ "ArguAna": 57.99,
"BrightRetrieval (theoremqa_questions)": 22.22,
"BrightRetrieval (leetcode)": 23.65,
"BrightRetrieval (earth_science)": 26.27,
@@ -13841,6 +31728,25 @@
"BrightRetrieval (aops)": 8.45,
"CQADupstackRetrieval": 47.54,
"ClimateFEVER": 30.27,
+ "CmedqaRetrieval": 27.43,
+ "CodeFeedbackMT": 68.92,
+ "CodeFeedbackST": 80.42,
+ "CodeSearchNetCCRetrieval (python)": 76.01,
+ "CodeSearchNetCCRetrieval (javascript)": 77.36,
+ "CodeSearchNetCCRetrieval (go)": 67.19,
+ "CodeSearchNetCCRetrieval (ruby)": 75.43,
+ "CodeSearchNetCCRetrieval (java)": 77.23,
+ "CodeSearchNetCCRetrieval (php)": 65.83,
+ "CodeSearchNetRetrieval (python)": 92.36,
+ "CodeSearchNetRetrieval (javascript)": 83.14,
+ "CodeSearchNetRetrieval (go)": 96.48,
+ "CodeSearchNetRetrieval (ruby)": 87.96,
+ "CodeSearchNetRetrieval (java)": 93.18,
+ "CodeSearchNetRetrieval (php)": 89.89,
+ "CodeTransOceanContest": 84.25,
+ "CodeTransOceanDL": 34.23,
+ "CosQA": 31.0,
+ "CovidRetrieval": 68.43,
"DBPedia": 44.76,
"FEVER": 87.94,
"FiQA2018": 55.0,
@@ -13853,10 +31759,18 @@
"LEMBWikimQARetrieval": 54.16,
"LeCaRDv2": 57.2,
"LegalBenchConsumerContractsQA": 79.39,
- "LegalBenchCorporateLobbying": 95.09,
+ "LegalBenchCorporateLobbying": 95.22,
"LegalQuAD": 57.47,
"LegalSummarization": 71.55,
"MSMARCO": 40.24,
+ "MintakaRetrieval (ar)": 28.07,
+ "MintakaRetrieval (de)": 61.51,
+ "MintakaRetrieval (es)": 60.26,
+ "MintakaRetrieval (fr)": 62.88,
+ "MintakaRetrieval (hi)": 22.55,
+ "MintakaRetrieval (it)": 59.89,
+ "MintakaRetrieval (ja)": 39.29,
+ "MintakaRetrieval (pt)": 59.99,
"NFCorpus": 42.07,
"NQ": 61.27,
"PIQA": 41.96,
@@ -13864,18 +31778,60 @@
"QuoraRetrieval": 89.05,
"RARbCode": 89.64,
"RARbMath": 90.08,
- "SCIDOCS": 23.11,
+ "RuBQRetrieval": 72.32,
+ "SCIDOCS": 23.07,
"SIQA": 3.44,
"SciFact": 77.77,
- "SpartQA": 7.51,
+ "SciFact-PL": 71.04,
+ "SpartQA": 7.44,
+ "StackOverflowQA": 92.44,
+ "SyntecRetrieval": 87.36,
+ "SyntheticText2SQL": 68.45,
"TRECCOVID": 79.56,
+ "TRECCOVID-PL": 62.88,
"TempReasonL1": 2.13,
"TempReasonL2Fact": 28.65,
"TempReasonL2Pure": 10.34,
"TempReasonL3Fact": 25.52,
"TempReasonL3Pure": 15.28,
"Touche2020": 23.35,
- "WinoGrande": 29.11
+ "WinoGrande": 29.11,
+ "XPQARetrieval (ara-ara)": 50.83,
+ "XPQARetrieval (eng-ara)": 43.19,
+ "XPQARetrieval (ara-eng)": 50.85,
+ "XPQARetrieval (deu-deu)": 83.97,
+ "XPQARetrieval (eng-deu)": 62.93,
+ "XPQARetrieval (deu-eng)": 82.24,
+ "XPQARetrieval (spa-spa)": 70.56,
+ "XPQARetrieval (eng-spa)": 55.51,
+ "XPQARetrieval (spa-eng)": 68.74,
+ "XPQARetrieval (fra-fra)": 76.53,
+ "XPQARetrieval (eng-fra)": 60.93,
+ "XPQARetrieval (fra-eng)": 74.02,
+ "XPQARetrieval (hin-hin)": 74.81,
+ "XPQARetrieval (eng-hin)": 29.67,
+ "XPQARetrieval (hin-eng)": 65.92,
+ "XPQARetrieval (ita-ita)": 80.95,
+ "XPQARetrieval (eng-ita)": 54.83,
+ "XPQARetrieval (ita-eng)": 79.25,
+ "XPQARetrieval (jpn-jpn)": 76.92,
+ "XPQARetrieval (eng-jpn)": 54.56,
+ "XPQARetrieval (jpn-eng)": 75.09,
+ "XPQARetrieval (kor-kor)": 37.47,
+ "XPQARetrieval (eng-kor)": 36.63,
+ "XPQARetrieval (kor-eng)": 36.82,
+ "XPQARetrieval (pol-pol)": 53.28,
+ "XPQARetrieval (eng-pol)": 40.5,
+ "XPQARetrieval (pol-eng)": 51.6,
+ "XPQARetrieval (por-por)": 54.28,
+ "XPQARetrieval (eng-por)": 37.79,
+ "XPQARetrieval (por-eng)": 53.49,
+ "XPQARetrieval (tam-tam)": 32.85,
+ "XPQARetrieval (eng-tam)": 11.39,
+ "XPQARetrieval (tam-eng)": 14.02,
+ "XPQARetrieval (cmn-cmn)": 66.56,
+ "XPQARetrieval (eng-cmn)": 41.22,
+ "XPQARetrieval (cmn-eng)": 63.92
}
],
"recall_at_1": [
@@ -13906,11 +31862,59 @@
"STS17 (en-en)": 90.22,
"STS22 (en)": 66.14,
"STSBenchmark": 83.56
+ },
+ {
+ "Model": "text-embedding-3-large",
+ "CDSC-R": 92.19,
+ "GermanSTSBenchmark": 82.32,
+ "RUParaPhraserSTS": 72.97,
+ "RuSTSBenchmarkSTS": 80.81,
+ "SICK-R-PL": 72.68,
+ "SICKFr": 78.74,
+ "STSB": 78.0,
+ "STSBenchmarkMultilingualSTS (it)": 80.97,
+ "STSBenchmarkMultilingualSTS (es)": 82.99,
+ "STSBenchmarkMultilingualSTS (nl)": 81.41,
+ "STSBenchmarkMultilingualSTS (en)": 83.6,
+ "STSBenchmarkMultilingualSTS (fr)": 82.51,
+ "STSBenchmarkMultilingualSTS (pl)": 79.85,
+ "STSBenchmarkMultilingualSTS (pt)": 80.34,
+ "STSBenchmarkMultilingualSTS (de)": 82.73,
+ "STSBenchmarkMultilingualSTS (zh)": 79.32,
+ "STSBenchmarkMultilingualSTS (ru)": 80.85
+ },
+ {
+ "Model": "text-embedding-3-large",
+ "CDSC-R": 92.19,
+ "GermanSTSBenchmark": 82.32,
+ "RUParaPhraserSTS": 72.97,
+ "RuSTSBenchmarkSTS": 80.81,
+ "SICK-R-PL": 72.68,
+ "SICKFr": 78.74,
+ "STSB": 78.0,
+ "STSBenchmarkMultilingualSTS (it)": 80.97,
+ "STSBenchmarkMultilingualSTS (es)": 82.99,
+ "STSBenchmarkMultilingualSTS (nl)": 81.41,
+ "STSBenchmarkMultilingualSTS (en)": 83.6,
+ "STSBenchmarkMultilingualSTS (fr)": 82.51,
+ "STSBenchmarkMultilingualSTS (pl)": 79.85,
+ "STSBenchmarkMultilingualSTS (pt)": 80.34,
+ "STSBenchmarkMultilingualSTS (de)": 82.73,
+ "STSBenchmarkMultilingualSTS (zh)": 79.32,
+ "STSBenchmarkMultilingualSTS (ru)": 80.85
}
]
},
"Summarization": {
"cosine_spearman": [
+ {
+ "Model": "text-embedding-3-large",
+ "SummEval": 30.05
+ },
+ {
+ "Model": "text-embedding-3-large",
+ "SummEval": 30.05
+ },
{
"Model": "text-embedding-3-large",
"SummEval": 29.92
@@ -13918,7 +31922,13 @@
]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "text-embedding-3-large",
+ "CEDRClassification": 46.77,
+ "SensitiveTopicsClassification": 31.97
+ }
+ ]
},
"InstructionRetrieval": {
"p-MRR": [
@@ -14100,22 +32110,290 @@
},
"openai__text-embedding-3-small": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "text-embedding-3-small",
+ "BornholmBitextMining": 40.88,
+ "Tatoeba (afr-eng)": 79.77,
+ "Tatoeba (dtp-eng)": 6.86,
+ "Tatoeba (tel-eng)": 0.68,
+ "Tatoeba (glg-eng)": 88.25,
+ "Tatoeba (kor-eng)": 61.61,
+ "Tatoeba (dsb-eng)": 38.66,
+ "Tatoeba (est-eng)": 67.96,
+ "Tatoeba (oci-eng)": 48.97,
+ "Tatoeba (mar-eng)": 9.51,
+ "Tatoeba (awa-eng)": 15.07,
+ "Tatoeba (zsm-eng)": 88.16,
+ "Tatoeba (fao-eng)": 42.44,
+ "Tatoeba (lat-eng)": 43.22,
+ "Tatoeba (tuk-eng)": 17.76,
+ "Tatoeba (ile-eng)": 75.59,
+ "Tatoeba (bul-eng)": 83.54,
+ "Tatoeba (heb-eng)": 44.33,
+ "Tatoeba (rus-eng)": 91.1,
+ "Tatoeba (urd-eng)": 12.54,
+ "Tatoeba (pes-eng)": 54.92,
+ "Tatoeba (pam-eng)": 8.23,
+ "Tatoeba (ang-eng)": 48.75,
+ "Tatoeba (tur-eng)": 83.92,
+ "Tatoeba (ara-eng)": 75.08,
+ "Tatoeba (kzj-eng)": 8.19,
+ "Tatoeba (war-eng)": 17.98,
+ "Tatoeba (sqi-eng)": 35.63,
+ "Tatoeba (ron-eng)": 90.68,
+ "Tatoeba (slv-eng)": 83.81,
+ "Tatoeba (fra-eng)": 93.75,
+ "Tatoeba (kat-eng)": 0.74,
+ "Tatoeba (ast-eng)": 78.26,
+ "Tatoeba (jpn-eng)": 89.22,
+ "Tatoeba (tam-eng)": 4.31,
+ "Tatoeba (yue-eng)": 71.79,
+ "Tatoeba (lfn-eng)": 56.5,
+ "Tatoeba (fry-eng)": 46.68,
+ "Tatoeba (nob-eng)": 95.91,
+ "Tatoeba (xho-eng)": 10.93,
+ "Tatoeba (mon-eng)": 11.71,
+ "Tatoeba (hin-eng)": 43.69,
+ "Tatoeba (orv-eng)": 28.28,
+ "Tatoeba (cmn-eng)": 93.33,
+ "Tatoeba (ell-eng)": 76.42,
+ "Tatoeba (cor-eng)": 4.18,
+ "Tatoeba (max-eng)": 43.93,
+ "Tatoeba (lit-eng)": 43.09,
+ "Tatoeba (fin-eng)": 89.08,
+ "Tatoeba (dan-eng)": 92.71,
+ "Tatoeba (uzb-eng)": 12.23,
+ "Tatoeba (tat-eng)": 11.31,
+ "Tatoeba (nld-eng)": 96.3,
+ "Tatoeba (hrv-eng)": 90.64,
+ "Tatoeba (bos-eng)": 86.53,
+ "Tatoeba (nds-eng)": 61.73,
+ "Tatoeba (kaz-eng)": 9.81,
+ "Tatoeba (spa-eng)": 97.04,
+ "Tatoeba (swe-eng)": 91.98,
+ "Tatoeba (ukr-eng)": 81.09,
+ "Tatoeba (kur-eng)": 17.64,
+ "Tatoeba (gla-eng)": 4.26,
+ "Tatoeba (ido-eng)": 62.7,
+ "Tatoeba (nov-eng)": 63.74,
+ "Tatoeba (vie-eng)": 85.38,
+ "Tatoeba (csb-eng)": 38.6,
+ "Tatoeba (ina-eng)": 90.77,
+ "Tatoeba (gsw-eng)": 37.93,
+ "Tatoeba (khm-eng)": 0.7,
+ "Tatoeba (lvs-eng)": 31.88,
+ "Tatoeba (cym-eng)": 8.58,
+ "Tatoeba (isl-eng)": 35.53,
+ "Tatoeba (ita-eng)": 92.32,
+ "Tatoeba (uig-eng)": 1.67,
+ "Tatoeba (wuu-eng)": 76.54,
+ "Tatoeba (yid-eng)": 1.18,
+ "Tatoeba (eus-eng)": 17.25,
+ "Tatoeba (tzl-eng)": 48.13,
+ "Tatoeba (bel-eng)": 47.19,
+ "Tatoeba (tha-eng)": 36.24,
+ "Tatoeba (cat-eng)": 84.23,
+ "Tatoeba (hye-eng)": 0.86,
+ "Tatoeba (gle-eng)": 4.65,
+ "Tatoeba (jav-eng)": 21.76,
+ "Tatoeba (arz-eng)": 46.12,
+ "Tatoeba (mkd-eng)": 65.77,
+ "Tatoeba (epo-eng)": 60.93,
+ "Tatoeba (ceb-eng)": 15.93,
+ "Tatoeba (por-eng)": 93.69,
+ "Tatoeba (pms-eng)": 47.24,
+ "Tatoeba (deu-eng)": 98.6,
+ "Tatoeba (ces-eng)": 88.56,
+ "Tatoeba (nno-eng)": 86.34,
+ "Tatoeba (ind-eng)": 88.64,
+ "Tatoeba (hsb-eng)": 53.23,
+ "Tatoeba (cha-eng)": 36.96,
+ "Tatoeba (cbk-eng)": 71.54,
+ "Tatoeba (aze-eng)": 36.72,
+ "Tatoeba (mal-eng)": 3.09,
+ "Tatoeba (arq-eng)": 17.45,
+ "Tatoeba (bre-eng)": 7.35,
+ "Tatoeba (ben-eng)": 13.3,
+ "Tatoeba (pol-eng)": 94.93,
+ "Tatoeba (mhr-eng)": 9.01,
+ "Tatoeba (tgl-eng)": 58.15,
+ "Tatoeba (srp-eng)": 85.3,
+ "Tatoeba (kab-eng)": 2.39,
+ "Tatoeba (amh-eng)": 0.01,
+ "Tatoeba (swg-eng)": 50.73,
+ "Tatoeba (hun-eng)": 81.65,
+ "Tatoeba (ber-eng)": 6.13,
+ "Tatoeba (swh-eng)": 27.61,
+ "Tatoeba (slk-eng)": 82.56
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "text-embedding-3-small",
+ "AllegroReviews": 38.71,
+ "AmazonCounterfactualClassification (en-ext)": 77.29,
"AmazonCounterfactualClassification (en)": 76.42,
+ "AmazonCounterfactualClassification (de)": 72.25,
+ "AmazonCounterfactualClassification (ja)": 71.86,
"AmazonPolarityClassification": 90.84,
"AmazonReviewsClassification (en)": 45.73,
+ "AmazonReviewsClassification (de)": 44.2,
+ "AmazonReviewsClassification (es)": 43.33,
+ "AmazonReviewsClassification (fr)": 43.64,
+ "AmazonReviewsClassification (ja)": 37.48,
+ "AmazonReviewsClassification (zh)": 37.42,
+ "AngryTweetsClassification": 55.68,
"Banking77Classification": 83.01,
+ "CBD": 65.23,
+ "DanishPoliticalCommentsClassification": 39.83,
"EmotionClassification": 50.63,
+ "GeoreviewClassification": 42.96,
+ "HeadlineClassification": 74.06,
"ImdbClassification": 83.66,
+ "InappropriatenessClassification": 58.86,
+ "KinopoiskClassification": 55.03,
+ "LccSentimentClassification": 59.33,
"MTOPDomainClassification (en)": 93.91,
+ "MTOPDomainClassification (de)": 92.77,
+ "MTOPDomainClassification (es)": 93.11,
+ "MTOPDomainClassification (fr)": 90.9,
+ "MTOPDomainClassification (hi)": 71.85,
+ "MTOPDomainClassification (th)": 70.32,
"MTOPIntentClassification (en)": 70.98,
+ "MTOPIntentClassification (de)": 71.6,
+ "MTOPIntentClassification (es)": 73.4,
+ "MTOPIntentClassification (fr)": 67.83,
+ "MTOPIntentClassification (hi)": 42.3,
+ "MTOPIntentClassification (th)": 48.04,
+ "MasakhaNEWSClassification (amh)": 47.85,
+ "MasakhaNEWSClassification (eng)": 80.57,
+ "MasakhaNEWSClassification (fra)": 76.49,
+ "MasakhaNEWSClassification (hau)": 69.91,
+ "MasakhaNEWSClassification (ibo)": 60.85,
+ "MasakhaNEWSClassification (lin)": 76.46,
+ "MasakhaNEWSClassification (lug)": 66.55,
+ "MasakhaNEWSClassification (orm)": 70.71,
+ "MasakhaNEWSClassification (pcm)": 92.16,
+ "MasakhaNEWSClassification (run)": 75.28,
+ "MasakhaNEWSClassification (sna)": 83.41,
+ "MasakhaNEWSClassification (som)": 61.39,
+ "MasakhaNEWSClassification (swa)": 75.06,
+ "MasakhaNEWSClassification (tir)": 29.89,
+ "MasakhaNEWSClassification (xho)": 79.53,
+ "MasakhaNEWSClassification (yor)": 76.42,
+ "MassiveIntentClassification (cy)": 40.7,
+ "MassiveIntentClassification (af)": 55.03,
+ "MassiveIntentClassification (pt)": 67.6,
+ "MassiveIntentClassification (ar)": 51.72,
+ "MassiveIntentClassification (zh-CN)": 64.38,
+ "MassiveIntentClassification (sq)": 46.1,
+ "MassiveIntentClassification (da)": 63.51,
+ "MassiveIntentClassification (is)": 41.21,
+ "MassiveIntentClassification (ka)": 28.86,
+ "MassiveIntentClassification (ml)": 40.9,
+ "MassiveIntentClassification (ta)": 37.65,
+ "MassiveIntentClassification (el)": 54.11,
+ "MassiveIntentClassification (fa)": 52.0,
+ "MassiveIntentClassification (hy)": 31.93,
+ "MassiveIntentClassification (km)": 33.33,
+ "MassiveIntentClassification (fr)": 67.8,
+ "MassiveIntentClassification (hu)": 55.75,
+ "MassiveIntentClassification (ru)": 63.87,
+ "MassiveIntentClassification (hi)": 44.77,
+ "MassiveIntentClassification (pl)": 64.59,
+ "MassiveIntentClassification (sw)": 45.12,
+ "MassiveIntentClassification (jv)": 48.29,
+ "MassiveIntentClassification (zh-TW)": 59.91,
+ "MassiveIntentClassification (it)": 66.6,
"MassiveIntentClassification (en)": 72.86,
+ "MassiveIntentClassification (am)": 31.68,
+ "MassiveIntentClassification (ja)": 64.14,
+ "MassiveIntentClassification (kn)": 37.15,
+ "MassiveIntentClassification (mn)": 40.05,
+ "MassiveIntentClassification (sv)": 64.6,
+ "MassiveIntentClassification (tl)": 50.93,
+ "MassiveIntentClassification (ms)": 57.19,
+ "MassiveIntentClassification (de)": 66.56,
+ "MassiveIntentClassification (tr)": 56.52,
+ "MassiveIntentClassification (nl)": 67.82,
+ "MassiveIntentClassification (id)": 60.52,
+ "MassiveIntentClassification (he)": 45.11,
+ "MassiveIntentClassification (ko)": 52.12,
+ "MassiveIntentClassification (th)": 45.14,
+ "MassiveIntentClassification (es)": 66.91,
+ "MassiveIntentClassification (te)": 37.19,
+ "MassiveIntentClassification (az)": 45.36,
+ "MassiveIntentClassification (ro)": 60.05,
+ "MassiveIntentClassification (nb)": 63.31,
+ "MassiveIntentClassification (fi)": 60.27,
+ "MassiveIntentClassification (sl)": 58.77,
+ "MassiveIntentClassification (lv)": 47.21,
+ "MassiveIntentClassification (bn)": 42.02,
+ "MassiveIntentClassification (vi)": 53.28,
+ "MassiveIntentClassification (ur)": 39.51,
+ "MassiveIntentClassification (my)": 28.57,
+ "MassiveScenarioClassification (nl)": 74.25,
+ "MassiveScenarioClassification (el)": 60.87,
+ "MassiveScenarioClassification (de)": 74.88,
+ "MassiveScenarioClassification (ar)": 58.95,
+ "MassiveScenarioClassification (af)": 66.43,
+ "MassiveScenarioClassification (km)": 39.58,
+ "MassiveScenarioClassification (sq)": 55.13,
+ "MassiveScenarioClassification (cy)": 47.54,
+ "MassiveScenarioClassification (ro)": 65.97,
+ "MassiveScenarioClassification (vi)": 61.03,
+ "MassiveScenarioClassification (pl)": 70.0,
+ "MassiveScenarioClassification (fi)": 66.29,
+ "MassiveScenarioClassification (pt)": 71.06,
+ "MassiveScenarioClassification (ja)": 71.02,
+ "MassiveScenarioClassification (sl)": 66.65,
+ "MassiveScenarioClassification (hy)": 38.01,
+ "MassiveScenarioClassification (sv)": 72.82,
+ "MassiveScenarioClassification (sw)": 54.09,
+ "MassiveScenarioClassification (ka)": 32.87,
+ "MassiveScenarioClassification (fr)": 73.3,
+ "MassiveScenarioClassification (zh-CN)": 73.14,
+ "MassiveScenarioClassification (ms)": 65.35,
+ "MassiveScenarioClassification (ur)": 47.76,
+ "MassiveScenarioClassification (da)": 71.31,
+ "MassiveScenarioClassification (am)": 36.06,
+ "MassiveScenarioClassification (nb)": 71.57,
+ "MassiveScenarioClassification (is)": 50.91,
+ "MassiveScenarioClassification (my)": 35.19,
+ "MassiveScenarioClassification (ru)": 69.58,
+ "MassiveScenarioClassification (es)": 72.19,
+ "MassiveScenarioClassification (th)": 54.2,
+ "MassiveScenarioClassification (lv)": 52.16,
+ "MassiveScenarioClassification (id)": 69.04,
+ "MassiveScenarioClassification (te)": 42.89,
+ "MassiveScenarioClassification (it)": 72.59,
+ "MassiveScenarioClassification (zh-TW)": 67.61,
+ "MassiveScenarioClassification (kn)": 41.2,
"MassiveScenarioClassification (en)": 76.84,
+ "MassiveScenarioClassification (ta)": 44.43,
+ "MassiveScenarioClassification (ml)": 44.68,
+ "MassiveScenarioClassification (fa)": 57.04,
+ "MassiveScenarioClassification (hi)": 51.29,
+ "MassiveScenarioClassification (hu)": 64.06,
+ "MassiveScenarioClassification (mn)": 41.77,
+ "MassiveScenarioClassification (jv)": 55.21,
+ "MassiveScenarioClassification (bn)": 46.02,
+ "MassiveScenarioClassification (he)": 51.68,
+ "MassiveScenarioClassification (az)": 52.6,
+ "MassiveScenarioClassification (tr)": 63.72,
+ "MassiveScenarioClassification (tl)": 60.12,
+ "MassiveScenarioClassification (ko)": 57.88,
+ "NoRecClassification": 49.41,
+ "NordicLangClassification": 67.85,
+ "PAC": 66.83,
+ "PolEmo2.0-IN": 66.73,
+ "PolEmo2.0-OUT": 31.46,
+ "RuReviewsClassification": 61.07,
+ "RuSciBenchGRNTIClassification": 55.56,
+ "RuSciBenchOECDClassification": 43.35,
"ToxicConversationsClassification": 71.91,
"TweetSentimentExtractionClassification": 61.72
}
@@ -14125,16 +32403,40 @@
"v_measure": [
{
"Model": "text-embedding-3-small",
+ "AlloProfClusteringS2S": 51.23,
"ArxivClusteringP2P": 46.57,
"ArxivClusteringS2S": 39.35,
"BiorxivClusteringP2P": 37.77,
"BiorxivClusteringS2S": 34.68,
+ "BlurbsClusteringP2P": 41.0,
+ "BlurbsClusteringS2S": 18.51,
+ "GeoreviewClusteringP2P": 63.89,
+ "HALClusteringS2S": 27.05,
+ "MasakhaNEWSClusteringS2S (amh)": 46.58,
+ "MasakhaNEWSClusteringS2S (eng)": 47.96,
+ "MasakhaNEWSClusteringS2S (fra)": 54.76,
+ "MasakhaNEWSClusteringS2S (hau)": 27.33,
+ "MasakhaNEWSClusteringS2S (ibo)": 49.27,
+ "MasakhaNEWSClusteringS2S (lin)": 57.97,
+ "MasakhaNEWSClusteringS2S (lug)": 48.93,
+ "MasakhaNEWSClusteringS2S (orm)": 28.39,
+ "MasakhaNEWSClusteringS2S (pcm)": 66.55,
+ "MasakhaNEWSClusteringS2S (run)": 51.66,
+ "MasakhaNEWSClusteringS2S (sna)": 49.83,
+ "MasakhaNEWSClusteringS2S (som)": 30.26,
+ "MasakhaNEWSClusteringS2S (swa)": 20.16,
+ "MasakhaNEWSClusteringS2S (tir)": 44.26,
+ "MasakhaNEWSClusteringS2S (xho)": 32.96,
+ "MasakhaNEWSClusteringS2S (yor)": 43.33,
"MedrxivClusteringP2P": 32.77,
"MedrxivClusteringS2S": 31.85,
"RedditClustering": 64.09,
"RedditClusteringP2P": 65.12,
+ "RuSciBenchGRNTIClusteringP2P": 50.18,
+ "RuSciBenchOECDClusteringP2P": 44.16,
"StackExchangeClustering": 72.05,
"StackExchangeClusteringP2P": 34.04,
+ "TenKGnadClusteringS2S": 34.51,
"TwentyNewsgroupsClustering": 54.81
}
]
@@ -14143,14 +32445,58 @@
"max_ap": [
{
"Model": "text-embedding-3-small",
+ "CDSC-E": 66.28,
+ "FalseFriendsGermanEnglish": 53.14,
+ "OpusparcusPC (de)": 97.13,
+ "OpusparcusPC (en)": 98.76,
+ "OpusparcusPC (fi)": 91.51,
"OpusparcusPC (fr)": 94.45,
- "SprintDuplicateQuestions": 94.58,
- "TwitterSemEval2015": 73.33,
- "TwitterURLCorpus": 87.21
+ "OpusparcusPC (ru)": 86.04,
+ "OpusparcusPC (sv)": 93.92,
+ "PSC": 99.22,
+ "PawsXPairClassification (de)": 58.5,
+ "PawsXPairClassification (en)": 64.2,
+ "PawsXPairClassification (es)": 59.62,
+ "PawsXPairClassification (fr)": 61.36,
+ "PawsXPairClassification (ja)": 52.67,
+ "PawsXPairClassification (ko)": 52.15,
+ "PawsXPairClassification (zh)": 58.59,
+ "SICK-E-PL": 60.78,
+ "SprintDuplicateQuestions": 94.6,
+ "TERRa": 53.73,
+ "TwitterSemEval2015": 73.38,
+ "TwitterURLCorpus": 87.18
+ },
+ {
+ "Model": "text-embedding-3-small",
+ "CDSC-E": 66.28,
+ "FalseFriendsGermanEnglish": 53.14,
+ "OpusparcusPC (de)": 97.14,
+ "OpusparcusPC (en)": 98.76,
+ "OpusparcusPC (fi)": 91.51,
+ "OpusparcusPC (fr)": 94.45,
+ "OpusparcusPC (ru)": 86.04,
+ "OpusparcusPC (sv)": 93.92,
+ "PSC": 99.23,
+ "PawsXPairClassification (de)": 58.72,
+ "PawsXPairClassification (en)": 64.24,
+ "PawsXPairClassification (es)": 59.65,
+ "PawsXPairClassification (fr)": 61.37,
+ "PawsXPairClassification (ja)": 52.71,
+ "PawsXPairClassification (ko)": 52.31,
+ "PawsXPairClassification (zh)": 59.01,
+ "SICK-E-PL": 60.78,
+ "SprintDuplicateQuestions": 94.61,
+ "TERRa": 53.73,
+ "TwitterSemEval2015": 73.38,
+ "TwitterURLCorpus": 87.18
},
{
"Model": "text-embedding-3-small",
- "OpusparcusPC (fr)": 94.45
+ "OpusparcusPC (fr)": 94.45,
+ "SprintDuplicateQuestions": 94.58,
+ "TwitterSemEval2015": 73.33,
+ "TwitterURLCorpus": 87.21
}
]
},
@@ -14158,10 +32504,14 @@
"map": [
{
"Model": "text-embedding-3-small",
+ "AlloprofReranking": 74.84,
"AskUbuntuDupQuestions": 62.18,
"MindSmallReranking": 29.93,
+ "RuBQReranking": 66.0,
"SciDocsRR": 83.25,
- "StackOverflowDupQuestions": 51.53
+ "StackOverflowDupQuestions": 51.53,
+ "SyntecReranking": 85.45,
+ "T2Reranking": 65.71
}
]
},
@@ -14169,17 +32519,58 @@
"ndcg_at_10": [
{
"Model": "text-embedding-3-small",
+ "AILACasedocs": 34.9,
+ "AILAStatutes": 28.21,
"ARCChallenge": 14.63,
+ "AlloprofRetrieval": 52.09,
"AlphaNLI": 30.61,
+ "AppsRetrieval": 17.01,
"ArguAna": 55.49,
"CQADupstackRetrieval": 42.58,
"ClimateFEVER": 26.86,
+ "CmedqaRetrieval": 21.84,
+ "CodeFeedbackMT": 60.48,
+ "CodeFeedbackST": 73.89,
+ "CodeSearchNetCCRetrieval (python)": 59.4,
+ "CodeSearchNetCCRetrieval (javascript)": 67.6,
+ "CodeSearchNetCCRetrieval (go)": 49.21,
+ "CodeSearchNetCCRetrieval (ruby)": 65.89,
+ "CodeSearchNetCCRetrieval (java)": 68.63,
+ "CodeSearchNetCCRetrieval (php)": 54.06,
+ "CodeSearchNetRetrieval (python)": 88.52,
+ "CodeSearchNetRetrieval (javascript)": 78.92,
+ "CodeSearchNetRetrieval (go)": 95.24,
+ "CodeSearchNetRetrieval (ruby)": 83.76,
+ "CodeSearchNetRetrieval (java)": 90.71,
+ "CodeSearchNetRetrieval (php)": 87.39,
+ "CodeTransOceanContest": 72.05,
+ "CodeTransOceanDL": 27.39,
+ "CosQA": 28.93,
+ "CovidRetrieval": 61.07,
"DBPedia": 39.97,
"FEVER": 79.42,
"FiQA2018": 44.91,
+ "GerDaLIRSmall": 34.74,
"HellaSwag": 30.94,
"HotpotQA": 63.63,
+ "LEMBNarrativeQARetrieval": 47.23,
+ "LEMBQMSumRetrieval": 38.15,
+ "LEMBSummScreenFDRetrieval": 92.12,
+ "LEMBWikimQARetrieval": 68.55,
+ "LeCaRDv2": 55.0,
+ "LegalBenchConsumerContractsQA": 79.15,
+ "LegalBenchCorporateLobbying": 93.9,
+ "LegalQuAD": 54.7,
+ "LegalSummarization": 69.34,
"MSMARCO": 37.02,
+ "MintakaRetrieval (ar)": 16.78,
+ "MintakaRetrieval (de)": 42.76,
+ "MintakaRetrieval (es)": 41.79,
+ "MintakaRetrieval (fr)": 41.69,
+ "MintakaRetrieval (hi)": 14.78,
+ "MintakaRetrieval (it)": 41.84,
+ "MintakaRetrieval (ja)": 21.16,
+ "MintakaRetrieval (pt)": 43.9,
"NFCorpus": 38.33,
"NQ": 52.86,
"PIQA": 33.69,
@@ -14187,18 +32578,60 @@
"QuoraRetrieval": 88.83,
"RARbCode": 72.03,
"RARbMath": 71.07,
+ "RuBQRetrieval": 60.1,
"SCIDOCS": 20.8,
"SIQA": 3.03,
"SciFact": 73.37,
+ "SciFact-PL": 59.37,
"SpartQA": 6.63,
+ "StackOverflowQA": 88.67,
+ "SyntecRetrieval": 86.42,
+ "SyntheticText2SQL": 58.33,
"TRECCOVID": 77.9,
+ "TRECCOVID-PL": 65.36,
"TempReasonL1": 2.35,
"TempReasonL2Fact": 25.68,
"TempReasonL2Pure": 2.76,
"TempReasonL3Fact": 22.09,
"TempReasonL3Pure": 9.79,
"Touche2020": 24.28,
- "WinoGrande": 31.53
+ "WinoGrande": 31.53,
+ "XPQARetrieval (ara-ara)": 42.5,
+ "XPQARetrieval (eng-ara)": 24.35,
+ "XPQARetrieval (ara-eng)": 35.1,
+ "XPQARetrieval (deu-deu)": 81.28,
+ "XPQARetrieval (eng-deu)": 55.73,
+ "XPQARetrieval (deu-eng)": 77.68,
+ "XPQARetrieval (spa-spa)": 66.82,
+ "XPQARetrieval (eng-spa)": 51.75,
+ "XPQARetrieval (spa-eng)": 65.02,
+ "XPQARetrieval (fra-fra)": 74.87,
+ "XPQARetrieval (eng-fra)": 55.75,
+ "XPQARetrieval (fra-eng)": 70.01,
+ "XPQARetrieval (hin-hin)": 66.94,
+ "XPQARetrieval (eng-hin)": 16.95,
+ "XPQARetrieval (hin-eng)": 26.17,
+ "XPQARetrieval (ita-ita)": 76.15,
+ "XPQARetrieval (eng-ita)": 48.92,
+ "XPQARetrieval (ita-eng)": 71.54,
+ "XPQARetrieval (jpn-jpn)": 70.79,
+ "XPQARetrieval (eng-jpn)": 43.54,
+ "XPQARetrieval (jpn-eng)": 66.14,
+ "XPQARetrieval (kor-kor)": 32.07,
+ "XPQARetrieval (eng-kor)": 25.15,
+ "XPQARetrieval (kor-eng)": 22.43,
+ "XPQARetrieval (pol-pol)": 47.44,
+ "XPQARetrieval (eng-pol)": 32.81,
+ "XPQARetrieval (pol-eng)": 43.7,
+ "XPQARetrieval (por-por)": 52.24,
+ "XPQARetrieval (eng-por)": 35.7,
+ "XPQARetrieval (por-eng)": 51.92,
+ "XPQARetrieval (tam-tam)": 32.97,
+ "XPQARetrieval (eng-tam)": 6.47,
+ "XPQARetrieval (tam-eng)": 4.98,
+ "XPQARetrieval (cmn-cmn)": 62.98,
+ "XPQARetrieval (eng-cmn)": 34.62,
+ "XPQARetrieval (cmn-eng)": 56.64
}
]
},
@@ -14216,11 +32649,59 @@
"STS17 (en-en)": 90.94,
"STS22 (en)": 64.96,
"STSBenchmark": 84.24
+ },
+ {
+ "Model": "text-embedding-3-small",
+ "CDSC-R": 89.48,
+ "GermanSTSBenchmark": 80.32,
+ "RUParaPhraserSTS": 65.18,
+ "RuSTSBenchmarkSTS": 74.52,
+ "SICK-R-PL": 64.77,
+ "SICKFr": 75.69,
+ "STSB": 75.24,
+ "STSBenchmarkMultilingualSTS (fr)": 81.11,
+ "STSBenchmarkMultilingualSTS (es)": 81.59,
+ "STSBenchmarkMultilingualSTS (it)": 79.0,
+ "STSBenchmarkMultilingualSTS (nl)": 79.4,
+ "STSBenchmarkMultilingualSTS (pt)": 80.64,
+ "STSBenchmarkMultilingualSTS (ru)": 74.58,
+ "STSBenchmarkMultilingualSTS (en)": 84.22,
+ "STSBenchmarkMultilingualSTS (zh)": 76.55,
+ "STSBenchmarkMultilingualSTS (de)": 80.57,
+ "STSBenchmarkMultilingualSTS (pl)": 74.06
+ },
+ {
+ "Model": "text-embedding-3-small",
+ "CDSC-R": 89.48,
+ "GermanSTSBenchmark": 80.32,
+ "RUParaPhraserSTS": 65.18,
+ "RuSTSBenchmarkSTS": 74.52,
+ "SICK-R-PL": 64.77,
+ "SICKFr": 75.69,
+ "STSB": 75.24,
+ "STSBenchmarkMultilingualSTS (fr)": 81.11,
+ "STSBenchmarkMultilingualSTS (es)": 81.59,
+ "STSBenchmarkMultilingualSTS (it)": 79.0,
+ "STSBenchmarkMultilingualSTS (nl)": 79.4,
+ "STSBenchmarkMultilingualSTS (pt)": 80.64,
+ "STSBenchmarkMultilingualSTS (ru)": 74.58,
+ "STSBenchmarkMultilingualSTS (en)": 84.22,
+ "STSBenchmarkMultilingualSTS (zh)": 76.55,
+ "STSBenchmarkMultilingualSTS (de)": 80.57,
+ "STSBenchmarkMultilingualSTS (pl)": 74.06
}
]
},
"Summarization": {
"cosine_spearman": [
+ {
+ "Model": "text-embedding-3-small",
+ "SummEval": 31.18
+ },
+ {
+ "Model": "text-embedding-3-small",
+ "SummEval": 31.18
+ },
{
"Model": "text-embedding-3-small",
"SummEval": 31.12
@@ -14228,10 +32709,23 @@
]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "text-embedding-3-small",
+ "CEDRClassification": 39.38,
+ "SensitiveTopicsClassification": 27.2
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "text-embedding-3-small",
+ "Core17InstructionRetrieval": 1.15,
+ "News21InstructionRetrieval": -1.81,
+ "Robust04InstructionRetrieval": -6.49
+ }
+ ]
}
},
"openai__text-embedding-3-small-instruct": {
@@ -15847,11 +34341,19 @@
"ArxivClusteringS2S": 22.05,
"BiorxivClusteringP2P": 29.84,
"BiorxivClusteringS2S": 20.57,
+ "BlurbsClusteringP2P": 36.52,
+ "BlurbsClusteringS2S": 14.27,
"GeoreviewClusteringP2P": 52.19,
"HALClusteringS2S": 20.62,
"MLSUMClusteringP2P (ru)": 39.45,
+ "MLSUMClusteringP2P (de)": 37.06,
+ "MLSUMClusteringP2P (fr)": 42.09,
+ "MLSUMClusteringP2P (es)": 43.19,
"MLSUMClusteringP2P": 42.09,
"MLSUMClusteringS2S (ru)": 35.77,
+ "MLSUMClusteringS2S (de)": 38.85,
+ "MLSUMClusteringS2S (fr)": 41.69,
+ "MLSUMClusteringS2S (es)": 42.85,
"MLSUMClusteringS2S": 34.84,
"MasakhaNEWSClusteringP2P (amh)": 67.78,
"MasakhaNEWSClusteringP2P (eng)": 48.16,
@@ -15893,6 +34395,8 @@
"RuSciBenchOECDClusteringP2P": 41.97,
"StackExchangeClustering": 35.43,
"StackExchangeClusteringP2P": 28.83,
+ "TenKGnadClusteringP2P": 40.64,
+ "TenKGnadClusteringS2S": 21.67,
"TwentyNewsgroupsClustering": 23.28
}
]
@@ -15902,6 +34406,7 @@
{
"Model": "LaBSE",
"CDSC-E": 68.92,
+ "FalseFriendsGermanEnglish": 51.15,
"OpusparcusPC (de)": 96.58,
"OpusparcusPC (en)": 98.12,
"OpusparcusPC (fi)": 94.44,
@@ -15925,6 +34430,7 @@
{
"Model": "LaBSE",
"CDSC-E": 68.92,
+ "FalseFriendsGermanEnglish": 51.28,
"OpusparcusPC (de)": 96.58,
"OpusparcusPC (en)": 98.12,
"OpusparcusPC (fi)": 94.44,
@@ -15973,6 +34479,27 @@
"StackOverflowDupQuestions": 42.42,
"SyntecReranking": 73.28,
"T2Reranking": 63.29
+ },
+ {
+ "Model": "LaBSE",
+ "MIRACLReranking (ar)": 30.84,
+ "MIRACLReranking (bn)": 39.77,
+ "MIRACLReranking (de)": 24.42,
+ "MIRACLReranking (en)": 24.96,
+ "MIRACLReranking (es)": 28.37,
+ "MIRACLReranking (fa)": 29.05,
+ "MIRACLReranking (fi)": 48.16,
+ "MIRACLReranking (fr)": 21.36,
+ "MIRACLReranking (hi)": 32.92,
+ "MIRACLReranking (id)": 22.95,
+ "MIRACLReranking (ja)": 29.55,
+ "MIRACLReranking (ko)": 30.38,
+ "MIRACLReranking (ru)": 28.92,
+ "MIRACLReranking (sw)": 33.09,
+ "MIRACLReranking (te)": 33.39,
+ "MIRACLReranking (th)": 38.0,
+ "MIRACLReranking (yo)": 47.58,
+ "MIRACLReranking (zh)": 20.23
}
]
},
@@ -16017,7 +34544,10 @@
"FEVER": 12.18,
"FiQA-PL": 7.63,
"FiQA2018": 7.0,
+ "GerDaLIR": 2.02,
"GerDaLIRSmall": 4.59,
+ "GermanDPR": 60.82,
+ "GermanQuAD-Retrieval": 78.87,
"HellaSwag": 5.59,
"HotpotQA": 18.75,
"HotpotQA-PL": 19.72,
@@ -16030,6 +34560,24 @@
"LegalBenchCorporateLobbying": 69.39,
"LegalQuAD": 16.64,
"LegalSummarization": 53.89,
+ "MIRACLRetrieval (ar)": 8.82,
+ "MIRACLRetrieval (bn)": 19.85,
+ "MIRACLRetrieval (de)": 7.84,
+ "MIRACLRetrieval (en)": 5.0,
+ "MIRACLRetrieval (es)": 8.19,
+ "MIRACLRetrieval (fa)": 10.53,
+ "MIRACLRetrieval (fi)": 28.1,
+ "MIRACLRetrieval (fr)": 7.93,
+ "MIRACLRetrieval (hi)": 13.97,
+ "MIRACLRetrieval (id)": 8.2,
+ "MIRACLRetrieval (ja)": 7.93,
+ "MIRACLRetrieval (ko)": 17.25,
+ "MIRACLRetrieval (ru)": 10.86,
+ "MIRACLRetrieval (sw)": 23.07,
+ "MIRACLRetrieval (te)": 12.79,
+ "MIRACLRetrieval (th)": 17.29,
+ "MIRACLRetrieval (yo)": 30.69,
+ "MIRACLRetrieval (zh)": 4.95,
"MMarcoRetrieval": 34.78,
"MSMARCO": 7.6,
"MSMARCO-PL": 7.22,
@@ -16074,6 +34622,9 @@
"Touche2020": 4.88,
"VideoRetrieval": 22.04,
"WinoGrande": 54.3,
+ "XMarket (de)": 4.27,
+ "XMarket (en)": 2.95,
+ "XMarket (es)": 6.44,
"XPQARetrieval (ara-ara)": 35.19,
"XPQARetrieval (eng-ara)": 20.64,
"XPQARetrieval (ara-eng)": 32.47,
@@ -16123,6 +34674,7 @@
"BIOSSES": 78.7,
"BQ": 42.6,
"CDSC-R": 85.53,
+ "GermanSTSBenchmark": 71.91,
"LCQMC": 52.19,
"PAWSX": 10.23,
"RUParaPhraserSTS": 65.74,
@@ -16569,9 +35121,19 @@
"ArxivClusteringS2S": 37.5,
"BiorxivClusteringP2P": 36.99,
"BiorxivClusteringS2S": 33.21,
+ "BlurbsClusteringP2P": 17.44,
+ "BlurbsClusteringS2S": 10.09,
"GeoreviewClusteringP2P": 20.76,
"HALClusteringS2S": 19.58,
+ "MLSUMClusteringP2P (de)": 20.86,
+ "MLSUMClusteringP2P (fr)": 34.35,
+ "MLSUMClusteringP2P (ru)": 22.69,
+ "MLSUMClusteringP2P (es)": 33.17,
"MLSUMClusteringP2P": 34.35,
+ "MLSUMClusteringS2S (de)": 20.69,
+ "MLSUMClusteringS2S (fr)": 32.64,
+ "MLSUMClusteringS2S (ru)": 20.82,
+ "MLSUMClusteringS2S (es)": 32.28,
"MLSUMClusteringS2S": 29.3,
"MasakhaNEWSClusteringP2P (amh)": 40.5,
"MasakhaNEWSClusteringP2P (eng)": 55.86,
@@ -16613,6 +35175,8 @@
"RuSciBenchOECDClusteringP2P": 10.19,
"StackExchangeClustering": 53.05,
"StackExchangeClusteringP2P": 33.13,
+ "TenKGnadClusteringP2P": 30.45,
+ "TenKGnadClusteringS2S": 15.81,
"TwentyNewsgroupsClustering": 47.47
}
]
@@ -16622,6 +35186,7 @@
{
"Model": "all-MiniLM-L12-v2",
"CDSC-E": 49.04,
+ "FalseFriendsGermanEnglish": 47.96,
"OpusparcusPC (de)": 91.2,
"OpusparcusPC (en)": 97.41,
"OpusparcusPC (fi)": 85.99,
@@ -16645,6 +35210,7 @@
{
"Model": "all-MiniLM-L12-v2",
"CDSC-E": 49.04,
+ "FalseFriendsGermanEnglish": 48.02,
"OpusparcusPC (de)": 91.2,
"OpusparcusPC (en)": 97.41,
"OpusparcusPC (fi)": 85.99,
@@ -16680,6 +35246,27 @@
"StackOverflowDupQuestions": 51.47,
"SyntecReranking": 69.17,
"T2Reranking": 60.32
+ },
+ {
+ "Model": "all-MiniLM-L12-v2",
+ "MIRACLReranking (ar)": 7.49,
+ "MIRACLReranking (bn)": 9.23,
+ "MIRACLReranking (de)": 23.33,
+ "MIRACLReranking (en)": 42.01,
+ "MIRACLReranking (es)": 28.07,
+ "MIRACLReranking (fa)": 7.11,
+ "MIRACLReranking (fi)": 37.13,
+ "MIRACLReranking (fr)": 24.1,
+ "MIRACLReranking (hi)": 7.8,
+ "MIRACLReranking (id)": 21.86,
+ "MIRACLReranking (ja)": 10.64,
+ "MIRACLReranking (ko)": 8.82,
+ "MIRACLReranking (ru)": 18.42,
+ "MIRACLReranking (sw)": 28.34,
+ "MIRACLReranking (te)": 2.28,
+ "MIRACLReranking (th)": 4.22,
+ "MIRACLReranking (yo)": 50.47,
+ "MIRACLReranking (zh)": 6.96
}
]
},
@@ -16723,7 +35310,10 @@
"FEVER": 55.9,
"FiQA-PL": 5.82,
"FiQA2018": 37.27,
+ "GerDaLIR": 0.55,
"GerDaLIRSmall": 1.35,
+ "GermanDPR": 50.27,
+ "GermanQuAD-Retrieval": 71.43,
"HellaSwag": 24.08,
"HotpotQA": 44.59,
"LEMBNarrativeQARetrieval": 19.64,
@@ -16735,6 +35325,24 @@
"LegalBenchCorporateLobbying": 88.69,
"LegalQuAD": 7.44,
"LegalSummarization": 57.43,
+ "MIRACLRetrieval (ar)": 0.01,
+ "MIRACLRetrieval (bn)": 0.06,
+ "MIRACLRetrieval (de)": 10.85,
+ "MIRACLRetrieval (en)": 26.85,
+ "MIRACLRetrieval (es)": 11.15,
+ "MIRACLRetrieval (fa)": 0.05,
+ "MIRACLRetrieval (fi)": 18.24,
+ "MIRACLRetrieval (fr)": 10.89,
+ "MIRACLRetrieval (hi)": 0.21,
+ "MIRACLRetrieval (id)": 10.12,
+ "MIRACLRetrieval (ja)": 0.73,
+ "MIRACLRetrieval (ko)": 4.57,
+ "MIRACLRetrieval (ru)": 3.04,
+ "MIRACLRetrieval (sw)": 19.38,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.32,
+ "MIRACLRetrieval (yo)": 47.02,
+ "MIRACLRetrieval (zh)": 0.05,
"MMarcoRetrieval": 7.46,
"MSMARCO": 39.03,
"MedicalRetrieval": 2.3,
@@ -16754,6 +35362,7 @@
"QuoraRetrieval": 87.75,
"RARbCode": 42.44,
"RARbMath": 66.36,
+ "RiaNewsRetrieval": 15.65,
"RuBQRetrieval": 8.84,
"SCIDOCS": 21.82,
"SCIDOCS-PL": 5.34,
@@ -16775,6 +35384,9 @@
"Touche2020": 17.22,
"VideoRetrieval": 9.38,
"WinoGrande": 27.2,
+ "XMarket (de)": 10.4,
+ "XMarket (en)": 26.67,
+ "XMarket (es)": 11.29,
"XPQARetrieval (ara-ara)": 7.83,
"XPQARetrieval (eng-ara)": 2.55,
"XPQARetrieval (ara-eng)": 8.88,
@@ -16824,6 +35436,7 @@
"BIOSSES": 83.57,
"BQ": 23.31,
"CDSC-R": 82.5,
+ "GermanSTSBenchmark": 62.88,
"LCQMC": 21.04,
"PAWSX": 7.31,
"RUParaPhraserSTS": 45.47,
@@ -17253,16 +35866,24 @@
"Model": "all-MiniLM-L6-v2",
"AlloProfClusteringP2P": 51.83,
"AlloProfClusteringS2S": 32.07,
- "ArxivClusteringP2P": 46.55,
- "ArxivClusteringS2S": 37.86,
+ "ArxivClusteringP2P": 46.47,
+ "ArxivClusteringS2S": 37.67,
"BiorxivClusteringP2P": 38.37,
"BiorxivClusteringS2S": 32.88,
+ "BlurbsClusteringP2P": 19.94,
+ "BlurbsClusteringS2S": 9.38,
"GeoreviewClusteringP2P": 20.25,
"HALClusteringS2S": 18.84,
"MLSUMClusteringP2P": 36.74,
"MLSUMClusteringP2P (ru)": 23.91,
+ "MLSUMClusteringP2P (de)": 26.72,
+ "MLSUMClusteringP2P (fr)": 36.74,
+ "MLSUMClusteringP2P (es)": 35.55,
"MLSUMClusteringS2S": 28.12,
"MLSUMClusteringS2S (ru)": 19.07,
+ "MLSUMClusteringS2S (de)": 26.44,
+ "MLSUMClusteringS2S (fr)": 35.66,
+ "MLSUMClusteringS2S (es)": 34.81,
"MasakhaNEWSClusteringP2P (fra)": 34.92,
"MasakhaNEWSClusteringP2P (amh)": 43.85,
"MasakhaNEWSClusteringP2P (eng)": 48.88,
@@ -17303,6 +35924,8 @@
"RuSciBenchOECDClusteringP2P": 9.44,
"StackExchangeClustering": 53.14,
"StackExchangeClusteringP2P": 34.26,
+ "TenKGnadClusteringP2P": 30.3,
+ "TenKGnadClusteringS2S": 13.32,
"TwentyNewsgroupsClustering": 46.49
}
]
@@ -17312,6 +35935,7 @@
{
"Model": "all-MiniLM-L6-v2",
"CDSC-E": 47.27,
+ "FalseFriendsGermanEnglish": 47.96,
"OpusparcusPC (de)": 89.91,
"OpusparcusPC (en)": 97.46,
"OpusparcusPC (fi)": 85.44,
@@ -17335,6 +35959,7 @@
{
"Model": "all-MiniLM-L6-v2",
"CDSC-E": 47.27,
+ "FalseFriendsGermanEnglish": 47.97,
"OpusparcusPC (fr)": 86.53,
"OpusparcusPC (de)": 89.91,
"OpusparcusPC (en)": 97.46,
@@ -17378,6 +36003,27 @@
"StackOverflowDupQuestions": 50.77,
"SyntecReranking": 67.31,
"T2Reranking": 56.26
+ },
+ {
+ "Model": "all-MiniLM-L6-v2",
+ "MIRACLReranking (ar)": 3.26,
+ "MIRACLReranking (bn)": 2.55,
+ "MIRACLReranking (de)": 16.26,
+ "MIRACLReranking (en)": 44.48,
+ "MIRACLReranking (es)": 21.34,
+ "MIRACLReranking (fa)": 4.63,
+ "MIRACLReranking (fi)": 30.04,
+ "MIRACLReranking (fr)": 19.04,
+ "MIRACLReranking (hi)": 6.02,
+ "MIRACLReranking (id)": 18.33,
+ "MIRACLReranking (ja)": 5.01,
+ "MIRACLReranking (ko)": 8.18,
+ "MIRACLReranking (ru)": 4.67,
+ "MIRACLReranking (sw)": 27.93,
+ "MIRACLReranking (te)": 2.94,
+ "MIRACLReranking (th)": 3.62,
+ "MIRACLReranking (yo)": 46.62,
+ "MIRACLReranking (zh)": 6.08
}
]
},
@@ -17421,7 +36067,10 @@
"FEVER": 51.93,
"FiQA-PL": 2.29,
"FiQA2018": 36.87,
+ "GerDaLIR": 1.07,
"GerDaLIRSmall": 2.41,
+ "GermanDPR": 47.83,
+ "GermanQuAD-Retrieval": 68.82,
"HellaSwag": 24.21,
"HotpotQA": 46.51,
"LEMBNarrativeQARetrieval": 18.27,
@@ -17434,6 +36083,23 @@
"LegalQuAD": 11.81,
"LegalSummarization": 59.0,
"MIRACLRetrieval (ru)": 0.39,
+ "MIRACLRetrieval (ar)": 0.01,
+ "MIRACLRetrieval (bn)": 0.05,
+ "MIRACLRetrieval (de)": 5.91,
+ "MIRACLRetrieval (en)": 29.26,
+ "MIRACLRetrieval (es)": 7.0,
+ "MIRACLRetrieval (fa)": 0.13,
+ "MIRACLRetrieval (fi)": 12.45,
+ "MIRACLRetrieval (fr)": 6.94,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 7.16,
+ "MIRACLRetrieval (ja)": 0.35,
+ "MIRACLRetrieval (ko)": 1.35,
+ "MIRACLRetrieval (sw)": 17.11,
+ "MIRACLRetrieval (te)": 0.04,
+ "MIRACLRetrieval (th)": 0.33,
+ "MIRACLRetrieval (yo)": 38.05,
+ "MIRACLRetrieval (zh)": 0.0,
"MMarcoRetrieval": 6.21,
"MSMARCO": 36.54,
"MedicalRetrieval": 1.76,
@@ -17450,7 +36116,7 @@
"NQ": 43.87,
"PIQA": 25.28,
"Quail": 3.92,
- "QuoraRetrieval": 87.56,
+ "QuoraRetrieval": 87.55,
"RARbCode": 44.27,
"RARbMath": 68.19,
"RiaNewsRetrieval": 0.67,
@@ -17475,6 +36141,9 @@
"Touche2020": 16.9,
"VideoRetrieval": 9.79,
"WinoGrande": 47.35,
+ "XMarket (de)": 9.3,
+ "XMarket (en)": 24.92,
+ "XMarket (es)": 10.11,
"XPQARetrieval (fr)": 51.79,
"XPQARetrieval (ara-ara)": 8.05,
"XPQARetrieval (eng-ara)": 1.9,
@@ -17524,6 +36193,7 @@
"BIOSSES": 81.64,
"BQ": 23.84,
"CDSC-R": 79.45,
+ "GermanSTSBenchmark": 60.56,
"LCQMC": 23.85,
"PAWSX": 7.21,
"RUParaPhraserSTS": 43.93,
@@ -18013,11 +36683,24 @@
"v_measure": [
{
"Model": "all-mpnet-base-v2",
+ "AlloProfClusteringP2P": 56.41,
+ "AlloProfClusteringS2S": 36.59,
"ArxivClusteringP2P": 48.38,
"ArxivClusteringS2S": 39.72,
"BiorxivClusteringP2P": 39.62,
"BiorxivClusteringS2S": 35.02,
+ "BlurbsClusteringP2P": 25.03,
+ "BlurbsClusteringS2S": 10.62,
"GeoreviewClusteringP2P": 20.33,
+ "HALClusteringS2S": 21.21,
+ "MLSUMClusteringP2P (de)": 36.78,
+ "MLSUMClusteringP2P (fr)": 40.62,
+ "MLSUMClusteringP2P (ru)": 22.35,
+ "MLSUMClusteringP2P (es)": 39.83,
+ "MLSUMClusteringS2S (de)": 36.21,
+ "MLSUMClusteringS2S (fr)": 39.91,
+ "MLSUMClusteringS2S (ru)": 22.55,
+ "MLSUMClusteringS2S (es)": 39.32,
"MasakhaNEWSClusteringP2P (amh)": 42.49,
"MasakhaNEWSClusteringP2P (eng)": 67.24,
"MasakhaNEWSClusteringP2P (fra)": 61.99,
@@ -18058,6 +36741,8 @@
"RuSciBenchOECDClusteringP2P": 12.49,
"StackExchangeClustering": 53.8,
"StackExchangeClusteringP2P": 34.28,
+ "TenKGnadClusteringP2P": 39.41,
+ "TenKGnadClusteringS2S": 19.98,
"TwentyNewsgroupsClustering": 49.74
}
]
@@ -18067,6 +36752,7 @@
{
"Model": "all-mpnet-base-v2",
"CDSC-E": 45.37,
+ "FalseFriendsGermanEnglish": 47.44,
"OpusparcusPC (de)": 89.78,
"OpusparcusPC (en)": 97.75,
"OpusparcusPC (fi)": 85.82,
@@ -18090,6 +36776,7 @@
{
"Model": "all-mpnet-base-v2",
"CDSC-E": 45.37,
+ "FalseFriendsGermanEnglish": 47.48,
"OpusparcusPC (de)": 89.78,
"OpusparcusPC (en)": 97.75,
"OpusparcusPC (fi)": 85.82,
@@ -18125,6 +36812,27 @@
"StackOverflowDupQuestions": 51.98,
"SyntecReranking": 66.12,
"T2Reranking": 58.3
+ },
+ {
+ "Model": "all-mpnet-base-v2",
+ "MIRACLReranking (ar)": 2.97,
+ "MIRACLReranking (bn)": 5.28,
+ "MIRACLReranking (de)": 23.43,
+ "MIRACLReranking (en)": 43.05,
+ "MIRACLReranking (es)": 26.46,
+ "MIRACLReranking (fa)": 4.86,
+ "MIRACLReranking (fi)": 25.32,
+ "MIRACLReranking (fr)": 22.26,
+ "MIRACLReranking (hi)": 6.99,
+ "MIRACLReranking (id)": 16.1,
+ "MIRACLReranking (ja)": 7.01,
+ "MIRACLReranking (ko)": 5.94,
+ "MIRACLReranking (ru)": 9.85,
+ "MIRACLReranking (sw)": 23.06,
+ "MIRACLReranking (te)": 2.53,
+ "MIRACLReranking (th)": 3.55,
+ "MIRACLReranking (yo)": 45.07,
+ "MIRACLReranking (zh)": 7.23
}
]
},
@@ -18180,7 +36888,10 @@
"FEVER": 50.86,
"FiQA-PL": 3.6,
"FiQA2018": 49.96,
+ "GerDaLIR": 1.53,
"GerDaLIRSmall": 3.78,
+ "GermanDPR": 56.96,
+ "GermanQuAD-Retrieval": 75.63,
"HellaSwag": 26.27,
"HotpotQA": 39.29,
"LEMBNarrativeQARetrieval": 19.34,
@@ -18192,6 +36903,24 @@
"LegalBenchCorporateLobbying": 89.04,
"LegalQuAD": 10.67,
"LegalSummarization": 58.55,
+ "MIRACLRetrieval (ar)": 0.01,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 9.69,
+ "MIRACLRetrieval (en)": 25.2,
+ "MIRACLRetrieval (es)": 7.45,
+ "MIRACLRetrieval (fa)": 0.04,
+ "MIRACLRetrieval (fi)": 10.14,
+ "MIRACLRetrieval (fr)": 7.92,
+ "MIRACLRetrieval (hi)": 0.14,
+ "MIRACLRetrieval (id)": 5.78,
+ "MIRACLRetrieval (ja)": 0.82,
+ "MIRACLRetrieval (ko)": 3.15,
+ "MIRACLRetrieval (ru)": 0.77,
+ "MIRACLRetrieval (sw)": 13.53,
+ "MIRACLRetrieval (te)": 0.04,
+ "MIRACLRetrieval (th)": 0.2,
+ "MIRACLRetrieval (yo)": 35.67,
+ "MIRACLRetrieval (zh)": 0.0,
"MMarcoRetrieval": 7.13,
"MSMARCO": 39.75,
"MedicalRetrieval": 1.71,
@@ -18211,6 +36940,7 @@
"QuoraRetrieval": 87.46,
"RARbCode": 53.21,
"RARbMath": 71.85,
+ "RiaNewsRetrieval": 3.32,
"RuBQRetrieval": 4.75,
"SCIDOCS": 23.76,
"SCIDOCS-PL": 4.02,
@@ -18232,6 +36962,9 @@
"Touche2020": 19.93,
"VideoRetrieval": 8.48,
"WinoGrande": 20.77,
+ "XMarket (de)": 8.23,
+ "XMarket (en)": 27.37,
+ "XMarket (es)": 8.05,
"XPQARetrieval (ara-ara)": 9.42,
"XPQARetrieval (eng-ara)": 2.36,
"XPQARetrieval (ara-eng)": 8.98,
@@ -18293,6 +37026,7 @@
"BIOSSES": 80.43,
"BQ": 21.39,
"CDSC-R": 77.04,
+ "GermanSTSBenchmark": 61.27,
"LCQMC": 22.84,
"PAWSX": 6.44,
"RUParaPhraserSTS": 42.15,
@@ -20827,8 +39561,14 @@
"GeoreviewClusteringP2P": 53.37,
"HALClusteringS2S": 23.21,
"MLSUMClusteringP2P (ru)": 37.0,
+ "MLSUMClusteringP2P (de)": 33.37,
+ "MLSUMClusteringP2P (fr)": 37.96,
+ "MLSUMClusteringP2P (es)": 36.62,
"MLSUMClusteringP2P": 39.97,
"MLSUMClusteringS2S (ru)": 38.16,
+ "MLSUMClusteringS2S (de)": 34.26,
+ "MLSUMClusteringS2S (fr)": 35.87,
+ "MLSUMClusteringS2S (es)": 35.6,
"MLSUMClusteringS2S": 36.55,
"MasakhaNEWSClusteringP2P (amh)": 40.36,
"MasakhaNEWSClusteringP2P (eng)": 49.96,
@@ -20881,6 +39621,7 @@
{
"Model": "paraphrase-multilingual-MiniLM-L12-v2",
"CDSC-E": 72.22,
+ "FalseFriendsGermanEnglish": 48.8,
"OpusparcusPC (de)": 96.63,
"OpusparcusPC (en)": 98.59,
"OpusparcusPC (fi)": 93.2,
@@ -20904,6 +39645,7 @@
{
"Model": "paraphrase-multilingual-MiniLM-L12-v2",
"CDSC-E": 72.33,
+ "FalseFriendsGermanEnglish": 49.5,
"OpusparcusPC (de)": 96.63,
"OpusparcusPC (en)": 98.59,
"OpusparcusPC (fi)": 93.2,
@@ -20952,6 +39694,27 @@
"StackOverflowDupQuestions": 45.85,
"SyntecReranking": 75.03,
"T2Reranking": 65.28
+ },
+ {
+ "Model": "paraphrase-multilingual-MiniLM-L12-v2",
+ "MIRACLReranking (ar)": 41.35,
+ "MIRACLReranking (bn)": 7.91,
+ "MIRACLReranking (de)": 36.46,
+ "MIRACLReranking (en)": 47.91,
+ "MIRACLReranking (es)": 46.0,
+ "MIRACLReranking (fa)": 30.83,
+ "MIRACLReranking (fi)": 54.16,
+ "MIRACLReranking (fr)": 35.78,
+ "MIRACLReranking (hi)": 36.16,
+ "MIRACLReranking (id)": 41.57,
+ "MIRACLReranking (ja)": 33.84,
+ "MIRACLReranking (ko)": 35.56,
+ "MIRACLReranking (ru)": 39.88,
+ "MIRACLReranking (sw)": 24.09,
+ "MIRACLReranking (te)": 14.45,
+ "MIRACLReranking (th)": 44.64,
+ "MIRACLReranking (yo)": 40.82,
+ "MIRACLReranking (zh)": 35.72
}
]
},
@@ -20970,7 +39733,7 @@
"BSARDRetrieval": 0.0,
"CQADupstackRetrieval": 30.7,
"ClimateFEVER": 18.49,
- "CmedqaRetrieval": 10.78,
+ "CmedqaRetrieval": 10.79,
"CodeFeedbackMT": 12.53,
"CodeFeedbackST": 32.53,
"CodeSearchNetCCRetrieval (python)": 44.49,
@@ -20996,7 +39759,10 @@
"FEVER": 52.66,
"FiQA-PL": 12.49,
"FiQA2018": 20.33,
+ "GerDaLIR": 0.95,
"GerDaLIRSmall": 2.62,
+ "GermanDPR": 64.63,
+ "GermanQuAD-Retrieval": 82.14,
"HellaSwag": 16.98,
"HotpotQA": 30.01,
"HotpotQA-PL": 22.76,
@@ -21009,6 +39775,24 @@
"LegalBenchCorporateLobbying": 88.51,
"LegalQuAD": 13.31,
"LegalSummarization": 54.97,
+ "MIRACLRetrieval (ar)": 19.57,
+ "MIRACLRetrieval (bn)": 1.12,
+ "MIRACLRetrieval (de)": 22.43,
+ "MIRACLRetrieval (en)": 32.75,
+ "MIRACLRetrieval (es)": 25.87,
+ "MIRACLRetrieval (fa)": 13.33,
+ "MIRACLRetrieval (fi)": 34.14,
+ "MIRACLRetrieval (fr)": 21.75,
+ "MIRACLRetrieval (hi)": 18.91,
+ "MIRACLRetrieval (id)": 25.76,
+ "MIRACLRetrieval (ja)": 14.06,
+ "MIRACLRetrieval (ko)": 25.68,
+ "MIRACLRetrieval (ru)": 18.96,
+ "MIRACLRetrieval (sw)": 11.04,
+ "MIRACLRetrieval (te)": 0.95,
+ "MIRACLRetrieval (th)": 26.91,
+ "MIRACLRetrieval (yo)": 24.47,
+ "MIRACLRetrieval (zh)": 21.35,
"MMarcoRetrieval": 46.62,
"MSMARCO": 23.72,
"MSMARCO-PL": 10.39,
@@ -21053,6 +39837,9 @@
"Touche2020": 16.06,
"VideoRetrieval": 14.71,
"WinoGrande": 46.52,
+ "XMarket (de)": 5.95,
+ "XMarket (en)": 14.43,
+ "XMarket (es)": 7.11,
"XPQARetrieval (ara-ara)": 22.97,
"XPQARetrieval (eng-ara)": 17.17,
"XPQARetrieval (ara-eng)": 25.5,
@@ -21102,6 +39889,7 @@
"BIOSSES": 74.18,
"BQ": 38.53,
"CDSC-R": 88.98,
+ "GermanSTSBenchmark": 79.1,
"LCQMC": 63.96,
"PAWSX": 10.13,
"RUParaPhraserSTS": 61.87,
@@ -21531,20 +40319,26 @@
{
"Model": "paraphrase-multilingual-mpnet-base-v2",
"8TagsClustering": 25.62,
- "AlloProfClusteringP2P": 54.49,
- "AlloProfClusteringS2S": 44.79,
- "ArxivClusteringP2P": 37.78,
- "ArxivClusteringS2S": 31.68,
+ "AlloProfClusteringP2P": 53.84,
+ "AlloProfClusteringS2S": 40.83,
+ "ArxivClusteringP2P": 37.83,
+ "ArxivClusteringS2S": 31.69,
"BiorxivClusteringP2P": 33.02,
"BiorxivClusteringS2S": 29.45,
- "BlurbsClusteringP2P": 34.38,
- "BlurbsClusteringS2S": 15.81,
+ "BlurbsClusteringP2P": 34.0,
+ "BlurbsClusteringS2S": 15.31,
"GeoreviewClusteringP2P": 56.18,
- "HALClusteringS2S": 23.97,
+ "HALClusteringS2S": 23.56,
"MLSUMClusteringP2P": 40.55,
"MLSUMClusteringP2P (ru)": 35.95,
+ "MLSUMClusteringP2P (de)": 34.46,
+ "MLSUMClusteringP2P (fr)": 39.08,
+ "MLSUMClusteringP2P (es)": 37.58,
"MLSUMClusteringS2S": 37.53,
"MLSUMClusteringS2S (ru)": 38.88,
+ "MLSUMClusteringS2S (de)": 34.25,
+ "MLSUMClusteringS2S (fr)": 36.54,
+ "MLSUMClusteringS2S (es)": 36.99,
"MasakhaNEWSClusteringP2P (fra)": 53.3,
"MasakhaNEWSClusteringP2P (amh)": 46.85,
"MasakhaNEWSClusteringP2P (eng)": 47.3,
@@ -21585,8 +40379,8 @@
"RuSciBenchOECDClusteringP2P": 42.9,
"StackExchangeClustering": 52.99,
"StackExchangeClusteringP2P": 33.06,
- "TenKGnadClusteringP2P": 35.96,
- "TenKGnadClusteringS2S": 22.0,
+ "TenKGnadClusteringP2P": 33.62,
+ "TenKGnadClusteringS2S": 20.93,
"TwentyNewsgroupsClustering": 44.36
}
]
@@ -21608,6 +40402,7 @@
{
"Model": "paraphrase-multilingual-mpnet-base-v2",
"CDSC-E": 75.77,
+ "FalseFriendsGermanEnglish": 51.63,
"OpusparcusPC (fr)": 93.45,
"OpusparcusPC (de)": 97.34,
"OpusparcusPC (en)": 98.59,
@@ -21632,6 +40427,7 @@
{
"Model": "paraphrase-multilingual-mpnet-base-v2",
"CDSC-E": 75.77,
+ "FalseFriendsGermanEnglish": 51.35,
"OpusparcusPC (de)": 97.34,
"OpusparcusPC (en)": 98.59,
"OpusparcusPC (fi)": 95.33,
@@ -21667,6 +40463,27 @@
"StackOverflowDupQuestions": 46.78,
"SyntecReranking": 80.97,
"T2Reranking": 64.49
+ },
+ {
+ "Model": "paraphrase-multilingual-mpnet-base-v2",
+ "MIRACLReranking (ar)": 44.47,
+ "MIRACLReranking (bn)": 30.21,
+ "MIRACLReranking (de)": 40.7,
+ "MIRACLReranking (en)": 50.23,
+ "MIRACLReranking (es)": 50.96,
+ "MIRACLReranking (fa)": 37.18,
+ "MIRACLReranking (fi)": 59.56,
+ "MIRACLReranking (fr)": 40.93,
+ "MIRACLReranking (hi)": 40.36,
+ "MIRACLReranking (id)": 45.04,
+ "MIRACLReranking (ja)": 36.2,
+ "MIRACLReranking (ko)": 40.13,
+ "MIRACLReranking (ru)": 43.66,
+ "MIRACLReranking (sw)": 26.94,
+ "MIRACLReranking (te)": 23.65,
+ "MIRACLReranking (th)": 46.63,
+ "MIRACLReranking (yo)": 42.37,
+ "MIRACLReranking (zh)": 37.56
}
]
},
@@ -21711,7 +40528,10 @@
"FEVER": 56.76,
"FiQA-PL": 14.71,
"FiQA2018": 22.96,
+ "GerDaLIR": 1.14,
"GerDaLIRSmall": 3.0,
+ "GermanDPR": 67.88,
+ "GermanQuAD-Retrieval": 85.24,
"HellaSwag": 17.53,
"HotpotQA": 37.03,
"HotpotQA-PL": 29.36,
@@ -21724,8 +40544,26 @@
"LegalBenchCorporateLobbying": 87.62,
"LegalQuAD": 17.8,
"LegalSummarization": 56.8,
+ "MIRACLRetrieval (ar)": 20.69,
+ "MIRACLRetrieval (bn)": 11.04,
+ "MIRACLRetrieval (de)": 28.28,
+ "MIRACLRetrieval (en)": 32.78,
+ "MIRACLRetrieval (es)": 28.4,
+ "MIRACLRetrieval (fa)": 17.34,
+ "MIRACLRetrieval (fi)": 40.78,
+ "MIRACLRetrieval (fr)": 25.87,
+ "MIRACLRetrieval (hi)": 20.27,
+ "MIRACLRetrieval (id)": 29.58,
+ "MIRACLRetrieval (ja)": 13.91,
+ "MIRACLRetrieval (ko)": 30.01,
+ "MIRACLRetrieval (ru)": 22.87,
+ "MIRACLRetrieval (sw)": 17.24,
+ "MIRACLRetrieval (te)": 5.33,
+ "MIRACLRetrieval (th)": 25.49,
+ "MIRACLRetrieval (yo)": 28.34,
+ "MIRACLRetrieval (zh)": 20.19,
"MMarcoRetrieval": 44.62,
- "MSMARCO": 26.6,
+ "MSMARCO": 26.59,
"MSMARCO-PL": 12.45,
"MedicalRetrieval": 14.1,
"MintakaRetrieval (fr)": 24.45,
@@ -21743,7 +40581,7 @@
"PIQA": 18.65,
"Quail": 2.98,
"Quora-PL": 79.18,
- "QuoraRetrieval": 86.4,
+ "QuoraRetrieval": 86.41,
"RARbCode": 11.02,
"RARbMath": 30.93,
"RiaNewsRetrieval": 51.75,
@@ -21768,6 +40606,9 @@
"Touche2020": 17.4,
"VideoRetrieval": 14.18,
"WinoGrande": 49.01,
+ "XMarket (de)": 7.72,
+ "XMarket (en)": 16.28,
+ "XMarket (es)": 9.18,
"XPQARetrieval (fr)": 46.22,
"XPQARetrieval (ara-ara)": 24.86,
"XPQARetrieval (eng-ara)": 19.6,
@@ -21817,6 +40658,7 @@
"BIOSSES": 76.27,
"BQ": 36.33,
"CDSC-R": 88.8,
+ "GermanSTSBenchmark": 83.41,
"LCQMC": 63.3,
"PAWSX": 12.16,
"RUParaPhraserSTS": 65.74,
@@ -23254,6 +42096,7 @@
"f1": [
{
"Model": "LaBSE-ru-turbo",
+ "BornholmBitextMining": 37.59,
"Tatoeba (rus-eng)": 93.22
}
]
@@ -23262,15 +42105,169 @@
"accuracy": [
{
"Model": "LaBSE-ru-turbo",
+ "AllegroReviews": 24.9,
+ "AmazonCounterfactualClassification (en-ext)": 74.51,
+ "AmazonCounterfactualClassification (en)": 74.01,
+ "AmazonCounterfactualClassification (de)": 51.82,
+ "AmazonCounterfactualClassification (ja)": 56.28,
+ "AmazonPolarityClassification": 79.24,
+ "AmazonReviewsClassification (en)": 42.38,
+ "AmazonReviewsClassification (de)": 29.28,
+ "AmazonReviewsClassification (es)": 33.88,
+ "AmazonReviewsClassification (fr)": 32.2,
+ "AmazonReviewsClassification (ja)": 20.53,
+ "AmazonReviewsClassification (zh)": 20.93,
+ "AngryTweetsClassification": 46.87,
+ "Banking77Classification": 73.54,
+ "CBD": 53.58,
+ "DanishPoliticalCommentsClassification": 30.36,
+ "EmotionClassification": 45.58,
"GeoreviewClassification": 46.04,
"HeadlineClassification": 69.98,
+ "ImdbClassification": 71.58,
"InappropriatenessClassification": 61.39,
"KinopoiskClassification": 53.59,
+ "LccSentimentClassification": 48.0,
+ "MTOPDomainClassification (en)": 89.99,
+ "MTOPDomainClassification (de)": 64.42,
+ "MTOPDomainClassification (es)": 63.98,
+ "MTOPDomainClassification (fr)": 68.37,
+ "MTOPDomainClassification (hi)": 21.64,
+ "MTOPDomainClassification (th)": 15.28,
+ "MTOPIntentClassification (en)": 58.5,
+ "MTOPIntentClassification (de)": 36.38,
+ "MTOPIntentClassification (es)": 37.06,
+ "MTOPIntentClassification (fr)": 35.98,
+ "MTOPIntentClassification (hi)": 3.76,
+ "MTOPIntentClassification (th)": 4.99,
+ "MasakhaNEWSClassification (amh)": 35.64,
+ "MasakhaNEWSClassification (eng)": 79.59,
+ "MasakhaNEWSClassification (fra)": 75.43,
+ "MasakhaNEWSClassification (hau)": 57.16,
+ "MasakhaNEWSClassification (ibo)": 52.74,
+ "MasakhaNEWSClassification (lin)": 67.2,
+ "MasakhaNEWSClassification (lug)": 53.23,
+ "MasakhaNEWSClassification (orm)": 52.22,
+ "MasakhaNEWSClassification (pcm)": 93.05,
+ "MasakhaNEWSClassification (run)": 57.64,
+ "MasakhaNEWSClassification (sna)": 71.17,
+ "MasakhaNEWSClassification (som)": 44.97,
+ "MasakhaNEWSClassification (swa)": 52.67,
+ "MasakhaNEWSClassification (tir)": 26.95,
+ "MasakhaNEWSClassification (xho)": 62.36,
+ "MasakhaNEWSClassification (yor)": 63.11,
"MassiveIntentClassification (ru)": 66.08,
+ "MassiveIntentClassification (pt)": 39.34,
+ "MassiveIntentClassification (fi)": 35.23,
+ "MassiveIntentClassification (hu)": 30.01,
+ "MassiveIntentClassification (da)": 39.46,
+ "MassiveIntentClassification (ja)": 4.66,
+ "MassiveIntentClassification (ur)": 2.45,
+ "MassiveIntentClassification (fr)": 39.59,
+ "MassiveIntentClassification (km)": 5.34,
+ "MassiveIntentClassification (te)": 2.39,
+ "MassiveIntentClassification (ka)": 2.69,
+ "MassiveIntentClassification (mn)": 32.23,
+ "MassiveIntentClassification (hi)": 3.21,
+ "MassiveIntentClassification (is)": 27.36,
+ "MassiveIntentClassification (ro)": 36.04,
+ "MassiveIntentClassification (az)": 20.84,
+ "MassiveIntentClassification (sw)": 33.68,
+ "MassiveIntentClassification (sq)": 35.67,
+ "MassiveIntentClassification (bn)": 3.74,
+ "MassiveIntentClassification (vi)": 23.65,
+ "MassiveIntentClassification (my)": 3.37,
+ "MassiveIntentClassification (th)": 3.87,
+ "MassiveIntentClassification (en)": 64.92,
+ "MassiveIntentClassification (ar)": 4.44,
+ "MassiveIntentClassification (kn)": 2.58,
+ "MassiveIntentClassification (pl)": 30.78,
+ "MassiveIntentClassification (tr)": 29.76,
+ "MassiveIntentClassification (hy)": 2.28,
+ "MassiveIntentClassification (fa)": 3.46,
+ "MassiveIntentClassification (de)": 37.67,
+ "MassiveIntentClassification (id)": 36.68,
+ "MassiveIntentClassification (ta)": 1.65,
+ "MassiveIntentClassification (nl)": 36.79,
+ "MassiveIntentClassification (tl)": 35.52,
+ "MassiveIntentClassification (am)": 2.29,
+ "MassiveIntentClassification (ko)": 2.25,
+ "MassiveIntentClassification (el)": 10.36,
+ "MassiveIntentClassification (he)": 2.31,
+ "MassiveIntentClassification (sl)": 36.95,
+ "MassiveIntentClassification (ml)": 2.56,
+ "MassiveIntentClassification (cy)": 35.08,
+ "MassiveIntentClassification (ms)": 32.99,
+ "MassiveIntentClassification (jv)": 33.21,
+ "MassiveIntentClassification (es)": 37.45,
+ "MassiveIntentClassification (lv)": 24.75,
+ "MassiveIntentClassification (sv)": 35.51,
+ "MassiveIntentClassification (zh-TW)": 4.56,
+ "MassiveIntentClassification (zh-CN)": 3.76,
+ "MassiveIntentClassification (it)": 42.88,
+ "MassiveIntentClassification (af)": 33.65,
+ "MassiveIntentClassification (nb)": 37.7,
"MassiveScenarioClassification (ru)": 71.13,
+ "MassiveScenarioClassification (kn)": 7.55,
+ "MassiveScenarioClassification (pt)": 47.33,
+ "MassiveScenarioClassification (tl)": 42.97,
+ "MassiveScenarioClassification (hy)": 8.46,
+ "MassiveScenarioClassification (hu)": 37.92,
+ "MassiveScenarioClassification (lv)": 29.04,
+ "MassiveScenarioClassification (sq)": 44.31,
+ "MassiveScenarioClassification (it)": 50.78,
+ "MassiveScenarioClassification (sv)": 41.57,
+ "MassiveScenarioClassification (zh-CN)": 10.17,
+ "MassiveScenarioClassification (ro)": 46.49,
+ "MassiveScenarioClassification (sl)": 42.08,
+ "MassiveScenarioClassification (bn)": 8.95,
+ "MassiveScenarioClassification (es)": 45.47,
+ "MassiveScenarioClassification (fa)": 7.15,
+ "MassiveScenarioClassification (hi)": 7.71,
+ "MassiveScenarioClassification (is)": 35.45,
+ "MassiveScenarioClassification (nl)": 44.99,
+ "MassiveScenarioClassification (ms)": 40.41,
+ "MassiveScenarioClassification (de)": 46.58,
+ "MassiveScenarioClassification (nb)": 43.87,
+ "MassiveScenarioClassification (id)": 42.1,
+ "MassiveScenarioClassification (ta)": 7.02,
+ "MassiveScenarioClassification (vi)": 31.76,
+ "MassiveScenarioClassification (th)": 8.74,
+ "MassiveScenarioClassification (te)": 7.29,
+ "MassiveScenarioClassification (he)": 7.65,
+ "MassiveScenarioClassification (en)": 71.06,
+ "MassiveScenarioClassification (zh-TW)": 10.21,
+ "MassiveScenarioClassification (fr)": 47.94,
+ "MassiveScenarioClassification (cy)": 43.03,
+ "MassiveScenarioClassification (km)": 8.92,
+ "MassiveScenarioClassification (ml)": 7.16,
+ "MassiveScenarioClassification (da)": 48.37,
+ "MassiveScenarioClassification (jv)": 41.0,
+ "MassiveScenarioClassification (sw)": 42.72,
+ "MassiveScenarioClassification (tr)": 35.96,
+ "MassiveScenarioClassification (am)": 7.12,
+ "MassiveScenarioClassification (az)": 27.25,
+ "MassiveScenarioClassification (el)": 17.96,
+ "MassiveScenarioClassification (af)": 42.41,
+ "MassiveScenarioClassification (ka)": 6.95,
+ "MassiveScenarioClassification (ko)": 5.61,
+ "MassiveScenarioClassification (fi)": 40.85,
+ "MassiveScenarioClassification (mn)": 36.93,
+ "MassiveScenarioClassification (my)": 10.47,
+ "MassiveScenarioClassification (ja)": 10.34,
+ "MassiveScenarioClassification (ur)": 8.63,
+ "MassiveScenarioClassification (ar)": 11.75,
+ "MassiveScenarioClassification (pl)": 36.2,
+ "NoRecClassification": 40.1,
+ "NordicLangClassification": 46.52,
+ "PAC": 69.41,
+ "PolEmo2.0-IN": 44.06,
+ "PolEmo2.0-OUT": 27.02,
"RuReviewsClassification": 64.58,
"RuSciBenchGRNTIClassification": 56.67,
- "RuSciBenchOECDClassification": 43.58
+ "RuSciBenchOECDClassification": 43.58,
+ "ToxicConversationsClassification": 64.28,
+ "TweetSentimentExtractionClassification": 63.49
}
]
},
@@ -23278,11 +42275,67 @@
"v_measure": [
{
"Model": "LaBSE-ru-turbo",
+ "AlloProfClusteringP2P": 58.25,
+ "AlloProfClusteringS2S": 23.39,
+ "ArxivClusteringP2P": 35.03,
+ "ArxivClusteringS2S": 23.08,
+ "BiorxivClusteringP2P": 31.48,
+ "BiorxivClusteringS2S": 20.71,
+ "BlurbsClusteringP2P": 26.37,
+ "BlurbsClusteringS2S": 10.26,
"GeoreviewClusteringP2P": 64.55,
+ "HALClusteringS2S": 14.97,
"MLSUMClusteringP2P (ru)": 45.7,
+ "MLSUMClusteringP2P (de)": 32.73,
+ "MLSUMClusteringP2P (fr)": 40.75,
+ "MLSUMClusteringP2P (es)": 41.11,
"MLSUMClusteringS2S (ru)": 42.93,
- "RuSciBenchGRNTIClusteringP2P": 50.64,
- "RuSciBenchOECDClusteringP2P": 44.48
+ "MLSUMClusteringS2S (de)": 34.6,
+ "MLSUMClusteringS2S (fr)": 40.44,
+ "MLSUMClusteringS2S (es)": 40.4,
+ "MasakhaNEWSClusteringP2P (amh)": 42.64,
+ "MasakhaNEWSClusteringP2P (eng)": 53.41,
+ "MasakhaNEWSClusteringP2P (fra)": 62.23,
+ "MasakhaNEWSClusteringP2P (hau)": 35.53,
+ "MasakhaNEWSClusteringP2P (ibo)": 34.19,
+ "MasakhaNEWSClusteringP2P (lin)": 50.83,
+ "MasakhaNEWSClusteringP2P (lug)": 42.65,
+ "MasakhaNEWSClusteringP2P (orm)": 29.3,
+ "MasakhaNEWSClusteringP2P (pcm)": 92.96,
+ "MasakhaNEWSClusteringP2P (run)": 51.6,
+ "MasakhaNEWSClusteringP2P (sna)": 47.86,
+ "MasakhaNEWSClusteringP2P (som)": 31.78,
+ "MasakhaNEWSClusteringP2P (swa)": 21.21,
+ "MasakhaNEWSClusteringP2P (tir)": 44.22,
+ "MasakhaNEWSClusteringP2P (xho)": 29.11,
+ "MasakhaNEWSClusteringP2P (yor)": 27.34,
+ "MasakhaNEWSClusteringS2S (amh)": 43.99,
+ "MasakhaNEWSClusteringS2S (eng)": 13.78,
+ "MasakhaNEWSClusteringS2S (fra)": 34.26,
+ "MasakhaNEWSClusteringS2S (hau)": 14.67,
+ "MasakhaNEWSClusteringS2S (ibo)": 32.77,
+ "MasakhaNEWSClusteringS2S (lin)": 48.41,
+ "MasakhaNEWSClusteringS2S (lug)": 42.65,
+ "MasakhaNEWSClusteringS2S (orm)": 25.07,
+ "MasakhaNEWSClusteringS2S (pcm)": 64.67,
+ "MasakhaNEWSClusteringS2S (run)": 44.37,
+ "MasakhaNEWSClusteringS2S (sna)": 46.43,
+ "MasakhaNEWSClusteringS2S (som)": 25.16,
+ "MasakhaNEWSClusteringS2S (swa)": 16.84,
+ "MasakhaNEWSClusteringS2S (tir)": 43.8,
+ "MasakhaNEWSClusteringS2S (xho)": 29.51,
+ "MasakhaNEWSClusteringS2S (yor)": 24.19,
+ "MedrxivClusteringP2P": 29.44,
+ "MedrxivClusteringS2S": 23.93,
+ "RedditClustering": 35.73,
+ "RedditClusteringP2P": 51.74,
+ "RuSciBenchGRNTIClusteringP2P": 51.69,
+ "RuSciBenchOECDClusteringP2P": 45.56,
+ "StackExchangeClustering": 45.45,
+ "StackExchangeClusteringP2P": 31.13,
+ "TenKGnadClusteringP2P": 37.43,
+ "TenKGnadClusteringS2S": 13.19,
+ "TwentyNewsgroupsClustering": 25.67
}
]
},
@@ -23290,13 +42343,51 @@
"max_ap": [
{
"Model": "LaBSE-ru-turbo",
+ "CDSC-E": 52.92,
+ "FalseFriendsGermanEnglish": 47.15,
"OpusparcusPC (ru)": 89.32,
- "TERRa": 57.81
+ "OpusparcusPC (de)": 91.59,
+ "OpusparcusPC (en)": 98.04,
+ "OpusparcusPC (fi)": 85.53,
+ "OpusparcusPC (fr)": 88.54,
+ "OpusparcusPC (sv)": 86.76,
+ "PSC": 85.45,
+ "PawsXPairClassification (de)": 51.09,
+ "PawsXPairClassification (en)": 60.74,
+ "PawsXPairClassification (es)": 52.25,
+ "PawsXPairClassification (fr)": 54.84,
+ "PawsXPairClassification (ja)": 47.44,
+ "PawsXPairClassification (ko)": 47.04,
+ "PawsXPairClassification (zh)": 50.49,
+ "SICK-E-PL": 47.79,
+ "SprintDuplicateQuestions": 94.26,
+ "TERRa": 57.81,
+ "TwitterSemEval2015": 64.76,
+ "TwitterURLCorpus": 84.18
},
{
"Model": "LaBSE-ru-turbo",
+ "CDSC-E": 53.23,
+ "FalseFriendsGermanEnglish": 47.15,
"OpusparcusPC (ru)": 89.32,
- "TERRa": 57.81
+ "OpusparcusPC (de)": 91.62,
+ "OpusparcusPC (en)": 98.07,
+ "OpusparcusPC (fi)": 85.58,
+ "OpusparcusPC (fr)": 88.54,
+ "OpusparcusPC (sv)": 86.76,
+ "PSC": 85.45,
+ "PawsXPairClassification (de)": 51.25,
+ "PawsXPairClassification (en)": 60.87,
+ "PawsXPairClassification (es)": 52.31,
+ "PawsXPairClassification (fr)": 54.98,
+ "PawsXPairClassification (ja)": 47.44,
+ "PawsXPairClassification (ko)": 47.25,
+ "PawsXPairClassification (zh)": 50.79,
+ "SICK-E-PL": 47.9,
+ "SprintDuplicateQuestions": 94.39,
+ "TERRa": 57.81,
+ "TwitterSemEval2015": 64.76,
+ "TwitterURLCorpus": 84.18
}
]
},
@@ -23304,11 +42395,35 @@
"map": [
{
"Model": "LaBSE-ru-turbo",
- "MIRACLReranking (ru)": 57.44
+ "AlloprofReranking": 41.6,
+ "AskUbuntuDupQuestions": 55.91,
+ "MindSmallReranking": 29.19,
+ "RuBQReranking": 68.65,
+ "SciDocsRR": 69.74,
+ "StackOverflowDupQuestions": 45.98,
+ "SyntecReranking": 53.82,
+ "T2Reranking": 52.42
},
{
"Model": "LaBSE-ru-turbo",
- "RuBQReranking": 68.65
+ "MIRACLReranking (ru)": 57.44,
+ "MIRACLReranking (ar)": 2.29,
+ "MIRACLReranking (bn)": 2.07,
+ "MIRACLReranking (de)": 22.14,
+ "MIRACLReranking (en)": 51.47,
+ "MIRACLReranking (es)": 27.98,
+ "MIRACLReranking (fa)": 3.18,
+ "MIRACLReranking (fi)": 31.28,
+ "MIRACLReranking (fr)": 26.29,
+ "MIRACLReranking (hi)": 2.99,
+ "MIRACLReranking (id)": 19.64,
+ "MIRACLReranking (ja)": 2.58,
+ "MIRACLReranking (ko)": 5.09,
+ "MIRACLReranking (sw)": 29.43,
+ "MIRACLReranking (te)": 3.13,
+ "MIRACLReranking (th)": 2.47,
+ "MIRACLReranking (yo)": 48.48,
+ "MIRACLReranking (zh)": 4.2
}
]
},
@@ -23316,9 +42431,143 @@
"ndcg_at_10": [
{
"Model": "LaBSE-ru-turbo",
+ "AILACasedocs": 19.7,
+ "AILAStatutes": 15.9,
+ "ARCChallenge": 7.14,
+ "AlloprofRetrieval": 10.08,
+ "AlphaNLI": 23.76,
+ "AppsRetrieval": 3.21,
+ "ArguAna": 46.57,
+ "BSARDRetrieval": 1.71,
+ "ClimateFEVER": 13.19,
+ "CmedqaRetrieval": 1.27,
+ "CodeFeedbackMT": 28.06,
+ "CodeFeedbackST": 52.83,
+ "CodeSearchNetCCRetrieval (python)": 46.49,
+ "CodeSearchNetCCRetrieval (javascript)": 48.94,
+ "CodeSearchNetCCRetrieval (go)": 32.67,
+ "CodeSearchNetCCRetrieval (ruby)": 47.35,
+ "CodeSearchNetCCRetrieval (java)": 35.98,
+ "CodeSearchNetCCRetrieval (php)": 28.61,
+ "CodeSearchNetRetrieval (python)": 71.17,
+ "CodeSearchNetRetrieval (javascript)": 52.91,
+ "CodeSearchNetRetrieval (go)": 70.52,
+ "CodeSearchNetRetrieval (ruby)": 60.79,
+ "CodeSearchNetRetrieval (java)": 42.15,
+ "CodeSearchNetRetrieval (php)": 55.98,
+ "CodeTransOceanContest": 41.2,
+ "CodeTransOceanDL": 26.26,
+ "CosQA": 17.94,
+ "CovidRetrieval": 0.06,
+ "DBPedia": 29.74,
+ "FEVER": 48.04,
+ "FiQA2018": 21.01,
+ "GerDaLIR": 1.06,
+ "GerDaLIRSmall": 2.92,
+ "GermanQuAD-Retrieval": 66.35,
+ "HellaSwag": 19.23,
+ "HotpotQA": 54.51,
+ "LEMBNarrativeQARetrieval": 13.6,
+ "LEMBQMSumRetrieval": 21.47,
+ "LEMBSummScreenFDRetrieval": 57.41,
+ "LEMBWikimQARetrieval": 39.41,
+ "LeCaRDv2": 9.86,
+ "LegalBenchConsumerContractsQA": 62.58,
+ "LegalBenchCorporateLobbying": 86.41,
+ "LegalQuAD": 11.82,
+ "LegalSummarization": 57.64,
"MIRACLRetrieval (ru)": 55.97,
+ "MIRACLRetrieval (ar)": 0.01,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 10.18,
+ "MIRACLRetrieval (en)": 41.09,
+ "MIRACLRetrieval (es)": 10.08,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 13.8,
+ "MIRACLRetrieval (fr)": 13.74,
+ "MIRACLRetrieval (hi)": 0.18,
+ "MIRACLRetrieval (id)": 7.11,
+ "MIRACLRetrieval (ja)": 0.0,
+ "MIRACLRetrieval (ko)": 0.92,
+ "MIRACLRetrieval (sw)": 19.66,
+ "MIRACLRetrieval (te)": 0.0,
+ "MIRACLRetrieval (th)": 0.07,
+ "MIRACLRetrieval (yo)": 38.76,
+ "MIRACLRetrieval (zh)": 0.0,
+ "MSMARCO": 27.51,
+ "MintakaRetrieval (ar)": 0.5,
+ "MintakaRetrieval (de)": 9.55,
+ "MintakaRetrieval (es)": 11.36,
+ "MintakaRetrieval (fr)": 12.44,
+ "MintakaRetrieval (hi)": 0.82,
+ "MintakaRetrieval (it)": 11.4,
+ "MintakaRetrieval (ja)": 1.14,
+ "MintakaRetrieval (pt)": 12.64,
+ "NFCorpus": 22.08,
+ "NQ": 35.87,
+ "PIQA": 16.31,
+ "Quail": 2.52,
+ "QuoraRetrieval": 85.71,
+ "RARbCode": 9.48,
+ "RARbMath": 46.36,
"RiaNewsRetrieval": 69.36,
- "RuBQRetrieval": 65.71
+ "RuBQRetrieval": 65.71,
+ "SCIDOCS": 11.7,
+ "SIQA": 1.68,
+ "SciFact": 50.49,
+ "SciFact-PL": 23.95,
+ "SpartQA": 3.89,
+ "StackOverflowQA": 53.06,
+ "SyntecRetrieval": 43.26,
+ "SyntheticText2SQL": 44.08,
+ "TRECCOVID": 30.52,
+ "TRECCOVID-PL": 6.45,
+ "TempReasonL1": 1.06,
+ "TempReasonL2Fact": 8.84,
+ "TempReasonL2Pure": 0.1,
+ "TempReasonL3Fact": 9.51,
+ "TempReasonL3Pure": 4.66,
+ "Touche2020": 14.08,
+ "WinoGrande": 17.92,
+ "XMarket (de)": 5.95,
+ "XMarket (en)": 9.81,
+ "XMarket (es)": 7.06,
+ "XPQARetrieval (ara-ara)": 4.71,
+ "XPQARetrieval (eng-ara)": 4.3,
+ "XPQARetrieval (ara-eng)": 8.38,
+ "XPQARetrieval (deu-deu)": 41.62,
+ "XPQARetrieval (eng-deu)": 12.76,
+ "XPQARetrieval (deu-eng)": 35.52,
+ "XPQARetrieval (spa-spa)": 31.42,
+ "XPQARetrieval (eng-spa)": 13.48,
+ "XPQARetrieval (spa-eng)": 27.67,
+ "XPQARetrieval (fra-fra)": 39.24,
+ "XPQARetrieval (eng-fra)": 15.96,
+ "XPQARetrieval (fra-eng)": 33.89,
+ "XPQARetrieval (hin-hin)": 6.89,
+ "XPQARetrieval (eng-hin)": 6.29,
+ "XPQARetrieval (hin-eng)": 7.6,
+ "XPQARetrieval (ita-ita)": 47.91,
+ "XPQARetrieval (eng-ita)": 13.39,
+ "XPQARetrieval (ita-eng)": 34.7,
+ "XPQARetrieval (jpn-jpn)": 6.29,
+ "XPQARetrieval (eng-jpn)": 3.14,
+ "XPQARetrieval (jpn-eng)": 8.38,
+ "XPQARetrieval (kor-kor)": 2.51,
+ "XPQARetrieval (eng-kor)": 4.89,
+ "XPQARetrieval (kor-eng)": 3.54,
+ "XPQARetrieval (pol-pol)": 23.18,
+ "XPQARetrieval (eng-pol)": 9.8,
+ "XPQARetrieval (pol-eng)": 20.13,
+ "XPQARetrieval (por-por)": 27.19,
+ "XPQARetrieval (eng-por)": 10.36,
+ "XPQARetrieval (por-eng)": 22.76,
+ "XPQARetrieval (tam-tam)": 3.02,
+ "XPQARetrieval (eng-tam)": 5.27,
+ "XPQARetrieval (tam-eng)": 3.51,
+ "XPQARetrieval (cmn-cmn)": 14.0,
+ "XPQARetrieval (eng-cmn)": 7.68,
+ "XPQARetrieval (cmn-eng)": 13.48
}
]
},
@@ -23326,27 +42575,149 @@
"cosine_spearman": [
{
"Model": "LaBSE-ru-turbo",
+ "BIOSSES": 80.3,
+ "CDSC-R": 75.79,
+ "GermanSTSBenchmark": 61.54,
+ "SICK-R": 75.57,
+ "SICK-R-PL": 50.86,
+ "SICKFr": 64.97,
+ "STS12": 72.45,
+ "STS13": 78.98,
+ "STS14": 76.7,
+ "STS15": 85.62,
+ "STS16": 79.93,
+ "STS17 (fr-en)": 39.61,
+ "STS17 (en-en)": 86.49,
+ "STS17 (es-en)": 36.65,
+ "STS17 (it-en)": 23.36,
+ "STS17 (en-de)": 33.58,
+ "STS17 (nl-en)": 32.13,
+ "STS17 (ar-ar)": 14.47,
+ "STS17 (ko-ko)": 8.95,
+ "STS17 (en-ar)": 1.3,
+ "STS17 (es-es)": 71.77,
+ "STS17 (en-tr)": -9.64,
+ "STS22 (ru)": 67.57,
+ "STS22 (fr-pl)": 73.25,
+ "STS22 (de-pl)": 36.4,
+ "STS22 (de-en)": 44.31,
+ "STS22 (fr)": 74.66,
+ "STS22 (en)": 63.1,
+ "STS22 (es-en)": 68.55,
+ "STS22 (es-it)": 56.99,
+ "STS22 (zh-en)": 33.76,
+ "STS22 (tr)": 47.83,
+ "STS22 (es)": 57.06,
+ "STS22 (pl)": 29.24,
+ "STS22 (zh)": 33.15,
+ "STS22 (de-fr)": 48.38,
+ "STS22 (ar)": 29.74,
+ "STS22 (pl-en)": 65.09,
+ "STS22 (de)": 32.62,
+ "STS22 (it)": 66.02,
+ "STSB": 8.82,
+ "STSBenchmark": 81.82,
+ "STSBenchmarkMultilingualSTS (de)": 63.22,
+ "STSBenchmarkMultilingualSTS (es)": 57.44,
+ "STSBenchmarkMultilingualSTS (en)": 81.82,
+ "STSBenchmarkMultilingualSTS (pl)": 53.2,
+ "STSBenchmarkMultilingualSTS (nl)": 53.16,
+ "STSBenchmarkMultilingualSTS (it)": 63.22,
+ "STSBenchmarkMultilingualSTS (pt)": 54.98,
+ "STSBenchmarkMultilingualSTS (fr)": 64.24,
+ "STSBenchmarkMultilingualSTS (zh)": 10.91
+ },
+ {
+ "Model": "LaBSE-ru-turbo",
+ "BIOSSES": 80.3,
+ "CDSC-R": 75.79,
+ "GermanSTSBenchmark": 61.54,
"RUParaPhraserSTS": 72.97,
"RuSTSBenchmarkSTS": 81.77,
- "STS22 (ru)": 62.89,
- "STSBenchmarkMultilingualSTS (ru)": 81.81
+ "SICK-R": 75.57,
+ "SICK-R-PL": 50.85,
+ "SICKFr": 64.97,
+ "STS12": 72.45,
+ "STS13": 78.98,
+ "STS14": 76.7,
+ "STS15": 85.62,
+ "STS16": 79.93,
+ "STS17 (fr-en)": 39.61,
+ "STS17 (en-en)": 86.49,
+ "STS17 (es-en)": 36.65,
+ "STS17 (it-en)": 23.36,
+ "STS17 (en-de)": 33.58,
+ "STS17 (nl-en)": 32.13,
+ "STS17 (ar-ar)": 13.11,
+ "STS17 (ko-ko)": 8.96,
+ "STS17 (en-ar)": 1.3,
+ "STS17 (es-es)": 71.77,
+ "STS17 (en-tr)": -9.64,
+ "STS22 (ru)": 67.57,
+ "STS22 (fr-pl)": 73.25,
+ "STS22 (de-pl)": 36.4,
+ "STS22 (de-en)": 44.31,
+ "STS22 (fr)": 74.66,
+ "STS22 (en)": 63.1,
+ "STS22 (es-en)": 68.55,
+ "STS22 (es-it)": 56.99,
+ "STS22 (zh-en)": 33.76,
+ "STS22 (tr)": 47.83,
+ "STS22 (es)": 57.06,
+ "STS22 (pl)": 29.44,
+ "STS22 (zh)": 33.15,
+ "STS22 (de-fr)": 48.38,
+ "STS22 (ar)": 29.72,
+ "STS22 (pl-en)": 65.09,
+ "STS22 (de)": 32.62,
+ "STS22 (it)": 66.02,
+ "STSB": 8.79,
+ "STSBenchmark": 81.82,
+ "STSBenchmarkMultilingualSTS (ru)": 81.81,
+ "STSBenchmarkMultilingualSTS (de)": 63.22,
+ "STSBenchmarkMultilingualSTS (es)": 57.44,
+ "STSBenchmarkMultilingualSTS (en)": 81.82,
+ "STSBenchmarkMultilingualSTS (pl)": 53.2,
+ "STSBenchmarkMultilingualSTS (nl)": 53.16,
+ "STSBenchmarkMultilingualSTS (it)": 63.22,
+ "STSBenchmarkMultilingualSTS (pt)": 54.98,
+ "STSBenchmarkMultilingualSTS (fr)": 64.24,
+ "STSBenchmarkMultilingualSTS (zh)": 10.83
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "LaBSE-ru-turbo",
+ "SummEval": 30.12,
+ "SummEvalFr": 28.7
+ },
+ {
+ "Model": "LaBSE-ru-turbo",
+ "SummEval": 30.12,
+ "SummEvalFr": 28.7
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
{
"Model": "LaBSE-ru-turbo",
"CEDRClassification": 45.11,
- "SensitiveTopicsClassification": 27.52
+ "SensitiveTopicsClassification": 28.75
}
]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "LaBSE-ru-turbo",
+ "Core17InstructionRetrieval": 0.29,
+ "News21InstructionRetrieval": -0.47,
+ "Robust04InstructionRetrieval": -10.4
+ }
+ ]
}
},
"sergeyzh__rubert-tiny-turbo": {
@@ -23354,6 +42725,7 @@
"f1": [
{
"Model": "rubert-tiny-turbo",
+ "BornholmBitextMining": 42.96,
"Tatoeba (rus-eng)": 83.14
}
]
@@ -23362,14 +42734,57 @@
"accuracy": [
{
"Model": "rubert-tiny-turbo",
+ "AllegroReviews": 25.4,
+ "AmazonCounterfactualClassification (en-ext)": 69.94,
+ "AmazonCounterfactualClassification (en)": 71.04,
+ "AmazonCounterfactualClassification (de)": 61.07,
+ "AmazonCounterfactualClassification (ja)": 52.16,
"AmazonPolarityClassification": 68.36,
+ "AmazonReviewsClassification (en)": 36.73,
+ "AmazonReviewsClassification (de)": 27.69,
+ "AmazonReviewsClassification (es)": 30.91,
+ "AmazonReviewsClassification (fr)": 27.89,
+ "AmazonReviewsClassification (ja)": 20.84,
+ "AmazonReviewsClassification (zh)": 20.9,
+ "AngryTweetsClassification": 44.74,
"Banking77Classification": 59.86,
+ "CBD": 54.03,
+ "DanishPoliticalCommentsClassification": 27.44,
"EmotionClassification": 29.5,
"GeoreviewClassification": 41.36,
"HeadlineClassification": 68.9,
"ImdbClassification": 58.36,
"InappropriatenessClassification": 59.11,
"KinopoiskClassification": 50.47,
+ "LccSentimentClassification": 36.2,
+ "MTOPDomainClassification (en)": 78.49,
+ "MTOPDomainClassification (de)": 55.38,
+ "MTOPDomainClassification (es)": 54.62,
+ "MTOPDomainClassification (fr)": 62.52,
+ "MTOPDomainClassification (hi)": 21.52,
+ "MTOPDomainClassification (th)": 16.44,
+ "MTOPIntentClassification (en)": 42.46,
+ "MTOPIntentClassification (de)": 31.78,
+ "MTOPIntentClassification (es)": 28.4,
+ "MTOPIntentClassification (fr)": 31.28,
+ "MTOPIntentClassification (hi)": 2.9,
+ "MTOPIntentClassification (th)": 4.78,
+ "MasakhaNEWSClassification (amh)": 31.09,
+ "MasakhaNEWSClassification (eng)": 64.06,
+ "MasakhaNEWSClassification (fra)": 38.6,
+ "MasakhaNEWSClassification (hau)": 37.6,
+ "MasakhaNEWSClassification (ibo)": 30.33,
+ "MasakhaNEWSClassification (lin)": 57.89,
+ "MasakhaNEWSClassification (lug)": 38.79,
+ "MasakhaNEWSClassification (orm)": 34.65,
+ "MasakhaNEWSClassification (pcm)": 85.7,
+ "MasakhaNEWSClassification (run)": 37.95,
+ "MasakhaNEWSClassification (sna)": 54.58,
+ "MasakhaNEWSClassification (som)": 29.05,
+ "MasakhaNEWSClassification (swa)": 30.15,
+ "MasakhaNEWSClassification (tir)": 24.67,
+ "MasakhaNEWSClassification (xho)": 43.37,
+ "MasakhaNEWSClassification (yor)": 36.25,
"MassiveIntentClassification (zh-CN)": 5.21,
"MassiveIntentClassification (ko)": 2.53,
"MassiveIntentClassification (hi)": 2.56,
@@ -23472,6 +42887,11 @@
"MassiveScenarioClassification (he)": 7.61,
"MassiveScenarioClassification (nl)": 40.94,
"MassiveScenarioClassification (km)": 8.51,
+ "NoRecClassification": 40.31,
+ "NordicLangClassification": 49.9,
+ "PAC": 59.88,
+ "PolEmo2.0-IN": 38.1,
+ "PolEmo2.0-OUT": 23.74,
"RuReviewsClassification": 60.66,
"RuSciBenchGRNTIClassification": 52.93,
"RuSciBenchOECDClassification": 40.79,
@@ -23484,21 +42904,66 @@
"v_measure": [
{
"Model": "rubert-tiny-turbo",
+ "AlloProfClusteringP2P": 34.87,
+ "AlloProfClusteringS2S": 22.73,
"ArxivClusteringP2P": 24.83,
"ArxivClusteringS2S": 16.68,
"BiorxivClusteringP2P": 20.0,
"BiorxivClusteringS2S": 12.67,
+ "BlurbsClusteringP2P": 12.49,
+ "BlurbsClusteringS2S": 8.97,
"GeoreviewClusteringP2P": 59.71,
+ "HALClusteringS2S": 5.84,
"MLSUMClusteringP2P (ru)": 40.02,
+ "MLSUMClusteringP2P (de)": 11.49,
+ "MLSUMClusteringP2P (fr)": 30.26,
+ "MLSUMClusteringP2P (es)": 34.16,
"MLSUMClusteringS2S (ru)": 41.36,
+ "MLSUMClusteringS2S (de)": 11.17,
+ "MLSUMClusteringS2S (fr)": 29.34,
+ "MLSUMClusteringS2S (es)": 33.25,
+ "MasakhaNEWSClusteringP2P (amh)": 40.23,
+ "MasakhaNEWSClusteringP2P (eng)": 5.19,
+ "MasakhaNEWSClusteringP2P (fra)": 26.13,
+ "MasakhaNEWSClusteringP2P (hau)": 10.18,
+ "MasakhaNEWSClusteringP2P (ibo)": 21.87,
+ "MasakhaNEWSClusteringP2P (lin)": 42.77,
+ "MasakhaNEWSClusteringP2P (lug)": 42.51,
+ "MasakhaNEWSClusteringP2P (orm)": 21.39,
+ "MasakhaNEWSClusteringP2P (pcm)": 51.79,
+ "MasakhaNEWSClusteringP2P (run)": 42.72,
+ "MasakhaNEWSClusteringP2P (sna)": 43.34,
+ "MasakhaNEWSClusteringP2P (som)": 23.5,
+ "MasakhaNEWSClusteringP2P (swa)": 4.66,
+ "MasakhaNEWSClusteringP2P (tir)": 45.67,
+ "MasakhaNEWSClusteringP2P (xho)": 22.17,
+ "MasakhaNEWSClusteringP2P (yor)": 23.26,
+ "MasakhaNEWSClusteringS2S (amh)": 44.56,
+ "MasakhaNEWSClusteringS2S (eng)": 24.45,
+ "MasakhaNEWSClusteringS2S (fra)": 25.43,
+ "MasakhaNEWSClusteringS2S (hau)": 6.46,
+ "MasakhaNEWSClusteringS2S (ibo)": 34.22,
+ "MasakhaNEWSClusteringS2S (lin)": 44.27,
+ "MasakhaNEWSClusteringS2S (lug)": 43.53,
+ "MasakhaNEWSClusteringS2S (orm)": 26.63,
+ "MasakhaNEWSClusteringS2S (pcm)": 58.95,
+ "MasakhaNEWSClusteringS2S (run)": 47.69,
+ "MasakhaNEWSClusteringS2S (sna)": 42.32,
+ "MasakhaNEWSClusteringS2S (som)": 23.51,
+ "MasakhaNEWSClusteringS2S (swa)": 15.74,
+ "MasakhaNEWSClusteringS2S (tir)": 48.37,
+ "MasakhaNEWSClusteringS2S (xho)": 24.53,
+ "MasakhaNEWSClusteringS2S (yor)": 24.7,
"MedrxivClusteringP2P": 20.79,
"MedrxivClusteringS2S": 18.18,
"RedditClustering": 26.28,
"RedditClusteringP2P": 40.48,
- "RuSciBenchGRNTIClusteringP2P": 47.55,
- "RuSciBenchOECDClusteringP2P": 41.44,
+ "RuSciBenchGRNTIClusteringP2P": 48.1,
+ "RuSciBenchOECDClusteringP2P": 41.06,
"StackExchangeClustering": 33.51,
"StackExchangeClusteringP2P": 27.98,
+ "TenKGnadClusteringP2P": 8.65,
+ "TenKGnadClusteringS2S": 9.77,
"TwentyNewsgroupsClustering": 19.9
}
]
@@ -23507,13 +42972,51 @@
"max_ap": [
{
"Model": "rubert-tiny-turbo",
+ "CDSC-E": 48.4,
+ "FalseFriendsGermanEnglish": 47.21,
"OpusparcusPC (ru)": 87.58,
- "TERRa": 56.09
+ "OpusparcusPC (de)": 91.25,
+ "OpusparcusPC (en)": 97.27,
+ "OpusparcusPC (fi)": 84.56,
+ "OpusparcusPC (fr)": 85.13,
+ "OpusparcusPC (sv)": 84.27,
+ "PSC": 72.63,
+ "PawsXPairClassification (de)": 50.85,
+ "PawsXPairClassification (en)": 49.09,
+ "PawsXPairClassification (es)": 50.07,
+ "PawsXPairClassification (fr)": 51.42,
+ "PawsXPairClassification (ja)": 48.03,
+ "PawsXPairClassification (ko)": 47.09,
+ "PawsXPairClassification (zh)": 51.98,
+ "SICK-E-PL": 48.51,
+ "SprintDuplicateQuestions": 91.94,
+ "TERRa": 56.09,
+ "TwitterSemEval2015": 56.87,
+ "TwitterURLCorpus": 79.67
},
{
"Model": "rubert-tiny-turbo",
+ "CDSC-E": 48.65,
+ "FalseFriendsGermanEnglish": 47.33,
"OpusparcusPC (ru)": 87.58,
- "TERRa": 56.27
+ "OpusparcusPC (de)": 91.25,
+ "OpusparcusPC (en)": 97.28,
+ "OpusparcusPC (fi)": 84.56,
+ "OpusparcusPC (fr)": 85.13,
+ "OpusparcusPC (sv)": 84.27,
+ "PSC": 72.63,
+ "PawsXPairClassification (de)": 51.04,
+ "PawsXPairClassification (en)": 49.11,
+ "PawsXPairClassification (es)": 50.07,
+ "PawsXPairClassification (fr)": 51.43,
+ "PawsXPairClassification (ja)": 48.38,
+ "PawsXPairClassification (ko)": 47.29,
+ "PawsXPairClassification (zh)": 52.38,
+ "SICK-E-PL": 48.72,
+ "SprintDuplicateQuestions": 91.94,
+ "TERRa": 56.27,
+ "TwitterSemEval2015": 56.87,
+ "TwitterURLCorpus": 79.67
}
]
},
@@ -23521,11 +43024,35 @@
"map": [
{
"Model": "rubert-tiny-turbo",
- "MIRACLReranking (ru)": 47.73
+ "AlloprofReranking": 30.29,
+ "AskUbuntuDupQuestions": 49.48,
+ "MindSmallReranking": 29.14,
+ "RuBQReranking": 62.15,
+ "SciDocsRR": 59.49,
+ "StackOverflowDupQuestions": 37.97,
+ "SyntecReranking": 40.01,
+ "T2Reranking": 51.61
},
{
"Model": "rubert-tiny-turbo",
- "RuBQReranking": 62.15
+ "MIRACLReranking (ru)": 47.73,
+ "MIRACLReranking (ar)": 2.02,
+ "MIRACLReranking (bn)": 1.54,
+ "MIRACLReranking (de)": 12.08,
+ "MIRACLReranking (en)": 28.98,
+ "MIRACLReranking (es)": 14.02,
+ "MIRACLReranking (fa)": 3.73,
+ "MIRACLReranking (fi)": 23.27,
+ "MIRACLReranking (fr)": 10.09,
+ "MIRACLReranking (hi)": 3.65,
+ "MIRACLReranking (id)": 15.61,
+ "MIRACLReranking (ja)": 2.32,
+ "MIRACLReranking (ko)": 6.19,
+ "MIRACLReranking (sw)": 21.3,
+ "MIRACLReranking (te)": 1.5,
+ "MIRACLReranking (th)": 2.19,
+ "MIRACLReranking (yo)": 28.48,
+ "MIRACLReranking (zh)": 2.83
}
]
},
@@ -23536,13 +43063,140 @@
"AILACasedocs": 7.43,
"AILAStatutes": 13.62,
"ARCChallenge": 3.85,
+ "AlloprofRetrieval": 0.94,
"AlphaNLI": 14.15,
+ "AppsRetrieval": 1.25,
"ArguAna": 32.03,
+ "BSARDRetrieval": 1.06,
"ClimateFEVER": 5.56,
+ "CmedqaRetrieval": 1.14,
+ "CodeFeedbackMT": 18.67,
+ "CodeFeedbackST": 25.37,
+ "CodeSearchNetCCRetrieval (python)": 32.45,
+ "CodeSearchNetCCRetrieval (javascript)": 27.6,
+ "CodeSearchNetCCRetrieval (go)": 17.14,
+ "CodeSearchNetCCRetrieval (ruby)": 37.08,
+ "CodeSearchNetCCRetrieval (java)": 23.17,
+ "CodeSearchNetCCRetrieval (php)": 17.53,
+ "CodeSearchNetRetrieval (python)": 47.77,
+ "CodeSearchNetRetrieval (javascript)": 24.46,
+ "CodeSearchNetRetrieval (go)": 34.78,
+ "CodeSearchNetRetrieval (ruby)": 39.47,
+ "CodeSearchNetRetrieval (java)": 20.77,
+ "CodeSearchNetRetrieval (php)": 30.9,
+ "CodeTransOceanContest": 23.44,
+ "CodeTransOceanDL": 32.03,
+ "CosQA": 7.22,
+ "CovidRetrieval": 0.04,
"DBPedia": 9.61,
+ "FEVER": 16.42,
+ "FiQA2018": 7.18,
+ "GerDaLIR": 0.13,
+ "GerDaLIRSmall": 0.34,
+ "GermanQuAD-Retrieval": 19.17,
+ "HellaSwag": 10.6,
+ "HotpotQA": 29.51,
+ "LEMBNarrativeQARetrieval": 7.26,
+ "LEMBQMSumRetrieval": 19.0,
+ "LEMBSummScreenFDRetrieval": 59.64,
+ "LEMBWikimQARetrieval": 33.38,
+ "LeCaRDv2": 8.54,
+ "LegalBenchConsumerContractsQA": 35.72,
+ "LegalBenchCorporateLobbying": 70.01,
+ "LegalQuAD": 3.11,
+ "LegalSummarization": 45.44,
"MIRACLRetrieval (ru)": 37.07,
+ "MIRACLRetrieval (ar)": 0.0,
+ "MIRACLRetrieval (bn)": 0.0,
+ "MIRACLRetrieval (de)": 1.52,
+ "MIRACLRetrieval (en)": 12.01,
+ "MIRACLRetrieval (es)": 1.63,
+ "MIRACLRetrieval (fa)": 0.0,
+ "MIRACLRetrieval (fi)": 4.14,
+ "MIRACLRetrieval (fr)": 1.76,
+ "MIRACLRetrieval (hi)": 0.0,
+ "MIRACLRetrieval (id)": 3.51,
+ "MIRACLRetrieval (ja)": 0.02,
+ "MIRACLRetrieval (ko)": 1.41,
+ "MIRACLRetrieval (sw)": 9.46,
+ "MIRACLRetrieval (te)": 0.04,
+ "MIRACLRetrieval (th)": 0.04,
+ "MIRACLRetrieval (yo)": 13.83,
+ "MIRACLRetrieval (zh)": 0.0,
+ "MSMARCO": 5.79,
+ "MintakaRetrieval (ar)": 0.52,
+ "MintakaRetrieval (de)": 10.58,
+ "MintakaRetrieval (es)": 9.24,
+ "MintakaRetrieval (fr)": 9.63,
+ "MintakaRetrieval (hi)": 1.03,
+ "MintakaRetrieval (it)": 9.67,
+ "MintakaRetrieval (ja)": 1.04,
+ "MintakaRetrieval (pt)": 11.61,
+ "NFCorpus": 9.19,
+ "NQ": 4.96,
+ "PIQA": 6.27,
+ "Quail": 1.11,
+ "QuoraRetrieval": 73.99,
+ "RARbCode": 1.47,
+ "RARbMath": 26.36,
"RiaNewsRetrieval": 51.27,
- "RuBQRetrieval": 51.73
+ "RuBQRetrieval": 51.73,
+ "SCIDOCS": 4.07,
+ "SIQA": 0.84,
+ "SciFact": 27.32,
+ "SciFact-PL": 7.26,
+ "SpartQA": 6.31,
+ "StackOverflowQA": 31.97,
+ "SyntecRetrieval": 14.16,
+ "SyntheticText2SQL": 31.4,
+ "TRECCOVID": 17.53,
+ "TRECCOVID-PL": 5.64,
+ "TempReasonL1": 0.77,
+ "TempReasonL2Fact": 6.72,
+ "TempReasonL2Pure": 0.1,
+ "TempReasonL3Fact": 7.0,
+ "TempReasonL3Pure": 4.88,
+ "Touche2020": 4.43,
+ "WinoGrande": 27.9,
+ "XMarket (de)": 2.51,
+ "XMarket (en)": 3.46,
+ "XMarket (es)": 2.32,
+ "XPQARetrieval (ara-ara)": 3.95,
+ "XPQARetrieval (eng-ara)": 3.16,
+ "XPQARetrieval (ara-eng)": 5.85,
+ "XPQARetrieval (deu-deu)": 27.11,
+ "XPQARetrieval (eng-deu)": 5.17,
+ "XPQARetrieval (deu-eng)": 15.11,
+ "XPQARetrieval (spa-spa)": 22.36,
+ "XPQARetrieval (eng-spa)": 5.92,
+ "XPQARetrieval (spa-eng)": 13.22,
+ "XPQARetrieval (fra-fra)": 25.41,
+ "XPQARetrieval (eng-fra)": 6.09,
+ "XPQARetrieval (fra-eng)": 16.73,
+ "XPQARetrieval (hin-hin)": 6.3,
+ "XPQARetrieval (eng-hin)": 4.45,
+ "XPQARetrieval (hin-eng)": 5.13,
+ "XPQARetrieval (ita-ita)": 38.08,
+ "XPQARetrieval (eng-ita)": 6.83,
+ "XPQARetrieval (ita-eng)": 17.7,
+ "XPQARetrieval (jpn-jpn)": 5.73,
+ "XPQARetrieval (eng-jpn)": 2.03,
+ "XPQARetrieval (jpn-eng)": 5.5,
+ "XPQARetrieval (kor-kor)": 2.18,
+ "XPQARetrieval (eng-kor)": 3.99,
+ "XPQARetrieval (kor-eng)": 2.51,
+ "XPQARetrieval (pol-pol)": 14.8,
+ "XPQARetrieval (eng-pol)": 5.32,
+ "XPQARetrieval (pol-eng)": 12.01,
+ "XPQARetrieval (por-por)": 18.48,
+ "XPQARetrieval (eng-por)": 5.05,
+ "XPQARetrieval (por-eng)": 11.7,
+ "XPQARetrieval (tam-tam)": 2.8,
+ "XPQARetrieval (eng-tam)": 3.71,
+ "XPQARetrieval (tam-eng)": 2.73,
+ "XPQARetrieval (cmn-cmn)": 13.08,
+ "XPQARetrieval (eng-cmn)": 3.63,
+ "XPQARetrieval (cmn-eng)": 8.65
}
]
},
@@ -23550,12 +43204,71 @@
"cosine_spearman": [
{
"Model": "rubert-tiny-turbo",
+ "BIOSSES": 72.48,
+ "CDSC-R": 73.78,
+ "GermanSTSBenchmark": 57.5,
+ "SICK-R": 68.43,
+ "SICK-R-PL": 48.95,
+ "SICKFr": 57.71,
+ "STS12": 66.17,
+ "STS13": 60.34,
+ "STS14": 66.12,
+ "STS15": 77.41,
+ "STS16": 73.3,
+ "STS17 (ko-ko)": 10.27,
+ "STS17 (ar-ar)": 14.32,
+ "STS17 (en-ar)": 9.83,
+ "STS17 (en-de)": 25.27,
+ "STS17 (it-en)": 23.96,
+ "STS17 (es-es)": 69.98,
+ "STS17 (en-tr)": 2.36,
+ "STS17 (en-en)": 76.02,
+ "STS17 (es-en)": 22.68,
+ "STS17 (nl-en)": 20.77,
+ "STS17 (fr-en)": 21.97,
+ "STS22 (ru)": 64.56,
+ "STSB": 15.61,
+ "STSBenchmark": 68.97,
+ "STSBenchmarkMultilingualSTS (pl)": 53.05,
+ "STSBenchmarkMultilingualSTS (es)": 53.79,
+ "STSBenchmarkMultilingualSTS (zh)": 15.18,
+ "STSBenchmarkMultilingualSTS (fr)": 58.44,
+ "STSBenchmarkMultilingualSTS (it)": 56.09,
+ "STSBenchmarkMultilingualSTS (pt)": 48.9,
+ "STSBenchmarkMultilingualSTS (de)": 59.77,
+ "STSBenchmarkMultilingualSTS (en)": 68.97,
+ "STSBenchmarkMultilingualSTS (nl)": 55.13
+ },
+ {
+ "Model": "rubert-tiny-turbo",
+ "BIOSSES": 72.48,
+ "CDSC-R": 73.78,
+ "GermanSTSBenchmark": 57.5,
"RUParaPhraserSTS": 72.15,
"RuSTSBenchmarkSTS": 78.48,
+ "SICK-R": 68.43,
+ "SICK-R-PL": 48.95,
+ "SICKFr": 57.71,
+ "STS12": 66.17,
+ "STS13": 60.34,
+ "STS14": 66.12,
+ "STS15": 77.41,
+ "STS16": 73.3,
+ "STS17 (ko-ko)": 10.11,
+ "STS17 (ar-ar)": 16.06,
+ "STS17 (en-ar)": 9.82,
+ "STS17 (en-de)": 25.27,
+ "STS17 (it-en)": 23.96,
+ "STS17 (es-es)": 69.98,
+ "STS17 (en-tr)": 2.36,
+ "STS17 (en-en)": 76.02,
+ "STS17 (es-en)": 22.68,
+ "STS17 (nl-en)": 20.77,
+ "STS17 (fr-en)": 21.97,
"STS22 (zh)": 32.83,
"STS22 (de-fr)": 17.5,
"STS22 (pl-en)": 42.08,
- "STS22 (ru)": 60.06,
+ "STS22 (ru)": 64.56,
"STS22 (fr)": 42.0,
"STS22 (de)": 8.16,
"STS22 (tr)": 15.46,
@@ -23570,24 +43283,53 @@
"STS22 (es)": 45.31,
"STS22 (zh-en)": 31.25,
"STS22 (en)": 47.06,
- "STSBenchmarkMultilingualSTS (ru)": 78.12
+ "STSB": 15.57,
+ "STSBenchmark": 68.97,
+ "STSBenchmarkMultilingualSTS (ru)": 78.12,
+ "STSBenchmarkMultilingualSTS (pl)": 53.04,
+ "STSBenchmarkMultilingualSTS (es)": 53.79,
+ "STSBenchmarkMultilingualSTS (zh)": 15.17,
+ "STSBenchmarkMultilingualSTS (fr)": 58.44,
+ "STSBenchmarkMultilingualSTS (it)": 56.09,
+ "STSBenchmarkMultilingualSTS (pt)": 48.9,
+ "STSBenchmarkMultilingualSTS (de)": 59.76,
+ "STSBenchmarkMultilingualSTS (en)": 68.97,
+ "STSBenchmarkMultilingualSTS (nl)": 55.13
}
]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "rubert-tiny-turbo",
+ "SummEval": 31.12,
+ "SummEvalFr": 30.87
+ },
+ {
+ "Model": "rubert-tiny-turbo",
+ "SummEval": 31.12,
+ "SummEvalFr": 30.87
+ }
+ ]
},
"MultilabelClassification": {
"accuracy": [
{
"Model": "rubert-tiny-turbo",
"CEDRClassification": 38.95,
- "SensitiveTopicsClassification": 24.44
+ "SensitiveTopicsClassification": 25.16
+ }
+ ]
+ },
+ "InstructionRetrieval": {
+ "p-MRR": [
+ {
+ "Model": "rubert-tiny-turbo",
+ "Core17InstructionRetrieval": -4.06,
+ "News21InstructionRetrieval": 1.39,
+ "Robust04InstructionRetrieval": -4.16
}
]
- },
- "InstructionRetrieval": {
- "p-MRR": []
}
},
"shibing624__text2vec-base-chinese": {
@@ -23688,18 +43430,171 @@
},
"shibing624__text2vec-base-multilingual": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "text2vec-base-multilingual",
+ "BornholmBitextMining": 17.43,
+ "Tatoeba (zsm-eng)": 94.48,
+ "Tatoeba (ang-eng)": 9.91,
+ "Tatoeba (swh-eng)": 14.69,
+ "Tatoeba (afr-eng)": 53.85,
+ "Tatoeba (lfn-eng)": 42.06,
+ "Tatoeba (hsb-eng)": 36.15,
+ "Tatoeba (fra-eng)": 91.29,
+ "Tatoeba (tzl-eng)": 25.48,
+ "Tatoeba (rus-eng)": 91.73,
+ "Tatoeba (khm-eng)": 27.12,
+ "Tatoeba (mar-eng)": 91.27,
+ "Tatoeba (cmn-eng)": 93.67,
+ "Tatoeba (mon-eng)": 91.7,
+ "Tatoeba (nld-eng)": 94.35,
+ "Tatoeba (cat-eng)": 93.48,
+ "Tatoeba (ido-eng)": 32.99,
+ "Tatoeba (ell-eng)": 95.23,
+ "Tatoeba (bre-eng)": 4.44,
+ "Tatoeba (hye-eng)": 92.68,
+ "Tatoeba (dtp-eng)": 3.32,
+ "Tatoeba (hrv-eng)": 96.07,
+ "Tatoeba (ita-eng)": 92.18,
+ "Tatoeba (kur-eng)": 43.05,
+ "Tatoeba (tgl-eng)": 9.49,
+ "Tatoeba (kzj-eng)": 3.99,
+ "Tatoeba (arq-eng)": 15.17,
+ "Tatoeba (kab-eng)": 0.86,
+ "Tatoeba (ind-eng)": 91.57,
+ "Tatoeba (slv-eng)": 97.25,
+ "Tatoeba (isl-eng)": 20.93,
+ "Tatoeba (vie-eng)": 94.48,
+ "Tatoeba (arz-eng)": 46.38,
+ "Tatoeba (war-eng)": 5.47,
+ "Tatoeba (srp-eng)": 91.77,
+ "Tatoeba (hin-eng)": 97.17,
+ "Tatoeba (pam-eng)": 3.32,
+ "Tatoeba (tat-eng)": 6.16,
+ "Tatoeba (nov-eng)": 46.38,
+ "Tatoeba (ron-eng)": 94.22,
+ "Tatoeba (jpn-eng)": 88.66,
+ "Tatoeba (por-eng)": 92.58,
+ "Tatoeba (ina-eng)": 73.14,
+ "Tatoeba (deu-eng)": 96.03,
+ "Tatoeba (yue-eng)": 54.38,
+ "Tatoeba (swe-eng)": 93.92,
+ "Tatoeba (bos-eng)": 93.6,
+ "Tatoeba (awa-eng)": 26.57,
+ "Tatoeba (cym-eng)": 11.56,
+ "Tatoeba (nob-eng)": 98.67,
+ "Tatoeba (bul-eng)": 92.79,
+ "Tatoeba (aze-eng)": 50.48,
+ "Tatoeba (glg-eng)": 93.12,
+ "Tatoeba (nno-eng)": 70.14,
+ "Tatoeba (yid-eng)": 8.31,
+ "Tatoeba (epo-eng)": 32.67,
+ "Tatoeba (oci-eng)": 35.7,
+ "Tatoeba (spa-eng)": 95.57,
+ "Tatoeba (mhr-eng)": 3.97,
+ "Tatoeba (est-eng)": 97.75,
+ "Tatoeba (lat-eng)": 18.11,
+ "Tatoeba (dsb-eng)": 29.84,
+ "Tatoeba (fry-eng)": 26.27,
+ "Tatoeba (pol-eng)": 94.7,
+ "Tatoeba (fin-eng)": 93.86,
+ "Tatoeba (orv-eng)": 12.32,
+ "Tatoeba (lvs-eng)": 98.0,
+ "Tatoeba (ben-eng)": 19.9,
+ "Tatoeba (tuk-eng)": 12.3,
+ "Tatoeba (ast-eng)": 59.27,
+ "Tatoeba (ile-eng)": 56.1,
+ "Tatoeba (pms-eng)": 22.86,
+ "Tatoeba (jav-eng)": 11.43,
+ "Tatoeba (slk-eng)": 94.83,
+ "Tatoeba (ber-eng)": 2.53,
+ "Tatoeba (heb-eng)": 86.49,
+ "Tatoeba (gle-eng)": 11.26,
+ "Tatoeba (kor-eng)": 86.37,
+ "Tatoeba (uzb-eng)": 14.03,
+ "Tatoeba (cbk-eng)": 54.82,
+ "Tatoeba (sqi-eng)": 98.17,
+ "Tatoeba (uig-eng)": 9.65,
+ "Tatoeba (cha-eng)": 11.03,
+ "Tatoeba (max-eng)": 36.23,
+ "Tatoeba (swg-eng)": 22.4,
+ "Tatoeba (lit-eng)": 93.15,
+ "Tatoeba (tha-eng)": 95.99,
+ "Tatoeba (pes-eng)": 88.84,
+ "Tatoeba (tur-eng)": 94.73,
+ "Tatoeba (ceb-eng)": 5.72,
+ "Tatoeba (urd-eng)": 94.21,
+ "Tatoeba (ara-eng)": 88.48,
+ "Tatoeba (ukr-eng)": 92.51,
+ "Tatoeba (wuu-eng)": 52.99,
+ "Tatoeba (amh-eng)": 28.72,
+ "Tatoeba (csb-eng)": 16.97,
+ "Tatoeba (gla-eng)": 1.87,
+ "Tatoeba (tel-eng)": 41.89,
+ "Tatoeba (ces-eng)": 94.3,
+ "Tatoeba (kat-eng)": 93.76,
+ "Tatoeba (nds-eng)": 29.11,
+ "Tatoeba (hun-eng)": 90.12,
+ "Tatoeba (dan-eng)": 94.65,
+ "Tatoeba (bel-eng)": 57.88,
+ "Tatoeba (tam-eng)": 35.72,
+ "Tatoeba (cor-eng)": 3.09,
+ "Tatoeba (xho-eng)": 2.01,
+ "Tatoeba (mkd-eng)": 90.86,
+ "Tatoeba (mal-eng)": 7.61,
+ "Tatoeba (fao-eng)": 19.62,
+ "Tatoeba (kaz-eng)": 23.64,
+ "Tatoeba (eus-eng)": 16.35,
+ "Tatoeba (gsw-eng)": 27.33
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "text2vec-base-multilingual",
+ "AllegroReviews": 27.57,
+ "AmazonCounterfactualClassification (de)": 68.7,
+ "AmazonReviewsClassification (de)": 33.39,
"AmazonReviewsClassification (fr)": 34.25,
+ "AngryTweetsClassification": 48.59,
+ "CBD": 57.94,
+ "DanishPoliticalCommentsClassification": 36.55,
+ "GeoreviewClassification": 34.63,
+ "HeadlineClassification": 62.29,
+ "InappropriatenessClassification": 57.37,
+ "KinopoiskClassification": 42.09,
+ "LccSentimentClassification": 51.93,
+ "MTOPDomainClassification (de)": 78.61,
"MTOPDomainClassification (fr)": 71.83,
+ "MTOPIntentClassification (de)": 55.26,
"MTOPIntentClassification (fr)": 44.53,
+ "MasakhaNEWSClassification (amh)": 77.29,
+ "MasakhaNEWSClassification (eng)": 68.54,
"MasakhaNEWSClassification (fra)": 73.84,
+ "MasakhaNEWSClassification (hau)": 46.81,
+ "MasakhaNEWSClassification (ibo)": 41.72,
+ "MasakhaNEWSClassification (lin)": 55.6,
+ "MasakhaNEWSClassification (lug)": 34.71,
+ "MasakhaNEWSClassification (orm)": 36.65,
+ "MasakhaNEWSClassification (pcm)": 82.75,
+ "MasakhaNEWSClassification (run)": 42.11,
+ "MasakhaNEWSClassification (sna)": 54.12,
+ "MasakhaNEWSClassification (som)": 32.07,
+ "MasakhaNEWSClassification (swa)": 57.54,
+ "MasakhaNEWSClassification (tir)": 40.81,
+ "MasakhaNEWSClassification (xho)": 35.12,
+ "MasakhaNEWSClassification (yor)": 49.29,
"MassiveIntentClassification (fr)": 51.93,
- "MassiveScenarioClassification (fr)": 58.31
+ "MassiveScenarioClassification (fr)": 58.31,
+ "NoRecClassification": 43.98,
+ "NordicLangClassification": 38.13,
+ "PAC": 66.11,
+ "PolEmo2.0-IN": 54.7,
+ "PolEmo2.0-OUT": 20.51,
+ "RuReviewsClassification": 56.71,
+ "RuSciBenchGRNTIClassification": 49.18,
+ "RuSciBenchOECDClassification": 39.55
}
]
},
@@ -23709,11 +43604,56 @@
"Model": "text2vec-base-multilingual",
"AlloProfClusteringP2P": 49.11,
"AlloProfClusteringS2S": 32.72,
+ "BlurbsClusteringP2P": 27.75,
+ "BlurbsClusteringS2S": 13.22,
+ "GeoreviewClusteringP2P": 51.65,
"HALClusteringS2S": 16.19,
+ "MLSUMClusteringP2P (de)": 33.54,
+ "MLSUMClusteringP2P (fr)": 36.41,
+ "MLSUMClusteringP2P (ru)": 35.95,
+ "MLSUMClusteringP2P (es)": 37.46,
"MLSUMClusteringP2P": 36.19,
+ "MLSUMClusteringS2S (de)": 31.94,
+ "MLSUMClusteringS2S (fr)": 35.75,
+ "MLSUMClusteringS2S (ru)": 35.41,
+ "MLSUMClusteringS2S (es)": 37.23,
"MLSUMClusteringS2S": 30.39,
+ "MasakhaNEWSClusteringP2P (amh)": 52.2,
+ "MasakhaNEWSClusteringP2P (eng)": 42.5,
"MasakhaNEWSClusteringP2P (fra)": 38.51,
- "MasakhaNEWSClusteringS2S (fra)": 32.51
+ "MasakhaNEWSClusteringP2P (hau)": 27.39,
+ "MasakhaNEWSClusteringP2P (ibo)": 29.8,
+ "MasakhaNEWSClusteringP2P (lin)": 43.86,
+ "MasakhaNEWSClusteringP2P (lug)": 44.81,
+ "MasakhaNEWSClusteringP2P (orm)": 23.29,
+ "MasakhaNEWSClusteringP2P (pcm)": 53.31,
+ "MasakhaNEWSClusteringP2P (run)": 43.27,
+ "MasakhaNEWSClusteringP2P (sna)": 45.25,
+ "MasakhaNEWSClusteringP2P (som)": 27.97,
+ "MasakhaNEWSClusteringP2P (swa)": 26.53,
+ "MasakhaNEWSClusteringP2P (tir)": 46.64,
+ "MasakhaNEWSClusteringP2P (xho)": 21.6,
+ "MasakhaNEWSClusteringP2P (yor)": 31.63,
+ "MasakhaNEWSClusteringS2S (amh)": 52.13,
+ "MasakhaNEWSClusteringS2S (eng)": 9.86,
+ "MasakhaNEWSClusteringS2S (fra)": 32.51,
+ "MasakhaNEWSClusteringS2S (hau)": 15.48,
+ "MasakhaNEWSClusteringS2S (ibo)": 32.76,
+ "MasakhaNEWSClusteringS2S (lin)": 46.64,
+ "MasakhaNEWSClusteringS2S (lug)": 41.71,
+ "MasakhaNEWSClusteringS2S (orm)": 22.31,
+ "MasakhaNEWSClusteringS2S (pcm)": 44.62,
+ "MasakhaNEWSClusteringS2S (run)": 45.44,
+ "MasakhaNEWSClusteringS2S (sna)": 43.21,
+ "MasakhaNEWSClusteringS2S (som)": 26.3,
+ "MasakhaNEWSClusteringS2S (swa)": 17.91,
+ "MasakhaNEWSClusteringS2S (tir)": 45.61,
+ "MasakhaNEWSClusteringS2S (xho)": 21.36,
+ "MasakhaNEWSClusteringS2S (yor)": 24.11,
+ "RuSciBenchGRNTIClusteringP2P": 42.83,
+ "RuSciBenchOECDClusteringP2P": 38.58,
+ "TenKGnadClusteringP2P": 30.87,
+ "TenKGnadClusteringS2S": 14.84
}
]
},
@@ -23721,13 +43661,50 @@
"max_ap": [
{
"Model": "text2vec-base-multilingual",
+ "CDSC-E": 74.42,
+ "FalseFriendsGermanEnglish": 48.21,
+ "OpusparcusPC (de)": 96.39,
+ "OpusparcusPC (en)": 98.39,
+ "OpusparcusPC (fi)": 92.88,
"OpusparcusPC (fr)": 92.04,
- "PawsXPairClassification (fr)": 65.57
+ "OpusparcusPC (ru)": 87.84,
+ "OpusparcusPC (sv)": 93.65,
+ "PSC": 94.96,
+ "PawsXPairClassification (de)": 64.44,
+ "PawsXPairClassification (en)": 68.78,
+ "PawsXPairClassification (es)": 63.87,
+ "PawsXPairClassification (fr)": 65.57,
+ "PawsXPairClassification (ja)": 57.71,
+ "PawsXPairClassification (ko)": 49.51,
+ "PawsXPairClassification (zh)": 65.33,
+ "SICK-E-PL": 74.15,
+ "TERRa": 54.56
+ },
+ {
+ "Model": "text2vec-base-multilingual",
+ "CDSC-E": 74.42,
+ "FalseFriendsGermanEnglish": 48.49,
+ "OpusparcusPC (de)": 96.39,
+ "OpusparcusPC (en)": 98.45,
+ "OpusparcusPC (fi)": 92.88,
+ "OpusparcusPC (fr)": 92.04,
+ "OpusparcusPC (ru)": 87.84,
+ "OpusparcusPC (sv)": 93.65,
+ "PSC": 94.96,
+ "PawsXPairClassification (de)": 64.79,
+ "PawsXPairClassification (en)": 68.78,
+ "PawsXPairClassification (es)": 63.87,
+ "PawsXPairClassification (fr)": 65.6,
+ "PawsXPairClassification (ja)": 57.86,
+ "PawsXPairClassification (ko)": 49.51,
+ "PawsXPairClassification (zh)": 65.51,
+ "SICK-E-PL": 74.15,
+ "TERRa": 54.56
},
{
"Model": "text2vec-base-multilingual",
"OpusparcusPC (fr)": 92.04,
- "PawsXPairClassification (fr)": 65.6
+ "PawsXPairClassification (fr)": 65.57
}
]
},
@@ -23736,7 +43713,31 @@
{
"Model": "text2vec-base-multilingual",
"AlloprofReranking": 51.48,
- "SyntecReranking": 70.28
+ "MindSmallReranking": 29.64,
+ "RuBQReranking": 47.69,
+ "SyntecReranking": 70.28,
+ "T2Reranking": 65.07
+ },
+ {
+ "Model": "text2vec-base-multilingual",
+ "MIRACLReranking (ar)": 37.44,
+ "MIRACLReranking (bn)": 19.37,
+ "MIRACLReranking (de)": 25.45,
+ "MIRACLReranking (en)": 37.85,
+ "MIRACLReranking (es)": 36.76,
+ "MIRACLReranking (fa)": 22.75,
+ "MIRACLReranking (fi)": 42.38,
+ "MIRACLReranking (fr)": 26.52,
+ "MIRACLReranking (hi)": 23.05,
+ "MIRACLReranking (id)": 32.89,
+ "MIRACLReranking (ja)": 29.42,
+ "MIRACLReranking (ko)": 23.99,
+ "MIRACLReranking (ru)": 29.44,
+ "MIRACLReranking (sw)": 21.21,
+ "MIRACLReranking (te)": 18.51,
+ "MIRACLReranking (th)": 26.99,
+ "MIRACLReranking (yo)": 35.18,
+ "MIRACLReranking (zh)": 20.66
}
]
},
@@ -23744,16 +43745,188 @@
"ndcg_at_10": [
{
"Model": "text2vec-base-multilingual",
+ "AILACasedocs": 9.96,
+ "AILAStatutes": 15.39,
+ "ARCChallenge": 5.45,
"AlloprofRetrieval": 18.9,
+ "AlphaNLI": 23.29,
+ "AppsRetrieval": 1.31,
+ "ArguAna": 31.35,
"BSARDRetrieval": 0.0,
+ "ClimateFEVER": 11.19,
+ "CmedqaRetrieval": 12.1,
+ "CodeFeedbackMT": 12.67,
+ "CodeFeedbackST": 22.08,
+ "CodeSearchNetCCRetrieval (python)": 17.26,
+ "CodeSearchNetCCRetrieval (javascript)": 16.51,
+ "CodeSearchNetCCRetrieval (go)": 8.65,
+ "CodeSearchNetCCRetrieval (ruby)": 21.38,
+ "CodeSearchNetCCRetrieval (java)": 9.18,
+ "CodeSearchNetCCRetrieval (php)": 8.02,
+ "CodeSearchNetRetrieval (python)": 39.52,
+ "CodeSearchNetRetrieval (javascript)": 22.2,
+ "CodeSearchNetRetrieval (go)": 32.32,
+ "CodeSearchNetRetrieval (ruby)": 34.56,
+ "CodeSearchNetRetrieval (java)": 18.91,
+ "CodeSearchNetRetrieval (php)": 26.69,
+ "CodeTransOceanContest": 17.87,
+ "CodeTransOceanDL": 26.42,
+ "CosQA": 11.36,
+ "CovidRetrieval": 15.0,
+ "DBPedia": 15.87,
+ "FEVER": 30.69,
+ "FiQA2018": 12.34,
+ "GerDaLIR": 1.18,
+ "GerDaLIRSmall": 3.55,
+ "GermanQuAD-Retrieval": 68.44,
+ "HellaSwag": 15.59,
+ "HotpotQA": 16.63,
+ "LEMBNarrativeQARetrieval": 6.26,
+ "LEMBQMSumRetrieval": 12.2,
+ "LEMBSummScreenFDRetrieval": 40.45,
+ "LEMBWikimQARetrieval": 23.88,
+ "LeCaRDv2": 25.28,
+ "LegalBenchConsumerContractsQA": 48.83,
+ "LegalBenchCorporateLobbying": 81.56,
+ "LegalQuAD": 11.71,
+ "LegalSummarization": 48.68,
+ "MIRACLRetrieval (ar)": 17.44,
+ "MIRACLRetrieval (bn)": 3.55,
+ "MIRACLRetrieval (de)": 12.85,
+ "MIRACLRetrieval (en)": 18.93,
+ "MIRACLRetrieval (es)": 16.1,
+ "MIRACLRetrieval (fa)": 10.15,
+ "MIRACLRetrieval (fi)": 21.62,
+ "MIRACLRetrieval (fr)": 13.13,
+ "MIRACLRetrieval (hi)": 9.42,
+ "MIRACLRetrieval (id)": 17.33,
+ "MIRACLRetrieval (ja)": 10.28,
+ "MIRACLRetrieval (ko)": 16.31,
+ "MIRACLRetrieval (ru)": 11.99,
+ "MIRACLRetrieval (sw)": 8.6,
+ "MIRACLRetrieval (te)": 2.54,
+ "MIRACLRetrieval (th)": 10.66,
+ "MIRACLRetrieval (yo)": 17.03,
+ "MIRACLRetrieval (zh)": 9.32,
+ "MSMARCO": 14.57,
+ "MintakaRetrieval (ar)": 8.28,
+ "MintakaRetrieval (de)": 13.55,
+ "MintakaRetrieval (es)": 14.61,
"MintakaRetrieval (fr)": 14.81,
+ "MintakaRetrieval (hi)": 7.09,
+ "MintakaRetrieval (it)": 14.28,
+ "MintakaRetrieval (ja)": 9.2,
+ "MintakaRetrieval (pt)": 14.95,
+ "NFCorpus": 18.64,
+ "NQ": 20.7,
+ "PIQA": 11.75,
+ "Quail": 1.75,
+ "RARbCode": 3.59,
+ "RARbMath": 31.12,
+ "RiaNewsRetrieval": 23.85,
+ "RuBQRetrieval": 21.04,
+ "SCIDOCS": 9.8,
+ "SIQA": 0.38,
+ "SciFact": 38.99,
+ "SciFact-PL": 28.92,
+ "SpartQA": 8.55,
+ "StackOverflowQA": 32.47,
"SyntecRetrieval": 49.69,
+ "SyntheticText2SQL": 24.54,
+ "TRECCOVID": 32.1,
+ "TRECCOVID-PL": 30.14,
+ "TempReasonL1": 1.48,
+ "TempReasonL2Fact": 2.89,
+ "TempReasonL2Pure": 0.09,
+ "TempReasonL3Fact": 3.82,
+ "TempReasonL3Pure": 0.42,
+ "Touche2020": 14.08,
+ "WinoGrande": 31.61,
+ "XMarket (de)": 4.04,
+ "XMarket (en)": 8.18,
+ "XMarket (es)": 5.39,
+ "XPQARetrieval (ara-ara)": 22.16,
+ "XPQARetrieval (eng-ara)": 13.78,
+ "XPQARetrieval (ara-eng)": 20.65,
+ "XPQARetrieval (deu-deu)": 41.79,
+ "XPQARetrieval (eng-deu)": 19.11,
+ "XPQARetrieval (deu-eng)": 44.08,
+ "XPQARetrieval (spa-spa)": 35.97,
+ "XPQARetrieval (eng-spa)": 19.3,
+ "XPQARetrieval (spa-eng)": 35.72,
+ "XPQARetrieval (fra-fra)": 40.4,
+ "XPQARetrieval (eng-fra)": 17.9,
+ "XPQARetrieval (fra-eng)": 38.18,
+ "XPQARetrieval (hin-hin)": 53.69,
+ "XPQARetrieval (eng-hin)": 19.47,
+ "XPQARetrieval (hin-eng)": 43.22,
+ "XPQARetrieval (ita-ita)": 50.45,
+ "XPQARetrieval (eng-ita)": 19.99,
+ "XPQARetrieval (ita-eng)": 41.49,
+ "XPQARetrieval (jpn-jpn)": 51.88,
+ "XPQARetrieval (eng-jpn)": 16.12,
+ "XPQARetrieval (jpn-eng)": 40.69,
+ "XPQARetrieval (kor-kor)": 17.62,
+ "XPQARetrieval (eng-kor)": 16.7,
+ "XPQARetrieval (kor-eng)": 16.93,
+ "XPQARetrieval (pol-pol)": 28.06,
+ "XPQARetrieval (eng-pol)": 12.59,
+ "XPQARetrieval (pol-eng)": 25.14,
+ "XPQARetrieval (por-por)": 30.62,
+ "XPQARetrieval (eng-por)": 14.86,
+ "XPQARetrieval (por-eng)": 28.56,
+ "XPQARetrieval (tam-tam)": 15.47,
+ "XPQARetrieval (eng-tam)": 5.43,
+ "XPQARetrieval (tam-eng)": 8.94,
+ "XPQARetrieval (cmn-cmn)": 39.62,
+ "XPQARetrieval (eng-cmn)": 14.62,
+ "XPQARetrieval (cmn-eng)": 31.26,
"XPQARetrieval (fr)": 40.4
}
]
},
"STS": {
"cosine_spearman": [
+ {
+ "Model": "text2vec-base-multilingual",
+ "CDSC-R": 90.74,
+ "GermanSTSBenchmark": 83.32,
+ "RUParaPhraserSTS": 67.02,
+ "RuSTSBenchmarkSTS": 82.6,
+ "SICK-R-PL": 72.43,
+ "SICKFr": 77.25,
+ "STSB": 80.45,
+ "STSBenchmarkMultilingualSTS (es)": 84.9,
+ "STSBenchmarkMultilingualSTS (pt)": 84.06,
+ "STSBenchmarkMultilingualSTS (nl)": 82.82,
+ "STSBenchmarkMultilingualSTS (ru)": 82.7,
+ "STSBenchmarkMultilingualSTS (fr)": 83.47,
+ "STSBenchmarkMultilingualSTS (de)": 83.15,
+ "STSBenchmarkMultilingualSTS (zh)": 81.06,
+ "STSBenchmarkMultilingualSTS (en)": 86.45,
+ "STSBenchmarkMultilingualSTS (it)": 83.72,
+ "STSBenchmarkMultilingualSTS (pl)": 82.42
+ },
+ {
+ "Model": "text2vec-base-multilingual",
+ "CDSC-R": 90.74,
+ "GermanSTSBenchmark": 83.32,
+ "RUParaPhraserSTS": 67.02,
+ "RuSTSBenchmarkSTS": 82.6,
+ "SICK-R-PL": 72.43,
+ "SICKFr": 77.25,
+ "STSB": 80.45,
+ "STSBenchmarkMultilingualSTS (es)": 84.9,
+ "STSBenchmarkMultilingualSTS (pt)": 84.06,
+ "STSBenchmarkMultilingualSTS (nl)": 82.82,
+ "STSBenchmarkMultilingualSTS (ru)": 82.7,
+ "STSBenchmarkMultilingualSTS (fr)": 83.47,
+ "STSBenchmarkMultilingualSTS (de)": 83.15,
+ "STSBenchmarkMultilingualSTS (zh)": 81.06,
+ "STSBenchmarkMultilingualSTS (en)": 86.45,
+ "STSBenchmarkMultilingualSTS (it)": 83.72,
+ "STSBenchmarkMultilingualSTS (pl)": 82.42
+ },
{
"Model": "text2vec-base-multilingual",
"SICKFr": 77.25,
@@ -23764,6 +43937,14 @@
},
"Summarization": {
"cosine_spearman": [
+ {
+ "Model": "text2vec-base-multilingual",
+ "SummEvalFr": 29.33
+ },
+ {
+ "Model": "text2vec-base-multilingual",
+ "SummEvalFr": 29.33
+ },
{
"Model": "text2vec-base-multilingual",
"SummEvalFr": 29.33
@@ -23771,10 +43952,23 @@
]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "text2vec-base-multilingual",
+ "CEDRClassification": 36.37,
+ "SensitiveTopicsClassification": 22.47
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "text2vec-base-multilingual",
+ "Core17InstructionRetrieval": -2.75,
+ "News21InstructionRetrieval": -2.1,
+ "Robust04InstructionRetrieval": -7.06
+ }
+ ]
}
},
"shibing624__text2vec-large-chinese": {
@@ -24158,19 +44352,426 @@
},
"voyageai__voyage-3": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "voyage-3",
+ "BornholmBitextMining": 34.84,
+ "Tatoeba (lat-eng)": 50.17,
+ "Tatoeba (max-eng)": 41.21,
+ "Tatoeba (cbk-eng)": 54.97,
+ "Tatoeba (nds-eng)": 69.17,
+ "Tatoeba (swe-eng)": 86.61,
+ "Tatoeba (tzl-eng)": 23.68,
+ "Tatoeba (nob-eng)": 92.86,
+ "Tatoeba (swh-eng)": 24.15,
+ "Tatoeba (afr-eng)": 68.23,
+ "Tatoeba (ara-eng)": 76.19,
+ "Tatoeba (ido-eng)": 69.36,
+ "Tatoeba (isl-eng)": 83.52,
+ "Tatoeba (nov-eng)": 50.2,
+ "Tatoeba (dan-eng)": 91.25,
+ "Tatoeba (war-eng)": 21.4,
+ "Tatoeba (pes-eng)": 80.49,
+ "Tatoeba (pol-eng)": 89.37,
+ "Tatoeba (ell-eng)": 81.25,
+ "Tatoeba (kor-eng)": 80.99,
+ "Tatoeba (khm-eng)": 2.22,
+ "Tatoeba (spa-eng)": 90.48,
+ "Tatoeba (heb-eng)": 78.8,
+ "Tatoeba (pam-eng)": 8.62,
+ "Tatoeba (cha-eng)": 28.41,
+ "Tatoeba (ind-eng)": 84.33,
+ "Tatoeba (vie-eng)": 90.94,
+ "Tatoeba (hye-eng)": 75.17,
+ "Tatoeba (tgl-eng)": 68.92,
+ "Tatoeba (hin-eng)": 85.93,
+ "Tatoeba (yid-eng)": 48.54,
+ "Tatoeba (ukr-eng)": 86.36,
+ "Tatoeba (urd-eng)": 77.63,
+ "Tatoeba (bul-eng)": 84.4,
+ "Tatoeba (rus-eng)": 88.01,
+ "Tatoeba (jav-eng)": 24.94,
+ "Tatoeba (deu-eng)": 94.67,
+ "Tatoeba (por-eng)": 89.06,
+ "Tatoeba (tur-eng)": 83.06,
+ "Tatoeba (gle-eng)": 28.8,
+ "Tatoeba (slv-eng)": 76.75,
+ "Tatoeba (kaz-eng)": 70.94,
+ "Tatoeba (fra-eng)": 86.35,
+ "Tatoeba (hrv-eng)": 81.03,
+ "Tatoeba (gsw-eng)": 44.12,
+ "Tatoeba (mal-eng)": 76.57,
+ "Tatoeba (tha-eng)": 73.05,
+ "Tatoeba (awa-eng)": 49.75,
+ "Tatoeba (uzb-eng)": 43.48,
+ "Tatoeba (kab-eng)": 2.95,
+ "Tatoeba (ben-eng)": 68.0,
+ "Tatoeba (csb-eng)": 47.73,
+ "Tatoeba (swg-eng)": 56.37,
+ "Tatoeba (ceb-eng)": 21.09,
+ "Tatoeba (ita-eng)": 85.3,
+ "Tatoeba (oci-eng)": 47.55,
+ "Tatoeba (fin-eng)": 84.79,
+ "Tatoeba (est-eng)": 73.93,
+ "Tatoeba (glg-eng)": 83.89,
+ "Tatoeba (arz-eng)": 52.87,
+ "Tatoeba (amh-eng)": 30.35,
+ "Tatoeba (aze-eng)": 82.64,
+ "Tatoeba (hun-eng)": 77.69,
+ "Tatoeba (srp-eng)": 75.45,
+ "Tatoeba (ces-eng)": 85.54,
+ "Tatoeba (cmn-eng)": 90.34,
+ "Tatoeba (sqi-eng)": 84.32,
+ "Tatoeba (ast-eng)": 75.12,
+ "Tatoeba (epo-eng)": 83.2,
+ "Tatoeba (uig-eng)": 50.74,
+ "Tatoeba (cym-eng)": 63.42,
+ "Tatoeba (dtp-eng)": 6.47,
+ "Tatoeba (ang-eng)": 58.48,
+ "Tatoeba (eus-eng)": 62.28,
+ "Tatoeba (dsb-eng)": 45.88,
+ "Tatoeba (slk-eng)": 83.46,
+ "Tatoeba (bel-eng)": 86.29,
+ "Tatoeba (wuu-eng)": 74.44,
+ "Tatoeba (lfn-eng)": 58.61,
+ "Tatoeba (kur-eng)": 50.02,
+ "Tatoeba (bos-eng)": 80.62,
+ "Tatoeba (fry-eng)": 63.1,
+ "Tatoeba (xho-eng)": 9.51,
+ "Tatoeba (bre-eng)": 18.83,
+ "Tatoeba (kzj-eng)": 7.63,
+ "Tatoeba (nno-eng)": 81.17,
+ "Tatoeba (lvs-eng)": 76.45,
+ "Tatoeba (hsb-eng)": 60.76,
+ "Tatoeba (tel-eng)": 68.28,
+ "Tatoeba (zsm-eng)": 81.68,
+ "Tatoeba (arq-eng)": 18.83,
+ "Tatoeba (mon-eng)": 68.66,
+ "Tatoeba (mkd-eng)": 77.4,
+ "Tatoeba (jpn-eng)": 80.76,
+ "Tatoeba (gla-eng)": 12.44,
+ "Tatoeba (kat-eng)": 74.33,
+ "Tatoeba (lit-eng)": 79.76,
+ "Tatoeba (tat-eng)": 63.72,
+ "Tatoeba (fao-eng)": 61.51,
+ "Tatoeba (tuk-eng)": 34.36,
+ "Tatoeba (ber-eng)": 6.36,
+ "Tatoeba (ina-eng)": 83.88,
+ "Tatoeba (nld-eng)": 91.59,
+ "Tatoeba (pms-eng)": 57.89,
+ "Tatoeba (mhr-eng)": 13.48,
+ "Tatoeba (orv-eng)": 34.56,
+ "Tatoeba (ile-eng)": 69.65,
+ "Tatoeba (tam-eng)": 68.61,
+ "Tatoeba (cor-eng)": 6.41,
+ "Tatoeba (cat-eng)": 84.34,
+ "Tatoeba (yue-eng)": 71.38,
+ "Tatoeba (mar-eng)": 72.56,
+ "Tatoeba (ron-eng)": 83.85
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "voyage-3",
+ "AllegroReviews": 39.35,
+ "AmazonCounterfactualClassification (en-ext)": 62.11,
+ "AmazonCounterfactualClassification (en)": 60.37,
+ "AmazonCounterfactualClassification (de)": 57.76,
+ "AmazonCounterfactualClassification (ja)": 65.06,
+ "AmazonPolarityClassification": 86.02,
+ "AmazonReviewsClassification (en)": 36.97,
+ "AmazonReviewsClassification (de)": 38.1,
+ "AmazonReviewsClassification (es)": 36.32,
+ "AmazonReviewsClassification (fr)": 35.43,
+ "AmazonReviewsClassification (ja)": 37.39,
+ "AmazonReviewsClassification (zh)": 32.94,
+ "AngryTweetsClassification": 54.26,
+ "Banking77Classification": 73.8,
+ "CBD": 66.4,
+ "DanishPoliticalCommentsClassification": 36.51,
+ "EmotionClassification": 40.62,
+ "GeoreviewClassification": 43.02,
+ "HeadlineClassification": 76.74,
+ "ImdbClassification": 89.68,
+ "InappropriatenessClassification": 59.62,
+ "KinopoiskClassification": 62.14,
+ "LccSentimentClassification": 55.93,
+ "MTOPDomainClassification (en)": 87.15,
+ "MTOPDomainClassification (de)": 84.03,
+ "MTOPDomainClassification (es)": 84.36,
+ "MTOPDomainClassification (fr)": 80.39,
+ "MTOPDomainClassification (hi)": 84.78,
+ "MTOPDomainClassification (th)": 80.49,
+ "MTOPIntentClassification (en)": 56.34,
+ "MTOPIntentClassification (de)": 57.94,
+ "MTOPIntentClassification (es)": 58.01,
+ "MTOPIntentClassification (fr)": 52.87,
+ "MTOPIntentClassification (hi)": 54.37,
+ "MTOPIntentClassification (th)": 56.06,
+ "MasakhaNEWSClassification (amh)": 79.81,
+ "MasakhaNEWSClassification (eng)": 71.82,
+ "MasakhaNEWSClassification (fra)": 70.54,
+ "MasakhaNEWSClassification (hau)": 74.33,
+ "MasakhaNEWSClassification (ibo)": 63.15,
+ "MasakhaNEWSClassification (lin)": 70.46,
+ "MasakhaNEWSClassification (lug)": 67.71,
+ "MasakhaNEWSClassification (orm)": 73.88,
+ "MasakhaNEWSClassification (pcm)": 85.02,
+ "MasakhaNEWSClassification (run)": 69.29,
+ "MasakhaNEWSClassification (sna)": 83.33,
+ "MasakhaNEWSClassification (som)": 61.12,
+ "MasakhaNEWSClassification (swa)": 68.42,
+ "MasakhaNEWSClassification (tir)": 65.26,
+ "MasakhaNEWSClassification (xho)": 77.88,
+ "MasakhaNEWSClassification (yor)": 76.11,
+ "MassiveIntentClassification (ru)": 58.32,
+ "MassiveIntentClassification (ur)": 52.72,
+ "MassiveIntentClassification (sq)": 56.6,
+ "MassiveIntentClassification (sv)": 59.09,
+ "MassiveIntentClassification (th)": 55.36,
+ "MassiveIntentClassification (fa)": 59.38,
+ "MassiveIntentClassification (el)": 56.02,
+ "MassiveIntentClassification (hi)": 55.99,
+ "MassiveIntentClassification (vi)": 59.24,
+ "MassiveIntentClassification (tr)": 59.45,
+ "MassiveIntentClassification (it)": 59.0,
+ "MassiveIntentClassification (ta)": 49.98,
+ "MassiveIntentClassification (sw)": 44.49,
+ "MassiveIntentClassification (tl)": 53.96,
+ "MassiveIntentClassification (fi)": 60.96,
+ "MassiveIntentClassification (he)": 56.41,
+ "MassiveIntentClassification (id)": 58.52,
+ "MassiveIntentClassification (az)": 56.82,
+ "MassiveIntentClassification (pt)": 59.34,
+ "MassiveIntentClassification (my)": 38.76,
+ "MassiveIntentClassification (ro)": 53.33,
+ "MassiveIntentClassification (hu)": 55.93,
+ "MassiveIntentClassification (ko)": 59.3,
+ "MassiveIntentClassification (nl)": 57.24,
+ "MassiveIntentClassification (ar)": 49.51,
+ "MassiveIntentClassification (km)": 34.82,
+ "MassiveIntentClassification (de)": 57.95,
+ "MassiveIntentClassification (am)": 36.33,
+ "MassiveIntentClassification (nb)": 58.53,
+ "MassiveIntentClassification (ml)": 52.87,
+ "MassiveIntentClassification (bn)": 53.85,
+ "MassiveIntentClassification (sl)": 56.0,
+ "MassiveIntentClassification (te)": 52.91,
+ "MassiveIntentClassification (da)": 59.34,
+ "MassiveIntentClassification (cy)": 48.16,
+ "MassiveIntentClassification (es)": 58.5,
+ "MassiveIntentClassification (lv)": 55.67,
+ "MassiveIntentClassification (pl)": 57.36,
+ "MassiveIntentClassification (zh-TW)": 54.88,
+ "MassiveIntentClassification (ms)": 53.42,
+ "MassiveIntentClassification (af)": 49.97,
+ "MassiveIntentClassification (jv)": 43.79,
+ "MassiveIntentClassification (hy)": 54.2,
+ "MassiveIntentClassification (ja)": 60.23,
+ "MassiveIntentClassification (ka)": 49.24,
+ "MassiveIntentClassification (fr)": 57.54,
+ "MassiveIntentClassification (is)": 53.36,
+ "MassiveIntentClassification (mn)": 52.26,
+ "MassiveIntentClassification (kn)": 50.87,
+ "MassiveIntentClassification (zh-CN)": 60.13,
+ "MassiveIntentClassification (en)": 63.75,
+ "MassiveScenarioClassification (sv)": 68.38,
+ "MassiveScenarioClassification (fr)": 66.07,
+ "MassiveScenarioClassification (km)": 38.68,
+ "MassiveScenarioClassification (te)": 62.13,
+ "MassiveScenarioClassification (bn)": 59.88,
+ "MassiveScenarioClassification (my)": 45.17,
+ "MassiveScenarioClassification (tl)": 62.36,
+ "MassiveScenarioClassification (fi)": 66.28,
+ "MassiveScenarioClassification (hi)": 61.29,
+ "MassiveScenarioClassification (tr)": 66.8,
+ "MassiveScenarioClassification (pl)": 64.96,
+ "MassiveScenarioClassification (az)": 64.37,
+ "MassiveScenarioClassification (ms)": 63.1,
+ "MassiveScenarioClassification (fa)": 66.24,
+ "MassiveScenarioClassification (he)": 64.02,
+ "MassiveScenarioClassification (ka)": 55.48,
+ "MassiveScenarioClassification (nl)": 66.71,
+ "MassiveScenarioClassification (ru)": 66.18,
+ "MassiveScenarioClassification (vi)": 67.19,
+ "MassiveScenarioClassification (jv)": 50.63,
+ "MassiveScenarioClassification (af)": 61.28,
+ "MassiveScenarioClassification (zh-TW)": 63.5,
+ "MassiveScenarioClassification (it)": 66.3,
+ "MassiveScenarioClassification (lv)": 60.81,
+ "MassiveScenarioClassification (zh-CN)": 68.37,
+ "MassiveScenarioClassification (id)": 67.18,
+ "MassiveScenarioClassification (ja)": 67.03,
+ "MassiveScenarioClassification (ro)": 61.52,
+ "MassiveScenarioClassification (nb)": 67.81,
+ "MassiveScenarioClassification (en)": 71.0,
+ "MassiveScenarioClassification (ko)": 67.6,
+ "MassiveScenarioClassification (ur)": 59.09,
+ "MassiveScenarioClassification (am)": 42.89,
+ "MassiveScenarioClassification (cy)": 56.86,
+ "MassiveScenarioClassification (da)": 67.53,
+ "MassiveScenarioClassification (sq)": 65.72,
+ "MassiveScenarioClassification (is)": 61.24,
+ "MassiveScenarioClassification (mn)": 57.43,
+ "MassiveScenarioClassification (sw)": 51.89,
+ "MassiveScenarioClassification (es)": 66.28,
+ "MassiveScenarioClassification (hu)": 66.07,
+ "MassiveScenarioClassification (ta)": 57.44,
+ "MassiveScenarioClassification (th)": 65.01,
+ "MassiveScenarioClassification (hy)": 60.1,
+ "MassiveScenarioClassification (de)": 67.76,
+ "MassiveScenarioClassification (ar)": 57.93,
+ "MassiveScenarioClassification (el)": 63.1,
+ "MassiveScenarioClassification (ml)": 59.28,
+ "MassiveScenarioClassification (kn)": 58.94,
+ "MassiveScenarioClassification (sl)": 62.96,
+ "MassiveScenarioClassification (pt)": 65.62,
+ "NoRecClassification": 50.16,
+ "NordicLangClassification": 47.13,
+ "PAC": 68.47,
+ "PolEmo2.0-IN": 71.05,
+ "PolEmo2.0-OUT": 42.61,
+ "RuReviewsClassification": 58.37,
+ "RuSciBenchGRNTIClassification": 59.01,
+ "RuSciBenchOECDClassification": 45.37,
+ "ToxicConversationsClassification": 60.73,
+ "TweetSentimentExtractionClassification": 51.28
+ }
+ ]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "voyage-3",
+ "AlloProfClusteringP2P": 59.71,
+ "AlloProfClusteringS2S": 47.41,
+ "ArxivClusteringP2P": 46.38,
+ "ArxivClusteringS2S": 39.09,
+ "BiorxivClusteringP2P": 38.0,
+ "BiorxivClusteringS2S": 35.33,
+ "BlurbsClusteringP2P": 41.96,
+ "BlurbsClusteringS2S": 18.13,
+ "GeoreviewClusteringP2P": 72.23,
+ "HALClusteringS2S": 26.14,
+ "MLSUMClusteringP2P (de)": 35.65,
+ "MLSUMClusteringP2P (fr)": 42.12,
+ "MLSUMClusteringP2P (ru)": 37.26,
+ "MLSUMClusteringP2P (es)": 45.55,
+ "MLSUMClusteringS2S (de)": 37.51,
+ "MLSUMClusteringS2S (fr)": 42.26,
+ "MLSUMClusteringS2S (ru)": 40.34,
+ "MLSUMClusteringS2S (es)": 44.87,
+ "MasakhaNEWSClusteringP2P (amh)": 54.69,
+ "MasakhaNEWSClusteringP2P (eng)": 48.43,
+ "MasakhaNEWSClusteringP2P (fra)": 36.68,
+ "MasakhaNEWSClusteringP2P (hau)": 43.55,
+ "MasakhaNEWSClusteringP2P (ibo)": 48.5,
+ "MasakhaNEWSClusteringP2P (lin)": 46.08,
+ "MasakhaNEWSClusteringP2P (lug)": 47.42,
+ "MasakhaNEWSClusteringP2P (orm)": 51.58,
+ "MasakhaNEWSClusteringP2P (pcm)": 53.92,
+ "MasakhaNEWSClusteringP2P (run)": 57.08,
+ "MasakhaNEWSClusteringP2P (sna)": 56.38,
+ "MasakhaNEWSClusteringP2P (som)": 33.65,
+ "MasakhaNEWSClusteringP2P (swa)": 25.28,
+ "MasakhaNEWSClusteringP2P (tir)": 58.23,
+ "MasakhaNEWSClusteringP2P (xho)": 40.47,
+ "MasakhaNEWSClusteringP2P (yor)": 41.09,
+ "MasakhaNEWSClusteringS2S (amh)": 56.35,
+ "MasakhaNEWSClusteringS2S (eng)": 55.07,
+ "MasakhaNEWSClusteringS2S (fra)": 41.39,
+ "MasakhaNEWSClusteringS2S (hau)": 26.63,
+ "MasakhaNEWSClusteringS2S (ibo)": 35.71,
+ "MasakhaNEWSClusteringS2S (lin)": 48.81,
+ "MasakhaNEWSClusteringS2S (lug)": 42.37,
+ "MasakhaNEWSClusteringS2S (orm)": 27.6,
+ "MasakhaNEWSClusteringS2S (pcm)": 55.02,
+ "MasakhaNEWSClusteringS2S (run)": 52.14,
+ "MasakhaNEWSClusteringS2S (sna)": 48.84,
+ "MasakhaNEWSClusteringS2S (som)": 28.6,
+ "MasakhaNEWSClusteringS2S (swa)": 20.29,
+ "MasakhaNEWSClusteringS2S (tir)": 48.05,
+ "MasakhaNEWSClusteringS2S (xho)": 23.41,
+ "MasakhaNEWSClusteringS2S (yor)": 30.96,
+ "MedrxivClusteringP2P": 32.39,
+ "MedrxivClusteringS2S": 32.14,
+ "RedditClustering": 50.49,
+ "RedditClusteringP2P": 58.55,
+ "RuSciBenchGRNTIClusteringP2P": 52.22,
+ "RuSciBenchOECDClusteringP2P": 44.42,
+ "StackExchangeClustering": 60.36,
+ "StackExchangeClusteringP2P": 32.78,
+ "TenKGnadClusteringP2P": 38.06,
+ "TenKGnadClusteringS2S": 35.49,
+ "TwentyNewsgroupsClustering": 46.4
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "voyage-3",
+ "CDSC-E": 66.92,
+ "FalseFriendsGermanEnglish": 52.21,
+ "OpusparcusPC (de)": 95.28,
+ "OpusparcusPC (en)": 98.09,
+ "OpusparcusPC (fi)": 92.53,
+ "OpusparcusPC (fr)": 91.63,
+ "OpusparcusPC (ru)": 90.1,
+ "OpusparcusPC (sv)": 93.41,
+ "PSC": 99.63,
+ "PawsXPairClassification (de)": 60.28,
+ "PawsXPairClassification (en)": 63.74,
+ "PawsXPairClassification (es)": 60.92,
+ "PawsXPairClassification (fr)": 64.0,
+ "PawsXPairClassification (ja)": 52.59,
+ "PawsXPairClassification (ko)": 52.85,
+ "PawsXPairClassification (zh)": 58.53,
+ "SICK-E-PL": 65.89,
+ "SprintDuplicateQuestions": 90.2,
+ "TERRa": 51.98,
+ "TwitterSemEval2015": 61.84,
+ "TwitterURLCorpus": 83.61
+ },
+ {
+ "Model": "voyage-3",
+ "CDSC-E": 66.93,
+ "FalseFriendsGermanEnglish": 52.21,
+ "OpusparcusPC (de)": 95.28,
+ "OpusparcusPC (en)": 98.09,
+ "OpusparcusPC (fi)": 92.57,
+ "OpusparcusPC (fr)": 91.63,
+ "OpusparcusPC (ru)": 90.12,
+ "OpusparcusPC (sv)": 93.41,
+ "PSC": 99.64,
+ "PawsXPairClassification (de)": 60.28,
+ "PawsXPairClassification (en)": 63.74,
+ "PawsXPairClassification (es)": 60.94,
+ "PawsXPairClassification (fr)": 64.01,
+ "PawsXPairClassification (ja)": 52.71,
+ "PawsXPairClassification (ko)": 52.88,
+ "PawsXPairClassification (zh)": 58.84,
+ "SICK-E-PL": 65.92,
+ "SprintDuplicateQuestions": 90.2,
+ "TERRa": 52.04,
+ "TwitterSemEval2015": 61.84,
+ "TwitterURLCorpus": 83.61
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "voyage-3",
+ "AlloprofReranking": 84.65,
+ "AskUbuntuDupQuestions": 61.52,
+ "MindSmallReranking": 31.93,
+ "RuBQReranking": 72.6,
+ "T2Reranking": 65.27
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
@@ -24180,7 +44781,9 @@
"AILAStatutes": 42.49,
"AlloprofRetrieval": 73.76,
"AppsRetrieval": 73.03,
+ "ArguAna": 60.96,
"BSARDRetrieval": 44.87,
+ "CmedqaRetrieval": 34.83,
"CodeFeedbackMT": 66.69,
"CodeFeedbackST": 83.02,
"CodeSearchNetCCRetrieval (python)": 79.8,
@@ -24198,6 +44801,8 @@
"CodeTransOceanContest": 89.92,
"CodeTransOceanDL": 33.92,
"CosQA": 28.7,
+ "CovidRetrieval": 88.47,
+ "FiQA2018": 52.28,
"GerDaLIRSmall": 44.72,
"LEMBNarrativeQARetrieval": 54.12,
"LEMBQMSumRetrieval": 51.05,
@@ -24210,43 +44815,560 @@
"LegalSummarization": 69.23,
"MIRACLRetrieval (ru)": 68.43,
"MintakaRetrieval (fr)": 44.56,
+ "NFCorpus": 38.22,
"RiaNewsRetrieval": 88.02,
"RuBQRetrieval": 71.54,
+ "SCIDOCS": 21.43,
+ "SciFact": 74.94,
+ "SciFact-PL": 68.72,
+ "SpartQA": 10.73,
"StackOverflowQA": 94.33,
"SyntecRetrieval": 87.54,
"SyntheticText2SQL": 57.56,
+ "TRECCOVID": 80.46,
+ "TRECCOVID-PL": 76.11,
+ "TempReasonL1": 1.23,
+ "WinoGrande": 67.74,
"XPQARetrieval (fra-fra)": 76.08
}
]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "voyage-3",
+ "BIOSSES": 87.92,
+ "CDSC-R": 86.77,
+ "GermanSTSBenchmark": 71.59,
+ "RUParaPhraserSTS": 65.57,
+ "RuSTSBenchmarkSTS": 69.9,
+ "SICK-R": 79.63,
+ "SICK-R-PL": 65.71,
+ "SICKFr": 73.58,
+ "STS12": 69.52,
+ "STS13": 80.56,
+ "STS14": 73.33,
+ "STS15": 80.39,
+ "STS16": 79.83,
+ "STS17 (en-de)": 82.12,
+ "STS17 (en-tr)": 67.44,
+ "STS17 (fr-en)": 77.55,
+ "STS17 (es-en)": 78.34,
+ "STS17 (es-es)": 84.23,
+ "STS17 (nl-en)": 80.56,
+ "STS17 (en-ar)": 62.98,
+ "STS17 (ar-ar)": 68.04,
+ "STS17 (it-en)": 81.22,
+ "STS17 (ko-ko)": 69.39,
+ "STS17 (en-en)": 86.81,
+ "STSB": 64.81,
+ "STSBenchmark": 79.53,
+ "STSBenchmarkMultilingualSTS (pt)": 72.26,
+ "STSBenchmarkMultilingualSTS (pl)": 71.28,
+ "STSBenchmarkMultilingualSTS (ru)": 70.05,
+ "STSBenchmarkMultilingualSTS (nl)": 73.64,
+ "STSBenchmarkMultilingualSTS (es)": 73.96,
+ "STSBenchmarkMultilingualSTS (zh)": 66.97,
+ "STSBenchmarkMultilingualSTS (it)": 72.42,
+ "STSBenchmarkMultilingualSTS (en)": 79.57,
+ "STSBenchmarkMultilingualSTS (de)": 73.01,
+ "STSBenchmarkMultilingualSTS (fr)": 74.22
+ },
+ {
+ "Model": "voyage-3",
+ "BIOSSES": 87.92,
+ "CDSC-R": 86.77,
+ "GermanSTSBenchmark": 71.59,
+ "RUParaPhraserSTS": 65.57,
+ "RuSTSBenchmarkSTS": 69.9,
+ "SICK-R": 79.63,
+ "SICK-R-PL": 65.71,
+ "SICKFr": 73.58,
+ "STS12": 69.52,
+ "STS13": 80.56,
+ "STS14": 73.33,
+ "STS15": 80.39,
+ "STS16": 79.83,
+ "STS17 (en-de)": 82.12,
+ "STS17 (en-tr)": 67.44,
+ "STS17 (fr-en)": 77.55,
+ "STS17 (es-en)": 78.34,
+ "STS17 (es-es)": 84.23,
+ "STS17 (nl-en)": 80.56,
+ "STS17 (en-ar)": 62.98,
+ "STS17 (ar-ar)": 68.04,
+ "STS17 (it-en)": 81.22,
+ "STS17 (ko-ko)": 69.39,
+ "STS17 (en-en)": 86.81,
+ "STSB": 64.81,
+ "STSBenchmark": 79.53,
+ "STSBenchmarkMultilingualSTS (pt)": 72.26,
+ "STSBenchmarkMultilingualSTS (pl)": 71.28,
+ "STSBenchmarkMultilingualSTS (ru)": 70.05,
+ "STSBenchmarkMultilingualSTS (nl)": 73.64,
+ "STSBenchmarkMultilingualSTS (es)": 73.96,
+ "STSBenchmarkMultilingualSTS (zh)": 66.97,
+ "STSBenchmarkMultilingualSTS (it)": 72.42,
+ "STSBenchmarkMultilingualSTS (en)": 79.57,
+ "STSBenchmarkMultilingualSTS (de)": 73.01,
+ "STSBenchmarkMultilingualSTS (fr)": 74.22
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "voyage-3",
+ "SummEval": 28.92
+ },
+ {
+ "Model": "voyage-3",
+ "SummEval": 28.92
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "voyage-3",
+ "CEDRClassification": 36.17,
+ "SensitiveTopicsClassification": 26.23
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "voyage-3",
+ "Core17InstructionRetrieval": 1.45,
+ "News21InstructionRetrieval": 4.59,
+ "Robust04InstructionRetrieval": -2.0
+ }
+ ]
}
},
"voyageai__voyage-3-lite": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "voyage-3-lite",
+ "BornholmBitextMining": 40.01,
+ "Tatoeba (glg-eng)": 83.71,
+ "Tatoeba (cor-eng)": 5.07,
+ "Tatoeba (war-eng)": 18.68,
+ "Tatoeba (kzj-eng)": 5.9,
+ "Tatoeba (heb-eng)": 76.32,
+ "Tatoeba (kat-eng)": 75.06,
+ "Tatoeba (csb-eng)": 32.04,
+ "Tatoeba (ber-eng)": 5.5,
+ "Tatoeba (hin-eng)": 83.54,
+ "Tatoeba (ceb-eng)": 17.46,
+ "Tatoeba (tel-eng)": 71.92,
+ "Tatoeba (urd-eng)": 82.72,
+ "Tatoeba (zsm-eng)": 83.35,
+ "Tatoeba (pms-eng)": 48.41,
+ "Tatoeba (bel-eng)": 86.51,
+ "Tatoeba (tam-eng)": 74.08,
+ "Tatoeba (ile-eng)": 70.53,
+ "Tatoeba (yue-eng)": 67.59,
+ "Tatoeba (tha-eng)": 85.63,
+ "Tatoeba (nds-eng)": 68.74,
+ "Tatoeba (ang-eng)": 44.62,
+ "Tatoeba (dsb-eng)": 46.75,
+ "Tatoeba (ind-eng)": 85.37,
+ "Tatoeba (sqi-eng)": 84.45,
+ "Tatoeba (afr-eng)": 75.83,
+ "Tatoeba (ina-eng)": 84.59,
+ "Tatoeba (gla-eng)": 9.4,
+ "Tatoeba (rus-eng)": 88.3,
+ "Tatoeba (hye-eng)": 78.89,
+ "Tatoeba (gsw-eng)": 42.12,
+ "Tatoeba (ben-eng)": 68.37,
+ "Tatoeba (tzl-eng)": 30.3,
+ "Tatoeba (pam-eng)": 6.05,
+ "Tatoeba (uig-eng)": 48.13,
+ "Tatoeba (lit-eng)": 80.2,
+ "Tatoeba (cha-eng)": 25.42,
+ "Tatoeba (khm-eng)": 38.22,
+ "Tatoeba (fry-eng)": 67.03,
+ "Tatoeba (ron-eng)": 85.52,
+ "Tatoeba (kab-eng)": 1.78,
+ "Tatoeba (hun-eng)": 75.75,
+ "Tatoeba (isl-eng)": 85.18,
+ "Tatoeba (slv-eng)": 77.15,
+ "Tatoeba (mon-eng)": 73.52,
+ "Tatoeba (uzb-eng)": 32.4,
+ "Tatoeba (dan-eng)": 91.92,
+ "Tatoeba (tgl-eng)": 57.23,
+ "Tatoeba (nld-eng)": 91.73,
+ "Tatoeba (nno-eng)": 85.64,
+ "Tatoeba (dtp-eng)": 5.26,
+ "Tatoeba (orv-eng)": 30.48,
+ "Tatoeba (fra-eng)": 85.98,
+ "Tatoeba (wuu-eng)": 77.34,
+ "Tatoeba (cbk-eng)": 55.37,
+ "Tatoeba (kor-eng)": 78.64,
+ "Tatoeba (arz-eng)": 49.44,
+ "Tatoeba (lfn-eng)": 50.19,
+ "Tatoeba (srp-eng)": 77.34,
+ "Tatoeba (mhr-eng)": 10.16,
+ "Tatoeba (max-eng)": 36.38,
+ "Tatoeba (oci-eng)": 48.04,
+ "Tatoeba (yid-eng)": 37.45,
+ "Tatoeba (ces-eng)": 87.47,
+ "Tatoeba (swe-eng)": 88.0,
+ "Tatoeba (nov-eng)": 57.51,
+ "Tatoeba (swg-eng)": 47.89,
+ "Tatoeba (tur-eng)": 86.98,
+ "Tatoeba (deu-eng)": 96.13,
+ "Tatoeba (ell-eng)": 80.58,
+ "Tatoeba (cym-eng)": 56.79,
+ "Tatoeba (jpn-eng)": 84.44,
+ "Tatoeba (awa-eng)": 45.66,
+ "Tatoeba (fao-eng)": 65.29,
+ "Tatoeba (est-eng)": 77.82,
+ "Tatoeba (ido-eng)": 64.81,
+ "Tatoeba (hsb-eng)": 58.22,
+ "Tatoeba (tat-eng)": 65.16,
+ "Tatoeba (hrv-eng)": 79.93,
+ "Tatoeba (swh-eng)": 16.1,
+ "Tatoeba (slk-eng)": 86.74,
+ "Tatoeba (ukr-eng)": 86.53,
+ "Tatoeba (ast-eng)": 75.33,
+ "Tatoeba (vie-eng)": 92.25,
+ "Tatoeba (bre-eng)": 17.51,
+ "Tatoeba (ita-eng)": 86.04,
+ "Tatoeba (ara-eng)": 74.73,
+ "Tatoeba (gle-eng)": 21.42,
+ "Tatoeba (pes-eng)": 82.83,
+ "Tatoeba (epo-eng)": 83.01,
+ "Tatoeba (cmn-eng)": 90.06,
+ "Tatoeba (eus-eng)": 70.02,
+ "Tatoeba (lat-eng)": 30.46,
+ "Tatoeba (mal-eng)": 83.27,
+ "Tatoeba (bos-eng)": 78.14,
+ "Tatoeba (nob-eng)": 93.75,
+ "Tatoeba (spa-eng)": 93.35,
+ "Tatoeba (arq-eng)": 18.26,
+ "Tatoeba (por-eng)": 90.37,
+ "Tatoeba (cat-eng)": 85.5,
+ "Tatoeba (tuk-eng)": 28.51,
+ "Tatoeba (mkd-eng)": 76.99,
+ "Tatoeba (kaz-eng)": 73.05,
+ "Tatoeba (jav-eng)": 23.65,
+ "Tatoeba (kur-eng)": 39.6,
+ "Tatoeba (mar-eng)": 77.58,
+ "Tatoeba (lvs-eng)": 78.75,
+ "Tatoeba (xho-eng)": 7.91,
+ "Tatoeba (fin-eng)": 87.41,
+ "Tatoeba (amh-eng)": 38.1,
+ "Tatoeba (bul-eng)": 84.43,
+ "Tatoeba (pol-eng)": 91.58,
+ "Tatoeba (aze-eng)": 82.44
+ }
+ ]
},
"Classification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "voyage-3-lite",
+ "AllegroReviews": 41.6,
+ "AmazonCounterfactualClassification (en-ext)": 65.74,
+ "AmazonCounterfactualClassification (en)": 63.58,
+ "AmazonCounterfactualClassification (de)": 64.15,
+ "AmazonCounterfactualClassification (ja)": 66.08,
+ "AmazonPolarityClassification": 75.7,
+ "AmazonReviewsClassification (en)": 37.63,
+ "AmazonReviewsClassification (de)": 41.0,
+ "AmazonReviewsClassification (es)": 39.26,
+ "AmazonReviewsClassification (fr)": 38.92,
+ "AmazonReviewsClassification (ja)": 38.58,
+ "AmazonReviewsClassification (zh)": 36.01,
+ "AngryTweetsClassification": 54.86,
+ "Banking77Classification": 76.85,
+ "CBD": 64.89,
+ "DanishPoliticalCommentsClassification": 37.46,
+ "EmotionClassification": 43.81,
+ "GeoreviewClassification": 45.4,
+ "HeadlineClassification": 78.34,
+ "ImdbClassification": 73.04,
+ "InappropriatenessClassification": 59.04,
+ "KinopoiskClassification": 57.39,
+ "LccSentimentClassification": 59.93,
+ "MTOPDomainClassification (en)": 90.52,
+ "MTOPDomainClassification (de)": 86.85,
+ "MTOPDomainClassification (es)": 84.68,
+ "MTOPDomainClassification (fr)": 82.89,
+ "MTOPDomainClassification (hi)": 83.19,
+ "MTOPDomainClassification (th)": 82.81,
+ "MTOPIntentClassification (en)": 62.23,
+ "MTOPIntentClassification (de)": 61.23,
+ "MTOPIntentClassification (es)": 61.31,
+ "MTOPIntentClassification (fr)": 55.38,
+ "MTOPIntentClassification (hi)": 56.72,
+ "MTOPIntentClassification (th)": 62.57,
+ "MasakhaNEWSClassification (amh)": 84.36,
+ "MasakhaNEWSClassification (eng)": 78.64,
+ "MasakhaNEWSClassification (fra)": 77.49,
+ "MasakhaNEWSClassification (hau)": 73.49,
+ "MasakhaNEWSClassification (ibo)": 65.95,
+ "MasakhaNEWSClassification (lin)": 73.49,
+ "MasakhaNEWSClassification (lug)": 66.32,
+ "MasakhaNEWSClassification (orm)": 72.92,
+ "MasakhaNEWSClassification (pcm)": 91.31,
+ "MasakhaNEWSClassification (run)": 72.48,
+ "MasakhaNEWSClassification (sna)": 85.07,
+ "MasakhaNEWSClassification (som)": 56.84,
+ "MasakhaNEWSClassification (swa)": 68.72,
+ "MasakhaNEWSClassification (tir)": 60.15,
+ "MasakhaNEWSClassification (xho)": 77.21,
+ "MasakhaNEWSClassification (yor)": 73.58,
+ "MassiveIntentClassification (sw)": 44.18,
+ "MassiveIntentClassification (vi)": 62.64,
+ "MassiveIntentClassification (sv)": 62.52,
+ "MassiveIntentClassification (km)": 38.5,
+ "MassiveIntentClassification (ja)": 64.02,
+ "MassiveIntentClassification (nb)": 62.3,
+ "MassiveIntentClassification (am)": 40.31,
+ "MassiveIntentClassification (is)": 57.27,
+ "MassiveIntentClassification (ka)": 50.07,
+ "MassiveIntentClassification (ar)": 51.41,
+ "MassiveIntentClassification (zh-TW)": 57.62,
+ "MassiveIntentClassification (bn)": 55.38,
+ "MassiveIntentClassification (sq)": 58.33,
+ "MassiveIntentClassification (te)": 53.72,
+ "MassiveIntentClassification (fi)": 61.16,
+ "MassiveIntentClassification (ta)": 51.63,
+ "MassiveIntentClassification (en)": 66.46,
+ "MassiveIntentClassification (zh-CN)": 64.98,
+ "MassiveIntentClassification (he)": 57.14,
+ "MassiveIntentClassification (cy)": 46.62,
+ "MassiveIntentClassification (jv)": 44.31,
+ "MassiveIntentClassification (el)": 56.16,
+ "MassiveIntentClassification (fa)": 63.87,
+ "MassiveIntentClassification (ro)": 54.23,
+ "MassiveIntentClassification (sl)": 58.45,
+ "MassiveIntentClassification (mn)": 55.9,
+ "MassiveIntentClassification (ko)": 60.7,
+ "MassiveIntentClassification (my)": 46.85,
+ "MassiveIntentClassification (az)": 60.51,
+ "MassiveIntentClassification (nl)": 60.61,
+ "MassiveIntentClassification (tr)": 62.05,
+ "MassiveIntentClassification (hy)": 59.17,
+ "MassiveIntentClassification (kn)": 51.19,
+ "MassiveIntentClassification (th)": 57.76,
+ "MassiveIntentClassification (lv)": 59.34,
+ "MassiveIntentClassification (id)": 60.44,
+ "MassiveIntentClassification (da)": 61.75,
+ "MassiveIntentClassification (de)": 59.82,
+ "MassiveIntentClassification (ur)": 55.39,
+ "MassiveIntentClassification (es)": 57.78,
+ "MassiveIntentClassification (hi)": 56.95,
+ "MassiveIntentClassification (ru)": 63.02,
+ "MassiveIntentClassification (ml)": 54.23,
+ "MassiveIntentClassification (pt)": 60.52,
+ "MassiveIntentClassification (pl)": 62.34,
+ "MassiveIntentClassification (af)": 51.75,
+ "MassiveIntentClassification (tl)": 52.13,
+ "MassiveIntentClassification (hu)": 57.9,
+ "MassiveIntentClassification (it)": 58.53,
+ "MassiveIntentClassification (fr)": 58.02,
+ "MassiveIntentClassification (ms)": 56.17,
+ "MassiveScenarioClassification (cy)": 56.54,
+ "MassiveScenarioClassification (ms)": 64.37,
+ "MassiveScenarioClassification (ur)": 61.78,
+ "MassiveScenarioClassification (th)": 65.75,
+ "MassiveScenarioClassification (da)": 69.41,
+ "MassiveScenarioClassification (it)": 64.76,
+ "MassiveScenarioClassification (ml)": 60.42,
+ "MassiveScenarioClassification (fr)": 65.51,
+ "MassiveScenarioClassification (pt)": 64.66,
+ "MassiveScenarioClassification (es)": 64.58,
+ "MassiveScenarioClassification (nl)": 67.55,
+ "MassiveScenarioClassification (bn)": 61.15,
+ "MassiveScenarioClassification (is)": 63.8,
+ "MassiveScenarioClassification (zh-TW)": 66.14,
+ "MassiveScenarioClassification (he)": 62.98,
+ "MassiveScenarioClassification (km)": 46.98,
+ "MassiveScenarioClassification (pl)": 65.13,
+ "MassiveScenarioClassification (tr)": 66.58,
+ "MassiveScenarioClassification (hu)": 66.29,
+ "MassiveScenarioClassification (zh-CN)": 70.44,
+ "MassiveScenarioClassification (sw)": 50.48,
+ "MassiveScenarioClassification (vi)": 67.33,
+ "MassiveScenarioClassification (am)": 47.4,
+ "MassiveScenarioClassification (az)": 65.45,
+ "MassiveScenarioClassification (de)": 68.76,
+ "MassiveScenarioClassification (el)": 64.34,
+ "MassiveScenarioClassification (ro)": 61.31,
+ "MassiveScenarioClassification (en)": 71.94,
+ "MassiveScenarioClassification (fi)": 65.14,
+ "MassiveScenarioClassification (ko)": 67.4,
+ "MassiveScenarioClassification (ta)": 59.58,
+ "MassiveScenarioClassification (jv)": 52.04,
+ "MassiveScenarioClassification (my)": 54.48,
+ "MassiveScenarioClassification (ka)": 57.01,
+ "MassiveScenarioClassification (sq)": 64.65,
+ "MassiveScenarioClassification (kn)": 58.69,
+ "MassiveScenarioClassification (tl)": 58.96,
+ "MassiveScenarioClassification (sv)": 69.08,
+ "MassiveScenarioClassification (ru)": 68.44,
+ "MassiveScenarioClassification (hy)": 64.17,
+ "MassiveScenarioClassification (nb)": 68.35,
+ "MassiveScenarioClassification (fa)": 67.14,
+ "MassiveScenarioClassification (ar)": 59.13,
+ "MassiveScenarioClassification (id)": 66.71,
+ "MassiveScenarioClassification (mn)": 60.82,
+ "MassiveScenarioClassification (sl)": 64.22,
+ "MassiveScenarioClassification (lv)": 64.23,
+ "MassiveScenarioClassification (af)": 59.74,
+ "MassiveScenarioClassification (hi)": 62.27,
+ "MassiveScenarioClassification (ja)": 69.19,
+ "MassiveScenarioClassification (te)": 59.89,
+ "NoRecClassification": 51.87,
+ "NordicLangClassification": 48.92,
+ "PAC": 70.35,
+ "PolEmo2.0-IN": 67.76,
+ "PolEmo2.0-OUT": 41.98,
+ "RuReviewsClassification": 59.14,
+ "RuSciBenchGRNTIClassification": 58.03,
+ "RuSciBenchOECDClassification": 44.54,
+ "ToxicConversationsClassification": 61.86,
+ "TweetSentimentExtractionClassification": 56.84
+ }
+ ]
},
"Clustering": {
- "v_measure": []
+ "v_measure": [
+ {
+ "Model": "voyage-3-lite",
+ "AlloProfClusteringP2P": 60.61,
+ "AlloProfClusteringS2S": 47.75,
+ "ArxivClusteringP2P": 43.4,
+ "ArxivClusteringS2S": 32.03,
+ "BiorxivClusteringP2P": 37.01,
+ "BiorxivClusteringS2S": 29.02,
+ "BlurbsClusteringP2P": 40.26,
+ "BlurbsClusteringS2S": 15.97,
+ "GeoreviewClusteringP2P": 63.53,
+ "HALClusteringS2S": 24.74,
+ "MLSUMClusteringP2P (de)": 44.05,
+ "MLSUMClusteringP2P (fr)": 45.69,
+ "MLSUMClusteringP2P (ru)": 48.37,
+ "MLSUMClusteringP2P (es)": 49.09,
+ "MLSUMClusteringS2S (de)": 44.99,
+ "MLSUMClusteringS2S (fr)": 45.62,
+ "MLSUMClusteringS2S (ru)": 49.14,
+ "MLSUMClusteringS2S (es)": 48.59,
+ "MasakhaNEWSClusteringP2P (amh)": 63.83,
+ "MasakhaNEWSClusteringP2P (eng)": 70.71,
+ "MasakhaNEWSClusteringP2P (fra)": 58.78,
+ "MasakhaNEWSClusteringP2P (hau)": 44.79,
+ "MasakhaNEWSClusteringP2P (ibo)": 47.36,
+ "MasakhaNEWSClusteringP2P (lin)": 60.43,
+ "MasakhaNEWSClusteringP2P (lug)": 43.85,
+ "MasakhaNEWSClusteringP2P (orm)": 47.96,
+ "MasakhaNEWSClusteringP2P (pcm)": 69.6,
+ "MasakhaNEWSClusteringP2P (run)": 57.35,
+ "MasakhaNEWSClusteringP2P (sna)": 61.77,
+ "MasakhaNEWSClusteringP2P (som)": 36.3,
+ "MasakhaNEWSClusteringP2P (swa)": 26.36,
+ "MasakhaNEWSClusteringP2P (tir)": 50.45,
+ "MasakhaNEWSClusteringP2P (xho)": 41.99,
+ "MasakhaNEWSClusteringP2P (yor)": 27.22,
+ "MasakhaNEWSClusteringS2S (amh)": 50.65,
+ "MasakhaNEWSClusteringS2S (eng)": 57.59,
+ "MasakhaNEWSClusteringS2S (fra)": 43.8,
+ "MasakhaNEWSClusteringS2S (hau)": 28.59,
+ "MasakhaNEWSClusteringS2S (ibo)": 42.81,
+ "MasakhaNEWSClusteringS2S (lin)": 53.67,
+ "MasakhaNEWSClusteringS2S (lug)": 47.26,
+ "MasakhaNEWSClusteringS2S (orm)": 33.55,
+ "MasakhaNEWSClusteringS2S (pcm)": 81.83,
+ "MasakhaNEWSClusteringS2S (run)": 48.94,
+ "MasakhaNEWSClusteringS2S (sna)": 46.16,
+ "MasakhaNEWSClusteringS2S (som)": 30.62,
+ "MasakhaNEWSClusteringS2S (swa)": 16.81,
+ "MasakhaNEWSClusteringS2S (tir)": 46.6,
+ "MasakhaNEWSClusteringS2S (xho)": 29.94,
+ "MasakhaNEWSClusteringS2S (yor)": 28.83,
+ "MedrxivClusteringP2P": 32.65,
+ "MedrxivClusteringS2S": 28.77,
+ "RedditClustering": 47.35,
+ "RedditClusteringP2P": 58.57,
+ "RuSciBenchGRNTIClusteringP2P": 53.76,
+ "RuSciBenchOECDClusteringP2P": 45.13,
+ "StackExchangeClustering": 59.88,
+ "StackExchangeClusteringP2P": 33.31,
+ "TenKGnadClusteringP2P": 46.79,
+ "TenKGnadClusteringS2S": 33.11,
+ "TwentyNewsgroupsClustering": 43.21
+ }
+ ]
},
"PairClassification": {
- "max_ap": []
+ "max_ap": [
+ {
+ "Model": "voyage-3-lite",
+ "CDSC-E": 66.64,
+ "FalseFriendsGermanEnglish": 51.59,
+ "OpusparcusPC (de)": 96.07,
+ "OpusparcusPC (en)": 98.12,
+ "OpusparcusPC (fi)": 93.18,
+ "OpusparcusPC (fr)": 91.54,
+ "OpusparcusPC (ru)": 87.84,
+ "OpusparcusPC (sv)": 93.37,
+ "PSC": 99.57,
+ "PawsXPairClassification (de)": 55.96,
+ "PawsXPairClassification (en)": 59.14,
+ "PawsXPairClassification (es)": 56.63,
+ "PawsXPairClassification (fr)": 58.75,
+ "PawsXPairClassification (ja)": 51.1,
+ "PawsXPairClassification (ko)": 51.15,
+ "PawsXPairClassification (zh)": 56.26,
+ "SICK-E-PL": 61.81,
+ "SprintDuplicateQuestions": 89.47,
+ "TERRa": 52.4,
+ "TwitterSemEval2015": 62.64,
+ "TwitterURLCorpus": 84.09
+ },
+ {
+ "Model": "voyage-3-lite",
+ "CDSC-E": 66.64,
+ "FalseFriendsGermanEnglish": 51.59,
+ "OpusparcusPC (de)": 96.07,
+ "OpusparcusPC (en)": 98.12,
+ "OpusparcusPC (fi)": 93.18,
+ "OpusparcusPC (fr)": 91.54,
+ "OpusparcusPC (ru)": 87.84,
+ "OpusparcusPC (sv)": 93.37,
+ "PSC": 99.57,
+ "PawsXPairClassification (de)": 56.07,
+ "PawsXPairClassification (en)": 59.14,
+ "PawsXPairClassification (es)": 56.63,
+ "PawsXPairClassification (fr)": 58.75,
+ "PawsXPairClassification (ja)": 51.23,
+ "PawsXPairClassification (ko)": 51.15,
+ "PawsXPairClassification (zh)": 56.55,
+ "SICK-E-PL": 61.84,
+ "SprintDuplicateQuestions": 89.47,
+ "TERRa": 52.68,
+ "TwitterSemEval2015": 62.64,
+ "TwitterURLCorpus": 84.09
+ }
+ ]
},
"Reranking": {
- "map": []
+ "map": [
+ {
+ "Model": "voyage-3-lite",
+ "AlloprofReranking": 75.02,
+ "AskUbuntuDupQuestions": 60.58,
+ "MindSmallReranking": 31.83,
+ "RuBQReranking": 67.85,
+ "T2Reranking": 65.4
+ }
+ ]
},
"Retrieval": {
"ndcg_at_10": [
@@ -24254,6 +45376,11 @@
"Model": "voyage-3-lite",
"AILACasedocs": 38.15,
"AILAStatutes": 35.03,
+ "AlloprofRetrieval": 47.86,
+ "ArguAna": 60.14,
+ "CmedqaRetrieval": 28.85,
+ "CovidRetrieval": 74.91,
+ "FiQA2018": 41.69,
"GerDaLIRSmall": 43.73,
"LEMBNarrativeQARetrieval": 51.67,
"LEMBQMSumRetrieval": 53.01,
@@ -24263,21 +45390,132 @@
"LegalBenchConsumerContractsQA": 83.22,
"LegalBenchCorporateLobbying": 94.53,
"LegalQuAD": 61.96,
- "LegalSummarization": 61.42
+ "LegalSummarization": 61.42,
+ "NFCorpus": 30.33,
+ "SCIDOCS": 19.12,
+ "SciFact": 70.75,
+ "SciFact-PL": 58.76,
+ "SpartQA": 0.45,
+ "StackOverflowQA": 89.41,
+ "TRECCOVID": 76.5,
+ "TRECCOVID-PL": 68.99,
+ "TempReasonL1": 1.85,
+ "WinoGrande": 41.17
}
]
},
"STS": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "voyage-3-lite",
+ "BIOSSES": 85.19,
+ "CDSC-R": 89.51,
+ "GermanSTSBenchmark": 69.78,
+ "RUParaPhraserSTS": 64.5,
+ "RuSTSBenchmarkSTS": 69.33,
+ "SICK-R": 76.65,
+ "SICK-R-PL": 67.25,
+ "SICKFr": 72.19,
+ "STS12": 70.76,
+ "STS13": 76.51,
+ "STS14": 71.92,
+ "STS15": 81.1,
+ "STS16": 80.8,
+ "STS17 (en-tr)": 63.71,
+ "STS17 (fr-en)": 73.69,
+ "STS17 (nl-en)": 77.19,
+ "STS17 (en-en)": 87.18,
+ "STS17 (es-es)": 82.43,
+ "STS17 (en-ar)": 61.43,
+ "STS17 (ar-ar)": 69.8,
+ "STS17 (it-en)": 74.99,
+ "STS17 (en-de)": 77.44,
+ "STS17 (es-en)": 69.69,
+ "STS17 (ko-ko)": 65.95,
+ "STSB": 66.07,
+ "STSBenchmark": 78.56,
+ "STSBenchmarkMultilingualSTS (it)": 71.4,
+ "STSBenchmarkMultilingualSTS (en)": 78.56,
+ "STSBenchmarkMultilingualSTS (pl)": 69.57,
+ "STSBenchmarkMultilingualSTS (de)": 71.71,
+ "STSBenchmarkMultilingualSTS (nl)": 70.65,
+ "STSBenchmarkMultilingualSTS (pt)": 69.01,
+ "STSBenchmarkMultilingualSTS (es)": 70.6,
+ "STSBenchmarkMultilingualSTS (ru)": 68.62,
+ "STSBenchmarkMultilingualSTS (fr)": 71.24,
+ "STSBenchmarkMultilingualSTS (zh)": 68.75
+ },
+ {
+ "Model": "voyage-3-lite",
+ "BIOSSES": 85.19,
+ "CDSC-R": 89.51,
+ "GermanSTSBenchmark": 69.78,
+ "RUParaPhraserSTS": 64.5,
+ "RuSTSBenchmarkSTS": 69.33,
+ "SICK-R": 76.65,
+ "SICK-R-PL": 67.25,
+ "SICKFr": 72.19,
+ "STS12": 70.76,
+ "STS13": 76.51,
+ "STS14": 71.92,
+ "STS15": 81.1,
+ "STS16": 80.8,
+ "STS17 (en-tr)": 63.71,
+ "STS17 (fr-en)": 73.69,
+ "STS17 (nl-en)": 77.19,
+ "STS17 (en-en)": 87.18,
+ "STS17 (es-es)": 82.43,
+ "STS17 (en-ar)": 61.43,
+ "STS17 (ar-ar)": 69.8,
+ "STS17 (it-en)": 74.99,
+ "STS17 (en-de)": 77.44,
+ "STS17 (es-en)": 69.69,
+ "STS17 (ko-ko)": 65.95,
+ "STSB": 66.07,
+ "STSBenchmark": 78.56,
+ "STSBenchmarkMultilingualSTS (it)": 71.4,
+ "STSBenchmarkMultilingualSTS (en)": 78.56,
+ "STSBenchmarkMultilingualSTS (pl)": 69.57,
+ "STSBenchmarkMultilingualSTS (de)": 71.71,
+ "STSBenchmarkMultilingualSTS (nl)": 70.65,
+ "STSBenchmarkMultilingualSTS (pt)": 69.01,
+ "STSBenchmarkMultilingualSTS (es)": 70.6,
+ "STSBenchmarkMultilingualSTS (ru)": 68.62,
+ "STSBenchmarkMultilingualSTS (fr)": 71.24,
+ "STSBenchmarkMultilingualSTS (zh)": 68.75
+ }
+ ]
},
"Summarization": {
- "cosine_spearman": []
+ "cosine_spearman": [
+ {
+ "Model": "voyage-3-lite",
+ "SummEval": 31.4
+ },
+ {
+ "Model": "voyage-3-lite",
+ "SummEval": 31.4
+ }
+ ]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "voyage-3-lite",
+ "CEDRClassification": 37.56,
+ "SensitiveTopicsClassification": 24.41
+ }
+ ]
},
"InstructionRetrieval": {
- "p-MRR": []
+ "p-MRR": [
+ {
+ "Model": "voyage-3-lite",
+ "Core17InstructionRetrieval": 0.41,
+ "News21InstructionRetrieval": 0.13,
+ "Robust04InstructionRetrieval": -4.06
+ }
+ ]
}
},
"voyageai__voyage-code-2": {
@@ -24373,24 +45611,276 @@
},
"voyageai__voyage-large-2-instruct": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "voyage-large-2-instruct",
+ "BornholmBitextMining": 34.66,
+ "Tatoeba (swg-eng)": 40.7,
+ "Tatoeba (aze-eng)": 35.41,
+ "Tatoeba (jpn-eng)": 84.32,
+ "Tatoeba (nov-eng)": 53.41,
+ "Tatoeba (ido-eng)": 54.53,
+ "Tatoeba (ina-eng)": 83.38,
+ "Tatoeba (jav-eng)": 11.06,
+ "Tatoeba (ara-eng)": 42.48,
+ "Tatoeba (bre-eng)": 5.07,
+ "Tatoeba (vie-eng)": 69.38,
+ "Tatoeba (fry-eng)": 39.02,
+ "Tatoeba (uig-eng)": 10.48,
+ "Tatoeba (amh-eng)": 0.18,
+ "Tatoeba (gla-eng)": 6.5,
+ "Tatoeba (awa-eng)": 22.77,
+ "Tatoeba (ind-eng)": 69.76,
+ "Tatoeba (tur-eng)": 58.51,
+ "Tatoeba (ces-eng)": 82.28,
+ "Tatoeba (deu-eng)": 96.21,
+ "Tatoeba (rus-eng)": 87.31,
+ "Tatoeba (lvs-eng)": 29.6,
+ "Tatoeba (slk-eng)": 77.44,
+ "Tatoeba (khm-eng)": 0.29,
+ "Tatoeba (ita-eng)": 87.11,
+ "Tatoeba (gsw-eng)": 43.62,
+ "Tatoeba (cha-eng)": 21.54,
+ "Tatoeba (orv-eng)": 30.21,
+ "Tatoeba (cmn-eng)": 91.87,
+ "Tatoeba (glg-eng)": 77.07,
+ "Tatoeba (pam-eng)": 5.97,
+ "Tatoeba (wuu-eng)": 74.56,
+ "Tatoeba (yid-eng)": 1.54,
+ "Tatoeba (kzj-eng)": 5.79,
+ "Tatoeba (ceb-eng)": 10.49,
+ "Tatoeba (pol-eng)": 88.57,
+ "Tatoeba (tha-eng)": 40.24,
+ "Tatoeba (kat-eng)": 13.22,
+ "Tatoeba (mal-eng)": 1.82,
+ "Tatoeba (nno-eng)": 69.29,
+ "Tatoeba (yue-eng)": 75.17,
+ "Tatoeba (epo-eng)": 58.0,
+ "Tatoeba (spa-eng)": 92.7,
+ "Tatoeba (ukr-eng)": 84.69,
+ "Tatoeba (tat-eng)": 10.24,
+ "Tatoeba (isl-eng)": 39.4,
+ "Tatoeba (heb-eng)": 24.59,
+ "Tatoeba (hye-eng)": 2.8,
+ "Tatoeba (swe-eng)": 84.16,
+ "Tatoeba (ell-eng)": 50.68,
+ "Tatoeba (sqi-eng)": 24.05,
+ "Tatoeba (dsb-eng)": 42.21,
+ "Tatoeba (mon-eng)": 12.51,
+ "Tatoeba (arz-eng)": 24.28,
+ "Tatoeba (lfn-eng)": 50.81,
+ "Tatoeba (fra-eng)": 89.27,
+ "Tatoeba (kaz-eng)": 14.08,
+ "Tatoeba (nds-eng)": 50.54,
+ "Tatoeba (fao-eng)": 44.19,
+ "Tatoeba (ber-eng)": 5.31,
+ "Tatoeba (ron-eng)": 80.44,
+ "Tatoeba (bos-eng)": 79.94,
+ "Tatoeba (xho-eng)": 7.3,
+ "Tatoeba (fin-eng)": 55.38,
+ "Tatoeba (pes-eng)": 43.87,
+ "Tatoeba (uzb-eng)": 12.82,
+ "Tatoeba (slv-eng)": 72.11,
+ "Tatoeba (hun-eng)": 69.19,
+ "Tatoeba (swh-eng)": 10.94,
+ "Tatoeba (nld-eng)": 89.88,
+ "Tatoeba (dan-eng)": 85.71,
+ "Tatoeba (kor-eng)": 67.28,
+ "Tatoeba (bul-eng)": 79.3,
+ "Tatoeba (mar-eng)": 12.22,
+ "Tatoeba (lat-eng)": 47.22,
+ "Tatoeba (war-eng)": 11.37,
+ "Tatoeba (kab-eng)": 1.36,
+ "Tatoeba (srp-eng)": 75.61,
+ "Tatoeba (hin-eng)": 39.36,
+ "Tatoeba (tuk-eng)": 7.38,
+ "Tatoeba (lit-eng)": 26.73,
+ "Tatoeba (hsb-eng)": 50.29,
+ "Tatoeba (zsm-eng)": 71.1,
+ "Tatoeba (mhr-eng)": 7.86,
+ "Tatoeba (tgl-eng)": 25.72,
+ "Tatoeba (hrv-eng)": 83.09,
+ "Tatoeba (tzl-eng)": 32.1,
+ "Tatoeba (por-eng)": 89.17,
+ "Tatoeba (urd-eng)": 21.4,
+ "Tatoeba (gle-eng)": 8.47,
+ "Tatoeba (cbk-eng)": 56.61,
+ "Tatoeba (mkd-eng)": 53.83,
+ "Tatoeba (ast-eng)": 69.08,
+ "Tatoeba (max-eng)": 35.91,
+ "Tatoeba (oci-eng)": 44.59,
+ "Tatoeba (est-eng)": 24.87,
+ "Tatoeba (tel-eng)": 1.26,
+ "Tatoeba (kur-eng)": 10.77,
+ "Tatoeba (bel-eng)": 59.58,
+ "Tatoeba (ile-eng)": 63.57,
+ "Tatoeba (cym-eng)": 10.27,
+ "Tatoeba (ben-eng)": 11.23,
+ "Tatoeba (pms-eng)": 41.25,
+ "Tatoeba (dtp-eng)": 4.32,
+ "Tatoeba (csb-eng)": 34.29,
+ "Tatoeba (afr-eng)": 63.53,
+ "Tatoeba (cor-eng)": 3.72,
+ "Tatoeba (nob-eng)": 85.34,
+ "Tatoeba (tam-eng)": 2.28,
+ "Tatoeba (cat-eng)": 84.06,
+ "Tatoeba (arq-eng)": 6.63,
+ "Tatoeba (eus-eng)": 13.67,
+ "Tatoeba (ang-eng)": 44.02
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "voyage-large-2-instruct",
- "AmazonCounterfactualClassification (en)": 77.6,
- "AmazonPolarityClassification": 96.58,
- "AmazonReviewsClassification (en)": 50.77,
- "Banking77Classification": 86.96,
- "EmotionClassification": 59.81,
- "ImdbClassification": 96.13,
- "MTOPDomainClassification (en)": 98.86,
- "MTOPIntentClassification (en)": 86.97,
- "MassiveIntentClassification (en)": 81.08,
- "MassiveScenarioClassification (en)": 87.95,
- "ToxicConversationsClassification": 83.58,
- "TweetSentimentExtractionClassification": 71.55
+ "AllegroReviews": 46.66,
+ "AmazonCounterfactualClassification (en)": 83.06,
+ "AmazonCounterfactualClassification (en-ext)": 85.13,
+ "AmazonCounterfactualClassification (de)": 64.61,
+ "AmazonCounterfactualClassification (ja)": 77.46,
+ "AmazonPolarityClassification": 96.11,
+ "AmazonReviewsClassification (en)": 51.4,
+ "AmazonReviewsClassification (de)": 47.52,
+ "AmazonReviewsClassification (es)": 46.78,
+ "AmazonReviewsClassification (fr)": 46.43,
+ "AmazonReviewsClassification (ja)": 45.09,
+ "AmazonReviewsClassification (zh)": 44.16,
+ "AngryTweetsClassification": 56.06,
+ "Banking77Classification": 87.62,
+ "CBD": 73.6,
+ "DanishPoliticalCommentsClassification": 38.61,
+ "EmotionClassification": 64.46,
+ "GeoreviewClassification": 47.18,
+ "HeadlineClassification": 75.1,
+ "ImdbClassification": 93.78,
+ "InappropriatenessClassification": 62.99,
+ "KinopoiskClassification": 64.43,
+ "LccSentimentClassification": 59.2,
+ "MTOPDomainClassification (en)": 97.46,
+ "MTOPDomainClassification (de)": 95.27,
+ "MTOPDomainClassification (es)": 95.28,
+ "MTOPDomainClassification (fr)": 93.57,
+ "MTOPDomainClassification (hi)": 79.0,
+ "MTOPDomainClassification (th)": 76.64,
+ "MTOPIntentClassification (en)": 75.01,
+ "MTOPIntentClassification (de)": 70.28,
+ "MTOPIntentClassification (es)": 74.9,
+ "MTOPIntentClassification (fr)": 68.15,
+ "MTOPIntentClassification (hi)": 40.64,
+ "MTOPIntentClassification (th)": 52.46,
+ "MassiveIntentClassification (en)": 76.62,
+ "MassiveIntentClassification (jv)": 42.59,
+ "MassiveIntentClassification (te)": 22.0,
+ "MassiveIntentClassification (km)": 21.07,
+ "MassiveIntentClassification (ur)": 38.9,
+ "MassiveIntentClassification (hy)": 32.94,
+ "MassiveIntentClassification (my)": 18.82,
+ "MassiveIntentClassification (pl)": 67.0,
+ "MassiveIntentClassification (tr)": 59.39,
+ "MassiveIntentClassification (hu)": 60.74,
+ "MassiveIntentClassification (kn)": 17.95,
+ "MassiveIntentClassification (tl)": 48.84,
+ "MassiveIntentClassification (ta)": 28.71,
+ "MassiveIntentClassification (da)": 65.75,
+ "MassiveIntentClassification (es)": 69.32,
+ "MassiveIntentClassification (fi)": 56.04,
+ "MassiveIntentClassification (fr)": 69.76,
+ "MassiveIntentClassification (ml)": 31.31,
+ "MassiveIntentClassification (ro)": 61.22,
+ "MassiveIntentClassification (cy)": 36.38,
+ "MassiveIntentClassification (ko)": 59.58,
+ "MassiveIntentClassification (el)": 51.05,
+ "MassiveIntentClassification (af)": 56.14,
+ "MassiveIntentClassification (he)": 46.56,
+ "MassiveIntentClassification (lv)": 45.89,
+ "MassiveIntentClassification (id)": 64.75,
+ "MassiveIntentClassification (it)": 68.79,
+ "MassiveIntentClassification (de)": 67.84,
+ "MassiveIntentClassification (pt)": 70.28,
+ "MassiveIntentClassification (th)": 45.59,
+ "MassiveIntentClassification (is)": 46.74,
+ "MassiveIntentClassification (ru)": 70.87,
+ "MassiveIntentClassification (vi)": 55.12,
+ "MassiveIntentClassification (sl)": 62.57,
+ "MassiveIntentClassification (sw)": 38.08,
+ "MassiveIntentClassification (sq)": 41.24,
+ "MassiveIntentClassification (az)": 50.48,
+ "MassiveIntentClassification (am)": 24.82,
+ "MassiveIntentClassification (hi)": 48.1,
+ "MassiveIntentClassification (ms)": 57.61,
+ "MassiveIntentClassification (bn)": 39.06,
+ "MassiveIntentClassification (nb)": 64.93,
+ "MassiveIntentClassification (nl)": 68.84,
+ "MassiveIntentClassification (ka)": 32.48,
+ "MassiveIntentClassification (sv)": 68.53,
+ "MassiveIntentClassification (mn)": 33.06,
+ "MassiveIntentClassification (zh-CN)": 69.49,
+ "MassiveIntentClassification (fa)": 55.04,
+ "MassiveIntentClassification (ja)": 67.62,
+ "MassiveIntentClassification (zh-TW)": 63.36,
+ "MassiveIntentClassification (ar)": 47.61,
+ "MassiveScenarioClassification (en)": 83.75,
+ "MassiveScenarioClassification (km)": 25.63,
+ "MassiveScenarioClassification (am)": 34.34,
+ "MassiveScenarioClassification (lv)": 56.15,
+ "MassiveScenarioClassification (th)": 56.57,
+ "MassiveScenarioClassification (hu)": 71.32,
+ "MassiveScenarioClassification (hy)": 40.24,
+ "MassiveScenarioClassification (ja)": 76.24,
+ "MassiveScenarioClassification (it)": 77.33,
+ "MassiveScenarioClassification (es)": 77.27,
+ "MassiveScenarioClassification (hi)": 56.05,
+ "MassiveScenarioClassification (da)": 76.76,
+ "MassiveScenarioClassification (kn)": 26.11,
+ "MassiveScenarioClassification (is)": 58.65,
+ "MassiveScenarioClassification (ms)": 67.79,
+ "MassiveScenarioClassification (de)": 78.79,
+ "MassiveScenarioClassification (vi)": 62.79,
+ "MassiveScenarioClassification (te)": 28.33,
+ "MassiveScenarioClassification (fi)": 64.86,
+ "MassiveScenarioClassification (cy)": 45.38,
+ "MassiveScenarioClassification (id)": 72.48,
+ "MassiveScenarioClassification (sv)": 78.4,
+ "MassiveScenarioClassification (sq)": 52.83,
+ "MassiveScenarioClassification (he)": 53.04,
+ "MassiveScenarioClassification (ru)": 78.34,
+ "MassiveScenarioClassification (sw)": 46.95,
+ "MassiveScenarioClassification (bn)": 47.38,
+ "MassiveScenarioClassification (af)": 67.7,
+ "MassiveScenarioClassification (fr)": 77.99,
+ "MassiveScenarioClassification (tr)": 67.67,
+ "MassiveScenarioClassification (jv)": 52.42,
+ "MassiveScenarioClassification (ka)": 41.12,
+ "MassiveScenarioClassification (nb)": 74.44,
+ "MassiveScenarioClassification (zh-TW)": 72.34,
+ "MassiveScenarioClassification (mn)": 41.83,
+ "MassiveScenarioClassification (el)": 60.8,
+ "MassiveScenarioClassification (ar)": 55.46,
+ "MassiveScenarioClassification (az)": 58.36,
+ "MassiveScenarioClassification (ko)": 68.71,
+ "MassiveScenarioClassification (tl)": 59.21,
+ "MassiveScenarioClassification (zh-CN)": 78.77,
+ "MassiveScenarioClassification (ro)": 70.68,
+ "MassiveScenarioClassification (ur)": 48.18,
+ "MassiveScenarioClassification (sl)": 71.12,
+ "MassiveScenarioClassification (nl)": 78.78,
+ "MassiveScenarioClassification (ml)": 38.14,
+ "MassiveScenarioClassification (pt)": 76.66,
+ "MassiveScenarioClassification (fa)": 60.92,
+ "MassiveScenarioClassification (ta)": 36.67,
+ "MassiveScenarioClassification (my)": 25.4,
+ "MassiveScenarioClassification (pl)": 76.06,
+ "NoRecClassification": 51.44,
+ "NordicLangClassification": 57.17,
+ "PAC": 65.35,
+ "PolEmo2.0-IN": 71.3,
+ "PolEmo2.0-OUT": 33.89,
+ "RuReviewsClassification": 65.99,
+ "RuSciBenchGRNTIClassification": 61.46,
+ "RuSciBenchOECDClassification": 47.86,
+ "ToxicConversationsClassification": 69.39,
+ "TweetSentimentExtractionClassification": 61.9
}
]
},
@@ -24398,17 +45888,42 @@
"v_measure": [
{
"Model": "voyage-large-2-instruct",
- "ArxivClusteringP2P": 51.81,
- "ArxivClusteringS2S": 44.73,
- "BiorxivClusteringP2P": 46.07,
- "BiorxivClusteringS2S": 40.64,
- "MedrxivClusteringP2P": 42.94,
- "MedrxivClusteringS2S": 41.44,
- "RedditClustering": 68.5,
- "RedditClusteringP2P": 64.86,
- "StackExchangeClustering": 74.16,
- "StackExchangeClusteringP2P": 45.1,
- "TwentyNewsgroupsClustering": 66.62
+ "AlloProfClusteringP2P": 62.06,
+ "AlloProfClusteringS2S": 48.05,
+ "ArxivClusteringP2P": 50.0,
+ "ArxivClusteringS2S": 43.99,
+ "BiorxivClusteringP2P": 41.6,
+ "BiorxivClusteringS2S": 37.75,
+ "BlurbsClusteringP2P": 40.78,
+ "BlurbsClusteringS2S": 20.59,
+ "GeoreviewClusteringP2P": 69.84,
+ "HALClusteringS2S": 27.07,
+ "MasakhaNEWSClusteringS2S (amh)": 42.05,
+ "MasakhaNEWSClusteringS2S (eng)": 55.35,
+ "MasakhaNEWSClusteringS2S (fra)": 41.79,
+ "MasakhaNEWSClusteringS2S (hau)": 22.56,
+ "MasakhaNEWSClusteringS2S (ibo)": 35.92,
+ "MasakhaNEWSClusteringS2S (lin)": 47.48,
+ "MasakhaNEWSClusteringS2S (lug)": 49.46,
+ "MasakhaNEWSClusteringS2S (orm)": 27.61,
+ "MasakhaNEWSClusteringS2S (pcm)": 48.95,
+ "MasakhaNEWSClusteringS2S (run)": 51.87,
+ "MasakhaNEWSClusteringS2S (sna)": 46.67,
+ "MasakhaNEWSClusteringS2S (som)": 38.65,
+ "MasakhaNEWSClusteringS2S (swa)": 17.78,
+ "MasakhaNEWSClusteringS2S (tir)": 42.67,
+ "MasakhaNEWSClusteringS2S (xho)": 22.1,
+ "MasakhaNEWSClusteringS2S (yor)": 29.25,
+ "MedrxivClusteringP2P": 36.13,
+ "MedrxivClusteringS2S": 35.82,
+ "RedditClustering": 63.37,
+ "RedditClusteringP2P": 62.66,
+ "RuSciBenchGRNTIClusteringP2P": 55.65,
+ "RuSciBenchOECDClusteringP2P": 47.23,
+ "StackExchangeClustering": 71.08,
+ "StackExchangeClusteringP2P": 36.02,
+ "TenKGnadClusteringS2S": 32.74,
+ "TwentyNewsgroupsClustering": 61.06
}
]
},
@@ -24416,13 +45931,55 @@
"max_ap": [
{
"Model": "voyage-large-2-instruct",
- "SprintDuplicateQuestions": 94.5,
- "TwitterSemEval2015": 86.32,
- "TwitterURLCorpus": 86.9
+ "CDSC-E": 68.05,
+ "FalseFriendsGermanEnglish": 52.72,
+ "OpusparcusPC (de)": 96.14,
+ "OpusparcusPC (en)": 98.3,
+ "OpusparcusPC (fi)": 87.37,
+ "OpusparcusPC (fr)": 92.98,
+ "OpusparcusPC (ru)": 89.62,
+ "OpusparcusPC (sv)": 92.63,
+ "PSC": 99.41,
+ "PawsXPairClassification (de)": 61.62,
+ "PawsXPairClassification (en)": 66.12,
+ "PawsXPairClassification (es)": 61.98,
+ "PawsXPairClassification (fr)": 64.41,
+ "PawsXPairClassification (ja)": 53.33,
+ "PawsXPairClassification (ko)": 53.16,
+ "PawsXPairClassification (zh)": 60.09,
+ "SICK-E-PL": 68.53,
+ "SprintDuplicateQuestions": 95.83,
+ "TERRa": 56.43,
+ "TwitterSemEval2015": 89.46,
+ "TwitterURLCorpus": 88.66
},
{
"Model": "voyage-large-2-instruct",
- "SprintDuplicateQuestions": 94.53,
+ "CDSC-E": 68.16,
+ "FalseFriendsGermanEnglish": 52.72,
+ "OpusparcusPC (de)": 96.15,
+ "OpusparcusPC (en)": 98.3,
+ "OpusparcusPC (fi)": 87.37,
+ "OpusparcusPC (fr)": 92.98,
+ "OpusparcusPC (ru)": 89.62,
+ "OpusparcusPC (sv)": 92.63,
+ "PSC": 99.41,
+ "PawsXPairClassification (de)": 61.69,
+ "PawsXPairClassification (en)": 66.12,
+ "PawsXPairClassification (es)": 61.99,
+ "PawsXPairClassification (fr)": 64.41,
+ "PawsXPairClassification (ja)": 53.42,
+ "PawsXPairClassification (ko)": 53.2,
+ "PawsXPairClassification (zh)": 60.09,
+ "SICK-E-PL": 68.53,
+ "SprintDuplicateQuestions": 95.83,
+ "TERRa": 56.43,
+ "TwitterSemEval2015": 89.46,
+ "TwitterURLCorpus": 88.66
+ },
+ {
+ "Model": "voyage-large-2-instruct",
+ "SprintDuplicateQuestions": 94.5,
"TwitterSemEval2015": 86.32,
"TwitterURLCorpus": 86.9
}
@@ -24432,8 +45989,9 @@
"map": [
{
"Model": "voyage-large-2-instruct",
- "AskUbuntuDupQuestions": 64.92,
- "MindSmallReranking": 30.97,
+ "AskUbuntuDupQuestions": 68.79,
+ "MindSmallReranking": 31.38,
+ "RuBQReranking": 70.59,
"SciDocsRR": 89.34,
"StackOverflowDupQuestions": 55.11
}
@@ -24443,7 +46001,8 @@
"ndcg_at_10": [
{
"Model": "voyage-large-2-instruct",
- "ArguAna": 64.06,
+ "AILAStatutes": 40.75,
+ "ArguAna": 58.61,
"BrightRetrieval (theoremqa_questions)": 26.06,
"BrightRetrieval (earth_science)": 25.09,
"BrightRetrieval (leetcode)": 30.6,
@@ -24460,16 +46019,21 @@
"ClimateFEVER": 32.65,
"DBPedia": 46.03,
"FEVER": 91.47,
- "FiQA2018": 59.76,
+ "FiQA2018": 54.84,
"HotpotQA": 70.86,
+ "LegalBenchCorporateLobbying": 95.32,
"MSMARCO": 40.6,
- "NFCorpus": 40.32,
+ "NFCorpus": 38.16,
"NQ": 65.92,
"QuoraRetrieval": 87.4,
"SCIDOCS": 24.32,
- "SciFact": 79.99,
+ "SciFact": 77.8,
+ "SciFact-PL": 68.06,
+ "SpartQA": 0.3,
"TRECCOVID": 85.07,
- "Touche2020": 39.16
+ "TempReasonL1": 1.1,
+ "Touche2020": 39.16,
+ "WinoGrande": 28.75
}
],
"recall_at_1": [
@@ -24500,6 +46064,80 @@
"STS17 (en-en)": 90.06,
"STS22 (en)": 66.32,
"STSBenchmark": 89.22
+ },
+ {
+ "Model": "voyage-large-2-instruct",
+ "CDSC-R": 90.05,
+ "GermanSTSBenchmark": 82.68,
+ "RUParaPhraserSTS": 69.05,
+ "RuSTSBenchmarkSTS": 81.77,
+ "SICK-R": 81.92,
+ "SICK-R-PL": 71.52,
+ "SICKFr": 77.05,
+ "STS13": 84.96,
+ "STS14": 82.67,
+ "STS15": 87.64,
+ "STS16": 84.5,
+ "STS17 (en-en)": 88.08,
+ "STS17 (fr-en)": 82.59,
+ "STS17 (en-tr)": 53.1,
+ "STS17 (ar-ar)": 68.95,
+ "STS17 (en-de)": 85.77,
+ "STS17 (it-en)": 82.75,
+ "STS17 (es-es)": 85.83,
+ "STS17 (nl-en)": 83.82,
+ "STS17 (ko-ko)": 77.63,
+ "STS17 (en-ar)": 30.33,
+ "STS17 (es-en)": 84.88,
+ "STSB": 78.75,
+ "STSBenchmark": 87.59,
+ "STSBenchmarkMultilingualSTS (it)": 82.77,
+ "STSBenchmarkMultilingualSTS (zh)": 80.35,
+ "STSBenchmarkMultilingualSTS (pl)": 79.15,
+ "STSBenchmarkMultilingualSTS (nl)": 82.06,
+ "STSBenchmarkMultilingualSTS (fr)": 83.68,
+ "STSBenchmarkMultilingualSTS (en)": 87.6,
+ "STSBenchmarkMultilingualSTS (pt)": 83.1,
+ "STSBenchmarkMultilingualSTS (ru)": 81.75,
+ "STSBenchmarkMultilingualSTS (es)": 84.57,
+ "STSBenchmarkMultilingualSTS (de)": 84.07
+ },
+ {
+ "Model": "voyage-large-2-instruct",
+ "CDSC-R": 90.05,
+ "GermanSTSBenchmark": 82.68,
+ "RUParaPhraserSTS": 69.05,
+ "RuSTSBenchmarkSTS": 81.77,
+ "SICK-R": 81.92,
+ "SICK-R-PL": 71.52,
+ "SICKFr": 77.05,
+ "STS13": 84.96,
+ "STS14": 82.67,
+ "STS15": 87.64,
+ "STS16": 84.5,
+ "STS17 (en-en)": 88.08,
+ "STS17 (fr-en)": 82.59,
+ "STS17 (en-tr)": 53.1,
+ "STS17 (ar-ar)": 68.95,
+ "STS17 (en-de)": 85.77,
+ "STS17 (it-en)": 82.75,
+ "STS17 (es-es)": 85.83,
+ "STS17 (nl-en)": 83.82,
+ "STS17 (ko-ko)": 77.63,
+ "STS17 (en-ar)": 30.33,
+ "STS17 (es-en)": 84.88,
+ "STSB": 78.75,
+ "STSBenchmark": 87.59,
+ "STSBenchmarkMultilingualSTS (it)": 82.77,
+ "STSBenchmarkMultilingualSTS (zh)": 80.35,
+ "STSBenchmarkMultilingualSTS (pl)": 79.15,
+ "STSBenchmarkMultilingualSTS (nl)": 82.06,
+ "STSBenchmarkMultilingualSTS (fr)": 83.68,
+ "STSBenchmarkMultilingualSTS (en)": 87.6,
+ "STSBenchmarkMultilingualSTS (pt)": 83.1,
+ "STSBenchmarkMultilingualSTS (ru)": 81.75,
+ "STSBenchmarkMultilingualSTS (es)": 84.57,
+ "STSBenchmarkMultilingualSTS (de)": 84.07
}
]
},
@@ -24508,11 +46146,25 @@
{
"Model": "voyage-large-2-instruct",
"SummEval": 30.84
+ },
+ {
+ "Model": "voyage-large-2-instruct",
+ "SummEval": 30.15
+ },
+ {
+ "Model": "voyage-large-2-instruct",
+ "SummEval": 30.15
}
]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "voyage-large-2-instruct",
+ "CEDRClassification": 45.03,
+ "SensitiveTopicsClassification": 32.23
+ }
+ ]
},
"InstructionRetrieval": {
"p-MRR": []
@@ -24867,18 +46519,277 @@
},
"voyageai__voyage-multilingual-2": {
"BitextMining": {
- "f1": []
+ "f1": [
+ {
+ "Model": "voyage-multilingual-2",
+ "BornholmBitextMining": 42.68,
+ "Tatoeba (bul-eng)": 88.43,
+ "Tatoeba (kaz-eng)": 64.49,
+ "Tatoeba (gle-eng)": 34.14,
+ "Tatoeba (uzb-eng)": 51.71,
+ "Tatoeba (por-eng)": 93.5,
+ "Tatoeba (gla-eng)": 23.63,
+ "Tatoeba (cha-eng)": 31.4,
+ "Tatoeba (kzj-eng)": 11.52,
+ "Tatoeba (ben-eng)": 80.05,
+ "Tatoeba (cat-eng)": 92.5,
+ "Tatoeba (heb-eng)": 83.4,
+ "Tatoeba (mkd-eng)": 79.84,
+ "Tatoeba (ara-eng)": 83.89,
+ "Tatoeba (slk-eng)": 91.15,
+ "Tatoeba (ces-eng)": 90.96,
+ "Tatoeba (swg-eng)": 70.94,
+ "Tatoeba (urd-eng)": 89.23,
+ "Tatoeba (mar-eng)": 62.12,
+ "Tatoeba (tha-eng)": 91.47,
+ "Tatoeba (hin-eng)": 93.82,
+ "Tatoeba (kur-eng)": 58.11,
+ "Tatoeba (lvs-eng)": 61.75,
+ "Tatoeba (jpn-eng)": 90.7,
+ "Tatoeba (tzl-eng)": 48.64,
+ "Tatoeba (war-eng)": 33.84,
+ "Tatoeba (epo-eng)": 90.17,
+ "Tatoeba (swh-eng)": 35.1,
+ "Tatoeba (hsb-eng)": 77.99,
+ "Tatoeba (cbk-eng)": 69.28,
+ "Tatoeba (kor-eng)": 87.75,
+ "Tatoeba (bel-eng)": 90.39,
+ "Tatoeba (kab-eng)": 4.22,
+ "Tatoeba (lit-eng)": 68.44,
+ "Tatoeba (nld-eng)": 93.56,
+ "Tatoeba (pol-eng)": 96.18,
+ "Tatoeba (ind-eng)": 91.69,
+ "Tatoeba (fin-eng)": 95.09,
+ "Tatoeba (cmn-eng)": 93.9,
+ "Tatoeba (est-eng)": 69.45,
+ "Tatoeba (fry-eng)": 74.56,
+ "Tatoeba (srp-eng)": 86.7,
+ "Tatoeba (khm-eng)": 8.52,
+ "Tatoeba (hye-eng)": 67.2,
+ "Tatoeba (nov-eng)": 72.82,
+ "Tatoeba (max-eng)": 56.28,
+ "Tatoeba (hun-eng)": 82.11,
+ "Tatoeba (tgl-eng)": 87.01,
+ "Tatoeba (tam-eng)": 53.72,
+ "Tatoeba (gsw-eng)": 57.86,
+ "Tatoeba (afr-eng)": 79.99,
+ "Tatoeba (pam-eng)": 14.03,
+ "Tatoeba (arz-eng)": 64.42,
+ "Tatoeba (kat-eng)": 56.25,
+ "Tatoeba (ina-eng)": 93.04,
+ "Tatoeba (amh-eng)": 4.07,
+ "Tatoeba (pes-eng)": 87.79,
+ "Tatoeba (eus-eng)": 67.7,
+ "Tatoeba (lfn-eng)": 70.49,
+ "Tatoeba (spa-eng)": 95.97,
+ "Tatoeba (mon-eng)": 88.26,
+ "Tatoeba (orv-eng)": 46.59,
+ "Tatoeba (wuu-eng)": 82.69,
+ "Tatoeba (swe-eng)": 93.29,
+ "Tatoeba (csb-eng)": 62.45,
+ "Tatoeba (fao-eng)": 76.43,
+ "Tatoeba (fra-eng)": 91.89,
+ "Tatoeba (xho-eng)": 13.94,
+ "Tatoeba (lat-eng)": 64.13,
+ "Tatoeba (cym-eng)": 46.28,
+ "Tatoeba (glg-eng)": 92.13,
+ "Tatoeba (ron-eng)": 92.57,
+ "Tatoeba (ido-eng)": 85.36,
+ "Tatoeba (bre-eng)": 20.24,
+ "Tatoeba (tel-eng)": 80.98,
+ "Tatoeba (isl-eng)": 92.75,
+ "Tatoeba (sqi-eng)": 71.18,
+ "Tatoeba (hrv-eng)": 91.01,
+ "Tatoeba (yid-eng)": 39.23,
+ "Tatoeba (tuk-eng)": 35.8,
+ "Tatoeba (cor-eng)": 8.09,
+ "Tatoeba (aze-eng)": 75.22,
+ "Tatoeba (vie-eng)": 94.83,
+ "Tatoeba (yue-eng)": 75.31,
+ "Tatoeba (oci-eng)": 65.69,
+ "Tatoeba (mhr-eng)": 14.52,
+ "Tatoeba (ceb-eng)": 29.27,
+ "Tatoeba (ast-eng)": 83.43,
+ "Tatoeba (jav-eng)": 44.22,
+ "Tatoeba (nds-eng)": 83.38,
+ "Tatoeba (tat-eng)": 56.92,
+ "Tatoeba (ang-eng)": 72.81,
+ "Tatoeba (uig-eng)": 39.35,
+ "Tatoeba (ukr-eng)": 92.07,
+ "Tatoeba (bos-eng)": 88.32,
+ "Tatoeba (deu-eng)": 97.2,
+ "Tatoeba (ber-eng)": 6.76,
+ "Tatoeba (ita-eng)": 92.12,
+ "Tatoeba (dan-eng)": 94.61,
+ "Tatoeba (awa-eng)": 62.93,
+ "Tatoeba (pms-eng)": 73.49,
+ "Tatoeba (dtp-eng)": 7.91,
+ "Tatoeba (nob-eng)": 97.2,
+ "Tatoeba (slv-eng)": 82.31,
+ "Tatoeba (ile-eng)": 82.73,
+ "Tatoeba (mal-eng)": 40.18,
+ "Tatoeba (dsb-eng)": 64.52,
+ "Tatoeba (ell-eng)": 73.34,
+ "Tatoeba (rus-eng)": 92.13,
+ "Tatoeba (zsm-eng)": 89.51,
+ "Tatoeba (tur-eng)": 81.52,
+ "Tatoeba (arq-eng)": 32.02,
+ "Tatoeba (nno-eng)": 92.31
+ }
+ ]
},
"Classification": {
"accuracy": [
{
"Model": "voyage-multilingual-2",
- "AmazonReviewsClassification (fr)": 43.36,
- "MTOPDomainClassification (fr)": 90.33,
- "MTOPIntentClassification (fr)": 60.52,
+ "AllegroReviews": 45.14,
+ "AmazonCounterfactualClassification (en-ext)": 72.42,
+ "AmazonCounterfactualClassification (en)": 72.73,
+ "AmazonCounterfactualClassification (de)": 63.02,
+ "AmazonCounterfactualClassification (ja)": 78.14,
+ "AmazonPolarityClassification": 87.78,
+ "AmazonReviewsClassification (fr)": 42.16,
+ "AmazonReviewsClassification (en)": 42.88,
+ "AmazonReviewsClassification (de)": 44.39,
+ "AmazonReviewsClassification (es)": 42.17,
+ "AmazonReviewsClassification (ja)": 42.63,
+ "AmazonReviewsClassification (zh)": 37.67,
+ "AngryTweetsClassification": 55.9,
+ "Banking77Classification": 78.61,
+ "CBD": 70.77,
+ "DanishPoliticalCommentsClassification": 41.0,
+ "EmotionClassification": 45.83,
+ "GeoreviewClassification": 45.45,
+ "HeadlineClassification": 79.3,
+ "ImdbClassification": 82.8,
+ "InappropriatenessClassification": 60.33,
+ "KinopoiskClassification": 57.99,
+ "LccSentimentClassification": 59.93,
+ "MTOPDomainClassification (fr)": 88.63,
+ "MTOPDomainClassification (en)": 93.04,
+ "MTOPDomainClassification (de)": 90.41,
+ "MTOPDomainClassification (es)": 90.95,
+ "MTOPDomainClassification (hi)": 88.56,
+ "MTOPDomainClassification (th)": 86.67,
+ "MTOPIntentClassification (fr)": 65.15,
+ "MTOPIntentClassification (en)": 68.46,
+ "MTOPIntentClassification (de)": 69.92,
+ "MTOPIntentClassification (es)": 69.54,
+ "MTOPIntentClassification (hi)": 62.33,
+ "MTOPIntentClassification (th)": 65.79,
"MasakhaNEWSClassification (fra)": 74.81,
- "MassiveIntentClassification (fr)": 68.06,
- "MassiveScenarioClassification (fr)": 74.29
+ "MassiveIntentClassification (fr)": 66.07,
+ "MassiveIntentClassification (lv)": 52.06,
+ "MassiveIntentClassification (hy)": 51.68,
+ "MassiveIntentClassification (he)": 63.97,
+ "MassiveIntentClassification (my)": 36.56,
+ "MassiveIntentClassification (ta)": 46.96,
+ "MassiveIntentClassification (ms)": 61.06,
+ "MassiveIntentClassification (mn)": 61.2,
+ "MassiveIntentClassification (pl)": 68.03,
+ "MassiveIntentClassification (ml)": 45.01,
+ "MassiveIntentClassification (ar)": 54.82,
+ "MassiveIntentClassification (da)": 67.76,
+ "MassiveIntentClassification (hu)": 61.11,
+ "MassiveIntentClassification (nl)": 66.54,
+ "MassiveIntentClassification (fa)": 65.38,
+ "MassiveIntentClassification (bn)": 61.35,
+ "MassiveIntentClassification (cy)": 46.17,
+ "MassiveIntentClassification (el)": 55.96,
+ "MassiveIntentClassification (ja)": 69.47,
+ "MassiveIntentClassification (sl)": 60.62,
+ "MassiveIntentClassification (az)": 57.64,
+ "MassiveIntentClassification (th)": 60.68,
+ "MassiveIntentClassification (it)": 67.51,
+ "MassiveIntentClassification (sw)": 48.45,
+ "MassiveIntentClassification (es)": 65.48,
+ "MassiveIntentClassification (tl)": 63.0,
+ "MassiveIntentClassification (fi)": 67.71,
+ "MassiveIntentClassification (zh-CN)": 67.46,
+ "MassiveIntentClassification (kn)": 46.24,
+ "MassiveIntentClassification (nb)": 67.58,
+ "MassiveIntentClassification (vi)": 63.81,
+ "MassiveIntentClassification (af)": 59.88,
+ "MassiveIntentClassification (jv)": 50.18,
+ "MassiveIntentClassification (ko)": 63.68,
+ "MassiveIntentClassification (ro)": 61.28,
+ "MassiveIntentClassification (ur)": 59.66,
+ "MassiveIntentClassification (pt)": 66.59,
+ "MassiveIntentClassification (is)": 63.95,
+ "MassiveIntentClassification (sv)": 68.56,
+ "MassiveIntentClassification (km)": 37.94,
+ "MassiveIntentClassification (ka)": 44.8,
+ "MassiveIntentClassification (de)": 66.71,
+ "MassiveIntentClassification (id)": 65.2,
+ "MassiveIntentClassification (zh-TW)": 62.52,
+ "MassiveIntentClassification (te)": 56.09,
+ "MassiveIntentClassification (am)": 26.5,
+ "MassiveIntentClassification (ru)": 67.89,
+ "MassiveIntentClassification (en)": 72.45,
+ "MassiveIntentClassification (tr)": 60.12,
+ "MassiveIntentClassification (sq)": 51.73,
+ "MassiveIntentClassification (hi)": 62.01,
+ "MassiveScenarioClassification (fr)": 72.06,
+ "MassiveScenarioClassification (hi)": 68.28,
+ "MassiveScenarioClassification (de)": 74.13,
+ "MassiveScenarioClassification (en)": 76.56,
+ "MassiveScenarioClassification (is)": 71.1,
+ "MassiveScenarioClassification (sw)": 57.39,
+ "MassiveScenarioClassification (cy)": 52.27,
+ "MassiveScenarioClassification (vi)": 71.04,
+ "MassiveScenarioClassification (my)": 41.78,
+ "MassiveScenarioClassification (sq)": 60.35,
+ "MassiveScenarioClassification (af)": 68.22,
+ "MassiveScenarioClassification (kn)": 55.63,
+ "MassiveScenarioClassification (fi)": 72.59,
+ "MassiveScenarioClassification (am)": 33.05,
+ "MassiveScenarioClassification (ko)": 70.92,
+ "MassiveScenarioClassification (sl)": 66.35,
+ "MassiveScenarioClassification (az)": 63.07,
+ "MassiveScenarioClassification (hy)": 58.24,
+ "MassiveScenarioClassification (es)": 70.93,
+ "MassiveScenarioClassification (bn)": 66.94,
+ "MassiveScenarioClassification (ka)": 50.52,
+ "MassiveScenarioClassification (jv)": 58.54,
+ "MassiveScenarioClassification (mn)": 65.72,
+ "MassiveScenarioClassification (nl)": 72.98,
+ "MassiveScenarioClassification (ur)": 67.57,
+ "MassiveScenarioClassification (ar)": 62.96,
+ "MassiveScenarioClassification (id)": 71.94,
+ "MassiveScenarioClassification (th)": 68.72,
+ "MassiveScenarioClassification (ro)": 67.79,
+ "MassiveScenarioClassification (ml)": 52.6,
+ "MassiveScenarioClassification (ru)": 73.86,
+ "MassiveScenarioClassification (el)": 62.86,
+ "MassiveScenarioClassification (sv)": 75.08,
+ "MassiveScenarioClassification (zh-CN)": 75.08,
+ "MassiveScenarioClassification (te)": 64.09,
+ "MassiveScenarioClassification (ms)": 69.26,
+ "MassiveScenarioClassification (it)": 72.94,
+ "MassiveScenarioClassification (nb)": 73.8,
+ "MassiveScenarioClassification (lv)": 59.79,
+ "MassiveScenarioClassification (ta)": 54.6,
+ "MassiveScenarioClassification (he)": 69.6,
+ "MassiveScenarioClassification (km)": 41.74,
+ "MassiveScenarioClassification (hu)": 68.02,
+ "MassiveScenarioClassification (fa)": 71.88,
+ "MassiveScenarioClassification (pl)": 72.57,
+ "MassiveScenarioClassification (ja)": 76.15,
+ "MassiveScenarioClassification (pt)": 71.42,
+ "MassiveScenarioClassification (da)": 73.23,
+ "MassiveScenarioClassification (tr)": 65.08,
+ "MassiveScenarioClassification (tl)": 68.64,
+ "MassiveScenarioClassification (zh-TW)": 70.91,
+ "NoRecClassification": 53.35,
+ "NordicLangClassification": 58.41,
+ "PAC": 70.59,
+ "PolEmo2.0-IN": 72.76,
+ "PolEmo2.0-OUT": 52.51,
+ "RuReviewsClassification": 63.12,
+ "RuSciBenchGRNTIClassification": 60.63,
+ "RuSciBenchOECDClassification": 46.05,
+ "ToxicConversationsClassification": 61.88,
+ "TweetSentimentExtractionClassification": 57.51
}
]
},
@@ -24886,13 +46797,44 @@
"v_measure": [
{
"Model": "voyage-multilingual-2",
- "AlloProfClusteringP2P": 65.37,
- "AlloProfClusteringS2S": 47.03,
- "HALClusteringS2S": 27.67,
+ "AlloProfClusteringP2P": 63.81,
+ "AlloProfClusteringS2S": 52.38,
+ "ArxivClusteringP2P": 47.84,
+ "ArxivClusteringS2S": 40.56,
+ "BiorxivClusteringS2S": 32.54,
+ "BlurbsClusteringP2P": 43.27,
+ "BlurbsClusteringS2S": 17.78,
+ "GeoreviewClusteringP2P": 71.94,
+ "HALClusteringS2S": 26.01,
"MLSUMClusteringP2P (fr)": 45.99,
"MLSUMClusteringS2S (fr)": 45.57,
"MasakhaNEWSClusteringP2P (fra)": 44.53,
- "MasakhaNEWSClusteringS2S (fra)": 49.8
+ "MasakhaNEWSClusteringS2S (fra)": 63.87,
+ "MasakhaNEWSClusteringS2S (amh)": 45.35,
+ "MasakhaNEWSClusteringS2S (eng)": 49.35,
+ "MasakhaNEWSClusteringS2S (hau)": 28.42,
+ "MasakhaNEWSClusteringS2S (ibo)": 35.48,
+ "MasakhaNEWSClusteringS2S (lin)": 56.02,
+ "MasakhaNEWSClusteringS2S (lug)": 43.66,
+ "MasakhaNEWSClusteringS2S (orm)": 34.32,
+ "MasakhaNEWSClusteringS2S (pcm)": 75.25,
+ "MasakhaNEWSClusteringS2S (run)": 46.39,
+ "MasakhaNEWSClusteringS2S (sna)": 49.4,
+ "MasakhaNEWSClusteringS2S (som)": 22.73,
+ "MasakhaNEWSClusteringS2S (swa)": 16.77,
+ "MasakhaNEWSClusteringS2S (tir)": 43.68,
+ "MasakhaNEWSClusteringS2S (xho)": 32.98,
+ "MasakhaNEWSClusteringS2S (yor)": 30.5,
+ "MedrxivClusteringS2S": 30.83,
+ "RedditClustering": 49.55,
+ "RedditClusteringP2P": 62.69,
+ "RuSciBenchGRNTIClusteringP2P": 50.93,
+ "RuSciBenchOECDClusteringP2P": 44.34,
+ "StackExchangeClustering": 64.13,
+ "StackExchangeClusteringP2P": 34.38,
+ "TenKGnadClusteringP2P": 43.87,
+ "TenKGnadClusteringS2S": 37.37,
+ "TwentyNewsgroupsClustering": 47.12
}
]
},
@@ -24900,13 +46842,56 @@
"max_ap": [
{
"Model": "voyage-multilingual-2",
- "OpusparcusPC (fr)": 93.68,
- "PawsXPairClassification (fr)": 63.64
+ "CDSC-E": 66.76,
+ "FalseFriendsGermanEnglish": 53.6,
+ "OpusparcusPC (de)": 96.95,
+ "OpusparcusPC (en)": 98.72,
+ "OpusparcusPC (fi)": 94.77,
+ "OpusparcusPC (fr)": 94.18,
+ "OpusparcusPC (ru)": 90.47,
+ "OpusparcusPC (sv)": 95.13,
+ "PSC": 99.63,
+ "PawsXPairClassification (de)": 59.28,
+ "PawsXPairClassification (en)": 61.13,
+ "PawsXPairClassification (es)": 59.53,
+ "PawsXPairClassification (fr)": 62.26,
+ "PawsXPairClassification (ja)": 52.18,
+ "PawsXPairClassification (ko)": 52.61,
+ "PawsXPairClassification (zh)": 57.97,
+ "SICK-E-PL": 70.8,
+ "SprintDuplicateQuestions": 81.08,
+ "TERRa": 51.48,
+ "TwitterSemEval2015": 68.86,
+ "TwitterURLCorpus": 84.37
+ },
+ {
+ "Model": "voyage-multilingual-2",
+ "CDSC-E": 66.76,
+ "FalseFriendsGermanEnglish": 53.62,
+ "OpusparcusPC (fr)": 94.18,
+ "OpusparcusPC (de)": 96.97,
+ "OpusparcusPC (en)": 98.72,
+ "OpusparcusPC (fi)": 94.78,
+ "OpusparcusPC (ru)": 90.47,
+ "OpusparcusPC (sv)": 95.14,
+ "PSC": 99.63,
+ "PawsXPairClassification (fr)": 62.28,
+ "PawsXPairClassification (de)": 59.28,
+ "PawsXPairClassification (en)": 61.13,
+ "PawsXPairClassification (es)": 59.57,
+ "PawsXPairClassification (ja)": 52.22,
+ "PawsXPairClassification (ko)": 52.61,
+ "PawsXPairClassification (zh)": 58.19,
+ "SICK-E-PL": 70.81,
+ "SprintDuplicateQuestions": 81.08,
+ "TERRa": 51.48,
+ "TwitterSemEval2015": 68.86,
+ "TwitterURLCorpus": 84.37
},
{
"Model": "voyage-multilingual-2",
"OpusparcusPC (fr)": 93.68,
- "PawsXPairClassification (fr)": 63.71
+ "PawsXPairClassification (fr)": 63.64
}
]
},
@@ -24915,6 +46900,8 @@
{
"Model": "voyage-multilingual-2",
"AlloprofReranking": 74.78,
+ "AskUbuntuDupQuestions": 61.76,
+ "MindSmallReranking": 33.05,
"SyntecReranking": 90.4
}
]
@@ -24923,20 +46910,111 @@
"ndcg_at_10": [
{
"Model": "voyage-multilingual-2",
+ "AILAStatutes": 45.0,
"AlloprofRetrieval": 58.27,
+ "ArguAna": 61.82,
"BSARDRetrieval": 5.14,
+ "CmedqaRetrieval": 34.4,
"LEMBNarrativeQARetrieval": 64.69,
"LEMBQMSumRetrieval": 51.49,
"LEMBSummScreenFDRetrieval": 99.11,
"LEMBWikimQARetrieval": 87.49,
+ "LegalBenchCorporateLobbying": 95.92,
"MintakaRetrieval (fr)": 49.19,
+ "NFCorpus": 39.28,
+ "SCIDOCS": 22.45,
+ "SciFact": 75.98,
+ "SciFact-PL": 69.27,
+ "SpartQA": 10.19,
"SyntecRetrieval": 87.28,
+ "TRECCOVID": 80.11,
+ "TRECCOVID-PL": 69.86,
+ "TempReasonL1": 1.37,
+ "WinoGrande": 39.09,
"XPQARetrieval (fr)": 72.92
}
]
},
"STS": {
"cosine_spearman": [
+ {
+ "Model": "voyage-multilingual-2",
+ "BIOSSES": 87.11,
+ "CDSC-R": 87.48,
+ "GermanSTSBenchmark": 74.08,
+ "RUParaPhraserSTS": 67.61,
+ "RuSTSBenchmarkSTS": 71.51,
+ "SICK-R": 78.97,
+ "SICK-R-PL": 70.42,
+ "SICKFr": 72.87,
+ "STS12": 67.3,
+ "STS13": 80.09,
+ "STS14": 71.98,
+ "STS15": 78.07,
+ "STS16": 77.36,
+ "STS17 (es-en)": 76.92,
+ "STS17 (it-en)": 81.58,
+ "STS17 (fr-en)": 78.76,
+ "STS17 (es-es)": 84.05,
+ "STS17 (ar-ar)": 72.42,
+ "STS17 (en-tr)": 62.87,
+ "STS17 (en-en)": 86.52,
+ "STS17 (en-de)": 78.95,
+ "STS17 (en-ar)": 73.01,
+ "STS17 (nl-en)": 81.95,
+ "STS17 (ko-ko)": 70.66,
+ "STSB": 68.25,
+ "STSBenchmark": 75.79,
+ "STSBenchmarkMultilingualSTS (pl)": 71.96,
+ "STSBenchmarkMultilingualSTS (fr)": 74.54,
+ "STSBenchmarkMultilingualSTS (it)": 72.94,
+ "STSBenchmarkMultilingualSTS (de)": 74.96,
+ "STSBenchmarkMultilingualSTS (nl)": 73.78,
+ "STSBenchmarkMultilingualSTS (ru)": 71.56,
+ "STSBenchmarkMultilingualSTS (pt)": 73.57,
+ "STSBenchmarkMultilingualSTS (zh)": 69.94,
+ "STSBenchmarkMultilingualSTS (en)": 75.83,
+ "STSBenchmarkMultilingualSTS (es)": 74.35
+ },
+ {
+ "Model": "voyage-multilingual-2",
+ "BIOSSES": 87.11,
+ "CDSC-R": 87.48,
+ "GermanSTSBenchmark": 74.08,
+ "RUParaPhraserSTS": 67.61,
+ "RuSTSBenchmarkSTS": 71.51,
+ "SICK-R": 78.97,
+ "SICK-R-PL": 70.42,
+ "SICKFr": 72.87,
+ "STS12": 67.3,
+ "STS13": 80.09,
+ "STS14": 71.98,
+ "STS15": 78.07,
+ "STS16": 77.36,
+ "STS17 (es-en)": 76.92,
+ "STS17 (it-en)": 81.58,
+ "STS17 (fr-en)": 78.76,
+ "STS17 (es-es)": 84.05,
+ "STS17 (ar-ar)": 72.42,
+ "STS17 (en-tr)": 62.87,
+ "STS17 (en-en)": 86.52,
+ "STS17 (en-de)": 78.95,
+ "STS17 (en-ar)": 73.01,
+ "STS17 (nl-en)": 81.95,
+ "STS17 (ko-ko)": 70.66,
+ "STSB": 68.25,
+ "STSBenchmark": 75.79,
+ "STSBenchmarkMultilingualSTS (pl)": 71.96,
+ "STSBenchmarkMultilingualSTS (fr)": 74.54,
+ "STSBenchmarkMultilingualSTS (it)": 72.94,
+ "STSBenchmarkMultilingualSTS (de)": 74.96,
+ "STSBenchmarkMultilingualSTS (nl)": 73.78,
+ "STSBenchmarkMultilingualSTS (ru)": 71.56,
+ "STSBenchmarkMultilingualSTS (pt)": 73.57,
+ "STSBenchmarkMultilingualSTS (zh)": 69.94,
+ "STSBenchmarkMultilingualSTS (en)": 75.83,
+ "STSBenchmarkMultilingualSTS (es)": 74.35
+ },
{
"Model": "voyage-multilingual-2",
"SICKFr": 74.9,
@@ -24947,6 +47025,14 @@
},
"Summarization": {
"cosine_spearman": [
+ {
+ "Model": "voyage-multilingual-2",
+ "SummEval": 28.44
+ },
+ {
+ "Model": "voyage-multilingual-2",
+ "SummEval": 28.44
+ },
{
"Model": "voyage-multilingual-2",
"SummEvalFr": 29.96
@@ -24954,7 +47040,13 @@
]
},
"MultilabelClassification": {
- "accuracy": []
+ "accuracy": [
+ {
+ "Model": "voyage-multilingual-2",
+ "CEDRClassification": 39.15,
+ "SensitiveTopicsClassification": 26.74
+ }
+ ]
},
"InstructionRetrieval": {
"p-MRR": []