diff --git "a/EXTERNAL_MODEL_RESULTS.json" "b/EXTERNAL_MODEL_RESULTS.json" --- "a/EXTERNAL_MODEL_RESULTS.json" +++ "b/EXTERNAL_MODEL_RESULTS.json" @@ -1,29 +1,294 @@ { "Alibaba-NLP__gte-Qwen1.5-7B-instruct": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "gte-Qwen1.5-7B-instruct", + "BornholmBitextMining": 40.62, + "Tatoeba (bos-eng)": 85.59, + "Tatoeba (ile-eng)": 74.66, + "Tatoeba (ukr-eng)": 88.42, + "Tatoeba (heb-eng)": 57.62, + "Tatoeba (eus-eng)": 12.59, + "Tatoeba (lvs-eng)": 68.21, + "Tatoeba (hrv-eng)": 88.6, + "Tatoeba (mhr-eng)": 8.29, + "Tatoeba (lfn-eng)": 57.45, + "Tatoeba (uig-eng)": 3.12, + "Tatoeba (zsm-eng)": 92.46, + "Tatoeba (tur-eng)": 83.9, + "Tatoeba (max-eng)": 41.02, + "Tatoeba (spa-eng)": 98.42, + "Tatoeba (pes-eng)": 83.46, + "Tatoeba (wuu-eng)": 90.55, + "Tatoeba (lat-eng)": 67.43, + "Tatoeba (hin-eng)": 88.22, + "Tatoeba (tel-eng)": 25.05, + "Tatoeba (ceb-eng)": 19.11, + "Tatoeba (dsb-eng)": 35.02, + "Tatoeba (amh-eng)": 0.6, + "Tatoeba (slk-eng)": 86.07, + "Tatoeba (gle-eng)": 24.33, + "Tatoeba (xho-eng)": 12.02, + "Tatoeba (ben-eng)": 65.12, + "Tatoeba (dan-eng)": 92.04, + "Tatoeba (cor-eng)": 4.07, + "Tatoeba (fao-eng)": 44.17, + "Tatoeba (pol-eng)": 95.56, + "Tatoeba (kor-eng)": 87.91, + "Tatoeba (pms-eng)": 41.88, + "Tatoeba (urd-eng)": 51.26, + "Tatoeba (glg-eng)": 87.85, + "Tatoeba (tha-eng)": 89.15, + "Tatoeba (tgl-eng)": 75.03, + "Tatoeba (nov-eng)": 61.22, + "Tatoeba (epo-eng)": 72.91, + "Tatoeba (nob-eng)": 94.97, + "Tatoeba (ina-eng)": 92.02, + "Tatoeba (ita-eng)": 92.62, + "Tatoeba (cmn-eng)": 96.35, + "Tatoeba (fra-eng)": 95.19, + "Tatoeba (fin-eng)": 85.6, + "Tatoeba (kur-eng)": 10.97, + "Tatoeba (gsw-eng)": 50.97, + "Tatoeba (awa-eng)": 41.64, + "Tatoeba (nds-eng)": 61.72, + "Tatoeba (csb-eng)": 25.18, + "Tatoeba (mar-eng)": 64.38, + "Tatoeba (kat-eng)": 39.63, + "Tatoeba (nno-eng)": 79.71, + "Tatoeba (ang-eng)": 62.42, + "Tatoeba (bre-eng)": 5.79, + "Tatoeba (ido-eng)": 62.61, + "Tatoeba (afr-eng)": 84.39, + "Tatoeba (bel-eng)": 57.68, + "Tatoeba (nld-eng)": 95.88, + "Tatoeba (ber-eng)": 5.17, + "Tatoeba (est-eng)": 56.72, + "Tatoeba (hye-eng)": 5.03, + "Tatoeba (mkd-eng)": 77.95, + "Tatoeba (ell-eng)": 85.69, + "Tatoeba (tat-eng)": 9.8, + "Tatoeba (ast-eng)": 80.38, + "Tatoeba (ind-eng)": 91.29, + "Tatoeba (por-eng)": 93.62, + "Tatoeba (arq-eng)": 22.07, + "Tatoeba (fry-eng)": 50.29, + "Tatoeba (tam-eng)": 52.97, + "Tatoeba (oci-eng)": 45.81, + "Tatoeba (sqi-eng)": 33.81, + "Tatoeba (ron-eng)": 93.03, + "Tatoeba (orv-eng)": 25.13, + "Tatoeba (cbk-eng)": 65.47, + "Tatoeba (cym-eng)": 20.45, + "Tatoeba (khm-eng)": 24.03, + "Tatoeba (cat-eng)": 88.4, + "Tatoeba (yid-eng)": 2.21, + "Tatoeba (slv-eng)": 80.4, + "Tatoeba (arz-eng)": 62.12, + "Tatoeba (kaz-eng)": 39.43, + "Tatoeba (aze-eng)": 63.53, + "Tatoeba (swg-eng)": 52.96, + "Tatoeba (bul-eng)": 85.33, + "Tatoeba (hsb-eng)": 46.82, + "Tatoeba (kab-eng)": 1.43, + "Tatoeba (jpn-eng)": 93.32, + "Tatoeba (hun-eng)": 81.38, + "Tatoeba (lit-eng)": 74.97, + "Tatoeba (rus-eng)": 92.95, + "Tatoeba (yue-eng)": 91.94, + "Tatoeba (ces-eng)": 91.46, + "Tatoeba (kzj-eng)": 5.69, + "Tatoeba (tzl-eng)": 40.33, + "Tatoeba (pam-eng)": 6.6, + "Tatoeba (vie-eng)": 94.92, + "Tatoeba (isl-eng)": 49.03, + "Tatoeba (ara-eng)": 85.19, + "Tatoeba (deu-eng)": 99.08, + "Tatoeba (swh-eng)": 38.89, + "Tatoeba (gla-eng)": 11.86, + "Tatoeba (war-eng)": 17.44, + "Tatoeba (dtp-eng)": 4.93, + "Tatoeba (swe-eng)": 92.5, + "Tatoeba (mal-eng)": 70.18, + "Tatoeba (uzb-eng)": 15.49, + "Tatoeba (srp-eng)": 85.04, + "Tatoeba (jav-eng)": 21.06, + "Tatoeba (mon-eng)": 33.92, + "Tatoeba (cha-eng)": 22.36, + "Tatoeba (tuk-eng)": 18.04 + } + ] }, "Classification": { "accuracy": [ { "Model": "gte-Qwen1.5-7B-instruct", + "AllegroReviews": 52.72, + "AmazonCounterfactualClassification (en-ext)": 80.55, "AmazonCounterfactualClassification (en)": 83.16, + "AmazonCounterfactualClassification (de)": 74.94, + "AmazonCounterfactualClassification (ja)": 81.6, "AmazonPolarityClassification": 96.7, "AmazonReviewsClassification (en)": 62.17, + "AmazonReviewsClassification (de)": 58.19, + "AmazonReviewsClassification (es)": 56.05, + "AmazonReviewsClassification (fr)": 54.59, + "AmazonReviewsClassification (ja)": 54.39, "AmazonReviewsClassification (zh)": 52.95, + "AngryTweetsClassification": 66.74, "Banking77Classification": 81.68, + "CBD": 70.61, + "DanishPoliticalCommentsClassification": 46.89, "EmotionClassification": 54.53, + "GeoreviewClassification": 59.31, + "HeadlineClassification": 82.32, "IFlyTek": 53.77, "ImdbClassification": 95.58, + "InappropriatenessClassification": 73.26, "JDReview": 88.2, + "KinopoiskClassification": 67.17, + "LccSentimentClassification": 69.4, "MTOPDomainClassification (en)": 95.75, + "MTOPDomainClassification (de)": 91.56, + "MTOPDomainClassification (es)": 91.96, + "MTOPDomainClassification (fr)": 89.66, + "MTOPDomainClassification (hi)": 86.31, + "MTOPDomainClassification (th)": 80.72, "MTOPIntentClassification (en)": 84.26, - "MassiveIntentClassification (zh-CN)": 76.25, + "MTOPIntentClassification (de)": 79.15, + "MTOPIntentClassification (es)": 81.06, + "MTOPIntentClassification (fr)": 77.69, + "MTOPIntentClassification (hi)": 67.88, + "MTOPIntentClassification (th)": 67.46, + "MasakhaNEWSClassification (amh)": 31.41, + "MasakhaNEWSClassification (eng)": 82.71, + "MasakhaNEWSClassification (fra)": 81.78, + "MasakhaNEWSClassification (hau)": 66.8, + "MasakhaNEWSClassification (ibo)": 68.62, + "MasakhaNEWSClassification (lin)": 80.74, + "MasakhaNEWSClassification (lug)": 65.34, + "MasakhaNEWSClassification (orm)": 62.31, + "MasakhaNEWSClassification (pcm)": 94.03, + "MasakhaNEWSClassification (run)": 79.53, + "MasakhaNEWSClassification (sna)": 79.76, + "MasakhaNEWSClassification (som)": 62.42, + "MasakhaNEWSClassification (swa)": 77.18, + "MasakhaNEWSClassification (tir)": 27.61, + "MasakhaNEWSClassification (xho)": 78.01, + "MasakhaNEWSClassification (yor)": 79.66, + "MassiveIntentClassification (ru)": 73.15, + "MassiveIntentClassification (fi)": 63.41, + "MassiveIntentClassification (sl)": 61.7, + "MassiveIntentClassification (lv)": 52.65, + "MassiveIntentClassification (te)": 33.68, + "MassiveIntentClassification (ka)": 38.5, + "MassiveIntentClassification (ko)": 67.71, + "MassiveIntentClassification (sq)": 39.98, + "MassiveIntentClassification (my)": 34.12, + "MassiveIntentClassification (jv)": 41.73, + "MassiveIntentClassification (zh-TW)": 70.87, + "MassiveIntentClassification (bn)": 60.69, + "MassiveIntentClassification (ta)": 39.49, + "MassiveIntentClassification (fa)": 66.15, + "MassiveIntentClassification (ar)": 57.09, + "MassiveIntentClassification (pl)": 69.2, + "MassiveIntentClassification (mn)": 38.69, + "MassiveIntentClassification (ms)": 64.78, + "MassiveIntentClassification (nl)": 72.74, + "MassiveIntentClassification (el)": 60.04, + "MassiveIntentClassification (fr)": 72.88, + "MassiveIntentClassification (km)": 33.4, + "MassiveIntentClassification (sv)": 70.44, + "MassiveIntentClassification (kn)": 29.46, + "MassiveIntentClassification (hi)": 65.42, + "MassiveIntentClassification (ml)": 49.24, + "MassiveIntentClassification (da)": 68.09, + "MassiveIntentClassification (ro)": 64.01, + "MassiveIntentClassification (it)": 72.46, + "MassiveIntentClassification (th)": 60.01, + "MassiveIntentClassification (nb)": 64.16, + "MassiveIntentClassification (sw)": 42.72, + "MassiveIntentClassification (ur)": 44.88, + "MassiveIntentClassification (tl)": 60.08, + "MassiveIntentClassification (tr)": 64.52, + "MassiveIntentClassification (cy)": 31.04, + "MassiveIntentClassification (is)": 37.19, + "MassiveIntentClassification (es)": 71.68, "MassiveIntentClassification (en)": 78.47, + "MassiveIntentClassification (hy)": 22.14, + "MassiveIntentClassification (he)": 45.05, + "MassiveIntentClassification (hu)": 60.33, + "MassiveIntentClassification (id)": 68.91, + "MassiveIntentClassification (pt)": 73.42, + "MassiveIntentClassification (ja)": 73.31, + "MassiveIntentClassification (vi)": 67.44, + "MassiveIntentClassification (af)": 62.62, + "MassiveIntentClassification (de)": 70.88, + "MassiveIntentClassification (zh-CN)": 76.25, + "MassiveIntentClassification (az)": 56.78, + "MassiveIntentClassification (am)": 19.46, + "MassiveScenarioClassification (ru)": 75.46, + "MassiveScenarioClassification (ml)": 56.2, + "MassiveScenarioClassification (km)": 46.63, + "MassiveScenarioClassification (te)": 42.86, + "MassiveScenarioClassification (zh-TW)": 74.5, + "MassiveScenarioClassification (hy)": 34.0, + "MassiveScenarioClassification (kn)": 41.11, + "MassiveScenarioClassification (tr)": 68.72, + "MassiveScenarioClassification (hu)": 67.01, + "MassiveScenarioClassification (lv)": 61.15, + "MassiveScenarioClassification (ja)": 74.96, + "MassiveScenarioClassification (pt)": 73.99, + "MassiveScenarioClassification (he)": 57.06, + "MassiveScenarioClassification (vi)": 71.88, + "MassiveScenarioClassification (af)": 69.27, "MassiveScenarioClassification (en)": 78.19, + "MassiveScenarioClassification (sl)": 66.4, + "MassiveScenarioClassification (fa)": 70.72, + "MassiveScenarioClassification (is)": 52.15, + "MassiveScenarioClassification (ro)": 68.82, + "MassiveScenarioClassification (pl)": 72.18, + "MassiveScenarioClassification (fi)": 66.32, + "MassiveScenarioClassification (es)": 74.89, + "MassiveScenarioClassification (az)": 61.39, + "MassiveScenarioClassification (nl)": 74.39, + "MassiveScenarioClassification (jv)": 52.93, + "MassiveScenarioClassification (de)": 75.34, + "MassiveScenarioClassification (fr)": 74.64, + "MassiveScenarioClassification (sq)": 52.41, + "MassiveScenarioClassification (hi)": 69.71, "MassiveScenarioClassification (zh-CN)": 77.26, + "MassiveScenarioClassification (mn)": 47.0, + "MassiveScenarioClassification (id)": 72.61, + "MassiveScenarioClassification (ms)": 71.61, + "MassiveScenarioClassification (sv)": 74.26, + "MassiveScenarioClassification (th)": 69.09, + "MassiveScenarioClassification (ta)": 50.26, + "MassiveScenarioClassification (nb)": 68.59, + "MassiveScenarioClassification (da)": 72.84, + "MassiveScenarioClassification (cy)": 44.52, + "MassiveScenarioClassification (am)": 27.95, + "MassiveScenarioClassification (ka)": 50.49, + "MassiveScenarioClassification (sw)": 54.13, + "MassiveScenarioClassification (ko)": 72.65, + "MassiveScenarioClassification (it)": 75.7, + "MassiveScenarioClassification (my)": 41.89, + "MassiveScenarioClassification (tl)": 66.9, + "MassiveScenarioClassification (ar)": 66.1, + "MassiveScenarioClassification (el)": 67.87, + "MassiveScenarioClassification (ur)": 54.9, + "MassiveScenarioClassification (bn)": 67.18, "MultilingualSentiment": 77.42, + "NoRecClassification": 55.81, + "NordicLangClassification": 72.94, "OnlineShopping": 94.48, + "PAC": 66.38, + "PolEmo2.0-IN": 75.48, + "PolEmo2.0-OUT": 47.04, + "RuReviewsClassification": 72.89, + "RuSciBenchGRNTIClassification": 67.35, + "RuSciBenchOECDClassification": 54.38, "TNews": 51.24, "ToxicConversationsClassification": 78.75, "TweetSentimentExtractionClassification": 66.0, @@ -35,18 +300,68 @@ "v_measure": [ { "Model": "gte-Qwen1.5-7B-instruct", + "AlloProfClusteringP2P": 70.99, + "AlloProfClusteringS2S": 58.27, "ArxivClusteringP2P": 56.4, "ArxivClusteringS2S": 51.45, "BiorxivClusteringP2P": 49.01, "BiorxivClusteringS2S": 45.06, + "BlurbsClusteringP2P": 45.28, + "BlurbsClusteringS2S": 20.1, "CLSClusteringP2P": 47.21, "CLSClusteringS2S": 45.79, + "GeoreviewClusteringP2P": 74.9, + "HALClusteringS2S": 28.35, + "MLSUMClusteringP2P (de)": 50.2, + "MLSUMClusteringP2P (fr)": 48.3, + "MLSUMClusteringP2P (ru)": 58.75, + "MLSUMClusteringP2P (es)": 50.48, + "MLSUMClusteringS2S (de)": 50.14, + "MLSUMClusteringS2S (fr)": 46.91, + "MLSUMClusteringS2S (ru)": 58.21, + "MLSUMClusteringS2S (es)": 48.13, + "MasakhaNEWSClusteringP2P (amh)": 42.33, + "MasakhaNEWSClusteringP2P (eng)": 63.81, + "MasakhaNEWSClusteringP2P (fra)": 68.07, + "MasakhaNEWSClusteringP2P (hau)": 45.04, + "MasakhaNEWSClusteringP2P (ibo)": 57.78, + "MasakhaNEWSClusteringP2P (lin)": 73.24, + "MasakhaNEWSClusteringP2P (lug)": 50.8, + "MasakhaNEWSClusteringP2P (orm)": 28.08, + "MasakhaNEWSClusteringP2P (pcm)": 93.82, + "MasakhaNEWSClusteringP2P (run)": 66.18, + "MasakhaNEWSClusteringP2P (sna)": 77.87, + "MasakhaNEWSClusteringP2P (som)": 41.27, + "MasakhaNEWSClusteringP2P (swa)": 42.45, + "MasakhaNEWSClusteringP2P (tir)": 41.57, + "MasakhaNEWSClusteringP2P (xho)": 50.68, + "MasakhaNEWSClusteringP2P (yor)": 55.25, + "MasakhaNEWSClusteringS2S (amh)": 41.39, + "MasakhaNEWSClusteringS2S (eng)": 59.16, + "MasakhaNEWSClusteringS2S (fra)": 70.81, + "MasakhaNEWSClusteringS2S (hau)": 17.93, + "MasakhaNEWSClusteringS2S (ibo)": 45.7, + "MasakhaNEWSClusteringS2S (lin)": 75.44, + "MasakhaNEWSClusteringS2S (lug)": 42.41, + "MasakhaNEWSClusteringS2S (orm)": 24.36, + "MasakhaNEWSClusteringS2S (pcm)": 81.96, + "MasakhaNEWSClusteringS2S (run)": 58.82, + "MasakhaNEWSClusteringS2S (sna)": 42.33, + "MasakhaNEWSClusteringS2S (som)": 31.87, + "MasakhaNEWSClusteringS2S (swa)": 35.13, + "MasakhaNEWSClusteringS2S (tir)": 42.06, + "MasakhaNEWSClusteringS2S (xho)": 27.07, + "MasakhaNEWSClusteringS2S (yor)": 49.06, "MedrxivClusteringP2P": 44.37, "MedrxivClusteringS2S": 42.0, "RedditClustering": 73.37, "RedditClusteringP2P": 72.51, + "RuSciBenchGRNTIClusteringP2P": 62.53, + "RuSciBenchOECDClusteringP2P": 53.89, "StackExchangeClustering": 79.07, "StackExchangeClusteringP2P": 49.57, + "TenKGnadClusteringP2P": 53.6, + "TenKGnadClusteringS2S": 39.3, "ThuNewsClusteringP2P": 87.43, "ThuNewsClusteringS2S": 87.9, "TwentyNewsgroupsClustering": 51.31 @@ -57,17 +372,56 @@ "max_ap": [ { "Model": "gte-Qwen1.5-7B-instruct", - "Cmnli": 91.81, - "Ocnli": 85.22, - "SprintDuplicateQuestions": 95.99, - "TwitterSemEval2015": 79.36, - "TwitterURLCorpus": 86.79 + "CDSC-E": 74.9, + "FalseFriendsGermanEnglish": 52.61, + "OpusparcusPC (de)": 97.61, + "OpusparcusPC (en)": 99.1, + "OpusparcusPC (fi)": 93.32, + "OpusparcusPC (fr)": 95.29, + "OpusparcusPC (ru)": 89.72, + "OpusparcusPC (sv)": 95.65, + "PSC": 98.05, + "PawsXPairClassification (de)": 74.44, + "PawsXPairClassification (en)": 84.33, + "PawsXPairClassification (es)": 76.88, + "PawsXPairClassification (fr)": 78.51, + "PawsXPairClassification (ja)": 66.5, + "PawsXPairClassification (ko)": 64.0, + "PawsXPairClassification (zh)": 75.39, + "SICK-E-PL": 79.81, + "TERRa": 69.44 }, { "Model": "gte-Qwen1.5-7B-instruct", + "CDSC-E": 74.94, "Cmnli": 91.85, + "FalseFriendsGermanEnglish": 52.61, "Ocnli": 85.28, + "OpusparcusPC (de)": 97.62, + "OpusparcusPC (en)": 99.1, + "OpusparcusPC (fi)": 93.32, + "OpusparcusPC (fr)": 95.29, + "OpusparcusPC (ru)": 89.76, + "OpusparcusPC (sv)": 95.68, + "PSC": 98.05, + "PawsXPairClassification (de)": 74.49, + "PawsXPairClassification (en)": 84.37, + "PawsXPairClassification (es)": 76.94, + "PawsXPairClassification (fr)": 78.51, + "PawsXPairClassification (ja)": 66.53, + "PawsXPairClassification (ko)": 64.07, + "PawsXPairClassification (zh)": 75.39, + "SICK-E-PL": 79.9, "SprintDuplicateQuestions": 96.07, + "TERRa": 69.5, + "TwitterSemEval2015": 79.36, + "TwitterURLCorpus": 86.79 + }, + { + "Model": "gte-Qwen1.5-7B-instruct", + "Cmnli": 91.81, + "Ocnli": 85.22, + "SprintDuplicateQuestions": 95.99, "TwitterSemEval2015": 79.36, "TwitterURLCorpus": 86.79 } @@ -77,13 +431,37 @@ "map": [ { "Model": "gte-Qwen1.5-7B-instruct", + "AlloprofReranking": 75.56, "AskUbuntuDupQuestions": 66.0, "CMedQAv1": 86.37, "CMedQAv2": 87.41, "MindSmallReranking": 32.71, + "RuBQReranking": 70.28, "SciDocsRR": 87.89, "StackOverflowDupQuestions": 53.93, + "SyntecReranking": 86.98, "T2Reranking": 68.11 + }, + { + "Model": "gte-Qwen1.5-7B-instruct", + "MIRACLReranking (ru)": 55.34, + "MIRACLReranking (ar)": 46.94, + "MIRACLReranking (bn)": 52.66, + "MIRACLReranking (de)": 49.61, + "MIRACLReranking (en)": 57.4, + "MIRACLReranking (es)": 55.89, + "MIRACLReranking (fa)": 33.31, + "MIRACLReranking (fi)": 60.99, + "MIRACLReranking (fr)": 47.51, + "MIRACLReranking (hi)": 46.49, + "MIRACLReranking (id)": 48.16, + "MIRACLReranking (ja)": 47.3, + "MIRACLReranking (ko)": 41.37, + "MIRACLReranking (sw)": 45.44, + "MIRACLReranking (te)": 50.46, + "MIRACLReranking (th)": 56.03, + "MIRACLReranking (yo)": 58.02, + "MIRACLReranking (zh)": 46.1 } ] }, @@ -91,7 +469,14 @@ "ndcg_at_10": [ { "Model": "gte-Qwen1.5-7B-instruct", + "AILACasedocs": 34.07, + "AILAStatutes": 31.46, + "ARCChallenge": 26.34, + "AlloprofRetrieval": 51.11, + "AlphaNLI": 33.84, + "AppsRetrieval": 30.19, "ArguAna": 62.65, + "BSARDRetrieval": 23.58, "BrightRetrieval (stackoverflow)": 19.85, "BrightRetrieval (earth_science)": 36.22, "BrightRetrieval (leetcode)": 25.46, @@ -107,25 +492,121 @@ "CQADupstackRetrieval": 40.64, "ClimateFEVER": 44.0, "CmedqaRetrieval": 43.47, + "CodeFeedbackMT": 52.15, + "CodeFeedbackST": 82.57, + "CodeSearchNetCCRetrieval (python)": 70.64, + "CodeSearchNetCCRetrieval (javascript)": 63.28, + "CodeSearchNetCCRetrieval (go)": 59.7, + "CodeSearchNetCCRetrieval (ruby)": 61.95, + "CodeSearchNetCCRetrieval (java)": 60.42, + "CodeSearchNetCCRetrieval (php)": 49.08, + "CodeSearchNetRetrieval (python)": 91.17, + "CodeSearchNetRetrieval (javascript)": 78.19, + "CodeSearchNetRetrieval (go)": 91.95, + "CodeSearchNetRetrieval (ruby)": 84.58, + "CodeSearchNetRetrieval (java)": 85.39, + "CodeSearchNetRetrieval (php)": 81.35, + "CodeTransOceanContest": 77.71, + "CodeTransOceanDL": 29.84, + "CosQA": 31.74, "CovidRetrieval": 80.87, "DBPedia": 48.04, "DuRetrieval": 86.01, "EcomRetrieval": 66.46, "FEVER": 93.35, "FiQA2018": 55.31, + "GerDaLIR": 6.51, + "GerDaLIRSmall": 14.81, + "GermanQuAD-Retrieval": 91.52, + "HellaSwag": 31.92, "HotpotQA": 72.25, + "LEMBNarrativeQARetrieval": 35.21, + "LEMBQMSumRetrieval": 27.03, + "LEMBSummScreenFDRetrieval": 70.43, + "LEMBWikimQARetrieval": 50.97, + "LeCaRDv2": 62.12, + "LegalBenchConsumerContractsQA": 76.06, + "LegalBenchCorporateLobbying": 94.51, + "LegalQuAD": 37.99, + "LegalSummarization": 61.94, "MMarcoRetrieval": 73.83, "MSMARCO": 41.68, "MedicalRetrieval": 61.33, + "MintakaRetrieval (ar)": 17.77, + "MintakaRetrieval (de)": 40.73, + "MintakaRetrieval (es)": 39.93, + "MintakaRetrieval (fr)": 42.44, + "MintakaRetrieval (hi)": 18.89, + "MintakaRetrieval (it)": 42.28, + "MintakaRetrieval (ja)": 24.74, + "MintakaRetrieval (pt)": 38.98, "NFCorpus": 38.25, "NQ": 61.79, + "PIQA": 40.98, + "Quail": 18.73, "QuoraRetrieval": 89.61, + "RARbCode": 87.67, + "RARbMath": 74.56, + "RiaNewsRetrieval": 61.1, + "RuBQRetrieval": 65.17, "SCIDOCS": 27.69, + "SIQA": 5.21, "SciFact": 75.31, + "SciFact-PL": 57.38, + "SpartQA": 16.95, + "StackOverflowQA": 91.02, + "SyntecRetrieval": 86.47, + "SyntheticText2SQL": 55.6, "T2Retrieval": 83.58, "TRECCOVID": 72.72, + "TRECCOVID-PL": 69.14, + "TempReasonL1": 1.84, + "TempReasonL2Fact": 44.81, + "TempReasonL2Pure": 5.34, + "TempReasonL3Fact": 33.49, + "TempReasonL3Pure": 9.01, "Touche2020": 20.3, - "VideoRetrieval": 69.41 + "VideoRetrieval": 69.41, + "WinoGrande": 64.8, + "XMarket (de)": 23.15, + "XMarket (en)": 36.25, + "XMarket (es)": 22.93, + "XPQARetrieval (ara-ara)": 36.85, + "XPQARetrieval (eng-ara)": 25.84, + "XPQARetrieval (ara-eng)": 37.12, + "XPQARetrieval (deu-deu)": 71.07, + "XPQARetrieval (eng-deu)": 41.29, + "XPQARetrieval (deu-eng)": 66.7, + "XPQARetrieval (spa-spa)": 55.62, + "XPQARetrieval (eng-spa)": 30.69, + "XPQARetrieval (spa-eng)": 55.47, + "XPQARetrieval (fra-fra)": 61.01, + "XPQARetrieval (eng-fra)": 40.46, + "XPQARetrieval (fra-eng)": 58.94, + "XPQARetrieval (hin-hin)": 64.15, + "XPQARetrieval (eng-hin)": 18.76, + "XPQARetrieval (hin-eng)": 50.65, + "XPQARetrieval (ita-ita)": 65.84, + "XPQARetrieval (eng-ita)": 35.49, + "XPQARetrieval (ita-eng)": 61.3, + "XPQARetrieval (jpn-jpn)": 66.64, + "XPQARetrieval (eng-jpn)": 38.88, + "XPQARetrieval (jpn-eng)": 64.6, + "XPQARetrieval (kor-kor)": 30.69, + "XPQARetrieval (eng-kor)": 27.28, + "XPQARetrieval (kor-eng)": 28.51, + "XPQARetrieval (pol-pol)": 39.11, + "XPQARetrieval (eng-pol)": 23.3, + "XPQARetrieval (pol-eng)": 37.17, + "XPQARetrieval (por-por)": 41.84, + "XPQARetrieval (eng-por)": 19.63, + "XPQARetrieval (por-eng)": 43.75, + "XPQARetrieval (tam-tam)": 23.55, + "XPQARetrieval (eng-tam)": 5.04, + "XPQARetrieval (tam-eng)": 13.74, + "XPQARetrieval (cmn-cmn)": 64.98, + "XPQARetrieval (eng-cmn)": 36.15, + "XPQARetrieval (cmn-eng)": 60.31 } ], "recall_at_1": [ @@ -164,6 +645,80 @@ "STS22 (zh)": 67.36, "STSB": 81.37, "STSBenchmark": 87.35 + }, + { + "Model": "gte-Qwen1.5-7B-instruct", + "CDSC-R": 85.32, + "GermanSTSBenchmark": 81.1, + "RUParaPhraserSTS": 73.65, + "RuSTSBenchmarkSTS": 80.03, + "SICK-R-PL": 73.6, + "SICKFr": 76.53, + "STS22 (ru)": 60.37, + "STS22 (pl)": 40.2, + "STS22 (fr)": 81.74, + "STS22 (de-pl)": 53.46, + "STS22 (it)": 77.46, + "STS22 (de-fr)": 67.08, + "STS22 (fr-pl)": 84.52, + "STS22 (es-en)": 77.16, + "STS22 (es)": 67.35, + "STS22 (tr)": 65.5, + "STS22 (de)": 55.27, + "STS22 (zh)": 67.9, + "STS22 (de-en)": 53.99, + "STS22 (zh-en)": 69.8, + "STS22 (es-it)": 74.55, + "STS22 (ar)": 58.84, + "STS22 (en)": 67.1, + "STS22 (pl-en)": 76.53, + "STSBenchmarkMultilingualSTS (en)": 87.64, + "STSBenchmarkMultilingualSTS (zh)": 81.33, + "STSBenchmarkMultilingualSTS (es)": 83.68, + "STSBenchmarkMultilingualSTS (pl)": 77.17, + "STSBenchmarkMultilingualSTS (fr)": 82.69, + "STSBenchmarkMultilingualSTS (it)": 81.59, + "STSBenchmarkMultilingualSTS (nl)": 78.86, + "STSBenchmarkMultilingualSTS (de)": 82.29, + "STSBenchmarkMultilingualSTS (ru)": 80.48, + "STSBenchmarkMultilingualSTS (pt)": 81.98 + }, + { + "Model": "gte-Qwen1.5-7B-instruct", + "CDSC-R": 85.32, + "GermanSTSBenchmark": 81.1, + "RUParaPhraserSTS": 73.65, + "RuSTSBenchmarkSTS": 80.03, + "SICK-R-PL": 73.6, + "SICKFr": 76.53, + "STS22 (ru)": 60.37, + "STS22 (pl)": 40.2, + "STS22 (fr)": 81.74, + "STS22 (de-pl)": 53.46, + "STS22 (it)": 77.46, + "STS22 (de-fr)": 67.08, + "STS22 (fr-pl)": 84.52, + "STS22 (es-en)": 77.16, + "STS22 (es)": 67.35, + "STS22 (tr)": 65.5, + "STS22 (de)": 55.27, + "STS22 (zh)": 67.9, + "STS22 (de-en)": 53.99, + "STS22 (zh-en)": 69.8, + "STS22 (es-it)": 74.55, + "STS22 (ar)": 58.84, + "STS22 (en)": 67.1, + "STS22 (pl-en)": 76.53, + "STSBenchmarkMultilingualSTS (en)": 87.64, + "STSBenchmarkMultilingualSTS (zh)": 81.33, + "STSBenchmarkMultilingualSTS (es)": 83.68, + "STSBenchmarkMultilingualSTS (pl)": 77.17, + "STSBenchmarkMultilingualSTS (fr)": 82.69, + "STSBenchmarkMultilingualSTS (it)": 81.59, + "STSBenchmarkMultilingualSTS (nl)": 78.86, + "STSBenchmarkMultilingualSTS (de)": 82.29, + "STSBenchmarkMultilingualSTS (ru)": 80.48, + "STSBenchmarkMultilingualSTS (pt)": 81.98 } ] }, @@ -172,36 +727,460 @@ { "Model": "gte-Qwen1.5-7B-instruct", "SummEval": 31.46 + }, + { + "Model": "gte-Qwen1.5-7B-instruct", + "SummEvalFr": 30.04 + }, + { + "Model": "gte-Qwen1.5-7B-instruct", + "SummEvalFr": 30.04 } ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "gte-Qwen1.5-7B-instruct", + "CEDRClassification": 54.77, + "SensitiveTopicsClassification": 35.62 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "gte-Qwen1.5-7B-instruct", + "Core17InstructionRetrieval": 8.37, + "News21InstructionRetrieval": 1.82, + "Robust04InstructionRetrieval": 5.88 + } + ] } }, "Alibaba-NLP__gte-Qwen2-7B-instruct": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "gte-Qwen2-7B-instruct", + "BornholmBitextMining": 50.16, + "Tatoeba (dan-eng)": 94.29, + "Tatoeba (ara-eng)": 92.36, + "Tatoeba (vie-eng)": 96.7, + "Tatoeba (pam-eng)": 16.08, + "Tatoeba (lat-eng)": 81.91, + "Tatoeba (jpn-eng)": 94.72, + "Tatoeba (ina-eng)": 95.2, + "Tatoeba (kab-eng)": 5.1, + "Tatoeba (hun-eng)": 86.36, + "Tatoeba (bel-eng)": 91.13, + "Tatoeba (fao-eng)": 73.0, + "Tatoeba (glg-eng)": 93.65, + "Tatoeba (swg-eng)": 79.91, + "Tatoeba (yue-eng)": 92.0, + "Tatoeba (ell-eng)": 92.78, + "Tatoeba (kur-eng)": 39.98, + "Tatoeba (spa-eng)": 98.68, + "Tatoeba (fin-eng)": 92.1, + "Tatoeba (amh-eng)": 25.63, + "Tatoeba (pes-eng)": 93.35, + "Tatoeba (eus-eng)": 37.91, + "Tatoeba (mar-eng)": 81.77, + "Tatoeba (awa-eng)": 68.02, + "Tatoeba (zsm-eng)": 94.15, + "Tatoeba (mon-eng)": 69.42, + "Tatoeba (arq-eng)": 55.37, + "Tatoeba (tur-eng)": 95.67, + "Tatoeba (arz-eng)": 76.51, + "Tatoeba (tat-eng)": 44.61, + "Tatoeba (lfn-eng)": 78.37, + "Tatoeba (jav-eng)": 38.98, + "Tatoeba (hrv-eng)": 94.05, + "Tatoeba (csb-eng)": 65.8, + "Tatoeba (orv-eng)": 62.42, + "Tatoeba (swe-eng)": 93.82, + "Tatoeba (kat-eng)": 78.69, + "Tatoeba (hin-eng)": 95.8, + "Tatoeba (tgl-eng)": 91.13, + "Tatoeba (oci-eng)": 64.81, + "Tatoeba (pms-eng)": 73.33, + "Tatoeba (mkd-eng)": 86.9, + "Tatoeba (dsb-eng)": 68.89, + "Tatoeba (mhr-eng)": 17.17, + "Tatoeba (ceb-eng)": 39.57, + "Tatoeba (cor-eng)": 9.1, + "Tatoeba (gle-eng)": 65.0, + "Tatoeba (sqi-eng)": 61.21, + "Tatoeba (tzl-eng)": 50.98, + "Tatoeba (kaz-eng)": 70.45, + "Tatoeba (swh-eng)": 52.93, + "Tatoeba (tel-eng)": 55.11, + "Tatoeba (kor-eng)": 91.41, + "Tatoeba (gla-eng)": 48.68, + "Tatoeba (mal-eng)": 87.48, + "Tatoeba (yid-eng)": 58.32, + "Tatoeba (ile-eng)": 85.28, + "Tatoeba (dtp-eng)": 11.63, + "Tatoeba (ang-eng)": 80.97, + "Tatoeba (ukr-eng)": 92.95, + "Tatoeba (ron-eng)": 94.18, + "Tatoeba (ita-eng)": 94.42, + "Tatoeba (epo-eng)": 92.14, + "Tatoeba (deu-eng)": 99.07, + "Tatoeba (cha-eng)": 45.44, + "Tatoeba (est-eng)": 81.14, + "Tatoeba (nld-eng)": 97.0, + "Tatoeba (kzj-eng)": 13.24, + "Tatoeba (max-eng)": 65.98, + "Tatoeba (khm-eng)": 58.6, + "Tatoeba (bul-eng)": 92.65, + "Tatoeba (nds-eng)": 82.73, + "Tatoeba (xho-eng)": 31.28, + "Tatoeba (hye-eng)": 48.49, + "Tatoeba (bos-eng)": 92.23, + "Tatoeba (bre-eng)": 14.41, + "Tatoeba (lvs-eng)": 87.21, + "Tatoeba (por-eng)": 94.28, + "Tatoeba (nno-eng)": 90.4, + "Tatoeba (ast-eng)": 85.83, + "Tatoeba (cmn-eng)": 96.15, + "Tatoeba (cym-eng)": 65.3, + "Tatoeba (ben-eng)": 86.77, + "Tatoeba (tam-eng)": 72.01, + "Tatoeba (fry-eng)": 70.23, + "Tatoeba (slk-eng)": 92.82, + "Tatoeba (war-eng)": 38.19, + "Tatoeba (nob-eng)": 97.7, + "Tatoeba (pol-eng)": 97.65, + "Tatoeba (afr-eng)": 91.77, + "Tatoeba (rus-eng)": 93.87, + "Tatoeba (cbk-eng)": 82.18, + "Tatoeba (lit-eng)": 89.38, + "Tatoeba (ind-eng)": 94.17, + "Tatoeba (tha-eng)": 97.2, + "Tatoeba (ido-eng)": 83.79, + "Tatoeba (nov-eng)": 72.49, + "Tatoeba (wuu-eng)": 92.68, + "Tatoeba (srp-eng)": 90.92, + "Tatoeba (isl-eng)": 79.5, + "Tatoeba (cat-eng)": 92.59, + "Tatoeba (fra-eng)": 95.16, + "Tatoeba (ber-eng)": 9.88, + "Tatoeba (ces-eng)": 94.92, + "Tatoeba (urd-eng)": 89.88, + "Tatoeba (slv-eng)": 86.48, + "Tatoeba (tuk-eng)": 47.64, + "Tatoeba (uzb-eng)": 58.58, + "Tatoeba (hsb-eng)": 79.21, + "Tatoeba (heb-eng)": 88.88, + "Tatoeba (gsw-eng)": 55.2, + "Tatoeba (uig-eng)": 71.71, + "Tatoeba (aze-eng)": 88.29 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "gte-Qwen2-7B-instruct", + "AmazonCounterfactualClassification (en-ext)": 93.04, + "AmazonCounterfactualClassification (en)": 91.33, + "AmazonCounterfactualClassification (de)": 75.62, + "AmazonCounterfactualClassification (ja)": 83.59, + "AmazonReviewsClassification (en)": 62.11, + "AmazonReviewsClassification (de)": 59.37, + "AmazonReviewsClassification (es)": 55.85, + "AmazonReviewsClassification (fr)": 55.54, + "AmazonReviewsClassification (ja)": 57.01, + "AmazonReviewsClassification (zh)": 53.55, + "AngryTweetsClassification": 64.4, + "DanishPoliticalCommentsClassification": 48.46, + "GeoreviewClassification": 60.01, + "HeadlineClassification": 76.38, + "InappropriatenessClassification": 76.41, + "KinopoiskClassification": 71.04, + "LccSentimentClassification": 76.87, + "MTOPDomainClassification (en)": 99.04, + "MTOPDomainClassification (de)": 97.17, + "MTOPDomainClassification (es)": 97.77, + "MTOPDomainClassification (fr)": 96.69, + "MTOPDomainClassification (hi)": 96.25, + "MTOPDomainClassification (th)": 93.25, + "MTOPIntentClassification (en)": 91.78, + "MTOPIntentClassification (de)": 88.18, + "MTOPIntentClassification (es)": 91.14, + "MTOPIntentClassification (fr)": 87.47, + "MTOPIntentClassification (hi)": 84.08, + "MTOPIntentClassification (th)": 84.48, + "MasakhaNEWSClassification (amh)": 72.71, + "MasakhaNEWSClassification (eng)": 81.78, + "MasakhaNEWSClassification (fra)": 80.78, + "MasakhaNEWSClassification (hau)": 79.45, + "MasakhaNEWSClassification (ibo)": 75.87, + "MasakhaNEWSClassification (lin)": 83.6, + "MasakhaNEWSClassification (lug)": 74.75, + "MasakhaNEWSClassification (orm)": 83.17, + "MasakhaNEWSClassification (pcm)": 94.95, + "MasakhaNEWSClassification (run)": 83.11, + "MasakhaNEWSClassification (sna)": 88.64, + "MasakhaNEWSClassification (som)": 68.06, + "MasakhaNEWSClassification (swa)": 77.94, + "MasakhaNEWSClassification (tir)": 55.51, + "MasakhaNEWSClassification (xho)": 83.94, + "MasakhaNEWSClassification (yor)": 86.06, + "MassiveIntentClassification (ar)": 71.16, + "MassiveIntentClassification (ja)": 81.92, + "MassiveIntentClassification (zh-TW)": 76.96, + "MassiveIntentClassification (bn)": 75.95, + "MassiveIntentClassification (hy)": 42.18, + "MassiveIntentClassification (lv)": 69.49, + "MassiveIntentClassification (sw)": 52.05, + "MassiveIntentClassification (el)": 75.35, + "MassiveIntentClassification (pt)": 81.94, + "MassiveIntentClassification (tr)": 77.81, + "MassiveIntentClassification (ru)": 82.1, + "MassiveIntentClassification (my)": 56.7, + "MassiveIntentClassification (it)": 81.58, + "MassiveIntentClassification (nb)": 78.71, + "MassiveIntentClassification (kn)": 52.18, + "MassiveIntentClassification (af)": 76.01, + "MassiveIntentClassification (vi)": 79.89, + "MassiveIntentClassification (te)": 57.4, + "MassiveIntentClassification (az)": 72.24, + "MassiveIntentClassification (fa)": 77.86, + "MassiveIntentClassification (th)": 74.09, + "MassiveIntentClassification (sq)": 50.92, + "MassiveIntentClassification (fr)": 81.9, + "MassiveIntentClassification (es)": 81.29, + "MassiveIntentClassification (da)": 78.15, + "MassiveIntentClassification (sv)": 79.13, + "MassiveIntentClassification (jv)": 58.24, + "MassiveIntentClassification (ta)": 52.74, + "MassiveIntentClassification (cy)": 49.37, + "MassiveIntentClassification (tl)": 73.98, + "MassiveIntentClassification (ur)": 72.4, + "MassiveIntentClassification (en)": 85.43, + "MassiveIntentClassification (ro)": 76.41, + "MassiveIntentClassification (hi)": 78.0, + "MassiveIntentClassification (id)": 80.11, + "MassiveIntentClassification (he)": 74.39, + "MassiveIntentClassification (km)": 49.36, + "MassiveIntentClassification (nl)": 81.59, + "MassiveIntentClassification (sl)": 75.0, + "MassiveIntentClassification (ms)": 76.4, + "MassiveIntentClassification (ko)": 79.24, + "MassiveIntentClassification (ml)": 64.98, + "MassiveIntentClassification (pl)": 80.89, + "MassiveIntentClassification (fi)": 74.58, + "MassiveIntentClassification (zh-CN)": 81.09, + "MassiveIntentClassification (hu)": 72.1, + "MassiveIntentClassification (is)": 56.6, + "MassiveIntentClassification (am)": 32.29, + "MassiveIntentClassification (mn)": 53.96, + "MassiveIntentClassification (de)": 80.49, + "MassiveIntentClassification (ka)": 54.94, + "MassiveScenarioClassification (ru)": 87.43, + "MassiveScenarioClassification (is)": 64.96, + "MassiveScenarioClassification (en)": 89.7, + "MassiveScenarioClassification (fi)": 79.5, + "MassiveScenarioClassification (sw)": 61.58, + "MassiveScenarioClassification (af)": 83.1, + "MassiveScenarioClassification (hu)": 78.12, + "MassiveScenarioClassification (ms)": 82.99, + "MassiveScenarioClassification (ka)": 63.42, + "MassiveScenarioClassification (kn)": 64.91, + "MassiveScenarioClassification (id)": 86.1, + "MassiveScenarioClassification (it)": 87.3, + "MassiveScenarioClassification (de)": 86.82, + "MassiveScenarioClassification (km)": 56.31, + "MassiveScenarioClassification (pl)": 85.54, + "MassiveScenarioClassification (vi)": 84.95, + "MassiveScenarioClassification (pt)": 86.3, + "MassiveScenarioClassification (th)": 81.19, + "MassiveScenarioClassification (zh-CN)": 85.74, + "MassiveScenarioClassification (sv)": 85.01, + "MassiveScenarioClassification (es)": 87.02, + "MassiveScenarioClassification (ta)": 57.91, + "MassiveScenarioClassification (sq)": 57.85, + "MassiveScenarioClassification (ur)": 77.18, + "MassiveScenarioClassification (he)": 80.12, + "MassiveScenarioClassification (cy)": 57.57, + "MassiveScenarioClassification (jv)": 69.44, + "MassiveScenarioClassification (nb)": 84.15, + "MassiveScenarioClassification (am)": 41.09, + "MassiveScenarioClassification (hy)": 49.28, + "MassiveScenarioClassification (zh-TW)": 83.56, + "MassiveScenarioClassification (az)": 76.8, + "MassiveScenarioClassification (tl)": 79.26, + "MassiveScenarioClassification (te)": 68.13, + "MassiveScenarioClassification (hi)": 83.21, + "MassiveScenarioClassification (ja)": 87.44, + "MassiveScenarioClassification (fa)": 82.51, + "MassiveScenarioClassification (ml)": 71.49, + "MassiveScenarioClassification (sl)": 79.98, + "MassiveScenarioClassification (ar)": 77.71, + "MassiveScenarioClassification (ro)": 81.88, + "MassiveScenarioClassification (lv)": 76.39, + "MassiveScenarioClassification (bn)": 80.4, + "MassiveScenarioClassification (ko)": 86.28, + "MassiveScenarioClassification (tr)": 83.32, + "MassiveScenarioClassification (el)": 81.23, + "MassiveScenarioClassification (fr)": 86.64, + "MassiveScenarioClassification (da)": 84.33, + "MassiveScenarioClassification (my)": 60.44, + "MassiveScenarioClassification (mn)": 59.65, + "MassiveScenarioClassification (nl)": 86.68, + "NoRecClassification": 65.99, + "NordicLangClassification": 73.23, + "PAC": 66.32, + "PolEmo2.0-OUT": 54.49, + "RuReviewsClassification": 74.85, + "RuSciBenchGRNTIClassification": 71.01, + "RuSciBenchOECDClassification": 57.68, + "ToxicConversationsClassification": 85.74 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "gte-Qwen2-7B-instruct", + "BlurbsClusteringP2P": 47.0, + "BlurbsClusteringS2S": 21.74, + "GeoreviewClusteringP2P": 78.32, + "MLSUMClusteringP2P (de)": 49.66, + "MLSUMClusteringP2P (fr)": 49.71, + "MLSUMClusteringP2P (ru)": 58.58, + "MLSUMClusteringP2P (es)": 50.43, + "MLSUMClusteringS2S (de)": 47.75, + "MLSUMClusteringS2S (fr)": 48.91, + "MLSUMClusteringS2S (ru)": 55.79, + "MLSUMClusteringS2S (es)": 48.93, + "MasakhaNEWSClusteringP2P (amh)": 51.99, + "MasakhaNEWSClusteringP2P (eng)": 63.97, + "MasakhaNEWSClusteringP2P (fra)": 63.67, + "MasakhaNEWSClusteringP2P (hau)": 57.93, + "MasakhaNEWSClusteringP2P (ibo)": 66.36, + "MasakhaNEWSClusteringP2P (lin)": 84.66, + "MasakhaNEWSClusteringP2P (lug)": 50.98, + "MasakhaNEWSClusteringP2P (orm)": 56.51, + "MasakhaNEWSClusteringP2P (pcm)": 89.16, + "MasakhaNEWSClusteringP2P (run)": 63.84, + "MasakhaNEWSClusteringP2P (sna)": 80.08, + "MasakhaNEWSClusteringP2P (som)": 43.45, + "MasakhaNEWSClusteringP2P (swa)": 45.64, + "MasakhaNEWSClusteringP2P (tir)": 58.86, + "MasakhaNEWSClusteringP2P (xho)": 54.15, + "MasakhaNEWSClusteringP2P (yor)": 68.84, + "MasakhaNEWSClusteringS2S (amh)": 48.74, + "MasakhaNEWSClusteringS2S (eng)": 57.22, + "MasakhaNEWSClusteringS2S (fra)": 60.93, + "MasakhaNEWSClusteringS2S (hau)": 29.97, + "MasakhaNEWSClusteringS2S (ibo)": 55.42, + "MasakhaNEWSClusteringS2S (lin)": 69.5, + "MasakhaNEWSClusteringS2S (lug)": 49.28, + "MasakhaNEWSClusteringS2S (orm)": 35.88, + "MasakhaNEWSClusteringS2S (pcm)": 79.67, + "MasakhaNEWSClusteringS2S (run)": 59.68, + "MasakhaNEWSClusteringS2S (sna)": 63.13, + "MasakhaNEWSClusteringS2S (som)": 36.68, + "MasakhaNEWSClusteringS2S (swa)": 31.39, + "MasakhaNEWSClusteringS2S (tir)": 45.25, + "MasakhaNEWSClusteringS2S (xho)": 28.47, + "MasakhaNEWSClusteringS2S (yor)": 47.31, + "RuSciBenchGRNTIClusteringP2P": 65.19, + "RuSciBenchOECDClusteringP2P": 55.6, + "TenKGnadClusteringP2P": 52.6, + "TenKGnadClusteringS2S": 38.04 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "gte-Qwen2-7B-instruct", + "FalseFriendsGermanEnglish": 55.91, + "OpusparcusPC (de)": 98.51, + "OpusparcusPC (en)": 99.12, + "OpusparcusPC (fi)": 94.12, + "OpusparcusPC (fr)": 95.95, + "OpusparcusPC (ru)": 92.5, + "OpusparcusPC (sv)": 95.66, + "PawsXPairClassification (de)": 77.16, + "PawsXPairClassification (en)": 82.37, + "PawsXPairClassification (es)": 79.4, + "PawsXPairClassification (fr)": 80.69, + "PawsXPairClassification (ja)": 68.72, + "PawsXPairClassification (ko)": 68.65, + "PawsXPairClassification (zh)": 76.53, + "SprintDuplicateQuestions": 92.82, + "TERRa": 67.45, + "TwitterURLCorpus": 86.59 + }, + { + "Model": "gte-Qwen2-7B-instruct", + "FalseFriendsGermanEnglish": 55.91, + "OpusparcusPC (de)": 98.51, + "OpusparcusPC (en)": 99.12, + "OpusparcusPC (fi)": 94.12, + "OpusparcusPC (fr)": 95.95, + "OpusparcusPC (ru)": 92.5, + "OpusparcusPC (sv)": 95.7, + "PawsXPairClassification (de)": 77.71, + "PawsXPairClassification (en)": 82.39, + "PawsXPairClassification (es)": 79.44, + "PawsXPairClassification (fr)": 80.8, + "PawsXPairClassification (ja)": 68.82, + "PawsXPairClassification (ko)": 68.7, + "PawsXPairClassification (zh)": 76.81, + "SprintDuplicateQuestions": 93.14, + "TERRa": 67.61, + "TwitterURLCorpus": 86.61 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "gte-Qwen2-7B-instruct", + "AlloprofReranking": 81.1, + "RuBQReranking": 74.13, + "T2Reranking": 67.8 + }, + { + "Model": "gte-Qwen2-7B-instruct", + "MIRACLReranking (ru)": 54.95, + "MIRACLReranking (ar)": 68.47, + "MIRACLReranking (bn)": 63.87, + "MIRACLReranking (de)": 50.18, + "MIRACLReranking (en)": 62.37, + "MIRACLReranking (es)": 60.78, + "MIRACLReranking (fa)": 51.21, + "MIRACLReranking (fi)": 68.98, + "MIRACLReranking (fr)": 50.82, + "MIRACLReranking (hi)": 59.2, + "MIRACLReranking (id)": 52.97, + "MIRACLReranking (ja)": 60.39, + "MIRACLReranking (ko)": 50.56, + "MIRACLReranking (sw)": 53.39, + "MIRACLReranking (te)": 67.83, + "MIRACLReranking (th)": 69.34, + "MIRACLReranking (yo)": 64.19, + "MIRACLReranking (zh)": 52.11 + } + ] }, "Retrieval": { "ndcg_at_10": [ { "Model": "gte-Qwen2-7B-instruct", + "AILACasedocs": 29.74, + "AILAStatutes": 33.76, + "ARCChallenge": 20.46, + "AlloprofRetrieval": 59.15, + "AlphaNLI": 43.93, + "AppsRetrieval": 28.39, + "ArguAna": 54.56, "BrightRetrieval (earth_science)": 40.66, "BrightRetrieval (sustainable_living)": 20.82, "BrightRetrieval (theoremqa_theorems)": 34.22, @@ -213,7 +1192,95 @@ "BrightRetrieval (biology)": 32.09, "BrightRetrieval (theoremqa_questions)": 29.9, "BrightRetrieval (robotics)": 12.82, - "BrightRetrieval (psychology)": 26.58 + "BrightRetrieval (psychology)": 26.58, + "CodeFeedbackMT": 57.66, + "CodeFeedbackST": 84.15, + "CodeSearchNetCCRetrieval (python)": 75.41, + "CodeSearchNetCCRetrieval (javascript)": 69.2, + "CodeSearchNetCCRetrieval (go)": 62.59, + "CodeSearchNetCCRetrieval (ruby)": 65.18, + "CodeSearchNetCCRetrieval (java)": 67.15, + "CodeSearchNetCCRetrieval (php)": 57.93, + "CodeSearchNetRetrieval (python)": 91.93, + "CodeSearchNetRetrieval (javascript)": 79.82, + "CodeSearchNetRetrieval (go)": 94.26, + "CodeSearchNetRetrieval (ruby)": 84.76, + "CodeSearchNetRetrieval (java)": 86.49, + "CodeSearchNetRetrieval (php)": 84.52, + "CodeTransOceanContest": 81.83, + "CodeTransOceanDL": 32.17, + "CosQA": 32.24, + "CovidRetrieval": 81.04, + "GerDaLIR": 7.75, + "GerDaLIRSmall": 16.94, + "GermanQuAD-Retrieval": 94.54, + "HellaSwag": 37.61, + "LEMBNarrativeQARetrieval": 45.46, + "LEMBQMSumRetrieval": 31.27, + "LEMBSummScreenFDRetrieval": 76.08, + "LEMBWikimQARetrieval": 61.15, + "LeCaRDv2": 69.72, + "LegalBenchConsumerContractsQA": 75.83, + "LegalBenchCorporateLobbying": 95.21, + "LegalQuAD": 46.1, + "LegalSummarization": 66.58, + "PIQA": 44.05, + "Quail": 26.57, + "RARbCode": 90.49, + "RARbMath": 85.27, + "RiaNewsRetrieval": 76.26, + "RuBQRetrieval": 72.72, + "SCIDOCS": 23.48, + "SIQA": 8.36, + "SpartQA": 18.78, + "StackOverflowQA": 84.35, + "SyntheticText2SQL": 53.22, + "TRECCOVID": 80.37, + "TempReasonL1": 2.18, + "TempReasonL2Fact": 64.12, + "TempReasonL2Pure": 6.32, + "TempReasonL3Fact": 47.39, + "TempReasonL3Pure": 9.79, + "WinoGrande": 66.81, + "XMarket (de)": 29.61, + "XMarket (en)": 39.89, + "XMarket (es)": 32.7, + "XPQARetrieval (ara-ara)": 50.05, + "XPQARetrieval (eng-ara)": 40.41, + "XPQARetrieval (ara-eng)": 48.41, + "XPQARetrieval (deu-deu)": 78.07, + "XPQARetrieval (eng-deu)": 59.84, + "XPQARetrieval (deu-eng)": 74.41, + "XPQARetrieval (spa-spa)": 63.68, + "XPQARetrieval (eng-spa)": 52.02, + "XPQARetrieval (spa-eng)": 61.5, + "XPQARetrieval (fra-fra)": 70.76, + "XPQARetrieval (eng-fra)": 57.21, + "XPQARetrieval (fra-eng)": 67.89, + "XPQARetrieval (hin-hin)": 72.32, + "XPQARetrieval (eng-hin)": 42.7, + "XPQARetrieval (hin-eng)": 67.74, + "XPQARetrieval (ita-ita)": 72.61, + "XPQARetrieval (eng-ita)": 51.55, + "XPQARetrieval (ita-eng)": 70.43, + "XPQARetrieval (jpn-jpn)": 74.54, + "XPQARetrieval (eng-jpn)": 51.62, + "XPQARetrieval (jpn-eng)": 71.94, + "XPQARetrieval (kor-kor)": 39.61, + "XPQARetrieval (eng-kor)": 38.93, + "XPQARetrieval (kor-eng)": 37.75, + "XPQARetrieval (pol-pol)": 47.81, + "XPQARetrieval (eng-pol)": 37.95, + "XPQARetrieval (pol-eng)": 44.67, + "XPQARetrieval (por-por)": 48.96, + "XPQARetrieval (eng-por)": 36.09, + "XPQARetrieval (por-eng)": 48.82, + "XPQARetrieval (tam-tam)": 36.09, + "XPQARetrieval (eng-tam)": 20.8, + "XPQARetrieval (tam-eng)": 26.95, + "XPQARetrieval (cmn-cmn)": 65.85, + "XPQARetrieval (eng-cmn)": 36.18, + "XPQARetrieval (cmn-eng)": 62.85 } ], "recall_at_1": [ @@ -231,80 +1298,849 @@ ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "gte-Qwen2-7B-instruct", + "GermanSTSBenchmark": 84.61, + "RUParaPhraserSTS": 77.45, + "RuSTSBenchmarkSTS": 83.82, + "SICK-R": 79.16, + "STS12": 79.53, + "STS13": 88.97, + "STS14": 83.87, + "STS15": 88.48, + "STS17 (en-en)": 88.75, + "STS17 (ko-ko)": 83.86, + "STS17 (it-en)": 87.98, + "STS17 (en-tr)": 77.15, + "STS17 (en-ar)": 84.05, + "STS17 (es-es)": 89.01, + "STS17 (ar-ar)": 85.92, + "STS17 (es-en)": 86.84, + "STS17 (en-de)": 87.22, + "STS17 (nl-en)": 86.65, + "STS17 (fr-en)": 87.14, + "STS22 (ru)": 63.82, + "STS22 (pl)": 40.87, + "STS22 (es-en)": 78.38, + "STS22 (en)": 67.16, + "STS22 (zh-en)": 72.08, + "STS22 (tr)": 67.44, + "STS22 (ar)": 56.69, + "STS22 (es-it)": 74.84, + "STS22 (es)": 67.24, + "STS22 (de-fr)": 64.76, + "STS22 (de-pl)": 54.6, + "STS22 (it)": 78.4, + "STS22 (pl-en)": 74.58, + "STS22 (fr)": 82.49, + "STS22 (de)": 58.05, + "STS22 (de-en)": 57.62, + "STS22 (fr-pl)": 84.52, + "STS22 (zh)": 65.77, + "STSB": 81.05, + "STSBenchmark": 86.81, + "STSBenchmarkMultilingualSTS (fr)": 85.44, + "STSBenchmarkMultilingualSTS (pt)": 85.11, + "STSBenchmarkMultilingualSTS (en)": 86.85, + "STSBenchmarkMultilingualSTS (de)": 85.05, + "STSBenchmarkMultilingualSTS (it)": 84.73, + "STSBenchmarkMultilingualSTS (es)": 85.74, + "STSBenchmarkMultilingualSTS (zh)": 82.91, + "STSBenchmarkMultilingualSTS (ru)": 83.86, + "STSBenchmarkMultilingualSTS (pl)": 83.95, + "STSBenchmarkMultilingualSTS (nl)": 84.53 + }, + { + "Model": "gte-Qwen2-7B-instruct", + "GermanSTSBenchmark": 84.61, + "RUParaPhraserSTS": 77.45, + "RuSTSBenchmarkSTS": 83.82, + "SICK-R": 79.16, + "STS12": 79.53, + "STS13": 88.97, + "STS14": 83.87, + "STS15": 88.48, + "STS17 (en-en)": 88.75, + "STS17 (ko-ko)": 83.86, + "STS17 (it-en)": 87.98, + "STS17 (en-tr)": 77.15, + "STS17 (en-ar)": 84.05, + "STS17 (es-es)": 89.01, + "STS17 (ar-ar)": 85.92, + "STS17 (es-en)": 86.84, + "STS17 (en-de)": 87.22, + "STS17 (nl-en)": 86.65, + "STS17 (fr-en)": 87.14, + "STS22 (ru)": 63.82, + "STS22 (pl)": 40.87, + "STS22 (es-en)": 78.38, + "STS22 (en)": 67.16, + "STS22 (zh-en)": 72.08, + "STS22 (tr)": 67.44, + "STS22 (ar)": 56.69, + "STS22 (es-it)": 74.84, + "STS22 (es)": 67.24, + "STS22 (de-fr)": 64.76, + "STS22 (de-pl)": 54.6, + "STS22 (it)": 78.4, + "STS22 (pl-en)": 74.58, + "STS22 (fr)": 82.49, + "STS22 (de)": 58.05, + "STS22 (de-en)": 57.62, + "STS22 (fr-pl)": 84.52, + "STS22 (zh)": 65.77, + "STSB": 81.05, + "STSBenchmark": 86.81, + "STSBenchmarkMultilingualSTS (fr)": 85.44, + "STSBenchmarkMultilingualSTS (pt)": 85.11, + "STSBenchmarkMultilingualSTS (en)": 86.85, + "STSBenchmarkMultilingualSTS (de)": 85.05, + "STSBenchmarkMultilingualSTS (it)": 84.73, + "STSBenchmarkMultilingualSTS (es)": 85.74, + "STSBenchmarkMultilingualSTS (zh)": 82.91, + "STSBenchmarkMultilingualSTS (ru)": 83.86, + "STSBenchmarkMultilingualSTS (pl)": 83.95, + "STSBenchmarkMultilingualSTS (nl)": 84.53 + } + ] }, "Summarization": { "cosine_spearman": [] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "gte-Qwen2-7B-instruct", + "CEDRClassification": 52.78, + "SensitiveTopicsClassification": 35.76 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "gte-Qwen2-7B-instruct", + "Core17InstructionRetrieval": 6.78, + "News21InstructionRetrieval": 4.11, + "Robust04InstructionRetrieval": 3.93 + } + ] } }, "BAAI__bge-base-en-v1.5": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "bge-base-en-v1.5", + "BornholmBitextMining": 27.6, + "Tatoeba (kzj-eng)": 3.89, + "Tatoeba (ile-eng)": 21.25, + "Tatoeba (ara-eng)": 0.3, + "Tatoeba (cha-eng)": 15.41, + "Tatoeba (tzl-eng)": 16.83, + "Tatoeba (hin-eng)": 0.1, + "Tatoeba (zsm-eng)": 8.0, + "Tatoeba (yid-eng)": 0.2, + "Tatoeba (vie-eng)": 5.01, + "Tatoeba (tat-eng)": 0.94, + "Tatoeba (nno-eng)": 7.28, + "Tatoeba (mal-eng)": 0.39, + "Tatoeba (nld-eng)": 12.18, + "Tatoeba (tgl-eng)": 3.79, + "Tatoeba (cym-eng)": 6.51, + "Tatoeba (eus-eng)": 7.21, + "Tatoeba (rus-eng)": 0.25, + "Tatoeba (orv-eng)": 0.13, + "Tatoeba (tha-eng)": 1.1, + "Tatoeba (ceb-eng)": 5.13, + "Tatoeba (ast-eng)": 17.75, + "Tatoeba (uig-eng)": 0.4, + "Tatoeba (jav-eng)": 4.87, + "Tatoeba (arq-eng)": 0.3, + "Tatoeba (hsb-eng)": 4.34, + "Tatoeba (srp-eng)": 3.12, + "Tatoeba (bul-eng)": 0.82, + "Tatoeba (tuk-eng)": 4.34, + "Tatoeba (sqi-eng)": 5.87, + "Tatoeba (ces-eng)": 4.76, + "Tatoeba (ell-eng)": 0.5, + "Tatoeba (kaz-eng)": 0.43, + "Tatoeba (spa-eng)": 18.16, + "Tatoeba (lit-eng)": 2.35, + "Tatoeba (ita-eng)": 18.82, + "Tatoeba (gsw-eng)": 14.92, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (cmn-eng)": 2.99, + "Tatoeba (awa-eng)": 0.51, + "Tatoeba (pms-eng)": 11.58, + "Tatoeba (mar-eng)": 0.09, + "Tatoeba (ang-eng)": 14.83, + "Tatoeba (ukr-eng)": 0.83, + "Tatoeba (jpn-eng)": 1.59, + "Tatoeba (arz-eng)": 0.0, + "Tatoeba (nob-eng)": 10.7, + "Tatoeba (mhr-eng)": 0.03, + "Tatoeba (gle-eng)": 3.39, + "Tatoeba (hrv-eng)": 6.18, + "Tatoeba (swe-eng)": 8.66, + "Tatoeba (pol-eng)": 5.87, + "Tatoeba (swh-eng)": 7.85, + "Tatoeba (dan-eng)": 10.08, + "Tatoeba (tam-eng)": 0.55, + "Tatoeba (ina-eng)": 29.74, + "Tatoeba (lvs-eng)": 4.24, + "Tatoeba (hun-eng)": 4.9, + "Tatoeba (fra-eng)": 20.65, + "Tatoeba (aze-eng)": 3.18, + "Tatoeba (cat-eng)": 14.93, + "Tatoeba (ido-eng)": 16.24, + "Tatoeba (kur-eng)": 6.67, + "Tatoeba (por-eng)": 16.47, + "Tatoeba (cbk-eng)": 13.72, + "Tatoeba (glg-eng)": 17.84, + "Tatoeba (dsb-eng)": 4.43, + "Tatoeba (mon-eng)": 1.42, + "Tatoeba (fin-eng)": 3.14, + "Tatoeba (cor-eng)": 3.17, + "Tatoeba (afr-eng)": 7.96, + "Tatoeba (ind-eng)": 7.52, + "Tatoeba (kor-eng)": 1.07, + "Tatoeba (xho-eng)": 3.86, + "Tatoeba (dtp-eng)": 3.32, + "Tatoeba (lat-eng)": 10.58, + "Tatoeba (kat-eng)": 0.55, + "Tatoeba (fao-eng)": 6.58, + "Tatoeba (swg-eng)": 11.45, + "Tatoeba (kab-eng)": 0.99, + "Tatoeba (csb-eng)": 6.79, + "Tatoeba (slk-eng)": 6.0, + "Tatoeba (fry-eng)": 14.25, + "Tatoeba (pam-eng)": 4.95, + "Tatoeba (yue-eng)": 1.54, + "Tatoeba (mkd-eng)": 0.21, + "Tatoeba (lfn-eng)": 12.2, + "Tatoeba (nov-eng)": 27.29, + "Tatoeba (tel-eng)": 0.24, + "Tatoeba (max-eng)": 7.97, + "Tatoeba (tur-eng)": 4.29, + "Tatoeba (hye-eng)": 0.3, + "Tatoeba (uzb-eng)": 3.6, + "Tatoeba (gla-eng)": 3.16, + "Tatoeba (khm-eng)": 0.42, + "Tatoeba (ron-eng)": 9.92, + "Tatoeba (isl-eng)": 3.36, + "Tatoeba (ben-eng)": 0.13, + "Tatoeba (amh-eng)": 0.6, + "Tatoeba (epo-eng)": 10.42, + "Tatoeba (deu-eng)": 15.53, + "Tatoeba (oci-eng)": 11.01, + "Tatoeba (wuu-eng)": 2.05, + "Tatoeba (ber-eng)": 5.5, + "Tatoeba (nds-eng)": 12.51, + "Tatoeba (bre-eng)": 3.44, + "Tatoeba (war-eng)": 5.61, + "Tatoeba (bos-eng)": 8.02, + "Tatoeba (pes-eng)": 0.1, + "Tatoeba (est-eng)": 3.75, + "Tatoeba (heb-eng)": 0.48, + "Tatoeba (slv-eng)": 5.44, + "Tatoeba (bel-eng)": 0.94 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "bge-base-en-v1.5", + "AllegroReviews": 25.03, + "AmazonCounterfactualClassification (en-ext)": 74.66, + "AmazonCounterfactualClassification (en)": 75.01, + "AmazonCounterfactualClassification (de)": 55.79, + "AmazonCounterfactualClassification (ja)": 58.61, + "AmazonReviewsClassification (en)": 50.73, + "AmazonReviewsClassification (de)": 26.04, + "AmazonReviewsClassification (es)": 33.95, + "AmazonReviewsClassification (fr)": 29.29, + "AmazonReviewsClassification (ja)": 23.63, + "AmazonReviewsClassification (zh)": 23.65, + "AngryTweetsClassification": 43.69, + "CBD": 51.55, + "DanishPoliticalCommentsClassification": 29.44, + "GeoreviewClassification": 27.74, + "HeadlineClassification": 30.07, + "InappropriatenessClassification": 51.63, + "KinopoiskClassification": 35.58, + "LccSentimentClassification": 40.33, + "MTOPDomainClassification (en)": 93.18, + "MTOPDomainClassification (de)": 69.48, + "MTOPDomainClassification (es)": 76.31, + "MTOPDomainClassification (fr)": 75.85, + "MTOPDomainClassification (hi)": 36.44, + "MTOPDomainClassification (th)": 16.13, + "MTOPIntentClassification (en)": 67.14, + "MTOPIntentClassification (de)": 44.82, + "MTOPIntentClassification (es)": 42.67, + "MTOPIntentClassification (fr)": 39.54, + "MTOPIntentClassification (hi)": 15.37, + "MTOPIntentClassification (th)": 5.24, + "MasakhaNEWSClassification (amh)": 34.04, + "MasakhaNEWSClassification (eng)": 79.02, + "MasakhaNEWSClassification (fra)": 76.07, + "MasakhaNEWSClassification (hau)": 63.2, + "MasakhaNEWSClassification (ibo)": 59.92, + "MasakhaNEWSClassification (lin)": 72.51, + "MasakhaNEWSClassification (lug)": 55.92, + "MasakhaNEWSClassification (orm)": 57.11, + "MasakhaNEWSClassification (pcm)": 92.1, + "MasakhaNEWSClassification (run)": 63.51, + "MasakhaNEWSClassification (sna)": 73.63, + "MasakhaNEWSClassification (som)": 49.15, + "MasakhaNEWSClassification (swa)": 54.35, + "MasakhaNEWSClassification (tir)": 25.88, + "MasakhaNEWSClassification (xho)": 61.14, + "MasakhaNEWSClassification (yor)": 66.28, + "MassiveIntentClassification (ru)": 31.7, + "MassiveIntentClassification (fi)": 41.0, + "MassiveIntentClassification (lv)": 39.23, + "MassiveIntentClassification (sw)": 38.11, + "MassiveIntentClassification (ta)": 11.24, + "MassiveIntentClassification (az)": 38.58, + "MassiveIntentClassification (bn)": 15.79, + "MassiveIntentClassification (ml)": 3.24, + "MassiveIntentClassification (hu)": 36.2, + "MassiveIntentClassification (id)": 40.1, + "MassiveIntentClassification (th)": 12.65, + "MassiveIntentClassification (fr)": 43.91, + "MassiveIntentClassification (zh-CN)": 23.75, + "MassiveIntentClassification (ms)": 37.3, + "MassiveIntentClassification (sl)": 38.57, + "MassiveIntentClassification (ur)": 16.7, + "MassiveIntentClassification (ro)": 39.93, + "MassiveIntentClassification (de)": 41.73, + "MassiveIntentClassification (fa)": 24.44, + "MassiveIntentClassification (te)": 2.72, + "MassiveIntentClassification (pl)": 38.51, + "MassiveIntentClassification (nb)": 38.88, + "MassiveIntentClassification (es)": 43.08, + "MassiveIntentClassification (ja)": 29.99, + "MassiveIntentClassification (zh-TW)": 22.34, + "MassiveIntentClassification (nl)": 38.75, + "MassiveIntentClassification (it)": 44.42, + "MassiveIntentClassification (pt)": 43.45, + "MassiveIntentClassification (ka)": 11.76, + "MassiveIntentClassification (ar)": 20.63, + "MassiveIntentClassification (tr)": 39.25, + "MassiveIntentClassification (cy)": 33.54, + "MassiveIntentClassification (en)": 72.64, + "MassiveIntentClassification (mn)": 20.53, + "MassiveIntentClassification (hi)": 13.89, + "MassiveIntentClassification (ko)": 20.17, + "MassiveIntentClassification (af)": 36.89, + "MassiveIntentClassification (km)": 4.75, + "MassiveIntentClassification (vi)": 37.62, + "MassiveIntentClassification (my)": 3.8, + "MassiveIntentClassification (am)": 2.75, + "MassiveIntentClassification (tl)": 41.78, + "MassiveIntentClassification (sv)": 38.39, + "MassiveIntentClassification (kn)": 3.27, + "MassiveIntentClassification (jv)": 35.15, + "MassiveIntentClassification (hy)": 11.24, + "MassiveIntentClassification (da)": 41.04, + "MassiveIntentClassification (sq)": 38.12, + "MassiveIntentClassification (is)": 34.25, + "MassiveIntentClassification (el)": 27.29, + "MassiveIntentClassification (he)": 23.25, + "MassiveScenarioClassification (am)": 8.29, + "MassiveScenarioClassification (fa)": 30.38, + "MassiveScenarioClassification (nl)": 47.81, + "MassiveScenarioClassification (ar)": 30.58, + "MassiveScenarioClassification (ka)": 18.04, + "MassiveScenarioClassification (nb)": 48.41, + "MassiveScenarioClassification (af)": 45.94, + "MassiveScenarioClassification (km)": 9.02, + "MassiveScenarioClassification (is)": 45.11, + "MassiveScenarioClassification (ta)": 17.82, + "MassiveScenarioClassification (he)": 25.38, + "MassiveScenarioClassification (zh-TW)": 31.33, + "MassiveScenarioClassification (cy)": 39.07, + "MassiveScenarioClassification (ru)": 36.99, + "MassiveScenarioClassification (tl)": 51.55, + "MassiveScenarioClassification (bn)": 21.49, + "MassiveScenarioClassification (sq)": 47.4, + "MassiveScenarioClassification (lv)": 43.41, + "MassiveScenarioClassification (hi)": 19.69, + "MassiveScenarioClassification (zh-CN)": 33.62, + "MassiveScenarioClassification (jv)": 44.53, + "MassiveScenarioClassification (sv)": 47.55, + "MassiveScenarioClassification (pt)": 53.56, + "MassiveScenarioClassification (ro)": 49.47, + "MassiveScenarioClassification (az)": 44.58, + "MassiveScenarioClassification (ko)": 26.1, + "MassiveScenarioClassification (it)": 56.3, + "MassiveScenarioClassification (kn)": 8.44, + "MassiveScenarioClassification (el)": 37.53, + "MassiveScenarioClassification (fi)": 44.54, + "MassiveScenarioClassification (sl)": 42.65, + "MassiveScenarioClassification (tr)": 45.61, + "MassiveScenarioClassification (ml)": 6.64, + "MassiveScenarioClassification (da)": 49.22, + "MassiveScenarioClassification (sw)": 44.79, + "MassiveScenarioClassification (fr)": 53.77, + "MassiveScenarioClassification (en)": 76.51, + "MassiveScenarioClassification (id)": 47.3, + "MassiveScenarioClassification (my)": 10.25, + "MassiveScenarioClassification (ja)": 36.32, + "MassiveScenarioClassification (es)": 53.4, + "MassiveScenarioClassification (mn)": 26.68, + "MassiveScenarioClassification (th)": 23.0, + "MassiveScenarioClassification (ur)": 25.4, + "MassiveScenarioClassification (de)": 54.33, + "MassiveScenarioClassification (pl)": 47.15, + "MassiveScenarioClassification (te)": 6.6, + "MassiveScenarioClassification (vi)": 41.87, + "MassiveScenarioClassification (ms)": 46.35, + "MassiveScenarioClassification (hu)": 43.08, + "MassiveScenarioClassification (hy)": 17.38, + "NoRecClassification": 38.83, + "NordicLangClassification": 53.43, + "PAC": 59.61, + "PolEmo2.0-IN": 44.25, + "PolEmo2.0-OUT": 30.79, + "RuReviewsClassification": 43.47, + "RuSciBenchGRNTIClassification": 17.34, + "RuSciBenchOECDClassification": 13.16, + "ToxicConversationsClassification": 67.04 + } + ] }, "Clustering": { "v_measure": [ { "Model": "bge-base-en-v1.5", + "AlloProfClusteringP2P": 59.78, + "AlloProfClusteringS2S": 38.3, "BiorxivClusteringP2P": 39.44, "BiorxivClusteringS2S": 36.62, + "BlurbsClusteringP2P": 25.15, + "BlurbsClusteringS2S": 11.38, + "GeoreviewClusteringP2P": 23.09, + "HALClusteringS2S": 23.18, + "MLSUMClusteringP2P (de)": 39.14, + "MLSUMClusteringP2P (fr)": 41.97, + "MLSUMClusteringP2P (ru)": 21.47, + "MLSUMClusteringP2P (es)": 40.97, + "MLSUMClusteringS2S (de)": 38.56, + "MLSUMClusteringS2S (fr)": 41.47, + "MLSUMClusteringS2S (ru)": 20.52, + "MLSUMClusteringS2S (es)": 40.77, + "MasakhaNEWSClusteringP2P (amh)": 40.48, + "MasakhaNEWSClusteringP2P (eng)": 49.92, + "MasakhaNEWSClusteringP2P (fra)": 47.41, + "MasakhaNEWSClusteringP2P (hau)": 41.76, + "MasakhaNEWSClusteringP2P (ibo)": 38.01, + "MasakhaNEWSClusteringP2P (lin)": 66.31, + "MasakhaNEWSClusteringP2P (lug)": 48.4, + "MasakhaNEWSClusteringP2P (orm)": 24.88, + "MasakhaNEWSClusteringP2P (pcm)": 84.63, + "MasakhaNEWSClusteringP2P (run)": 51.28, + "MasakhaNEWSClusteringP2P (sna)": 47.27, + "MasakhaNEWSClusteringP2P (som)": 33.96, + "MasakhaNEWSClusteringP2P (swa)": 22.85, + "MasakhaNEWSClusteringP2P (tir)": 47.31, + "MasakhaNEWSClusteringP2P (xho)": 26.52, + "MasakhaNEWSClusteringP2P (yor)": 37.97, + "MasakhaNEWSClusteringS2S (amh)": 44.05, + "MasakhaNEWSClusteringS2S (eng)": 37.48, + "MasakhaNEWSClusteringS2S (fra)": 44.15, + "MasakhaNEWSClusteringS2S (hau)": 19.01, + "MasakhaNEWSClusteringS2S (ibo)": 41.69, + "MasakhaNEWSClusteringS2S (lin)": 54.94, + "MasakhaNEWSClusteringS2S (lug)": 46.07, + "MasakhaNEWSClusteringS2S (orm)": 25.37, + "MasakhaNEWSClusteringS2S (pcm)": 83.13, + "MasakhaNEWSClusteringS2S (run)": 54.53, + "MasakhaNEWSClusteringS2S (sna)": 47.65, + "MasakhaNEWSClusteringS2S (som)": 27.86, + "MasakhaNEWSClusteringS2S (swa)": 20.2, + "MasakhaNEWSClusteringS2S (tir)": 42.87, + "MasakhaNEWSClusteringS2S (xho)": 26.32, + "MasakhaNEWSClusteringS2S (yor)": 32.17, "MedrxivClusteringP2P": 33.21, "MedrxivClusteringS2S": 31.68, "RedditClustering": 56.61, "RedditClusteringP2P": 62.66, + "RuSciBenchGRNTIClusteringP2P": 15.54, + "RuSciBenchOECDClusteringP2P": 14.16, "StackExchangeClustering": 66.11, "StackExchangeClusteringP2P": 35.24, + "TenKGnadClusteringP2P": 42.11, + "TenKGnadClusteringS2S": 22.07, "TwentyNewsgroupsClustering": 50.75 } ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "bge-base-en-v1.5", + "CDSC-E": 46.84, + "FalseFriendsGermanEnglish": 47.76, + "OpusparcusPC (de)": 90.85, + "OpusparcusPC (en)": 98.58, + "OpusparcusPC (fi)": 85.23, + "OpusparcusPC (fr)": 85.92, + "OpusparcusPC (ru)": 80.09, + "OpusparcusPC (sv)": 82.52, + "PSC": 92.98, + "PawsXPairClassification (de)": 52.1, + "PawsXPairClassification (en)": 59.05, + "PawsXPairClassification (es)": 53.77, + "PawsXPairClassification (fr)": 55.17, + "PawsXPairClassification (ja)": 48.3, + "PawsXPairClassification (ko)": 51.05, + "PawsXPairClassification (zh)": 52.85, + "SICK-E-PL": 43.02, + "SprintDuplicateQuestions": 96.33, + "TERRa": 47.12, + "TwitterURLCorpus": 85.65 + }, + { + "Model": "bge-base-en-v1.5", + "CDSC-E": 46.88, + "FalseFriendsGermanEnglish": 47.77, + "OpusparcusPC (de)": 90.92, + "OpusparcusPC (en)": 98.58, + "OpusparcusPC (fi)": 85.23, + "OpusparcusPC (fr)": 85.92, + "OpusparcusPC (ru)": 80.09, + "OpusparcusPC (sv)": 82.52, + "PSC": 92.98, + "PawsXPairClassification (de)": 52.1, + "PawsXPairClassification (en)": 59.09, + "PawsXPairClassification (es)": 53.77, + "PawsXPairClassification (fr)": 55.17, + "PawsXPairClassification (ja)": 48.46, + "PawsXPairClassification (ko)": 51.06, + "PawsXPairClassification (zh)": 53.0, + "SICK-E-PL": 43.03, + "SprintDuplicateQuestions": 96.37, + "TERRa": 47.12, + "TwitterURLCorpus": 85.65 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "bge-base-en-v1.5", + "AlloprofReranking": 62.19, + "RuBQReranking": 44.52, + "SyntecReranking": 73.0, + "T2Reranking": 63.09 + }, + { + "Model": "bge-base-en-v1.5", + "MIRACLReranking (ar)": 16.03, + "MIRACLReranking (bn)": 19.79, + "MIRACLReranking (de)": 25.83, + "MIRACLReranking (en)": 56.99, + "MIRACLReranking (es)": 38.38, + "MIRACLReranking (fa)": 15.77, + "MIRACLReranking (fi)": 48.23, + "MIRACLReranking (fr)": 27.9, + "MIRACLReranking (hi)": 10.78, + "MIRACLReranking (id)": 30.65, + "MIRACLReranking (ja)": 17.32, + "MIRACLReranking (ko)": 19.91, + "MIRACLReranking (ru)": 22.42, + "MIRACLReranking (sw)": 38.72, + "MIRACLReranking (te)": 1.55, + "MIRACLReranking (th)": 5.4, + "MIRACLReranking (yo)": 58.13, + "MIRACLReranking (zh)": 13.84 + } + ] }, "Retrieval": { "ndcg_at_10": [ { "Model": "bge-base-en-v1.5", + "AILACasedocs": 27.36, + "AILAStatutes": 23.35, "ARCChallenge": 9.66, + "AlloprofRetrieval": 31.28, "AlphaNLI": 10.99, + "AppsRetrieval": 6.45, + "ArguAna": 63.75, + "BSARDRetrieval": 11.67, + "CmedqaRetrieval": 2.42, + "CodeFeedbackMT": 33.65, + "CodeFeedbackST": 70.0, + "CodeSearchNetCCRetrieval (python)": 59.77, + "CodeSearchNetCCRetrieval (javascript)": 54.23, + "CodeSearchNetCCRetrieval (go)": 34.6, + "CodeSearchNetCCRetrieval (ruby)": 56.37, + "CodeSearchNetCCRetrieval (java)": 56.21, + "CodeSearchNetCCRetrieval (php)": 44.15, + "CodeSearchNetRetrieval (python)": 89.09, + "CodeSearchNetRetrieval (javascript)": 75.78, + "CodeSearchNetRetrieval (go)": 95.42, + "CodeSearchNetRetrieval (ruby)": 81.33, + "CodeSearchNetRetrieval (java)": 81.05, + "CodeSearchNetRetrieval (php)": 87.99, + "CodeTransOceanContest": 45.65, + "CodeTransOceanDL": 23.49, + "CosQA": 33.71, + "CovidRetrieval": 23.21, + "GerDaLIR": 1.25, + "GerDaLIRSmall": 3.32, + "GermanQuAD-Retrieval": 79.77, "HellaSwag": 26.64, + "LEMBNarrativeQARetrieval": 25.63, + "LEMBQMSumRetrieval": 22.43, + "LEMBSummScreenFDRetrieval": 60.34, + "LEMBWikimQARetrieval": 51.67, + "LeCaRDv2": 23.34, + "LegalBenchConsumerContractsQA": 73.48, + "LegalBenchCorporateLobbying": 91.67, + "LegalQuAD": 16.01, + "LegalSummarization": 63.41, + "MIRACLRetrieval (ar)": 1.0, + "MIRACLRetrieval (bn)": 1.53, + "MIRACLRetrieval (de)": 15.18, + "MIRACLRetrieval (en)": 48.63, + "MIRACLRetrieval (es)": 25.17, + "MIRACLRetrieval (fa)": 0.94, + "MIRACLRetrieval (fi)": 31.93, + "MIRACLRetrieval (fr)": 17.69, + "MIRACLRetrieval (hi)": 0.63, + "MIRACLRetrieval (id)": 21.02, + "MIRACLRetrieval (ja)": 3.85, + "MIRACLRetrieval (ko)": 5.89, + "MIRACLRetrieval (ru)": 6.02, + "MIRACLRetrieval (sw)": 31.61, + "MIRACLRetrieval (te)": 0.11, + "MIRACLRetrieval (th)": 0.52, + "MIRACLRetrieval (yo)": 54.14, + "MIRACLRetrieval (zh)": 0.98, + "MintakaRetrieval (ar)": 4.82, + "MintakaRetrieval (de)": 17.38, + "MintakaRetrieval (es)": 15.89, + "MintakaRetrieval (fr)": 19.27, + "MintakaRetrieval (hi)": 3.67, + "MintakaRetrieval (it)": 14.21, + "MintakaRetrieval (ja)": 8.79, + "MintakaRetrieval (pt)": 14.08, + "NFCorpus": 37.37, "PIQA": 25.69, "Quail": 1.42, "RARbCode": 46.47, "RARbMath": 46.86, + "RiaNewsRetrieval": 19.6, + "RuBQRetrieval": 13.27, + "SCIDOCS": 21.73, "SIQA": 0.94, + "SciFact": 74.35, + "SciFact-PL": 40.81, "SpartQA": 3.37, + "StackOverflowQA": 80.23, + "SyntecRetrieval": 63.7, + "SyntheticText2SQL": 49.98, + "TRECCOVID": 78.03, + "TRECCOVID-PL": 37.34, "TempReasonL1": 1.07, "TempReasonL2Fact": 17.23, "TempReasonL2Pure": 1.29, "TempReasonL3Fact": 13.36, "TempReasonL3Pure": 5.2, - "WinoGrande": 13.76 + "WinoGrande": 13.76, + "XMarket (de)": 15.71, + "XMarket (en)": 33.61, + "XMarket (es)": 17.5, + "XPQARetrieval (ara-ara)": 10.64, + "XPQARetrieval (eng-ara)": 3.81, + "XPQARetrieval (ara-eng)": 8.07, + "XPQARetrieval (deu-deu)": 58.05, + "XPQARetrieval (eng-deu)": 9.9, + "XPQARetrieval (deu-eng)": 30.84, + "XPQARetrieval (spa-spa)": 44.76, + "XPQARetrieval (eng-spa)": 8.58, + "XPQARetrieval (spa-eng)": 25.23, + "XPQARetrieval (fra-fra)": 51.81, + "XPQARetrieval (eng-fra)": 14.4, + "XPQARetrieval (fra-eng)": 32.94, + "XPQARetrieval (hin-hin)": 32.67, + "XPQARetrieval (eng-hin)": 5.81, + "XPQARetrieval (hin-eng)": 6.86, + "XPQARetrieval (ita-ita)": 63.61, + "XPQARetrieval (eng-ita)": 9.4, + "XPQARetrieval (ita-eng)": 28.35, + "XPQARetrieval (jpn-jpn)": 42.12, + "XPQARetrieval (eng-jpn)": 6.0, + "XPQARetrieval (jpn-eng)": 16.81, + "XPQARetrieval (kor-kor)": 15.84, + "XPQARetrieval (eng-kor)": 8.51, + "XPQARetrieval (kor-eng)": 8.13, + "XPQARetrieval (pol-pol)": 30.99, + "XPQARetrieval (eng-pol)": 11.94, + "XPQARetrieval (pol-eng)": 18.29, + "XPQARetrieval (por-por)": 37.14, + "XPQARetrieval (eng-por)": 8.38, + "XPQARetrieval (por-eng)": 23.01, + "XPQARetrieval (tam-tam)": 13.05, + "XPQARetrieval (eng-tam)": 4.12, + "XPQARetrieval (tam-eng)": 3.44, + "XPQARetrieval (cmn-cmn)": 25.73, + "XPQARetrieval (eng-cmn)": 7.49, + "XPQARetrieval (cmn-eng)": 14.89 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "bge-base-en-v1.5", + "CDSC-R": 82.49, + "GermanSTSBenchmark": 62.87, + "RUParaPhraserSTS": 48.73, + "RuSTSBenchmarkSTS": 59.7, + "SICK-R": 80.3, + "SICK-R-PL": 52.21, + "SICKFr": 65.48, + "STS12": 78.03, + "STS13": 84.18, + "STS14": 82.27, + "STS15": 87.96, + "STS17 (en-tr)": 9.04, + "STS17 (it-en)": 33.78, + "STS17 (es-es)": 79.63, + "STS17 (ar-ar)": 53.71, + "STS17 (fr-en)": 40.83, + "STS17 (en-en)": 86.41, + "STS17 (ko-ko)": 51.96, + "STS17 (en-de)": 37.37, + "STS17 (es-en)": 34.94, + "STS17 (nl-en)": 33.35, + "STS17 (en-ar)": 4.5, + "STS22 (de-fr)": 41.15, + "STS22 (fr)": 75.72, + "STS22 (pl)": 35.91, + "STS22 (tr)": 41.34, + "STS22 (es)": 51.51, + "STS22 (it)": 61.44, + "STS22 (pl-en)": 38.27, + "STS22 (zh-en)": 44.76, + "STS22 (ar)": 24.97, + "STS22 (zh)": 50.2, + "STS22 (en)": 66.54, + "STS22 (es-it)": 50.38, + "STS22 (ru)": 16.18, + "STS22 (fr-pl)": 39.44, + "STS22 (es-en)": 59.23, + "STS22 (de)": 32.68, + "STS22 (de-en)": 46.58, + "STS22 (de-pl)": 31.16, + "STSB": 41.69, + "STSBenchmark": 86.42, + "STSBenchmarkMultilingualSTS (en)": 86.42, + "STSBenchmarkMultilingualSTS (pt)": 65.71, + "STSBenchmarkMultilingualSTS (pl)": 59.86, + "STSBenchmarkMultilingualSTS (zh)": 41.25, + "STSBenchmarkMultilingualSTS (de)": 62.63, + "STSBenchmarkMultilingualSTS (es)": 68.01, + "STSBenchmarkMultilingualSTS (fr)": 66.28, + "STSBenchmarkMultilingualSTS (it)": 66.54, + "STSBenchmarkMultilingualSTS (nl)": 60.19, + "STSBenchmarkMultilingualSTS (ru)": 59.85 + }, + { + "Model": "bge-base-en-v1.5", + "CDSC-R": 82.49, + "GermanSTSBenchmark": 62.87, + "RUParaPhraserSTS": 48.73, + "RuSTSBenchmarkSTS": 59.7, + "SICK-R": 80.3, + "SICK-R-PL": 52.21, + "SICKFr": 65.48, + "STS12": 78.03, + "STS13": 84.18, + "STS14": 82.27, + "STS15": 87.96, + "STS17 (en-tr)": 9.04, + "STS17 (it-en)": 33.78, + "STS17 (es-es)": 79.63, + "STS17 (ar-ar)": 53.71, + "STS17 (fr-en)": 40.83, + "STS17 (en-en)": 86.41, + "STS17 (ko-ko)": 51.96, + "STS17 (en-de)": 37.37, + "STS17 (es-en)": 34.94, + "STS17 (nl-en)": 33.35, + "STS17 (en-ar)": 4.5, + "STS22 (de-fr)": 41.15, + "STS22 (fr)": 75.72, + "STS22 (pl)": 36.01, + "STS22 (tr)": 41.34, + "STS22 (es)": 51.51, + "STS22 (it)": 61.44, + "STS22 (pl-en)": 38.27, + "STS22 (zh-en)": 44.76, + "STS22 (ar)": 24.95, + "STS22 (zh)": 50.2, + "STS22 (en)": 66.54, + "STS22 (es-it)": 50.38, + "STS22 (ru)": 16.18, + "STS22 (fr-pl)": 39.44, + "STS22 (es-en)": 59.23, + "STS22 (de)": 32.67, + "STS22 (de-en)": 46.58, + "STS22 (de-pl)": 31.16, + "STSB": 41.69, + "STSBenchmark": 86.42, + "STSBenchmarkMultilingualSTS (en)": 86.42, + "STSBenchmarkMultilingualSTS (pt)": 65.71, + "STSBenchmarkMultilingualSTS (pl)": 59.86, + "STSBenchmarkMultilingualSTS (zh)": 41.24, + "STSBenchmarkMultilingualSTS (de)": 62.63, + "STSBenchmarkMultilingualSTS (es)": 68.01, + "STSBenchmarkMultilingualSTS (fr)": 66.28, + "STSBenchmarkMultilingualSTS (it)": 66.54, + "STSBenchmarkMultilingualSTS (nl)": 60.19, + "STSBenchmarkMultilingualSTS (ru)": 59.85 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "bge-base-en-v1.5", + "SummEvalFr": 30.72 + }, + { + "Model": "bge-base-en-v1.5", + "SummEvalFr": 30.72 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "bge-base-en-v1.5", + "CEDRClassification": 33.62, + "SensitiveTopicsClassification": 18.05 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "bge-base-en-v1.5", + "Core17InstructionRetrieval": -3.42, + "News21InstructionRetrieval": -1.0, + "Robust04InstructionRetrieval": -7.53 + } + ] } }, "BAAI__bge-base-en-v1.5-instruct": { @@ -456,28 +2292,441 @@ }, "BAAI__bge-large-en-v1.5": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "bge-large-en-v1.5", + "BornholmBitextMining": 34.09, + "Tatoeba (nld-eng)": 14.89, + "Tatoeba (isl-eng)": 4.57, + "Tatoeba (ces-eng)": 5.41, + "Tatoeba (ina-eng)": 38.55, + "Tatoeba (hye-eng)": 0.3, + "Tatoeba (heb-eng)": 0.7, + "Tatoeba (spa-eng)": 25.08, + "Tatoeba (slv-eng)": 5.77, + "Tatoeba (tat-eng)": 0.9, + "Tatoeba (kur-eng)": 7.31, + "Tatoeba (cbk-eng)": 18.32, + "Tatoeba (nob-eng)": 11.34, + "Tatoeba (ben-eng)": 0.01, + "Tatoeba (srp-eng)": 3.31, + "Tatoeba (tel-eng)": 0.78, + "Tatoeba (yue-eng)": 1.55, + "Tatoeba (hun-eng)": 5.15, + "Tatoeba (swe-eng)": 10.6, + "Tatoeba (bre-eng)": 4.2, + "Tatoeba (csb-eng)": 6.17, + "Tatoeba (epo-eng)": 11.93, + "Tatoeba (cat-eng)": 17.77, + "Tatoeba (jav-eng)": 5.46, + "Tatoeba (ast-eng)": 21.06, + "Tatoeba (ind-eng)": 7.67, + "Tatoeba (kaz-eng)": 0.61, + "Tatoeba (fry-eng)": 17.97, + "Tatoeba (ber-eng)": 5.06, + "Tatoeba (nds-eng)": 15.16, + "Tatoeba (est-eng)": 2.76, + "Tatoeba (zsm-eng)": 8.01, + "Tatoeba (mar-eng)": 0.09, + "Tatoeba (tgl-eng)": 5.95, + "Tatoeba (cmn-eng)": 2.95, + "Tatoeba (bos-eng)": 8.9, + "Tatoeba (sqi-eng)": 5.58, + "Tatoeba (cha-eng)": 14.46, + "Tatoeba (ita-eng)": 25.37, + "Tatoeba (kab-eng)": 0.97, + "Tatoeba (gsw-eng)": 15.41, + "Tatoeba (slk-eng)": 5.94, + "Tatoeba (lvs-eng)": 4.67, + "Tatoeba (nno-eng)": 6.71, + "Tatoeba (lat-eng)": 11.54, + "Tatoeba (nov-eng)": 30.37, + "Tatoeba (eus-eng)": 7.22, + "Tatoeba (lit-eng)": 2.78, + "Tatoeba (hrv-eng)": 7.32, + "Tatoeba (pol-eng)": 6.98, + "Tatoeba (ceb-eng)": 5.83, + "Tatoeba (ile-eng)": 27.22, + "Tatoeba (lfn-eng)": 16.0, + "Tatoeba (tuk-eng)": 6.19, + "Tatoeba (ell-eng)": 0.4, + "Tatoeba (afr-eng)": 9.78, + "Tatoeba (ang-eng)": 16.0, + "Tatoeba (tam-eng)": 0.38, + "Tatoeba (cym-eng)": 6.96, + "Tatoeba (rus-eng)": 0.4, + "Tatoeba (tha-eng)": 0.9, + "Tatoeba (vie-eng)": 4.96, + "Tatoeba (dsb-eng)": 5.92, + "Tatoeba (pam-eng)": 5.33, + "Tatoeba (uzb-eng)": 2.15, + "Tatoeba (yid-eng)": 0.0, + "Tatoeba (swg-eng)": 9.77, + "Tatoeba (awa-eng)": 0.2, + "Tatoeba (dtp-eng)": 3.45, + "Tatoeba (mon-eng)": 1.34, + "Tatoeba (cor-eng)": 3.37, + "Tatoeba (bel-eng)": 1.42, + "Tatoeba (ukr-eng)": 1.03, + "Tatoeba (max-eng)": 11.79, + "Tatoeba (por-eng)": 23.03, + "Tatoeba (uig-eng)": 0.53, + "Tatoeba (ido-eng)": 20.0, + "Tatoeba (hsb-eng)": 5.16, + "Tatoeba (kat-eng)": 0.59, + "Tatoeba (khm-eng)": 0.42, + "Tatoeba (orv-eng)": 0.24, + "Tatoeba (mal-eng)": 0.16, + "Tatoeba (swh-eng)": 7.79, + "Tatoeba (gla-eng)": 1.93, + "Tatoeba (gle-eng)": 3.14, + "Tatoeba (pes-eng)": 0.3, + "Tatoeba (wuu-eng)": 2.44, + "Tatoeba (dan-eng)": 12.75, + "Tatoeba (tzl-eng)": 18.68, + "Tatoeba (fin-eng)": 3.73, + "Tatoeba (war-eng)": 6.91, + "Tatoeba (ron-eng)": 11.35, + "Tatoeba (mhr-eng)": 0.07, + "Tatoeba (tur-eng)": 4.66, + "Tatoeba (kzj-eng)": 3.56, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (pms-eng)": 13.5, + "Tatoeba (bul-eng)": 0.93, + "Tatoeba (arz-eng)": 0.0, + "Tatoeba (aze-eng)": 3.79, + "Tatoeba (kor-eng)": 1.39, + "Tatoeba (ara-eng)": 0.58, + "Tatoeba (deu-eng)": 23.9, + "Tatoeba (fra-eng)": 34.94, + "Tatoeba (amh-eng)": 0.45, + "Tatoeba (mkd-eng)": 0.21, + "Tatoeba (glg-eng)": 22.82, + "Tatoeba (hin-eng)": 0.07, + "Tatoeba (jpn-eng)": 0.92, + "Tatoeba (xho-eng)": 4.22, + "Tatoeba (fao-eng)": 9.22, + "Tatoeba (oci-eng)": 13.59, + "Tatoeba (arq-eng)": 0.37 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "bge-large-en-v1.5", + "AllegroReviews": 24.37, + "AmazonCounterfactualClassification (en-ext)": 75.04, + "AmazonCounterfactualClassification (en)": 73.94, + "AmazonCounterfactualClassification (de)": 55.25, + "AmazonCounterfactualClassification (ja)": 56.98, + "AmazonReviewsClassification (en)": 50.23, + "AmazonReviewsClassification (de)": 27.53, + "AmazonReviewsClassification (es)": 34.65, + "AmazonReviewsClassification (fr)": 31.55, + "AmazonReviewsClassification (ja)": 22.85, + "AmazonReviewsClassification (zh)": 22.27, + "AngryTweetsClassification": 44.97, + "CBD": 51.28, + "DanishPoliticalCommentsClassification": 29.79, + "GeoreviewClassification": 28.64, + "HeadlineClassification": 33.56, + "InappropriatenessClassification": 51.81, + "KinopoiskClassification": 35.68, + "LccSentimentClassification": 38.27, + "MTOPDomainClassification (en)": 94.0, + "MTOPDomainClassification (de)": 75.43, + "MTOPDomainClassification (es)": 78.81, + "MTOPDomainClassification (fr)": 80.27, + "MTOPDomainClassification (hi)": 32.73, + "MTOPDomainClassification (th)": 15.91, + "MTOPIntentClassification (en)": 69.96, + "MTOPIntentClassification (de)": 44.3, + "MTOPIntentClassification (es)": 47.38, + "MTOPIntentClassification (fr)": 41.36, + "MTOPIntentClassification (hi)": 13.53, + "MTOPIntentClassification (th)": 5.02, + "MasakhaNEWSClassification (amh)": 32.02, + "MasakhaNEWSClassification (eng)": 79.86, + "MasakhaNEWSClassification (fra)": 76.97, + "MasakhaNEWSClassification (hau)": 65.24, + "MasakhaNEWSClassification (ibo)": 62.03, + "MasakhaNEWSClassification (lin)": 75.43, + "MasakhaNEWSClassification (lug)": 57.31, + "MasakhaNEWSClassification (orm)": 59.08, + "MasakhaNEWSClassification (pcm)": 93.38, + "MasakhaNEWSClassification (run)": 65.19, + "MasakhaNEWSClassification (sna)": 75.07, + "MasakhaNEWSClassification (som)": 50.58, + "MasakhaNEWSClassification (swa)": 58.21, + "MasakhaNEWSClassification (tir)": 23.49, + "MasakhaNEWSClassification (xho)": 63.8, + "MasakhaNEWSClassification (yor)": 67.81, + "MassiveIntentClassification (am)": 3.01, + "MassiveIntentClassification (nb)": 39.67, + "MassiveIntentClassification (kn)": 3.14, + "MassiveIntentClassification (sq)": 36.62, + "MassiveIntentClassification (pl)": 38.6, + "MassiveIntentClassification (fa)": 26.89, + "MassiveIntentClassification (pt)": 46.21, + "MassiveIntentClassification (az)": 37.33, + "MassiveIntentClassification (en)": 74.34, + "MassiveIntentClassification (ta)": 7.96, + "MassiveIntentClassification (af)": 37.67, + "MassiveIntentClassification (bn)": 10.88, + "MassiveIntentClassification (he)": 23.98, + "MassiveIntentClassification (da)": 40.99, + "MassiveIntentClassification (fi)": 38.04, + "MassiveIntentClassification (ur)": 15.81, + "MassiveIntentClassification (is)": 32.31, + "MassiveIntentClassification (fr)": 47.83, + "MassiveIntentClassification (ko)": 14.8, + "MassiveIntentClassification (sv)": 39.28, + "MassiveIntentClassification (sw)": 36.96, + "MassiveIntentClassification (tr)": 37.77, + "MassiveIntentClassification (mn)": 22.38, + "MassiveIntentClassification (cy)": 31.92, + "MassiveIntentClassification (el)": 32.84, + "MassiveIntentClassification (te)": 2.54, + "MassiveIntentClassification (my)": 4.0, + "MassiveIntentClassification (ja)": 29.62, + "MassiveIntentClassification (ml)": 3.18, + "MassiveIntentClassification (sl)": 35.56, + "MassiveIntentClassification (id)": 38.11, + "MassiveIntentClassification (ro)": 39.82, + "MassiveIntentClassification (zh-TW)": 17.03, + "MassiveIntentClassification (tl)": 39.53, + "MassiveIntentClassification (ar)": 14.05, + "MassiveIntentClassification (ka)": 8.42, + "MassiveIntentClassification (th)": 10.87, + "MassiveIntentClassification (hi)": 13.46, + "MassiveIntentClassification (hu)": 34.38, + "MassiveIntentClassification (nl)": 41.43, + "MassiveIntentClassification (it)": 46.19, + "MassiveIntentClassification (lv)": 35.81, + "MassiveIntentClassification (vi)": 35.68, + "MassiveIntentClassification (km)": 4.44, + "MassiveIntentClassification (jv)": 33.82, + "MassiveIntentClassification (de)": 43.23, + "MassiveIntentClassification (zh-CN)": 18.44, + "MassiveIntentClassification (ms)": 36.34, + "MassiveIntentClassification (es)": 45.64, + "MassiveIntentClassification (hy)": 6.94, + "MassiveIntentClassification (ru)": 32.31, + "MassiveScenarioClassification (ru)": 38.22, + "MassiveScenarioClassification (ka)": 13.35, + "MassiveScenarioClassification (nb)": 50.68, + "MassiveScenarioClassification (is)": 43.01, + "MassiveScenarioClassification (am)": 7.36, + "MassiveScenarioClassification (it)": 55.15, + "MassiveScenarioClassification (ml)": 7.08, + "MassiveScenarioClassification (pt)": 56.07, + "MassiveScenarioClassification (sq)": 46.71, + "MassiveScenarioClassification (bn)": 16.2, + "MassiveScenarioClassification (ur)": 22.4, + "MassiveScenarioClassification (jv)": 42.62, + "MassiveScenarioClassification (he)": 27.41, + "MassiveScenarioClassification (ko)": 19.02, + "MassiveScenarioClassification (hi)": 17.95, + "MassiveScenarioClassification (tr)": 45.72, + "MassiveScenarioClassification (sl)": 41.89, + "MassiveScenarioClassification (my)": 10.1, + "MassiveScenarioClassification (da)": 51.76, + "MassiveScenarioClassification (sw)": 43.94, + "MassiveScenarioClassification (ar)": 22.25, + "MassiveScenarioClassification (zh-CN)": 29.79, + "MassiveScenarioClassification (cy)": 37.85, + "MassiveScenarioClassification (az)": 44.88, + "MassiveScenarioClassification (th)": 19.92, + "MassiveScenarioClassification (de)": 59.02, + "MassiveScenarioClassification (fa)": 30.39, + "MassiveScenarioClassification (kn)": 7.92, + "MassiveScenarioClassification (ms)": 47.21, + "MassiveScenarioClassification (vi)": 41.41, + "MassiveScenarioClassification (ro)": 50.86, + "MassiveScenarioClassification (km)": 9.31, + "MassiveScenarioClassification (hu)": 41.37, + "MassiveScenarioClassification (sv)": 49.22, + "MassiveScenarioClassification (te)": 6.68, + "MassiveScenarioClassification (fi)": 42.97, + "MassiveScenarioClassification (en)": 77.39, + "MassiveScenarioClassification (mn)": 27.9, + "MassiveScenarioClassification (nl)": 52.0, + "MassiveScenarioClassification (pl)": 46.56, + "MassiveScenarioClassification (fr)": 57.13, + "MassiveScenarioClassification (hy)": 12.46, + "MassiveScenarioClassification (af)": 48.03, + "MassiveScenarioClassification (ja)": 36.14, + "MassiveScenarioClassification (tl)": 50.66, + "MassiveScenarioClassification (ta)": 12.75, + "MassiveScenarioClassification (lv)": 40.06, + "MassiveScenarioClassification (es)": 54.89, + "MassiveScenarioClassification (el)": 41.93, + "MassiveScenarioClassification (zh-TW)": 26.79, + "MassiveScenarioClassification (id)": 46.94, + "NoRecClassification": 39.55, + "NordicLangClassification": 55.07, + "PAC": 60.88, + "PolEmo2.0-IN": 43.92, + "PolEmo2.0-OUT": 24.13, + "RuReviewsClassification": 44.62, + "RuSciBenchGRNTIClassification": 22.05, + "RuSciBenchOECDClassification": 16.53, + "ToxicConversationsClassification": 66.48 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "bge-large-en-v1.5", + "AlloProfClusteringP2P": 60.31, + "AlloProfClusteringS2S": 41.15, + "BlurbsClusteringP2P": 29.05, + "BlurbsClusteringS2S": 12.3, + "GeoreviewClusteringP2P": 21.87, + "HALClusteringS2S": 25.36, + "MLSUMClusteringP2P (de)": 42.55, + "MLSUMClusteringP2P (fr)": 42.64, + "MLSUMClusteringP2P (ru)": 24.24, + "MLSUMClusteringP2P (es)": 42.98, + "MLSUMClusteringS2S (de)": 41.87, + "MLSUMClusteringS2S (fr)": 42.61, + "MLSUMClusteringS2S (ru)": 21.48, + "MLSUMClusteringS2S (es)": 42.34, + "MasakhaNEWSClusteringP2P (amh)": 40.67, + "MasakhaNEWSClusteringP2P (eng)": 60.51, + "MasakhaNEWSClusteringP2P (fra)": 46.71, + "MasakhaNEWSClusteringP2P (hau)": 47.0, + "MasakhaNEWSClusteringP2P (ibo)": 43.33, + "MasakhaNEWSClusteringP2P (lin)": 69.98, + "MasakhaNEWSClusteringP2P (lug)": 54.39, + "MasakhaNEWSClusteringP2P (orm)": 32.23, + "MasakhaNEWSClusteringP2P (pcm)": 79.75, + "MasakhaNEWSClusteringP2P (run)": 57.72, + "MasakhaNEWSClusteringP2P (sna)": 60.44, + "MasakhaNEWSClusteringP2P (som)": 35.77, + "MasakhaNEWSClusteringP2P (swa)": 26.07, + "MasakhaNEWSClusteringP2P (tir)": 45.23, + "MasakhaNEWSClusteringP2P (xho)": 36.73, + "MasakhaNEWSClusteringP2P (yor)": 36.21, + "MasakhaNEWSClusteringS2S (amh)": 42.03, + "MasakhaNEWSClusteringS2S (eng)": 40.9, + "MasakhaNEWSClusteringS2S (fra)": 48.92, + "MasakhaNEWSClusteringS2S (hau)": 17.37, + "MasakhaNEWSClusteringS2S (ibo)": 40.42, + "MasakhaNEWSClusteringS2S (lin)": 55.71, + "MasakhaNEWSClusteringS2S (lug)": 43.59, + "MasakhaNEWSClusteringS2S (orm)": 26.56, + "MasakhaNEWSClusteringS2S (pcm)": 73.17, + "MasakhaNEWSClusteringS2S (run)": 54.44, + "MasakhaNEWSClusteringS2S (sna)": 42.07, + "MasakhaNEWSClusteringS2S (som)": 34.27, + "MasakhaNEWSClusteringS2S (swa)": 22.01, + "MasakhaNEWSClusteringS2S (tir)": 43.34, + "MasakhaNEWSClusteringS2S (xho)": 22.12, + "MasakhaNEWSClusteringS2S (yor)": 32.6, + "RuSciBenchGRNTIClusteringP2P": 19.01, + "RuSciBenchOECDClusteringP2P": 15.98, + "TenKGnadClusteringP2P": 44.52, + "TenKGnadClusteringS2S": 24.68 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "bge-large-en-v1.5", + "CDSC-E": 47.58, + "FalseFriendsGermanEnglish": 47.77, + "OpusparcusPC (de)": 90.71, + "OpusparcusPC (en)": 98.69, + "OpusparcusPC (fi)": 84.99, + "OpusparcusPC (fr)": 87.64, + "OpusparcusPC (ru)": 79.78, + "OpusparcusPC (sv)": 82.63, + "PSC": 93.8, + "PawsXPairClassification (de)": 52.43, + "PawsXPairClassification (en)": 61.79, + "PawsXPairClassification (es)": 53.65, + "PawsXPairClassification (fr)": 54.89, + "PawsXPairClassification (ja)": 47.84, + "PawsXPairClassification (ko)": 49.99, + "PawsXPairClassification (zh)": 52.14, + "SICK-E-PL": 45.73, + "SprintDuplicateQuestions": 96.73, + "TERRa": 47.52, + "TwitterURLCorpus": 85.6 + }, + { + "Model": "bge-large-en-v1.5", + "CDSC-E": 47.66, + "FalseFriendsGermanEnglish": 47.77, + "OpusparcusPC (de)": 90.71, + "OpusparcusPC (en)": 98.69, + "OpusparcusPC (fi)": 84.99, + "OpusparcusPC (fr)": 87.64, + "OpusparcusPC (ru)": 79.78, + "OpusparcusPC (sv)": 82.63, + "PSC": 93.8, + "PawsXPairClassification (de)": 52.43, + "PawsXPairClassification (en)": 61.81, + "PawsXPairClassification (es)": 53.65, + "PawsXPairClassification (fr)": 54.89, + "PawsXPairClassification (ja)": 48.03, + "PawsXPairClassification (ko)": 50.03, + "PawsXPairClassification (zh)": 52.2, + "SICK-E-PL": 45.75, + "SprintDuplicateQuestions": 96.75, + "TERRa": 47.55, + "TwitterURLCorpus": 85.6 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "bge-large-en-v1.5", + "AlloprofReranking": 67.41, + "RuBQReranking": 47.66, + "SyntecReranking": 72.67, + "T2Reranking": 63.07 + }, + { + "Model": "bge-large-en-v1.5", + "MIRACLReranking (ru)": 25.96, + "MIRACLReranking (ar)": 25.08, + "MIRACLReranking (bn)": 15.35, + "MIRACLReranking (de)": 30.9, + "MIRACLReranking (en)": 57.39, + "MIRACLReranking (es)": 42.47, + "MIRACLReranking (fa)": 19.12, + "MIRACLReranking (fi)": 41.3, + "MIRACLReranking (fr)": 31.82, + "MIRACLReranking (hi)": 11.98, + "MIRACLReranking (id)": 28.43, + "MIRACLReranking (ja)": 20.48, + "MIRACLReranking (ko)": 23.85, + "MIRACLReranking (sw)": 37.71, + "MIRACLReranking (te)": 4.89, + "MIRACLReranking (th)": 9.36, + "MIRACLReranking (yo)": 55.88, + "MIRACLReranking (zh)": 15.02 + } + ] }, "Retrieval": { "ndcg_at_10": [ { "Model": "bge-large-en-v1.5", - "AILACasedocs": 25.15, - "AILAStatutes": 20.74, + "AILACasedocs": 25.98, + "AILAStatutes": 23.06, "ARCChallenge": 9.99, + "AlloprofRetrieval": 38.74, "AlphaNLI": 13.13, + "AppsRetrieval": 7.57, + "ArguAna": 64.52, + "BSARDRetrieval": 10.92, "BrightRetrieval (stackoverflow)": 9.51, "BrightRetrieval (earth_science)": 24.15, "BrightRetrieval (aops)": 6.08, @@ -490,25 +2739,126 @@ "BrightRetrieval (theoremqa_questions)": 12.56, "BrightRetrieval (leetcode)": 26.68, "BrightRetrieval (economics)": 16.59, - "GerDaLIRSmall": 3.96, + "CmedqaRetrieval": 2.07, + "CodeFeedbackMT": 36.89, + "CodeFeedbackST": 71.77, + "CodeSearchNetCCRetrieval (python)": 60.47, + "CodeSearchNetCCRetrieval (javascript)": 53.23, + "CodeSearchNetCCRetrieval (go)": 41.05, + "CodeSearchNetCCRetrieval (ruby)": 58.4, + "CodeSearchNetCCRetrieval (java)": 57.06, + "CodeSearchNetCCRetrieval (php)": 46.01, + "CodeSearchNetRetrieval (python)": 90.8, + "CodeSearchNetRetrieval (javascript)": 76.92, + "CodeSearchNetRetrieval (go)": 94.95, + "CodeSearchNetRetrieval (ruby)": 82.41, + "CodeSearchNetRetrieval (java)": 85.4, + "CodeSearchNetRetrieval (php)": 87.83, + "CodeTransOceanContest": 53.13, + "CodeTransOceanDL": 21.04, + "CosQA": 34.36, + "CovidRetrieval": 17.18, + "GerDaLIR": 1.61, + "GerDaLIRSmall": 4.16, + "GermanQuAD-Retrieval": 83.41, "HellaSwag": 28.5, - "LeCaRDv2": 22.68, - "LegalBenchConsumerContractsQA": 73.52, - "LegalBenchCorporateLobbying": 91.51, - "LegalQuAD": 16.22, - "LegalSummarization": 59.99, + "LEMBNarrativeQARetrieval": 27.92, + "LEMBQMSumRetrieval": 23.29, + "LEMBSummScreenFDRetrieval": 68.73, + "LEMBWikimQARetrieval": 56.42, + "LeCaRDv2": 21.67, + "LegalBenchConsumerContractsQA": 73.33, + "LegalBenchCorporateLobbying": 90.77, + "LegalQuAD": 16.56, + "LegalSummarization": 61.12, + "MIRACLRetrieval (ru)": 10.73, + "MIRACLRetrieval (ar)": 3.6, + "MIRACLRetrieval (bn)": 4.92, + "MIRACLRetrieval (de)": 17.65, + "MIRACLRetrieval (en)": 49.54, + "MIRACLRetrieval (es)": 28.5, + "MIRACLRetrieval (fa)": 3.6, + "MIRACLRetrieval (fi)": 24.56, + "MIRACLRetrieval (fr)": 19.15, + "MIRACLRetrieval (hi)": 0.62, + "MIRACLRetrieval (id)": 17.48, + "MIRACLRetrieval (ja)": 4.12, + "MIRACLRetrieval (ko)": 9.89, + "MIRACLRetrieval (sw)": 29.89, + "MIRACLRetrieval (te)": 0.11, + "MIRACLRetrieval (th)": 0.59, + "MIRACLRetrieval (yo)": 54.39, + "MIRACLRetrieval (zh)": 0.87, + "MintakaRetrieval (ar)": 3.36, + "MintakaRetrieval (de)": 18.33, + "MintakaRetrieval (es)": 16.42, + "MintakaRetrieval (fr)": 20.44, + "MintakaRetrieval (hi)": 3.1, + "MintakaRetrieval (it)": 15.85, + "MintakaRetrieval (ja)": 8.48, + "MintakaRetrieval (pt)": 15.72, + "NFCorpus": 38.06, "PIQA": 27.99, "Quail": 1.83, "RARbCode": 48.12, "RARbMath": 57.36, + "RiaNewsRetrieval": 29.09, + "RuBQRetrieval": 21.05, + "SCIDOCS": 22.63, "SIQA": 1.04, + "SciFact": 74.64, + "SciFact-PL": 39.46, "SpartQA": 2.99, + "StackOverflowQA": 83.07, + "SyntecRetrieval": 62.29, + "SyntheticText2SQL": 50.64, + "TRECCOVID": 74.7, + "TRECCOVID-PL": 31.13, "TempReasonL1": 1.46, "TempReasonL2Fact": 24.25, "TempReasonL2Pure": 2.35, "TempReasonL3Fact": 20.64, "TempReasonL3Pure": 6.67, - "WinoGrande": 19.18 + "WinoGrande": 19.18, + "XMarket (de)": 14.52, + "XMarket (en)": 33.37, + "XMarket (es)": 16.0, + "XPQARetrieval (ara-ara)": 11.06, + "XPQARetrieval (eng-ara)": 2.97, + "XPQARetrieval (ara-eng)": 8.68, + "XPQARetrieval (deu-deu)": 59.66, + "XPQARetrieval (eng-deu)": 12.85, + "XPQARetrieval (deu-eng)": 32.48, + "XPQARetrieval (spa-spa)": 47.89, + "XPQARetrieval (eng-spa)": 9.27, + "XPQARetrieval (spa-eng)": 29.33, + "XPQARetrieval (fra-fra)": 55.47, + "XPQARetrieval (eng-fra)": 15.52, + "XPQARetrieval (fra-eng)": 35.39, + "XPQARetrieval (hin-hin)": 25.9, + "XPQARetrieval (eng-hin)": 6.93, + "XPQARetrieval (hin-eng)": 7.93, + "XPQARetrieval (ita-ita)": 60.43, + "XPQARetrieval (eng-ita)": 9.66, + "XPQARetrieval (ita-eng)": 29.93, + "XPQARetrieval (jpn-jpn)": 43.18, + "XPQARetrieval (eng-jpn)": 5.22, + "XPQARetrieval (jpn-eng)": 16.47, + "XPQARetrieval (kor-kor)": 17.32, + "XPQARetrieval (eng-kor)": 8.82, + "XPQARetrieval (kor-eng)": 8.32, + "XPQARetrieval (pol-pol)": 33.06, + "XPQARetrieval (eng-pol)": 11.27, + "XPQARetrieval (pol-eng)": 20.21, + "XPQARetrieval (por-por)": 38.57, + "XPQARetrieval (eng-por)": 8.14, + "XPQARetrieval (por-eng)": 25.24, + "XPQARetrieval (tam-tam)": 8.49, + "XPQARetrieval (eng-tam)": 4.38, + "XPQARetrieval (tam-eng)": 3.46, + "XPQARetrieval (cmn-cmn)": 24.99, + "XPQARetrieval (eng-cmn)": 8.52, + "XPQARetrieval (cmn-eng)": 16.81 } ], "recall_at_1": [ @@ -526,16 +2876,149 @@ ] }, "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, + "cosine_spearman": [ + { + "Model": "bge-large-en-v1.5", + "CDSC-R": 82.28, + "GermanSTSBenchmark": 63.74, + "RUParaPhraserSTS": 49.11, + "RuSTSBenchmarkSTS": 60.33, + "SICK-R": 81.68, + "SICK-R-PL": 52.73, + "SICKFr": 69.33, + "STS12": 79.05, + "STS13": 86.37, + "STS14": 82.78, + "STS15": 88.03, + "STS17 (ko-ko)": 38.5, + "STS17 (en-ar)": 6.25, + "STS17 (es-en)": 44.24, + "STS17 (ar-ar)": 43.61, + "STS17 (en-tr)": 10.12, + "STS17 (es-es)": 79.62, + "STS17 (en-de)": 45.46, + "STS17 (fr-en)": 48.28, + "STS17 (en-en)": 87.49, + "STS17 (it-en)": 44.48, + "STS17 (nl-en)": 40.92, + "STS22 (ru)": 25.18, + "STS22 (pl-en)": 54.46, + "STS22 (de-en)": 50.66, + "STS22 (de-fr)": 40.97, + "STS22 (zh)": 51.6, + "STS22 (pl)": 33.72, + "STS22 (zh-en)": 49.02, + "STS22 (en)": 67.52, + "STS22 (tr)": 44.96, + "STS22 (es-en)": 55.47, + "STS22 (es)": 57.92, + "STS22 (ar)": 22.55, + "STS22 (fr-pl)": 28.17, + "STS22 (de)": 40.26, + "STS22 (it)": 65.13, + "STS22 (fr)": 79.43, + "STS22 (de-pl)": 23.31, + "STS22 (es-it)": 57.7, + "STSB": 36.44, + "STSBenchmark": 87.52, + "STSBenchmarkMultilingualSTS (it)": 69.38, + "STSBenchmarkMultilingualSTS (zh)": 37.32, + "STSBenchmarkMultilingualSTS (es)": 71.47, + "STSBenchmarkMultilingualSTS (pl)": 60.99, + "STSBenchmarkMultilingualSTS (de)": 64.59, + "STSBenchmarkMultilingualSTS (nl)": 65.61, + "STSBenchmarkMultilingualSTS (fr)": 70.05, + "STSBenchmarkMultilingualSTS (pt)": 68.57, + "STSBenchmarkMultilingualSTS (en)": 87.52, + "STSBenchmarkMultilingualSTS (ru)": 60.06 + }, + { + "Model": "bge-large-en-v1.5", + "CDSC-R": 82.28, + "GermanSTSBenchmark": 63.74, + "RUParaPhraserSTS": 49.11, + "RuSTSBenchmarkSTS": 60.33, + "SICK-R": 81.68, + "SICK-R-PL": 52.73, + "SICKFr": 69.33, + "STS12": 79.05, + "STS13": 86.37, + "STS14": 82.78, + "STS15": 88.03, + "STS17 (ko-ko)": 38.5, + "STS17 (en-ar)": 6.25, + "STS17 (es-en)": 44.24, + "STS17 (ar-ar)": 43.61, + "STS17 (en-tr)": 10.12, + "STS17 (es-es)": 79.62, + "STS17 (en-de)": 45.46, + "STS17 (fr-en)": 48.28, + "STS17 (en-en)": 87.49, + "STS17 (it-en)": 44.48, + "STS17 (nl-en)": 40.92, + "STS22 (ru)": 25.18, + "STS22 (pl-en)": 54.46, + "STS22 (de-en)": 50.66, + "STS22 (de-fr)": 40.97, + "STS22 (zh)": 51.6, + "STS22 (pl)": 33.72, + "STS22 (zh-en)": 49.02, + "STS22 (en)": 67.52, + "STS22 (tr)": 44.96, + "STS22 (es-en)": 55.47, + "STS22 (es)": 57.92, + "STS22 (ar)": 22.53, + "STS22 (fr-pl)": 28.17, + "STS22 (de)": 40.27, + "STS22 (it)": 65.13, + "STS22 (fr)": 79.43, + "STS22 (de-pl)": 23.31, + "STS22 (es-it)": 57.7, + "STSB": 36.44, + "STSBenchmark": 87.52, + "STSBenchmarkMultilingualSTS (it)": 69.38, + "STSBenchmarkMultilingualSTS (zh)": 37.32, + "STSBenchmarkMultilingualSTS (es)": 71.47, + "STSBenchmarkMultilingualSTS (pl)": 60.99, + "STSBenchmarkMultilingualSTS (de)": 64.59, + "STSBenchmarkMultilingualSTS (nl)": 65.61, + "STSBenchmarkMultilingualSTS (fr)": 70.05, + "STSBenchmarkMultilingualSTS (pt)": 68.57, + "STSBenchmarkMultilingualSTS (en)": 87.52, + "STSBenchmarkMultilingualSTS (ru)": 60.07 + } + ] + }, + "Summarization": { + "cosine_spearman": [ + { + "Model": "bge-large-en-v1.5", + "SummEvalFr": 29.5 + }, + { + "Model": "bge-large-en-v1.5", + "SummEvalFr": 29.5 + } + ] + }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "bge-large-en-v1.5", + "CEDRClassification": 36.15, + "SensitiveTopicsClassification": 17.97 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "bge-large-en-v1.5", + "Core17InstructionRetrieval": -1.93, + "News21InstructionRetrieval": 0.33, + "Robust04InstructionRetrieval": -5.26 + } + ] } }, "BAAI__bge-large-en-v1.5-instruct": { @@ -786,7 +3269,119 @@ "f1": [ { "Model": "bge-m3", - "Tatoeba (rus-eng)": 93.42 + "BornholmBitextMining": 44.11, + "Tatoeba (swh-eng)": 73.89, + "Tatoeba (arz-eng)": 68.59, + "Tatoeba (tam-eng)": 88.38, + "Tatoeba (gsw-eng)": 50.07, + "Tatoeba (fra-eng)": 94.3, + "Tatoeba (hsb-eng)": 61.14, + "Tatoeba (hrv-eng)": 95.35, + "Tatoeba (cha-eng)": 32.57, + "Tatoeba (ido-eng)": 71.22, + "Tatoeba (spa-eng)": 96.82, + "Tatoeba (tzl-eng)": 49.63, + "Tatoeba (cor-eng)": 5.9, + "Tatoeba (hun-eng)": 93.05, + "Tatoeba (max-eng)": 59.87, + "Tatoeba (uig-eng)": 79.95, + "Tatoeba (tel-eng)": 92.81, + "Tatoeba (kzj-eng)": 10.46, + "Tatoeba (fin-eng)": 95.7, + "Tatoeba (kat-eng)": 90.73, + "Tatoeba (awa-eng)": 74.37, + "Tatoeba (mhr-eng)": 11.36, + "Tatoeba (kab-eng)": 2.45, + "Tatoeba (eus-eng)": 73.6, + "Tatoeba (ceb-eng)": 32.14, + "Tatoeba (bre-eng)": 11.93, + "Tatoeba (ast-eng)": 76.64, + "Tatoeba (tur-eng)": 94.63, + "Tatoeba (ukr-eng)": 92.32, + "Tatoeba (kaz-eng)": 79.14, + "Tatoeba (csb-eng)": 42.61, + "Tatoeba (vie-eng)": 96.3, + "Tatoeba (isl-eng)": 93.35, + "Tatoeba (orv-eng)": 44.76, + "Tatoeba (bos-eng)": 92.98, + "Tatoeba (epo-eng)": 93.85, + "Tatoeba (ile-eng)": 77.32, + "Tatoeba (yid-eng)": 48.99, + "Tatoeba (por-eng)": 94.31, + "Tatoeba (khm-eng)": 76.02, + "Tatoeba (ina-eng)": 90.4, + "Tatoeba (deu-eng)": 99.1, + "Tatoeba (dtp-eng)": 10.16, + "Tatoeba (pms-eng)": 53.64, + "Tatoeba (ber-eng)": 7.78, + "Tatoeba (arq-eng)": 30.88, + "Tatoeba (swe-eng)": 93.63, + "Tatoeba (ind-eng)": 93.36, + "Tatoeba (urd-eng)": 90.48, + "Tatoeba (tha-eng)": 96.59, + "Tatoeba (rus-eng)": 93.27, + "Tatoeba (gla-eng)": 33.5, + "Tatoeba (pol-eng)": 96.6, + "Tatoeba (fao-eng)": 74.92, + "Tatoeba (ara-eng)": 87.76, + "Tatoeba (afr-eng)": 91.88, + "Tatoeba (cbk-eng)": 68.84, + "Tatoeba (mar-eng)": 88.81, + "Tatoeba (zsm-eng)": 95.25, + "Tatoeba (srp-eng)": 92.43, + "Tatoeba (jav-eng)": 60.86, + "Tatoeba (hye-eng)": 89.15, + "Tatoeba (lvs-eng)": 90.55, + "Tatoeba (tuk-eng)": 25.36, + "Tatoeba (ces-eng)": 94.75, + "Tatoeba (mkd-eng)": 89.26, + "Tatoeba (lfn-eng)": 65.76, + "Tatoeba (kor-eng)": 89.84, + "Tatoeba (ita-eng)": 92.57, + "Tatoeba (tgl-eng)": 79.91, + "Tatoeba (mon-eng)": 89.6, + "Tatoeba (ron-eng)": 95.62, + "Tatoeba (lit-eng)": 90.7, + "Tatoeba (uzb-eng)": 59.22, + "Tatoeba (cmn-eng)": 95.27, + "Tatoeba (swg-eng)": 47.69, + "Tatoeba (tat-eng)": 66.01, + "Tatoeba (ben-eng)": 85.97, + "Tatoeba (nno-eng)": 91.8, + "Tatoeba (dsb-eng)": 52.29, + "Tatoeba (wuu-eng)": 83.41, + "Tatoeba (glg-eng)": 91.11, + "Tatoeba (slk-eng)": 93.55, + "Tatoeba (pes-eng)": 92.13, + "Tatoeba (nds-eng)": 62.6, + "Tatoeba (nld-eng)": 95.78, + "Tatoeba (kur-eng)": 63.79, + "Tatoeba (nov-eng)": 75.48, + "Tatoeba (nob-eng)": 97.37, + "Tatoeba (heb-eng)": 87.5, + "Tatoeba (sqi-eng)": 93.65, + "Tatoeba (lat-eng)": 56.52, + "Tatoeba (cym-eng)": 61.64, + "Tatoeba (aze-eng)": 89.22, + "Tatoeba (xho-eng)": 64.92, + "Tatoeba (ell-eng)": 95.15, + "Tatoeba (cat-eng)": 93.49, + "Tatoeba (dan-eng)": 94.32, + "Tatoeba (pam-eng)": 10.89, + "Tatoeba (slv-eng)": 89.43, + "Tatoeba (bul-eng)": 91.63, + "Tatoeba (amh-eng)": 82.54, + "Tatoeba (jpn-eng)": 93.73, + "Tatoeba (yue-eng)": 85.48, + "Tatoeba (bel-eng)": 92.27, + "Tatoeba (oci-eng)": 55.5, + "Tatoeba (mal-eng)": 97.19, + "Tatoeba (ang-eng)": 46.95, + "Tatoeba (gle-eng)": 58.09, + "Tatoeba (est-eng)": 90.72, + "Tatoeba (hin-eng)": 95.45, + "Tatoeba (fry-eng)": 73.31, + "Tatoeba (war-eng)": 33.49 } ] }, @@ -794,15 +3389,93 @@ "accuracy": [ { "Model": "bge-m3", + "AmazonCounterfactualClassification (en-ext)": 76.23, + "AmazonCounterfactualClassification (en)": 75.63, + "AmazonCounterfactualClassification (de)": 69.11, + "AmazonCounterfactualClassification (ja)": 77.67, + "AmazonReviewsClassification (de)": 46.37, "GeoreviewClassification": 48.27, "HeadlineClassification": 70.32, "InappropriatenessClassification": 59.87, "KinopoiskClassification": 58.23, - "MassiveIntentClassification (ru)": 68.75, + "MTOPDomainClassification (de)": 91.22, + "MTOPIntentClassification (de)": 68.08, + "MasakhaNEWSClassification (amh)": 84.76, + "MasakhaNEWSClassification (eng)": 78.73, + "MasakhaNEWSClassification (fra)": 76.4, + "MasakhaNEWSClassification (hau)": 78.26, + "MasakhaNEWSClassification (ibo)": 64.26, + "MasakhaNEWSClassification (lin)": 72.46, + "MasakhaNEWSClassification (lug)": 65.25, + "MasakhaNEWSClassification (orm)": 74.58, + "MasakhaNEWSClassification (pcm)": 91.11, + "MasakhaNEWSClassification (run)": 76.74, + "MasakhaNEWSClassification (sna)": 84.74, + "MasakhaNEWSClassification (som)": 64.76, + "MasakhaNEWSClassification (swa)": 73.8, + "MasakhaNEWSClassification (tir)": 68.16, + "MasakhaNEWSClassification (xho)": 77.81, + "MasakhaNEWSClassification (yor)": 79.03, + "MassiveIntentClassification (sl)": 65.53, + "MassiveIntentClassification (ko)": 66.53, + "MassiveIntentClassification (tl)": 59.53, + "MassiveIntentClassification (sv)": 69.15, + "MassiveIntentClassification (pl)": 67.95, + "MassiveIntentClassification (ru)": 68.76, + "MassiveIntentClassification (en)": 71.08, + "MassiveIntentClassification (af)": 63.33, + "MassiveIntentClassification (az)": 64.24, + "MassiveIntentClassification (fr)": 67.45, + "MassiveIntentClassification (bn)": 62.95, + "MassiveIntentClassification (vi)": 67.59, + "MassiveIntentClassification (am)": 55.09, + "MassiveIntentClassification (fi)": 67.26, + "MassiveIntentClassification (tr)": 66.96, + "MassiveIntentClassification (el)": 66.22, + "MassiveIntentClassification (jv)": 52.32, + "MassiveIntentClassification (nl)": 68.26, + "MassiveIntentClassification (fa)": 69.44, + "MassiveIntentClassification (sw)": 56.82, + "MassiveIntentClassification (da)": 67.26, + "MassiveIntentClassification (ar)": 55.75, + "MassiveIntentClassification (nb)": 67.07, + "MassiveIntentClassification (hi)": 65.09, + "MassiveIntentClassification (hy)": 63.92, + "MassiveIntentClassification (ja)": 69.24, + "MassiveIntentClassification (mn)": 62.61, + "MassiveIntentClassification (zh-CN)": 69.02, + "MassiveIntentClassification (th)": 64.56, + "MassiveIntentClassification (cy)": 53.17, + "MassiveIntentClassification (te)": 62.85, + "MassiveIntentClassification (ka)": 55.21, + "MassiveIntentClassification (ro)": 65.09, + "MassiveIntentClassification (id)": 66.9, + "MassiveIntentClassification (de)": 65.74, + "MassiveIntentClassification (lv)": 64.9, + "MassiveIntentClassification (zh-TW)": 64.16, + "MassiveIntentClassification (sq)": 64.8, + "MassiveIntentClassification (my)": 60.0, + "MassiveIntentClassification (it)": 67.05, + "MassiveIntentClassification (he)": 65.16, + "MassiveIntentClassification (km)": 51.06, + "MassiveIntentClassification (ms)": 65.35, + "MassiveIntentClassification (hu)": 65.03, + "MassiveIntentClassification (is)": 61.64, + "MassiveIntentClassification (kn)": 61.66, + "MassiveIntentClassification (ml)": 64.67, + "MassiveIntentClassification (ta)": 60.4, + "MassiveIntentClassification (ur)": 63.17, + "MassiveIntentClassification (pt)": 67.63, + "MassiveIntentClassification (es)": 67.04, "MassiveScenarioClassification (ru)": 73.42, + "MassiveScenarioClassification (de)": 73.54, + "NordicLangClassification": 38.93, + "PAC": 69.13, + "PolEmo2.0-OUT": 49.47, "RuReviewsClassification": 66.91, "RuSciBenchGRNTIClassification": 55.81, - "RuSciBenchOECDClassification": 42.57 + "RuSciBenchOECDClassification": 42.57, + "ToxicConversationsClassification": 68.72 } ] }, @@ -810,7 +3483,25 @@ "v_measure": [ { "Model": "bge-m3", + "BlurbsClusteringP2P": 38.69, + "BlurbsClusteringS2S": 16.28, "GeoreviewClusteringP2P": 63.09, + "MasakhaNEWSClusteringS2S (amh)": 46.92, + "MasakhaNEWSClusteringS2S (eng)": 26.25, + "MasakhaNEWSClusteringS2S (fra)": 42.4, + "MasakhaNEWSClusteringS2S (hau)": 37.07, + "MasakhaNEWSClusteringS2S (ibo)": 38.82, + "MasakhaNEWSClusteringS2S (lin)": 52.16, + "MasakhaNEWSClusteringS2S (lug)": 48.08, + "MasakhaNEWSClusteringS2S (orm)": 28.09, + "MasakhaNEWSClusteringS2S (pcm)": 62.49, + "MasakhaNEWSClusteringS2S (run)": 47.41, + "MasakhaNEWSClusteringS2S (sna)": 48.44, + "MasakhaNEWSClusteringS2S (som)": 37.44, + "MasakhaNEWSClusteringS2S (swa)": 22.99, + "MasakhaNEWSClusteringS2S (tir)": 54.6, + "MasakhaNEWSClusteringS2S (xho)": 30.48, + "MasakhaNEWSClusteringS2S (yor)": 26.7, "RuSciBenchGRNTIClusteringP2P": 50.83, "RuSciBenchOECDClusteringP2P": 43.21 } @@ -820,13 +3511,41 @@ "max_ap": [ { "Model": "bge-m3", + "OpusparcusPC (de)": 96.6, + "OpusparcusPC (en)": 98.77, + "OpusparcusPC (fi)": 94.65, + "OpusparcusPC (fr)": 92.88, "OpusparcusPC (ru)": 89.64, - "TERRa": 60.6 + "OpusparcusPC (sv)": 95.36, + "PawsXPairClassification (de)": 56.79, + "PawsXPairClassification (en)": 61.09, + "PawsXPairClassification (es)": 57.3, + "PawsXPairClassification (fr)": 59.57, + "PawsXPairClassification (ja)": 51.7, + "PawsXPairClassification (ko)": 52.33, + "PawsXPairClassification (zh)": 57.05, + "SprintDuplicateQuestions": 97.33, + "TERRa": 60.6, + "TwitterURLCorpus": 85.89 }, { "Model": "bge-m3", - "OpusparcusPC (ru)": 89.64, - "TERRa": 60.6 + "OpusparcusPC (de)": 96.6, + "OpusparcusPC (en)": 98.77, + "OpusparcusPC (fi)": 94.65, + "OpusparcusPC (fr)": 92.88, + "OpusparcusPC (ru)": 89.65, + "OpusparcusPC (sv)": 95.36, + "PawsXPairClassification (de)": 57.12, + "PawsXPairClassification (en)": 61.13, + "PawsXPairClassification (es)": 57.35, + "PawsXPairClassification (fr)": 59.6, + "PawsXPairClassification (ja)": 51.79, + "PawsXPairClassification (ko)": 52.33, + "PawsXPairClassification (zh)": 57.11, + "SprintDuplicateQuestions": 97.33, + "TERRa": 60.6, + "TwitterURLCorpus": 85.89 } ] }, @@ -834,11 +3553,13 @@ "map": [ { "Model": "bge-m3", - "MIRACLReranking (ru)": 65.38 + "AlloprofReranking": 73.87, + "RuBQReranking": 74.03, + "T2Reranking": 66.83 }, { "Model": "bge-m3", - "RuBQReranking": 74.03 + "MIRACLReranking (ru)": 65.38 } ] }, @@ -846,13 +3567,17 @@ "ndcg_at_10": [ { "Model": "bge-m3", + "AILAStatutes": 29.04, "ARCChallenge": 9.02, "AlphaNLI": 24.73, + "ArguAna": 54.04, + "CovidRetrieval": 77.51, "HellaSwag": 25.67, "LEMBNarrativeQARetrieval": 45.76, "LEMBQMSumRetrieval": 35.54, "LEMBSummScreenFDRetrieval": 94.09, "LEMBWikimQARetrieval": 77.73, + "LegalBenchCorporateLobbying": 90.34, "MIRACLRetrieval (ru)": 70.16, "PIQA": 22.93, "Quail": 7.51, @@ -860,8 +3585,11 @@ "RARbMath": 69.19, "RiaNewsRetrieval": 82.99, "RuBQRetrieval": 71.22, + "SCIDOCS": 16.31, "SIQA": 4.89, "SpartQA": 7.49, + "StackOverflowQA": 80.6, + "TRECCOVID": 54.72, "TempReasonL1": 0.99, "TempReasonL2Fact": 33.23, "TempReasonL2Pure": 0.68, @@ -875,10 +3603,51 @@ "cosine_spearman": [ { "Model": "bge-m3", + "GermanSTSBenchmark": 80.79, "RUParaPhraserSTS": 74.9, "RuSTSBenchmarkSTS": 79.87, + "SICK-R": 79.72, + "STS12": 78.73, + "STS13": 79.6, + "STS14": 79.0, + "STS15": 87.81, + "STS17 (en-en)": 87.13, + "STS17 (es-es)": 87.34, + "STS17 (en-de)": 82.24, + "STS17 (en-tr)": 72.52, + "STS17 (en-ar)": 69.41, + "STS17 (ar-ar)": 80.74, + "STS17 (es-en)": 75.57, + "STS17 (nl-en)": 80.82, + "STS17 (fr-en)": 79.8, + "STS17 (it-en)": 79.12, + "STS17 (ko-ko)": 81.43, "STS22 (ru)": 66.26, + "STSB": 80.6, + "STSBenchmark": 84.87, "STSBenchmarkMultilingualSTS (ru)": 79.27 + }, + { + "Model": "bge-m3", + "GermanSTSBenchmark": 80.79, + "SICK-R": 79.72, + "STS12": 78.73, + "STS13": 79.6, + "STS14": 79.0, + "STS15": 87.81, + "STS17 (en-en)": 87.13, + "STS17 (es-es)": 87.34, + "STS17 (en-de)": 82.24, + "STS17 (en-tr)": 72.52, + "STS17 (en-ar)": 69.41, + "STS17 (ar-ar)": 80.74, + "STS17 (es-en)": 75.57, + "STS17 (nl-en)": 80.82, + "STS17 (fr-en)": 79.8, + "STS17 (it-en)": 79.12, + "STS17 (ko-ko)": 81.43, + "STSB": 80.6, + "STSBenchmark": 84.87 } ] }, @@ -895,7 +3664,14 @@ ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "bge-m3", + "Core17InstructionRetrieval": -1.25, + "News21InstructionRetrieval": -1.39, + "Robust04InstructionRetrieval": -6.69 + } + ] } }, "BAAI__bge-m3-instruct": { @@ -951,53 +3727,708 @@ }, "BAAI__bge-small-en-v1.5": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "bge-small-en-v1.5", + "BornholmBitextMining": 33.92, + "Tatoeba (arq-eng)": 0.39, + "Tatoeba (lfn-eng)": 9.26, + "Tatoeba (rus-eng)": 0.21, + "Tatoeba (ceb-eng)": 3.58, + "Tatoeba (cym-eng)": 6.99, + "Tatoeba (ile-eng)": 19.35, + "Tatoeba (nds-eng)": 11.51, + "Tatoeba (wuu-eng)": 1.9, + "Tatoeba (amh-eng)": 0.91, + "Tatoeba (max-eng)": 10.36, + "Tatoeba (jpn-eng)": 0.94, + "Tatoeba (tzl-eng)": 15.44, + "Tatoeba (xho-eng)": 3.87, + "Tatoeba (tur-eng)": 4.16, + "Tatoeba (urd-eng)": 0.03, + "Tatoeba (hrv-eng)": 5.55, + "Tatoeba (khm-eng)": 0.15, + "Tatoeba (gla-eng)": 2.71, + "Tatoeba (fao-eng)": 7.03, + "Tatoeba (kor-eng)": 0.85, + "Tatoeba (por-eng)": 10.93, + "Tatoeba (tat-eng)": 0.77, + "Tatoeba (pam-eng)": 4.81, + "Tatoeba (ces-eng)": 3.41, + "Tatoeba (uzb-eng)": 3.77, + "Tatoeba (yue-eng)": 1.18, + "Tatoeba (orv-eng)": 0.09, + "Tatoeba (aze-eng)": 3.68, + "Tatoeba (ang-eng)": 17.22, + "Tatoeba (est-eng)": 2.11, + "Tatoeba (kat-eng)": 0.58, + "Tatoeba (lvs-eng)": 3.5, + "Tatoeba (nob-eng)": 8.64, + "Tatoeba (epo-eng)": 9.42, + "Tatoeba (cor-eng)": 2.88, + "Tatoeba (yid-eng)": 0.21, + "Tatoeba (dan-eng)": 9.8, + "Tatoeba (hsb-eng)": 4.79, + "Tatoeba (bos-eng)": 7.18, + "Tatoeba (ara-eng)": 0.17, + "Tatoeba (swg-eng)": 9.47, + "Tatoeba (kab-eng)": 1.03, + "Tatoeba (eus-eng)": 7.21, + "Tatoeba (ido-eng)": 12.06, + "Tatoeba (glg-eng)": 12.43, + "Tatoeba (awa-eng)": 0.52, + "Tatoeba (hun-eng)": 4.22, + "Tatoeba (deu-eng)": 10.09, + "Tatoeba (nno-eng)": 6.76, + "Tatoeba (fra-eng)": 16.67, + "Tatoeba (tam-eng)": 0.08, + "Tatoeba (zsm-eng)": 6.99, + "Tatoeba (cmn-eng)": 2.47, + "Tatoeba (swh-eng)": 5.7, + "Tatoeba (ukr-eng)": 0.67, + "Tatoeba (gsw-eng)": 13.64, + "Tatoeba (ina-eng)": 24.66, + "Tatoeba (csb-eng)": 3.42, + "Tatoeba (oci-eng)": 8.07, + "Tatoeba (hin-eng)": 0.0, + "Tatoeba (ast-eng)": 13.66, + "Tatoeba (gle-eng)": 3.12, + "Tatoeba (bel-eng)": 1.22, + "Tatoeba (nld-eng)": 11.36, + "Tatoeba (mhr-eng)": 0.0, + "Tatoeba (isl-eng)": 3.78, + "Tatoeba (ron-eng)": 7.24, + "Tatoeba (mkd-eng)": 0.2, + "Tatoeba (fin-eng)": 3.49, + "Tatoeba (ind-eng)": 6.35, + "Tatoeba (cat-eng)": 12.93, + "Tatoeba (sqi-eng)": 5.09, + "Tatoeba (pms-eng)": 9.63, + "Tatoeba (tgl-eng)": 3.69, + "Tatoeba (lat-eng)": 7.06, + "Tatoeba (mar-eng)": 0.04, + "Tatoeba (srp-eng)": 2.25, + "Tatoeba (ben-eng)": 0.02, + "Tatoeba (afr-eng)": 6.77, + "Tatoeba (pol-eng)": 5.51, + "Tatoeba (mon-eng)": 1.6, + "Tatoeba (swe-eng)": 7.68, + "Tatoeba (bul-eng)": 0.56, + "Tatoeba (nov-eng)": 25.06, + "Tatoeba (uig-eng)": 0.59, + "Tatoeba (ell-eng)": 0.17, + "Tatoeba (hye-eng)": 0.41, + "Tatoeba (mal-eng)": 0.05, + "Tatoeba (dsb-eng)": 4.79, + "Tatoeba (slk-eng)": 3.97, + "Tatoeba (fry-eng)": 16.8, + "Tatoeba (dtp-eng)": 3.03, + "Tatoeba (kur-eng)": 7.53, + "Tatoeba (vie-eng)": 4.4, + "Tatoeba (war-eng)": 5.06, + "Tatoeba (kzj-eng)": 3.64, + "Tatoeba (ber-eng)": 4.47, + "Tatoeba (cbk-eng)": 10.19, + "Tatoeba (jav-eng)": 4.77, + "Tatoeba (slv-eng)": 3.89, + "Tatoeba (ita-eng)": 14.34, + "Tatoeba (tuk-eng)": 3.64, + "Tatoeba (arz-eng)": 0.06, + "Tatoeba (spa-eng)": 14.72, + "Tatoeba (bre-eng)": 4.08, + "Tatoeba (cha-eng)": 14.39, + "Tatoeba (tha-eng)": 1.28, + "Tatoeba (lit-eng)": 2.3, + "Tatoeba (kaz-eng)": 0.55, + "Tatoeba (heb-eng)": 0.47, + "Tatoeba (tel-eng)": 0.2, + "Tatoeba (pes-eng)": 0.63 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "bge-small-en-v1.5", + "AllegroReviews": 25.64, + "AmazonCounterfactualClassification (en-ext)": 71.81, + "AmazonCounterfactualClassification (en)": 71.84, + "AmazonCounterfactualClassification (de)": 58.53, + "AmazonCounterfactualClassification (ja)": 58.03, + "AmazonReviewsClassification (en)": 49.19, + "AmazonReviewsClassification (de)": 26.82, + "AmazonReviewsClassification (es)": 32.81, + "AmazonReviewsClassification (fr)": 28.3, + "AmazonReviewsClassification (ja)": 23.8, + "AmazonReviewsClassification (zh)": 23.54, + "AngryTweetsClassification": 43.31, + "CBD": 51.9, + "DanishPoliticalCommentsClassification": 28.74, + "GeoreviewClassification": 27.23, + "HeadlineClassification": 29.75, + "InappropriatenessClassification": 51.22, + "KinopoiskClassification": 35.31, + "LccSentimentClassification": 38.53, + "MTOPDomainClassification (en)": 92.31, + "MTOPDomainClassification (de)": 69.72, + "MTOPDomainClassification (es)": 74.81, + "MTOPDomainClassification (fr)": 75.49, + "MTOPDomainClassification (hi)": 26.01, + "MTOPDomainClassification (th)": 16.95, + "MTOPIntentClassification (en)": 63.87, + "MTOPIntentClassification (de)": 40.14, + "MTOPIntentClassification (es)": 40.36, + "MTOPIntentClassification (fr)": 37.32, + "MTOPIntentClassification (hi)": 9.4, + "MTOPIntentClassification (th)": 5.26, + "MasakhaNEWSClassification (amh)": 35.05, + "MasakhaNEWSClassification (eng)": 80.13, + "MasakhaNEWSClassification (fra)": 76.35, + "MasakhaNEWSClassification (hau)": 60.16, + "MasakhaNEWSClassification (ibo)": 56.97, + "MasakhaNEWSClassification (lin)": 69.37, + "MasakhaNEWSClassification (lug)": 57.49, + "MasakhaNEWSClassification (orm)": 60.52, + "MasakhaNEWSClassification (pcm)": 93.93, + "MasakhaNEWSClassification (run)": 63.23, + "MasakhaNEWSClassification (sna)": 73.77, + "MasakhaNEWSClassification (som)": 52.99, + "MasakhaNEWSClassification (swa)": 55.53, + "MasakhaNEWSClassification (tir)": 27.46, + "MasakhaNEWSClassification (xho)": 63.64, + "MasakhaNEWSClassification (yor)": 63.63, + "MassiveIntentClassification (de)": 40.71, + "MassiveIntentClassification (it)": 42.67, + "MassiveIntentClassification (kn)": 3.33, + "MassiveIntentClassification (am)": 2.47, + "MassiveIntentClassification (af)": 36.68, + "MassiveIntentClassification (tl)": 38.83, + "MassiveIntentClassification (ro)": 40.92, + "MassiveIntentClassification (zh-CN)": 21.95, + "MassiveIntentClassification (is)": 34.1, + "MassiveIntentClassification (ml)": 2.65, + "MassiveIntentClassification (ru)": 24.69, + "MassiveIntentClassification (pt)": 44.25, + "MassiveIntentClassification (mn)": 21.87, + "MassiveIntentClassification (my)": 4.7, + "MassiveIntentClassification (bn)": 10.08, + "MassiveIntentClassification (te)": 2.3, + "MassiveIntentClassification (nl)": 38.9, + "MassiveIntentClassification (fi)": 40.13, + "MassiveIntentClassification (sw)": 35.33, + "MassiveIntentClassification (vi)": 36.18, + "MassiveIntentClassification (lv)": 38.48, + "MassiveIntentClassification (fa)": 21.05, + "MassiveIntentClassification (sl)": 38.52, + "MassiveIntentClassification (ja)": 27.8, + "MassiveIntentClassification (ms)": 36.06, + "MassiveIntentClassification (hy)": 9.7, + "MassiveIntentClassification (da)": 43.48, + "MassiveIntentClassification (km)": 4.7, + "MassiveIntentClassification (el)": 23.78, + "MassiveIntentClassification (jv)": 34.78, + "MassiveIntentClassification (ur)": 9.98, + "MassiveIntentClassification (tr)": 38.43, + "MassiveIntentClassification (he)": 18.24, + "MassiveIntentClassification (nb)": 40.05, + "MassiveIntentClassification (en)": 70.76, + "MassiveIntentClassification (fr)": 41.51, + "MassiveIntentClassification (sv)": 40.35, + "MassiveIntentClassification (ar)": 18.67, + "MassiveIntentClassification (th)": 12.11, + "MassiveIntentClassification (ko)": 17.23, + "MassiveIntentClassification (az)": 37.24, + "MassiveIntentClassification (pl)": 39.01, + "MassiveIntentClassification (es)": 40.74, + "MassiveIntentClassification (hi)": 8.72, + "MassiveIntentClassification (hu)": 38.68, + "MassiveIntentClassification (ka)": 11.32, + "MassiveIntentClassification (ta)": 10.09, + "MassiveIntentClassification (cy)": 34.79, + "MassiveIntentClassification (id)": 40.08, + "MassiveIntentClassification (sq)": 40.64, + "MassiveIntentClassification (zh-TW)": 20.78, + "MassiveScenarioClassification (fi)": 44.7, + "MassiveScenarioClassification (sl)": 44.36, + "MassiveScenarioClassification (zh-CN)": 32.17, + "MassiveScenarioClassification (th)": 22.44, + "MassiveScenarioClassification (jv)": 43.42, + "MassiveScenarioClassification (ms)": 42.98, + "MassiveScenarioClassification (ka)": 17.01, + "MassiveScenarioClassification (fa)": 28.07, + "MassiveScenarioClassification (id)": 44.04, + "MassiveScenarioClassification (ko)": 23.89, + "MassiveScenarioClassification (pl)": 46.1, + "MassiveScenarioClassification (az)": 43.56, + "MassiveScenarioClassification (af)": 45.56, + "MassiveScenarioClassification (he)": 22.47, + "MassiveScenarioClassification (da)": 51.6, + "MassiveScenarioClassification (ru)": 29.84, + "MassiveScenarioClassification (mn)": 28.31, + "MassiveScenarioClassification (nl)": 48.31, + "MassiveScenarioClassification (ml)": 7.97, + "MassiveScenarioClassification (lv)": 42.93, + "MassiveScenarioClassification (zh-TW)": 31.33, + "MassiveScenarioClassification (sq)": 48.7, + "MassiveScenarioClassification (kn)": 8.03, + "MassiveScenarioClassification (te)": 7.81, + "MassiveScenarioClassification (tr)": 43.77, + "MassiveScenarioClassification (ur)": 17.86, + "MassiveScenarioClassification (hy)": 16.49, + "MassiveScenarioClassification (nb)": 47.72, + "MassiveScenarioClassification (ja)": 36.78, + "MassiveScenarioClassification (sw)": 44.31, + "MassiveScenarioClassification (tl)": 48.6, + "MassiveScenarioClassification (ro)": 51.26, + "MassiveScenarioClassification (en)": 75.21, + "MassiveScenarioClassification (cy)": 40.22, + "MassiveScenarioClassification (am)": 7.53, + "MassiveScenarioClassification (el)": 33.87, + "MassiveScenarioClassification (fr)": 54.25, + "MassiveScenarioClassification (hi)": 14.9, + "MassiveScenarioClassification (it)": 51.93, + "MassiveScenarioClassification (km)": 9.89, + "MassiveScenarioClassification (pt)": 53.39, + "MassiveScenarioClassification (de)": 49.62, + "MassiveScenarioClassification (ar)": 26.89, + "MassiveScenarioClassification (my)": 10.37, + "MassiveScenarioClassification (sv)": 46.86, + "MassiveScenarioClassification (ta)": 17.11, + "MassiveScenarioClassification (vi)": 42.1, + "MassiveScenarioClassification (hu)": 44.92, + "MassiveScenarioClassification (bn)": 15.29, + "MassiveScenarioClassification (is)": 43.11, + "MassiveScenarioClassification (es)": 51.67, + "NoRecClassification": 37.22, + "NordicLangClassification": 52.02, + "PAC": 57.42, + "PolEmo2.0-IN": 42.92, + "PolEmo2.0-OUT": 24.05, + "RuReviewsClassification": 43.95, + "RuSciBenchGRNTIClassification": 13.96, + "RuSciBenchOECDClassification": 10.76, + "ToxicConversationsClassification": 65.48 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "bge-small-en-v1.5", + "AlloProfClusteringP2P": 58.48, + "AlloProfClusteringS2S": 33.09, + "BlurbsClusteringP2P": 23.89, + "BlurbsClusteringS2S": 10.61, + "GeoreviewClusteringP2P": 21.15, + "HALClusteringS2S": 21.68, + "MLSUMClusteringP2P (de)": 37.74, + "MLSUMClusteringP2P (fr)": 40.52, + "MLSUMClusteringP2P (ru)": 21.86, + "MLSUMClusteringP2P (es)": 40.18, + "MLSUMClusteringS2S (de)": 37.93, + "MLSUMClusteringS2S (fr)": 39.47, + "MLSUMClusteringS2S (ru)": 21.02, + "MLSUMClusteringS2S (es)": 39.84, + "MasakhaNEWSClusteringP2P (amh)": 41.69, + "MasakhaNEWSClusteringP2P (eng)": 63.38, + "MasakhaNEWSClusteringP2P (fra)": 58.71, + "MasakhaNEWSClusteringP2P (hau)": 39.11, + "MasakhaNEWSClusteringP2P (ibo)": 52.02, + "MasakhaNEWSClusteringP2P (lin)": 54.46, + "MasakhaNEWSClusteringP2P (lug)": 54.67, + "MasakhaNEWSClusteringP2P (orm)": 33.71, + "MasakhaNEWSClusteringP2P (pcm)": 80.96, + "MasakhaNEWSClusteringP2P (run)": 58.78, + "MasakhaNEWSClusteringP2P (sna)": 55.31, + "MasakhaNEWSClusteringP2P (som)": 32.43, + "MasakhaNEWSClusteringP2P (swa)": 26.27, + "MasakhaNEWSClusteringP2P (tir)": 42.83, + "MasakhaNEWSClusteringP2P (xho)": 31.5, + "MasakhaNEWSClusteringP2P (yor)": 35.94, + "MasakhaNEWSClusteringS2S (amh)": 41.91, + "MasakhaNEWSClusteringS2S (eng)": 32.88, + "MasakhaNEWSClusteringS2S (fra)": 48.14, + "MasakhaNEWSClusteringS2S (hau)": 13.35, + "MasakhaNEWSClusteringS2S (ibo)": 40.28, + "MasakhaNEWSClusteringS2S (lin)": 60.73, + "MasakhaNEWSClusteringS2S (lug)": 50.01, + "MasakhaNEWSClusteringS2S (orm)": 21.4, + "MasakhaNEWSClusteringS2S (pcm)": 67.86, + "MasakhaNEWSClusteringS2S (run)": 56.95, + "MasakhaNEWSClusteringS2S (sna)": 48.25, + "MasakhaNEWSClusteringS2S (som)": 26.53, + "MasakhaNEWSClusteringS2S (swa)": 19.1, + "MasakhaNEWSClusteringS2S (tir)": 42.42, + "MasakhaNEWSClusteringS2S (xho)": 26.22, + "MasakhaNEWSClusteringS2S (yor)": 32.34, + "RuSciBenchGRNTIClusteringP2P": 14.22, + "RuSciBenchOECDClusteringP2P": 12.15, + "TenKGnadClusteringP2P": 41.47, + "TenKGnadClusteringS2S": 20.34 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "bge-small-en-v1.5", + "CDSC-E": 52.32, + "FalseFriendsGermanEnglish": 48.35, + "OpusparcusPC (de)": 89.96, + "OpusparcusPC (en)": 98.67, + "OpusparcusPC (fi)": 85.45, + "OpusparcusPC (fr)": 87.13, + "OpusparcusPC (ru)": 78.74, + "OpusparcusPC (sv)": 83.59, + "PSC": 91.06, + "PawsXPairClassification (de)": 51.35, + "PawsXPairClassification (en)": 56.61, + "PawsXPairClassification (es)": 52.88, + "PawsXPairClassification (fr)": 53.94, + "PawsXPairClassification (ja)": 49.31, + "PawsXPairClassification (ko)": 51.04, + "PawsXPairClassification (zh)": 53.16, + "SICK-E-PL": 47.15, + "SprintDuplicateQuestions": 96.67, + "TERRa": 44.15, + "TwitterURLCorpus": 84.84 + }, + { + "Model": "bge-small-en-v1.5", + "CDSC-E": 52.37, + "FalseFriendsGermanEnglish": 48.44, + "OpusparcusPC (de)": 89.96, + "OpusparcusPC (en)": 98.67, + "OpusparcusPC (fi)": 85.45, + "OpusparcusPC (fr)": 87.13, + "OpusparcusPC (ru)": 78.74, + "OpusparcusPC (sv)": 83.59, + "PSC": 91.06, + "PawsXPairClassification (de)": 51.35, + "PawsXPairClassification (en)": 56.77, + "PawsXPairClassification (es)": 52.91, + "PawsXPairClassification (fr)": 53.94, + "PawsXPairClassification (ja)": 49.56, + "PawsXPairClassification (ko)": 51.04, + "PawsXPairClassification (zh)": 53.27, + "SICK-E-PL": 47.23, + "SprintDuplicateQuestions": 96.67, + "TERRa": 44.15, + "TwitterURLCorpus": 84.84 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "bge-small-en-v1.5", + "AlloprofReranking": 61.74, + "RuBQReranking": 38.37, + "SyntecReranking": 69.4, + "T2Reranking": 63.0 + }, + { + "Model": "bge-small-en-v1.5", + "MIRACLReranking (ar)": 8.5, + "MIRACLReranking (bn)": 9.78, + "MIRACLReranking (de)": 26.04, + "MIRACLReranking (en)": 54.81, + "MIRACLReranking (es)": 36.01, + "MIRACLReranking (fa)": 8.72, + "MIRACLReranking (fi)": 44.66, + "MIRACLReranking (fr)": 27.89, + "MIRACLReranking (hi)": 8.33, + "MIRACLReranking (id)": 28.47, + "MIRACLReranking (ja)": 16.44, + "MIRACLReranking (ko)": 13.52, + "MIRACLReranking (ru)": 15.81, + "MIRACLReranking (sw)": 37.39, + "MIRACLReranking (te)": 2.58, + "MIRACLReranking (th)": 5.14, + "MIRACLReranking (yo)": 57.13, + "MIRACLReranking (zh)": 14.4 + } + ] }, "Retrieval": { "ndcg_at_10": [ { "Model": "bge-small-en-v1.5", + "AILACasedocs": 23.5, + "AILAStatutes": 23.0, "ARCChallenge": 8.95, + "AlloprofRetrieval": 30.77, "AlphaNLI": 11.64, + "AppsRetrieval": 5.64, + "ArguAna": 60.35, + "BSARDRetrieval": 12.89, + "CmedqaRetrieval": 2.13, + "CodeFeedbackMT": 35.09, + "CodeFeedbackST": 67.79, + "CodeSearchNetCCRetrieval (python)": 56.45, + "CodeSearchNetCCRetrieval (javascript)": 50.45, + "CodeSearchNetCCRetrieval (go)": 28.97, + "CodeSearchNetCCRetrieval (ruby)": 55.13, + "CodeSearchNetCCRetrieval (java)": 53.71, + "CodeSearchNetCCRetrieval (php)": 42.44, + "CodeSearchNetRetrieval (python)": 88.79, + "CodeSearchNetRetrieval (javascript)": 73.73, + "CodeSearchNetRetrieval (go)": 94.51, + "CodeSearchNetRetrieval (ruby)": 79.53, + "CodeSearchNetRetrieval (java)": 83.33, + "CodeSearchNetRetrieval (php)": 87.08, + "CodeTransOceanContest": 48.22, + "CodeTransOceanDL": 25.67, + "CosQA": 32.04, + "CovidRetrieval": 14.97, + "GerDaLIR": 0.87, + "GerDaLIRSmall": 2.33, + "GermanQuAD-Retrieval": 75.24, "HellaSwag": 25.44, + "LEMBNarrativeQARetrieval": 22.18, + "LEMBQMSumRetrieval": 21.29, + "LEMBSummScreenFDRetrieval": 57.75, + "LEMBWikimQARetrieval": 43.52, + "LeCaRDv2": 20.06, + "LegalBenchConsumerContractsQA": 73.37, + "LegalBenchCorporateLobbying": 90.32, + "LegalQuAD": 11.74, + "LegalSummarization": 61.49, + "MIRACLRetrieval (ar)": 0.2, + "MIRACLRetrieval (bn)": 0.13, + "MIRACLRetrieval (de)": 14.63, + "MIRACLRetrieval (en)": 46.02, + "MIRACLRetrieval (es)": 22.74, + "MIRACLRetrieval (fa)": 0.1, + "MIRACLRetrieval (fi)": 30.02, + "MIRACLRetrieval (fr)": 17.19, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 17.85, + "MIRACLRetrieval (ja)": 3.71, + "MIRACLRetrieval (ko)": 3.69, + "MIRACLRetrieval (ru)": 2.42, + "MIRACLRetrieval (sw)": 30.78, + "MIRACLRetrieval (te)": 0.06, + "MIRACLRetrieval (th)": 0.47, + "MIRACLRetrieval (yo)": 54.53, + "MIRACLRetrieval (zh)": 0.82, + "MintakaRetrieval (ar)": 1.54, + "MintakaRetrieval (de)": 18.36, + "MintakaRetrieval (es)": 17.71, + "MintakaRetrieval (fr)": 18.69, + "MintakaRetrieval (hi)": 2.74, + "MintakaRetrieval (it)": 16.13, + "MintakaRetrieval (ja)": 8.54, + "MintakaRetrieval (pt)": 17.06, + "NFCorpus": 34.26, "PIQA": 23.92, "Quail": 1.75, "RARbCode": 42.36, "RARbMath": 44.98, + "RiaNewsRetrieval": 10.58, + "RuBQRetrieval": 8.17, + "SCIDOCS": 20.52, "SIQA": 0.77, + "SciFact": 71.27, + "SciFact-PL": 30.48, "SpartQA": 3.55, + "StackOverflowQA": 78.05, + "SyntecRetrieval": 60.74, + "SyntheticText2SQL": 45.12, + "TRECCOVID": 75.53, + "TRECCOVID-PL": 24.85, "TempReasonL1": 1.41, "TempReasonL2Fact": 17.56, "TempReasonL2Pure": 1.05, "TempReasonL3Fact": 13.88, "TempReasonL3Pure": 4.76, - "WinoGrande": 10.28 + "WinoGrande": 10.28, + "XMarket (de)": 13.22, + "XMarket (en)": 31.95, + "XMarket (es)": 14.97, + "XPQARetrieval (ara-ara)": 7.5, + "XPQARetrieval (eng-ara)": 3.82, + "XPQARetrieval (ara-eng)": 8.35, + "XPQARetrieval (deu-deu)": 56.72, + "XPQARetrieval (eng-deu)": 11.29, + "XPQARetrieval (deu-eng)": 27.53, + "XPQARetrieval (spa-spa)": 41.62, + "XPQARetrieval (eng-spa)": 10.11, + "XPQARetrieval (spa-eng)": 22.43, + "XPQARetrieval (fra-fra)": 51.78, + "XPQARetrieval (eng-fra)": 12.23, + "XPQARetrieval (fra-eng)": 29.32, + "XPQARetrieval (hin-hin)": 19.32, + "XPQARetrieval (eng-hin)": 5.74, + "XPQARetrieval (hin-eng)": 5.66, + "XPQARetrieval (ita-ita)": 60.16, + "XPQARetrieval (eng-ita)": 9.89, + "XPQARetrieval (ita-eng)": 25.8, + "XPQARetrieval (jpn-jpn)": 39.11, + "XPQARetrieval (eng-jpn)": 5.05, + "XPQARetrieval (jpn-eng)": 14.37, + "XPQARetrieval (kor-kor)": 12.77, + "XPQARetrieval (eng-kor)": 7.18, + "XPQARetrieval (kor-eng)": 6.38, + "XPQARetrieval (pol-pol)": 30.33, + "XPQARetrieval (eng-pol)": 9.87, + "XPQARetrieval (pol-eng)": 17.83, + "XPQARetrieval (por-por)": 37.41, + "XPQARetrieval (eng-por)": 8.41, + "XPQARetrieval (por-eng)": 22.67, + "XPQARetrieval (tam-tam)": 10.25, + "XPQARetrieval (eng-tam)": 4.19, + "XPQARetrieval (tam-eng)": 3.42, + "XPQARetrieval (cmn-cmn)": 24.2, + "XPQARetrieval (eng-cmn)": 7.81, + "XPQARetrieval (cmn-eng)": 15.38 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "bge-small-en-v1.5", + "CDSC-R": 84.98, + "GermanSTSBenchmark": 64.25, + "RUParaPhraserSTS": 44.65, + "RuSTSBenchmarkSTS": 57.44, + "SICK-R": 79.41, + "SICK-R-PL": 54.12, + "SICKFr": 62.43, + "STS12": 77.44, + "STS13": 82.98, + "STS14": 81.84, + "STS15": 87.27, + "STS17 (en-de)": 28.84, + "STS17 (fr-en)": 35.02, + "STS17 (en-ar)": 6.64, + "STS17 (en-tr)": 13.56, + "STS17 (it-en)": 28.17, + "STS17 (es-en)": 28.15, + "STS17 (ar-ar)": 49.71, + "STS17 (nl-en)": 31.57, + "STS17 (ko-ko)": 45.13, + "STS17 (es-es)": 77.73, + "STS17 (en-en)": 87.14, + "STS22 (es-en)": 58.66, + "STS22 (de-pl)": 18.17, + "STS22 (zh-en)": 48.61, + "STS22 (pl)": 31.95, + "STS22 (de)": 32.8, + "STS22 (ru)": 17.88, + "STS22 (de-en)": 44.93, + "STS22 (zh)": 47.48, + "STS22 (tr)": 44.15, + "STS22 (it)": 63.98, + "STS22 (de-fr)": 44.7, + "STS22 (fr-pl)": 61.98, + "STS22 (es)": 55.47, + "STS22 (en)": 65.79, + "STS22 (ar)": 16.42, + "STS22 (fr)": 73.95, + "STS22 (pl-en)": 37.46, + "STS22 (es-it)": 50.23, + "STSB": 40.8, + "STSBenchmark": 85.86, + "STSBenchmarkMultilingualSTS (it)": 65.48, + "STSBenchmarkMultilingualSTS (zh)": 41.47, + "STSBenchmarkMultilingualSTS (en)": 85.86, + "STSBenchmarkMultilingualSTS (de)": 64.6, + "STSBenchmarkMultilingualSTS (ru)": 57.13, + "STSBenchmarkMultilingualSTS (pt)": 65.68, + "STSBenchmarkMultilingualSTS (es)": 66.5, + "STSBenchmarkMultilingualSTS (nl)": 61.82, + "STSBenchmarkMultilingualSTS (fr)": 63.67, + "STSBenchmarkMultilingualSTS (pl)": 60.25 + }, + { + "Model": "bge-small-en-v1.5", + "CDSC-R": 84.98, + "GermanSTSBenchmark": 64.25, + "RUParaPhraserSTS": 44.65, + "RuSTSBenchmarkSTS": 57.44, + "SICK-R": 79.41, + "SICK-R-PL": 54.12, + "SICKFr": 62.43, + "STS12": 77.44, + "STS13": 82.98, + "STS14": 81.84, + "STS15": 87.27, + "STS17 (en-de)": 28.84, + "STS17 (fr-en)": 35.02, + "STS17 (en-ar)": 6.64, + "STS17 (en-tr)": 13.56, + "STS17 (it-en)": 28.17, + "STS17 (es-en)": 28.15, + "STS17 (ar-ar)": 49.71, + "STS17 (nl-en)": 31.57, + "STS17 (ko-ko)": 45.13, + "STS17 (es-es)": 77.73, + "STS17 (en-en)": 87.14, + "STS22 (es-en)": 58.66, + "STS22 (de-pl)": 18.17, + "STS22 (zh-en)": 48.61, + "STS22 (pl)": 31.94, + "STS22 (de)": 32.79, + "STS22 (ru)": 17.88, + "STS22 (de-en)": 44.93, + "STS22 (zh)": 47.48, + "STS22 (tr)": 44.15, + "STS22 (it)": 63.98, + "STS22 (de-fr)": 44.7, + "STS22 (fr-pl)": 61.98, + "STS22 (es)": 55.47, + "STS22 (en)": 65.79, + "STS22 (ar)": 16.42, + "STS22 (fr)": 73.95, + "STS22 (pl-en)": 37.46, + "STS22 (es-it)": 50.23, + "STSB": 40.8, + "STSBenchmark": 85.86, + "STSBenchmarkMultilingualSTS (it)": 65.48, + "STSBenchmarkMultilingualSTS (zh)": 41.46, + "STSBenchmarkMultilingualSTS (en)": 85.86, + "STSBenchmarkMultilingualSTS (de)": 64.6, + "STSBenchmarkMultilingualSTS (ru)": 57.13, + "STSBenchmarkMultilingualSTS (pt)": 65.68, + "STSBenchmarkMultilingualSTS (es)": 66.5, + "STSBenchmarkMultilingualSTS (nl)": 61.82, + "STSBenchmarkMultilingualSTS (fr)": 63.67, + "STSBenchmarkMultilingualSTS (pl)": 60.25 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "bge-small-en-v1.5", + "SummEvalFr": 29.59 + }, + { + "Model": "bge-small-en-v1.5", + "SummEvalFr": 29.59 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "bge-small-en-v1.5", + "CEDRClassification": 36.1, + "SensitiveTopicsClassification": 18.09 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "bge-small-en-v1.5", + "Core17InstructionRetrieval": 1.51, + "News21InstructionRetrieval": -1.32, + "Robust04InstructionRetrieval": -5.77 + } + ] } }, "BAAI__bge-small-en-v1.5-instruct": { @@ -1149,28 +4580,435 @@ }, "Cohere__Cohere-embed-english-v3.0": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "Cohere-embed-english-v3.0", + "BornholmBitextMining": 40.85, + "Tatoeba (wuu-eng)": 2.65, + "Tatoeba (amh-eng)": 0.01, + "Tatoeba (ell-eng)": 0.74, + "Tatoeba (jpn-eng)": 2.11, + "Tatoeba (cym-eng)": 7.26, + "Tatoeba (fin-eng)": 6.23, + "Tatoeba (bel-eng)": 3.28, + "Tatoeba (aze-eng)": 5.38, + "Tatoeba (cat-eng)": 27.67, + "Tatoeba (eus-eng)": 8.12, + "Tatoeba (yid-eng)": 0.35, + "Tatoeba (ast-eng)": 34.32, + "Tatoeba (ron-eng)": 18.57, + "Tatoeba (gle-eng)": 3.57, + "Tatoeba (vie-eng)": 6.03, + "Tatoeba (cbk-eng)": 28.59, + "Tatoeba (cor-eng)": 3.45, + "Tatoeba (epo-eng)": 17.08, + "Tatoeba (uzb-eng)": 4.59, + "Tatoeba (tam-eng)": 0.35, + "Tatoeba (ceb-eng)": 7.2, + "Tatoeba (max-eng)": 14.66, + "Tatoeba (sqi-eng)": 10.45, + "Tatoeba (ile-eng)": 35.53, + "Tatoeba (hrv-eng)": 14.32, + "Tatoeba (yue-eng)": 2.33, + "Tatoeba (por-eng)": 44.85, + "Tatoeba (swh-eng)": 7.11, + "Tatoeba (cmn-eng)": 3.42, + "Tatoeba (ber-eng)": 5.21, + "Tatoeba (tzl-eng)": 21.77, + "Tatoeba (fra-eng)": 44.84, + "Tatoeba (mkd-eng)": 1.39, + "Tatoeba (dsb-eng)": 10.77, + "Tatoeba (hin-eng)": 0.14, + "Tatoeba (kab-eng)": 1.43, + "Tatoeba (ind-eng)": 8.79, + "Tatoeba (tel-eng)": 0.22, + "Tatoeba (ang-eng)": 28.1, + "Tatoeba (lfn-eng)": 23.83, + "Tatoeba (tur-eng)": 5.85, + "Tatoeba (ara-eng)": 0.7, + "Tatoeba (nld-eng)": 28.42, + "Tatoeba (dtp-eng)": 3.79, + "Tatoeba (pam-eng)": 5.71, + "Tatoeba (ces-eng)": 7.45, + "Tatoeba (kat-eng)": 1.1, + "Tatoeba (tuk-eng)": 3.14, + "Tatoeba (bul-eng)": 3.45, + "Tatoeba (lit-eng)": 4.03, + "Tatoeba (fry-eng)": 25.55, + "Tatoeba (lat-eng)": 13.73, + "Tatoeba (war-eng)": 6.91, + "Tatoeba (swg-eng)": 17.59, + "Tatoeba (tgl-eng)": 8.22, + "Tatoeba (xho-eng)": 2.92, + "Tatoeba (arq-eng)": 0.63, + "Tatoeba (urd-eng)": 0.03, + "Tatoeba (zsm-eng)": 10.32, + "Tatoeba (kaz-eng)": 1.39, + "Tatoeba (pol-eng)": 10.17, + "Tatoeba (bre-eng)": 4.21, + "Tatoeba (ido-eng)": 23.31, + "Tatoeba (mon-eng)": 2.28, + "Tatoeba (mal-eng)": 0.15, + "Tatoeba (swe-eng)": 22.17, + "Tatoeba (gla-eng)": 3.13, + "Tatoeba (hun-eng)": 7.57, + "Tatoeba (dan-eng)": 27.16, + "Tatoeba (khm-eng)": 0.28, + "Tatoeba (mhr-eng)": 0.81, + "Tatoeba (ukr-eng)": 2.56, + "Tatoeba (uig-eng)": 0.6, + "Tatoeba (slk-eng)": 10.11, + "Tatoeba (ben-eng)": 0.05, + "Tatoeba (kzj-eng)": 5.02, + "Tatoeba (rus-eng)": 3.58, + "Tatoeba (isl-eng)": 8.1, + "Tatoeba (nov-eng)": 37.09, + "Tatoeba (srp-eng)": 7.44, + "Tatoeba (kor-eng)": 1.5, + "Tatoeba (hye-eng)": 0.67, + "Tatoeba (afr-eng)": 13.68, + "Tatoeba (lvs-eng)": 6.8, + "Tatoeba (deu-eng)": 54.19, + "Tatoeba (orv-eng)": 0.7, + "Tatoeba (oci-eng)": 16.78, + "Tatoeba (csb-eng)": 14.03, + "Tatoeba (kur-eng)": 9.75, + "Tatoeba (cha-eng)": 19.06, + "Tatoeba (nds-eng)": 23.19, + "Tatoeba (mar-eng)": 0.35, + "Tatoeba (slv-eng)": 11.7, + "Tatoeba (glg-eng)": 38.0, + "Tatoeba (hsb-eng)": 10.78, + "Tatoeba (fao-eng)": 17.84, + "Tatoeba (tat-eng)": 0.87, + "Tatoeba (bos-eng)": 15.81, + "Tatoeba (pms-eng)": 15.39, + "Tatoeba (est-eng)": 4.89, + "Tatoeba (awa-eng)": 0.34, + "Tatoeba (ina-eng)": 47.01, + "Tatoeba (tha-eng)": 1.23, + "Tatoeba (heb-eng)": 0.94, + "Tatoeba (ita-eng)": 30.21, + "Tatoeba (jav-eng)": 5.61, + "Tatoeba (pes-eng)": 0.71, + "Tatoeba (spa-eng)": 46.47, + "Tatoeba (gsw-eng)": 20.78, + "Tatoeba (nno-eng)": 17.71, + "Tatoeba (arz-eng)": 0.17, + "Tatoeba (nob-eng)": 24.8 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "Cohere-embed-english-v3.0", + "AllegroReviews": 28.84, + "AmazonCounterfactualClassification (en-ext)": 78.05, + "AmazonCounterfactualClassification (en)": 80.64, + "AmazonCounterfactualClassification (de)": 56.48, + "AmazonCounterfactualClassification (ja)": 54.93, + "AmazonReviewsClassification (en)": 53.73, + "AmazonReviewsClassification (de)": 37.55, + "AmazonReviewsClassification (es)": 41.49, + "AmazonReviewsClassification (fr)": 39.11, + "AmazonReviewsClassification (ja)": 23.23, + "AmazonReviewsClassification (zh)": 23.82, + "AngryTweetsClassification": 48.63, + "CBD": 52.93, + "DanishPoliticalCommentsClassification": 30.06, + "GeoreviewClassification": 32.95, + "HeadlineClassification": 45.37, + "InappropriatenessClassification": 54.71, + "KinopoiskClassification": 36.31, + "LccSentimentClassification": 45.4, + "MTOPDomainClassification (en)": 94.71, + "MTOPDomainClassification (de)": 81.83, + "MTOPDomainClassification (es)": 84.21, + "MTOPDomainClassification (fr)": 82.72, + "MTOPDomainClassification (hi)": 38.3, + "MTOPDomainClassification (th)": 15.41, + "MTOPIntentClassification (en)": 68.08, + "MTOPIntentClassification (de)": 50.58, + "MTOPIntentClassification (es)": 50.62, + "MTOPIntentClassification (fr)": 43.71, + "MTOPIntentClassification (hi)": 17.12, + "MTOPIntentClassification (th)": 4.9, + "MasakhaNEWSClassification (amh)": 34.57, + "MasakhaNEWSClassification (eng)": 79.54, + "MasakhaNEWSClassification (fra)": 79.41, + "MasakhaNEWSClassification (hau)": 67.38, + "MasakhaNEWSClassification (ibo)": 63.87, + "MasakhaNEWSClassification (lin)": 76.29, + "MasakhaNEWSClassification (lug)": 66.14, + "MasakhaNEWSClassification (orm)": 64.4, + "MasakhaNEWSClassification (pcm)": 91.44, + "MasakhaNEWSClassification (run)": 69.29, + "MasakhaNEWSClassification (sna)": 79.35, + "MasakhaNEWSClassification (som)": 52.76, + "MasakhaNEWSClassification (swa)": 61.41, + "MasakhaNEWSClassification (tir)": 23.31, + "MasakhaNEWSClassification (xho)": 66.77, + "MasakhaNEWSClassification (yor)": 73.43, + "MassiveIntentClassification (el)": 33.62, + "MassiveIntentClassification (sl)": 40.83, + "MassiveIntentClassification (fa)": 32.68, + "MassiveIntentClassification (ta)": 13.08, + "MassiveIntentClassification (ml)": 1.96, + "MassiveIntentClassification (ar)": 26.91, + "MassiveIntentClassification (sq)": 44.17, + "MassiveIntentClassification (zh-TW)": 21.64, + "MassiveIntentClassification (fi)": 41.16, + "MassiveIntentClassification (nl)": 43.97, + "MassiveIntentClassification (hu)": 40.04, + "MassiveIntentClassification (tr)": 42.88, + "MassiveIntentClassification (ka)": 12.09, + "MassiveIntentClassification (pl)": 41.81, + "MassiveIntentClassification (vi)": 36.32, + "MassiveIntentClassification (hy)": 13.01, + "MassiveIntentClassification (sw)": 40.11, + "MassiveIntentClassification (fr)": 52.0, + "MassiveIntentClassification (id)": 44.09, + "MassiveIntentClassification (en)": 71.8, + "MassiveIntentClassification (ru)": 41.81, + "MassiveIntentClassification (cy)": 39.46, + "MassiveIntentClassification (az)": 40.37, + "MassiveIntentClassification (ms)": 41.36, + "MassiveIntentClassification (de)": 50.54, + "MassiveIntentClassification (ur)": 21.32, + "MassiveIntentClassification (kn)": 2.52, + "MassiveIntentClassification (it)": 49.63, + "MassiveIntentClassification (da)": 46.16, + "MassiveIntentClassification (am)": 2.64, + "MassiveIntentClassification (hi)": 19.98, + "MassiveIntentClassification (tl)": 44.05, + "MassiveIntentClassification (es)": 51.43, + "MassiveIntentClassification (mn)": 27.68, + "MassiveIntentClassification (km)": 3.45, + "MassiveIntentClassification (ro)": 45.61, + "MassiveIntentClassification (he)": 27.04, + "MassiveIntentClassification (af)": 42.61, + "MassiveIntentClassification (ja)": 33.04, + "MassiveIntentClassification (te)": 2.36, + "MassiveIntentClassification (bn)": 24.44, + "MassiveIntentClassification (zh-CN)": 22.45, + "MassiveIntentClassification (ko)": 23.43, + "MassiveIntentClassification (nb)": 43.9, + "MassiveIntentClassification (jv)": 39.44, + "MassiveIntentClassification (sv)": 43.98, + "MassiveIntentClassification (is)": 36.44, + "MassiveIntentClassification (th)": 11.25, + "MassiveIntentClassification (lv)": 41.86, + "MassiveIntentClassification (pt)": 51.93, + "MassiveIntentClassification (my)": 2.83, + "MassiveScenarioClassification (is)": 45.12, + "MassiveScenarioClassification (da)": 55.72, + "MassiveScenarioClassification (tl)": 53.33, + "MassiveScenarioClassification (zh-CN)": 31.89, + "MassiveScenarioClassification (el)": 44.14, + "MassiveScenarioClassification (sw)": 46.93, + "MassiveScenarioClassification (bn)": 32.0, + "MassiveScenarioClassification (fi)": 47.83, + "MassiveScenarioClassification (cy)": 46.83, + "MassiveScenarioClassification (pt)": 59.42, + "MassiveScenarioClassification (vi)": 42.58, + "MassiveScenarioClassification (th)": 20.07, + "MassiveScenarioClassification (kn)": 6.86, + "MassiveScenarioClassification (my)": 5.23, + "MassiveScenarioClassification (sl)": 48.23, + "MassiveScenarioClassification (fa)": 37.23, + "MassiveScenarioClassification (mn)": 32.36, + "MassiveScenarioClassification (ml)": 5.92, + "MassiveScenarioClassification (hi)": 24.26, + "MassiveScenarioClassification (az)": 46.84, + "MassiveScenarioClassification (ms)": 49.86, + "MassiveScenarioClassification (ka)": 19.95, + "MassiveScenarioClassification (hu)": 48.55, + "MassiveScenarioClassification (tr)": 49.89, + "MassiveScenarioClassification (de)": 62.39, + "MassiveScenarioClassification (ko)": 30.41, + "MassiveScenarioClassification (am)": 7.6, + "MassiveScenarioClassification (it)": 57.91, + "MassiveScenarioClassification (ro)": 54.28, + "MassiveScenarioClassification (jv)": 47.24, + "MassiveScenarioClassification (km)": 5.83, + "MassiveScenarioClassification (sq)": 51.95, + "MassiveScenarioClassification (pl)": 50.47, + "MassiveScenarioClassification (nl)": 54.86, + "MassiveScenarioClassification (ru)": 46.46, + "MassiveScenarioClassification (es)": 60.4, + "MassiveScenarioClassification (hy)": 19.38, + "MassiveScenarioClassification (af)": 52.75, + "MassiveScenarioClassification (nb)": 53.54, + "MassiveScenarioClassification (ur)": 29.99, + "MassiveScenarioClassification (en)": 76.58, + "MassiveScenarioClassification (he)": 30.33, + "MassiveScenarioClassification (fr)": 59.83, + "MassiveScenarioClassification (id)": 50.43, + "MassiveScenarioClassification (lv)": 46.74, + "MassiveScenarioClassification (sv)": 53.84, + "MassiveScenarioClassification (ta)": 17.77, + "MassiveScenarioClassification (ar)": 33.7, + "MassiveScenarioClassification (zh-TW)": 30.9, + "MassiveScenarioClassification (te)": 7.52, + "MassiveScenarioClassification (ja)": 41.94, + "NoRecClassification": 41.54, + "NordicLangClassification": 58.2, + "PAC": 69.12, + "PolEmo2.0-IN": 50.18, + "PolEmo2.0-OUT": 26.82, + "RuReviewsClassification": 48.71, + "RuSciBenchGRNTIClassification": 24.06, + "RuSciBenchOECDClassification": 19.18 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "Cohere-embed-english-v3.0", + "AlloProfClusteringP2P": 61.15, + "AlloProfClusteringS2S": 35.69, + "BlurbsClusteringP2P": 33.8, + "BlurbsClusteringS2S": 13.31, + "GeoreviewClusteringP2P": 27.27, + "HALClusteringS2S": 23.61, + "MLSUMClusteringP2P (de)": 45.78, + "MLSUMClusteringP2P (fr)": 44.54, + "MLSUMClusteringP2P (ru)": 27.93, + "MLSUMClusteringP2P (es)": 44.53, + "MLSUMClusteringS2S (de)": 44.11, + "MLSUMClusteringS2S (fr)": 43.62, + "MLSUMClusteringS2S (ru)": 26.85, + "MLSUMClusteringS2S (es)": 44.08, + "MasakhaNEWSClusteringP2P (amh)": 42.0, + "MasakhaNEWSClusteringP2P (eng)": 59.86, + "MasakhaNEWSClusteringP2P (fra)": 68.68, + "MasakhaNEWSClusteringP2P (hau)": 41.5, + "MasakhaNEWSClusteringP2P (ibo)": 43.18, + "MasakhaNEWSClusteringP2P (lin)": 71.26, + "MasakhaNEWSClusteringP2P (lug)": 58.01, + "MasakhaNEWSClusteringP2P (orm)": 27.48, + "MasakhaNEWSClusteringP2P (pcm)": 86.01, + "MasakhaNEWSClusteringP2P (run)": 53.68, + "MasakhaNEWSClusteringP2P (sna)": 67.47, + "MasakhaNEWSClusteringP2P (som)": 34.74, + "MasakhaNEWSClusteringP2P (swa)": 31.4, + "MasakhaNEWSClusteringP2P (tir)": 44.31, + "MasakhaNEWSClusteringP2P (xho)": 37.29, + "MasakhaNEWSClusteringP2P (yor)": 40.79, + "MasakhaNEWSClusteringS2S (amh)": 44.55, + "MasakhaNEWSClusteringS2S (eng)": 44.41, + "MasakhaNEWSClusteringS2S (fra)": 39.8, + "MasakhaNEWSClusteringS2S (hau)": 20.74, + "MasakhaNEWSClusteringS2S (ibo)": 46.43, + "MasakhaNEWSClusteringS2S (lin)": 52.66, + "MasakhaNEWSClusteringS2S (lug)": 42.28, + "MasakhaNEWSClusteringS2S (orm)": 28.05, + "MasakhaNEWSClusteringS2S (pcm)": 77.26, + "MasakhaNEWSClusteringS2S (run)": 48.73, + "MasakhaNEWSClusteringS2S (sna)": 40.39, + "MasakhaNEWSClusteringS2S (som)": 33.5, + "MasakhaNEWSClusteringS2S (swa)": 18.32, + "MasakhaNEWSClusteringS2S (tir)": 43.27, + "MasakhaNEWSClusteringS2S (xho)": 27.07, + "MasakhaNEWSClusteringS2S (yor)": 34.08, + "RuSciBenchGRNTIClusteringP2P": 23.4, + "RuSciBenchOECDClusteringP2P": 20.64, + "TenKGnadClusteringP2P": 47.9, + "TenKGnadClusteringS2S": 26.9 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "Cohere-embed-english-v3.0", + "CDSC-E": 62.91, + "FalseFriendsGermanEnglish": 47.58, + "OpusparcusPC (de)": 91.52, + "OpusparcusPC (en)": 98.38, + "OpusparcusPC (fi)": 87.04, + "OpusparcusPC (fr)": 88.39, + "OpusparcusPC (ru)": 80.87, + "OpusparcusPC (sv)": 84.44, + "PSC": 96.09, + "PawsXPairClassification (de)": 51.6, + "PawsXPairClassification (en)": 63.44, + "PawsXPairClassification (es)": 53.64, + "PawsXPairClassification (fr)": 56.8, + "PawsXPairClassification (ja)": 49.85, + "PawsXPairClassification (ko)": 50.82, + "PawsXPairClassification (zh)": 52.47, + "SICK-E-PL": 61.81, + "TERRa": 47.68 + }, + { + "Model": "Cohere-embed-english-v3.0", + "CDSC-E": 62.91, + "FalseFriendsGermanEnglish": 47.59, + "OpusparcusPC (de)": 91.52, + "OpusparcusPC (en)": 98.39, + "OpusparcusPC (fi)": 87.11, + "OpusparcusPC (fr)": 88.44, + "OpusparcusPC (ru)": 80.88, + "OpusparcusPC (sv)": 84.45, + "PSC": 96.09, + "PawsXPairClassification (de)": 52.18, + "PawsXPairClassification (en)": 63.59, + "PawsXPairClassification (es)": 53.68, + "PawsXPairClassification (fr)": 56.82, + "PawsXPairClassification (ja)": 49.94, + "PawsXPairClassification (ko)": 51.02, + "PawsXPairClassification (zh)": 52.49, + "SICK-E-PL": 61.81, + "TERRa": 47.68 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "Cohere-embed-english-v3.0", + "AlloprofReranking": 68.26, + "RuBQReranking": 51.31, + "SyntecReranking": 76.13, + "T2Reranking": 62.84 + }, + { + "Model": "Cohere-embed-english-v3.0", + "MIRACLReranking (ar)": 29.78, + "MIRACLReranking (bn)": 29.69, + "MIRACLReranking (de)": 36.5, + "MIRACLReranking (en)": 62.59, + "MIRACLReranking (es)": 49.73, + "MIRACLReranking (fa)": 18.93, + "MIRACLReranking (fi)": 50.03, + "MIRACLReranking (fr)": 44.25, + "MIRACLReranking (hi)": 22.73, + "MIRACLReranking (id)": 33.97, + "MIRACLReranking (ja)": 22.4, + "MIRACLReranking (ko)": 20.14, + "MIRACLReranking (ru)": 28.25, + "MIRACLReranking (sw)": 34.97, + "MIRACLReranking (te)": 2.74, + "MIRACLReranking (th)": 4.46, + "MIRACLReranking (yo)": 56.78, + "MIRACLReranking (zh)": 15.67 + } + ] }, "Retrieval": { "ndcg_at_10": [ { "Model": "Cohere-embed-english-v3.0", - "AILACasedocs": 31.54, - "AILAStatutes": 27.15, - "ARCChallenge": 9.89, - "AlphaNLI": 15.1, + "AILACasedocs": 32.35, + "AILAStatutes": 26.38, + "ARCChallenge": 9.92, + "AlloprofRetrieval": 38.83, + "AlphaNLI": 15.09, + "AppsRetrieval": 13.72, + "BSARDRetrieval": 13.19, "BrightRetrieval (psychology)": 21.82, "BrightRetrieval (economics)": 20.18, "BrightRetrieval (robotics)": 16.21, @@ -1183,25 +5021,104 @@ "BrightRetrieval (theoremqa_questions)": 15.07, "BrightRetrieval (leetcode)": 26.78, "BrightRetrieval (earth_science)": 27.45, - "GerDaLIRSmall": 6.05, - "HellaSwag": 26.35, - "LeCaRDv2": 21.02, - "LegalBenchConsumerContractsQA": 77.12, - "LegalBenchCorporateLobbying": 93.68, - "LegalQuAD": 26.08, - "LegalSummarization": 61.7, - "PIQA": 28.49, - "Quail": 4.1, - "RARbCode": 57.19, - "RARbMath": 72.26, - "SIQA": 4.26, - "SpartQA": 3.75, - "TempReasonL1": 1.5, - "TempReasonL2Fact": 35.91, + "CmedqaRetrieval": 2.95, + "CodeFeedbackMT": 47.02, + "CodeFeedbackST": 74.82, + "CodeSearchNetCCRetrieval (python)": 60.38, + "CodeSearchNetCCRetrieval (javascript)": 57.62, + "CodeSearchNetCCRetrieval (go)": 36.29, + "CodeSearchNetCCRetrieval (ruby)": 59.15, + "CodeSearchNetCCRetrieval (java)": 56.76, + "CodeSearchNetCCRetrieval (php)": 46.64, + "CodeSearchNetRetrieval (python)": 87.89, + "CodeSearchNetRetrieval (javascript)": 72.14, + "CodeSearchNetRetrieval (go)": 93.29, + "CodeSearchNetRetrieval (ruby)": 80.01, + "CodeSearchNetRetrieval (java)": 80.68, + "CodeSearchNetRetrieval (php)": 84.07, + "CodeTransOceanContest": 65.28, + "CodeTransOceanDL": 31.38, + "CosQA": 30.65, + "CovidRetrieval": 27.82, + "GerDaLIR": 2.38, + "GerDaLIRSmall": 6.22, + "GermanQuAD-Retrieval": 88.23, + "HellaSwag": 26.38, + "LEMBNarrativeQARetrieval": 25.03, + "LEMBQMSumRetrieval": 23.82, + "LEMBSummScreenFDRetrieval": 75.77, + "LEMBWikimQARetrieval": 59.03, + "LeCaRDv2": 21.03, + "LegalBenchConsumerContractsQA": 77.42, + "LegalBenchCorporateLobbying": 93.64, + "LegalQuAD": 25.77, + "LegalSummarization": 61.92, + "MintakaRetrieval (ar)": 3.12, + "MintakaRetrieval (de)": 30.88, + "MintakaRetrieval (es)": 29.12, + "MintakaRetrieval (fr)": 30.19, + "MintakaRetrieval (hi)": 6.22, + "MintakaRetrieval (it)": 26.8, + "MintakaRetrieval (ja)": 9.35, + "MintakaRetrieval (pt)": 28.46, + "PIQA": 28.55, + "Quail": 4.09, + "RARbCode": 57.27, + "RARbMath": 72.25, + "RiaNewsRetrieval": 21.74, + "RuBQRetrieval": 26.66, + "SIQA": 4.27, + "SciFact-PL": 39.36, + "SpartQA": 3.74, + "StackOverflowQA": 89.35, + "SyntecRetrieval": 72.17, + "SyntheticText2SQL": 57.2, + "TRECCOVID-PL": 47.27, + "TempReasonL1": 1.51, + "TempReasonL2Fact": 35.94, "TempReasonL2Pure": 1.89, - "TempReasonL3Fact": 27.51, + "TempReasonL3Fact": 27.52, "TempReasonL3Pure": 8.53, - "WinoGrande": 58.01 + "WinoGrande": 57.94, + "XMarket (de)": 14.36, + "XMarket (en)": 32.99, + "XMarket (es)": 15.81, + "XPQARetrieval (ara-ara)": 15.56, + "XPQARetrieval (eng-ara)": 5.23, + "XPQARetrieval (ara-eng)": 10.23, + "XPQARetrieval (deu-deu)": 61.94, + "XPQARetrieval (eng-deu)": 24.72, + "XPQARetrieval (deu-eng)": 38.58, + "XPQARetrieval (spa-spa)": 51.1, + "XPQARetrieval (eng-spa)": 21.07, + "XPQARetrieval (spa-eng)": 32.96, + "XPQARetrieval (fra-fra)": 57.61, + "XPQARetrieval (eng-fra)": 26.21, + "XPQARetrieval (fra-eng)": 40.77, + "XPQARetrieval (hin-hin)": 36.4, + "XPQARetrieval (eng-hin)": 10.72, + "XPQARetrieval (hin-eng)": 10.19, + "XPQARetrieval (ita-ita)": 63.39, + "XPQARetrieval (eng-ita)": 17.67, + "XPQARetrieval (ita-eng)": 36.49, + "XPQARetrieval (jpn-jpn)": 47.23, + "XPQARetrieval (eng-jpn)": 9.74, + "XPQARetrieval (jpn-eng)": 20.05, + "XPQARetrieval (kor-kor)": 15.81, + "XPQARetrieval (eng-kor)": 10.11, + "XPQARetrieval (kor-eng)": 8.63, + "XPQARetrieval (pol-pol)": 35.5, + "XPQARetrieval (eng-pol)": 15.66, + "XPQARetrieval (pol-eng)": 22.57, + "XPQARetrieval (por-por)": 40.45, + "XPQARetrieval (eng-por)": 15.6, + "XPQARetrieval (por-eng)": 29.83, + "XPQARetrieval (tam-tam)": 6.63, + "XPQARetrieval (eng-tam)": 6.34, + "XPQARetrieval (tam-eng)": 5.56, + "XPQARetrieval (cmn-cmn)": 23.53, + "XPQARetrieval (eng-cmn)": 10.92, + "XPQARetrieval (cmn-eng)": 15.4 } ], "recall_at_1": [ @@ -1219,21 +5136,113 @@ ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "Cohere-embed-english-v3.0", + "CDSC-R": 79.92, + "GermanSTSBenchmark": 67.07, + "RUParaPhraserSTS": 48.32, + "RuSTSBenchmarkSTS": 58.74, + "SICK-R-PL": 58.33, + "SICKFr": 70.9, + "STS22 (de-en)": 53.28, + "STS22 (zh)": 49.65, + "STS22 (pl-en)": 68.66, + "STS22 (de-pl)": 40.65, + "STS22 (es)": 60.81, + "STS22 (fr)": 81.09, + "STS22 (es-en)": 73.36, + "STS22 (en)": 68.15, + "STS22 (de-fr)": 67.87, + "STS22 (tr)": 53.62, + "STS22 (fr-pl)": 84.52, + "STS22 (zh-en)": 47.47, + "STS22 (ar)": 38.69, + "STS22 (pl)": 36.17, + "STS22 (de)": 43.62, + "STS22 (ru)": 36.52, + "STS22 (it)": 73.12, + "STS22 (es-it)": 68.51, + "STSB": 38.46, + "STSBenchmarkMultilingualSTS (pl)": 58.93, + "STSBenchmarkMultilingualSTS (nl)": 66.15, + "STSBenchmarkMultilingualSTS (it)": 67.54, + "STSBenchmarkMultilingualSTS (en)": 86.52, + "STSBenchmarkMultilingualSTS (fr)": 71.11, + "STSBenchmarkMultilingualSTS (pt)": 68.8, + "STSBenchmarkMultilingualSTS (de)": 67.95, + "STSBenchmarkMultilingualSTS (zh)": 37.44, + "STSBenchmarkMultilingualSTS (ru)": 58.77, + "STSBenchmarkMultilingualSTS (es)": 72.96 + }, + { + "Model": "Cohere-embed-english-v3.0", + "CDSC-R": 79.92, + "GermanSTSBenchmark": 67.07, + "RUParaPhraserSTS": 48.32, + "RuSTSBenchmarkSTS": 58.74, + "SICK-R-PL": 58.33, + "SICKFr": 70.9, + "STS22 (de-en)": 53.28, + "STS22 (zh)": 49.65, + "STS22 (pl-en)": 68.66, + "STS22 (de-pl)": 40.65, + "STS22 (es)": 60.81, + "STS22 (fr)": 81.09, + "STS22 (es-en)": 73.36, + "STS22 (en)": 68.15, + "STS22 (de-fr)": 67.87, + "STS22 (tr)": 53.62, + "STS22 (fr-pl)": 84.52, + "STS22 (zh-en)": 47.47, + "STS22 (ar)": 38.69, + "STS22 (pl)": 36.17, + "STS22 (de)": 43.62, + "STS22 (ru)": 36.52, + "STS22 (it)": 73.12, + "STS22 (es-it)": 68.51, + "STSB": 38.46, + "STSBenchmarkMultilingualSTS (pl)": 58.93, + "STSBenchmarkMultilingualSTS (nl)": 66.15, + "STSBenchmarkMultilingualSTS (it)": 67.54, + "STSBenchmarkMultilingualSTS (en)": 86.52, + "STSBenchmarkMultilingualSTS (fr)": 71.11, + "STSBenchmarkMultilingualSTS (pt)": 68.8, + "STSBenchmarkMultilingualSTS (de)": 67.95, + "STSBenchmarkMultilingualSTS (zh)": 37.44, + "STSBenchmarkMultilingualSTS (ru)": 58.77, + "STSBenchmarkMultilingualSTS (es)": 72.96 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "Cohere-embed-english-v3.0", + "SummEvalFr": 30.15 + }, + { + "Model": "Cohere-embed-english-v3.0", + "SummEvalFr": 30.15 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "Cohere-embed-english-v3.0", + "CEDRClassification": 35.56, + "SensitiveTopicsClassification": 18.73 + } + ] }, "InstructionRetrieval": { "p-MRR": [ { "Model": "Cohere-embed-english-v3.0", - "Core17InstructionRetrieval": 2.8, - "News21InstructionRetrieval": 0.2, - "Robust04InstructionRetrieval": -3.63 + "Core17InstructionRetrieval": 2.68, + "News21InstructionRetrieval": 0.3, + "Robust04InstructionRetrieval": -3.49 } ] } @@ -1291,18 +5300,286 @@ }, "Cohere__Cohere-embed-multilingual-light-v3.0": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "Cohere-embed-multilingual-light-v3.0", + "BornholmBitextMining": 36.64, + "Tatoeba (slv-eng)": 62.38, + "Tatoeba (kaz-eng)": 61.91, + "Tatoeba (bos-eng)": 73.84, + "Tatoeba (hin-eng)": 88.19, + "Tatoeba (wuu-eng)": 55.0, + "Tatoeba (epo-eng)": 79.45, + "Tatoeba (deu-eng)": 94.81, + "Tatoeba (xho-eng)": 40.42, + "Tatoeba (cym-eng)": 48.44, + "Tatoeba (kzj-eng)": 2.77, + "Tatoeba (nld-eng)": 86.46, + "Tatoeba (rus-eng)": 86.86, + "Tatoeba (hsb-eng)": 20.25, + "Tatoeba (por-eng)": 79.97, + "Tatoeba (hrv-eng)": 78.21, + "Tatoeba (ben-eng)": 75.07, + "Tatoeba (ido-eng)": 49.3, + "Tatoeba (ceb-eng)": 24.63, + "Tatoeba (eus-eng)": 37.3, + "Tatoeba (ron-eng)": 76.3, + "Tatoeba (arq-eng)": 16.72, + "Tatoeba (war-eng)": 24.29, + "Tatoeba (tzl-eng)": 17.23, + "Tatoeba (max-eng)": 40.8, + "Tatoeba (tur-eng)": 81.36, + "Tatoeba (hye-eng)": 70.67, + "Tatoeba (swg-eng)": 35.43, + "Tatoeba (uzb-eng)": 42.95, + "Tatoeba (vie-eng)": 84.12, + "Tatoeba (pam-eng)": 3.37, + "Tatoeba (lit-eng)": 46.27, + "Tatoeba (ast-eng)": 45.84, + "Tatoeba (kur-eng)": 22.44, + "Tatoeba (lfn-eng)": 36.33, + "Tatoeba (pes-eng)": 73.91, + "Tatoeba (ind-eng)": 81.83, + "Tatoeba (ita-eng)": 80.7, + "Tatoeba (nno-eng)": 61.07, + "Tatoeba (yid-eng)": 47.46, + "Tatoeba (fin-eng)": 60.74, + "Tatoeba (gle-eng)": 41.46, + "Tatoeba (isl-eng)": 52.69, + "Tatoeba (kab-eng)": 5.98, + "Tatoeba (urd-eng)": 78.17, + "Tatoeba (nov-eng)": 45.93, + "Tatoeba (tgl-eng)": 66.58, + "Tatoeba (est-eng)": 47.9, + "Tatoeba (khm-eng)": 26.01, + "Tatoeba (tel-eng)": 78.42, + "Tatoeba (lvs-eng)": 46.31, + "Tatoeba (awa-eng)": 59.5, + "Tatoeba (cat-eng)": 62.02, + "Tatoeba (cbk-eng)": 36.88, + "Tatoeba (ile-eng)": 54.61, + "Tatoeba (orv-eng)": 8.05, + "Tatoeba (srp-eng)": 74.4, + "Tatoeba (mon-eng)": 60.97, + "Tatoeba (pol-eng)": 78.49, + "Tatoeba (nds-eng)": 37.97, + "Tatoeba (uig-eng)": 44.38, + "Tatoeba (pms-eng)": 22.31, + "Tatoeba (cmn-eng)": 82.2, + "Tatoeba (ces-eng)": 66.62, + "Tatoeba (gsw-eng)": 28.46, + "Tatoeba (swe-eng)": 81.61, + "Tatoeba (mkd-eng)": 55.13, + "Tatoeba (amh-eng)": 54.67, + "Tatoeba (nob-eng)": 84.58, + "Tatoeba (spa-eng)": 85.6, + "Tatoeba (csb-eng)": 7.02, + "Tatoeba (yue-eng)": 57.38, + "Tatoeba (fry-eng)": 34.59, + "Tatoeba (ang-eng)": 30.55, + "Tatoeba (zsm-eng)": 85.68, + "Tatoeba (ara-eng)": 71.5, + "Tatoeba (bel-eng)": 73.25, + "Tatoeba (heb-eng)": 61.63, + "Tatoeba (swh-eng)": 49.84, + "Tatoeba (bre-eng)": 3.93, + "Tatoeba (mal-eng)": 93.51, + "Tatoeba (arz-eng)": 43.94, + "Tatoeba (hun-eng)": 60.87, + "Tatoeba (tha-eng)": 84.66, + "Tatoeba (bul-eng)": 79.47, + "Tatoeba (tuk-eng)": 7.74, + "Tatoeba (kat-eng)": 59.68, + "Tatoeba (fao-eng)": 40.75, + "Tatoeba (ina-eng)": 69.4, + "Tatoeba (slk-eng)": 65.58, + "Tatoeba (jpn-eng)": 68.54, + "Tatoeba (dtp-eng)": 3.32, + "Tatoeba (kor-eng)": 62.94, + "Tatoeba (ell-eng)": 80.02, + "Tatoeba (glg-eng)": 33.35, + "Tatoeba (tam-eng)": 76.32, + "Tatoeba (mar-eng)": 81.79, + "Tatoeba (oci-eng)": 12.17, + "Tatoeba (dsb-eng)": 15.5, + "Tatoeba (jav-eng)": 36.99, + "Tatoeba (lat-eng)": 24.35, + "Tatoeba (sqi-eng)": 79.78, + "Tatoeba (tat-eng)": 47.6, + "Tatoeba (ukr-eng)": 75.97, + "Tatoeba (aze-eng)": 71.26, + "Tatoeba (cha-eng)": 13.4, + "Tatoeba (cor-eng)": 3.63, + "Tatoeba (dan-eng)": 83.41, + "Tatoeba (afr-eng)": 73.58, + "Tatoeba (ber-eng)": 7.63, + "Tatoeba (gla-eng)": 24.94, + "Tatoeba (mhr-eng)": 2.33, + "Tatoeba (fra-eng)": 83.62 + } + ] }, "Classification": { "accuracy": [ { "Model": "Cohere-embed-multilingual-light-v3.0", - "AmazonReviewsClassification (fr)": 38.6, - "MTOPDomainClassification (fr)": 80.79, - "MTOPIntentClassification (fr)": 50.01, - "MasakhaNEWSClassification (fra)": 82.58, - "MassiveIntentClassification (fr)": 56.31, - "MassiveScenarioClassification (fr)": 59.5 + "AllegroReviews": 37.55, + "AmazonCounterfactualClassification (en-ext)": 70.76, + "AmazonCounterfactualClassification (en)": 69.96, + "AmazonCounterfactualClassification (de)": 68.67, + "AmazonCounterfactualClassification (ja)": 59.69, + "AmazonReviewsClassification (fr)": 41.14, + "AmazonReviewsClassification (en)": 46.54, + "AmazonReviewsClassification (de)": 42.42, + "AmazonReviewsClassification (es)": 41.96, + "AmazonReviewsClassification (ja)": 38.41, + "AmazonReviewsClassification (zh)": 38.37, + "AngryTweetsClassification": 54.8, + "CBD": 61.72, + "DanishPoliticalCommentsClassification": 36.95, + "GeoreviewClassification": 44.47, + "HeadlineClassification": 76.3, + "InappropriatenessClassification": 59.73, + "KinopoiskClassification": 50.88, + "LccSentimentClassification": 56.53, + "MTOPDomainClassification (fr)": 86.06, + "MTOPDomainClassification (en)": 91.87, + "MTOPDomainClassification (de)": 88.77, + "MTOPDomainClassification (es)": 90.4, + "MTOPDomainClassification (hi)": 88.21, + "MTOPDomainClassification (th)": 85.55, + "MTOPIntentClassification (fr)": 53.11, + "MTOPIntentClassification (en)": 62.23, + "MTOPIntentClassification (de)": 61.57, + "MTOPIntentClassification (es)": 62.33, + "MTOPIntentClassification (hi)": 59.26, + "MTOPIntentClassification (th)": 59.12, + "MasakhaNEWSClassification (fra)": 79.31, + "MasakhaNEWSClassification (amh)": 87.29, + "MasakhaNEWSClassification (eng)": 80.14, + "MasakhaNEWSClassification (hau)": 75.75, + "MasakhaNEWSClassification (ibo)": 68.08, + "MasakhaNEWSClassification (lin)": 75.09, + "MasakhaNEWSClassification (lug)": 71.12, + "MasakhaNEWSClassification (orm)": 71.05, + "MasakhaNEWSClassification (pcm)": 93.31, + "MasakhaNEWSClassification (run)": 80.0, + "MasakhaNEWSClassification (sna)": 86.31, + "MasakhaNEWSClassification (som)": 62.31, + "MasakhaNEWSClassification (swa)": 74.03, + "MasakhaNEWSClassification (tir)": 69.78, + "MasakhaNEWSClassification (xho)": 76.77, + "MasakhaNEWSClassification (yor)": 76.96, + "MassiveIntentClassification (fr)": 61.85, + "MassiveIntentClassification (da)": 59.39, + "MassiveIntentClassification (lv)": 51.66, + "MassiveIntentClassification (tl)": 55.25, + "MassiveIntentClassification (ru)": 61.88, + "MassiveIntentClassification (pl)": 60.69, + "MassiveIntentClassification (ur)": 57.27, + "MassiveIntentClassification (tr)": 59.69, + "MassiveIntentClassification (hi)": 60.13, + "MassiveIntentClassification (az)": 56.0, + "MassiveIntentClassification (ko)": 59.68, + "MassiveIntentClassification (kn)": 55.04, + "MassiveIntentClassification (ro)": 57.71, + "MassiveIntentClassification (zh-TW)": 58.92, + "MassiveIntentClassification (fi)": 56.94, + "MassiveIntentClassification (mn)": 54.36, + "MassiveIntentClassification (nb)": 58.27, + "MassiveIntentClassification (id)": 60.96, + "MassiveIntentClassification (ar)": 51.3, + "MassiveIntentClassification (af)": 55.18, + "MassiveIntentClassification (jv)": 49.66, + "MassiveIntentClassification (hu)": 56.36, + "MassiveIntentClassification (cy)": 45.42, + "MassiveIntentClassification (bn)": 57.56, + "MassiveIntentClassification (sv)": 60.87, + "MassiveIntentClassification (en)": 67.36, + "MassiveIntentClassification (it)": 61.58, + "MassiveIntentClassification (ja)": 64.44, + "MassiveIntentClassification (ta)": 55.05, + "MassiveIntentClassification (pt)": 63.52, + "MassiveIntentClassification (my)": 53.29, + "MassiveIntentClassification (he)": 54.14, + "MassiveIntentClassification (is)": 49.53, + "MassiveIntentClassification (ml)": 59.72, + "MassiveIntentClassification (sw)": 52.52, + "MassiveIntentClassification (te)": 55.63, + "MassiveIntentClassification (vi)": 58.43, + "MassiveIntentClassification (ka)": 46.25, + "MassiveIntentClassification (am)": 49.31, + "MassiveIntentClassification (de)": 58.69, + "MassiveIntentClassification (ms)": 56.8, + "MassiveIntentClassification (km)": 40.79, + "MassiveIntentClassification (zh-CN)": 64.86, + "MassiveIntentClassification (es)": 62.17, + "MassiveIntentClassification (fa)": 61.88, + "MassiveIntentClassification (hy)": 54.73, + "MassiveIntentClassification (sq)": 54.53, + "MassiveIntentClassification (el)": 57.33, + "MassiveIntentClassification (nl)": 62.38, + "MassiveIntentClassification (th)": 59.84, + "MassiveIntentClassification (sl)": 53.55, + "MassiveScenarioClassification (fr)": 67.5, + "MassiveScenarioClassification (vi)": 64.61, + "MassiveScenarioClassification (tr)": 64.68, + "MassiveScenarioClassification (ta)": 59.22, + "MassiveScenarioClassification (fa)": 67.76, + "MassiveScenarioClassification (el)": 65.25, + "MassiveScenarioClassification (fi)": 63.44, + "MassiveScenarioClassification (hu)": 64.85, + "MassiveScenarioClassification (is)": 58.15, + "MassiveScenarioClassification (lv)": 57.28, + "MassiveScenarioClassification (mn)": 58.41, + "MassiveScenarioClassification (zh-CN)": 71.66, + "MassiveScenarioClassification (pl)": 66.65, + "MassiveScenarioClassification (id)": 66.54, + "MassiveScenarioClassification (ja)": 70.82, + "MassiveScenarioClassification (hy)": 57.86, + "MassiveScenarioClassification (sl)": 59.98, + "MassiveScenarioClassification (sq)": 61.63, + "MassiveScenarioClassification (bn)": 63.41, + "MassiveScenarioClassification (pt)": 65.3, + "MassiveScenarioClassification (hi)": 66.29, + "MassiveScenarioClassification (jv)": 57.1, + "MassiveScenarioClassification (am)": 56.32, + "MassiveScenarioClassification (sw)": 59.54, + "MassiveScenarioClassification (nb)": 65.64, + "MassiveScenarioClassification (az)": 59.42, + "MassiveScenarioClassification (es)": 67.2, + "MassiveScenarioClassification (en)": 72.63, + "MassiveScenarioClassification (ko)": 68.03, + "MassiveScenarioClassification (th)": 67.91, + "MassiveScenarioClassification (zh-TW)": 66.67, + "MassiveScenarioClassification (nl)": 69.05, + "MassiveScenarioClassification (my)": 58.03, + "MassiveScenarioClassification (de)": 68.12, + "MassiveScenarioClassification (ms)": 64.24, + "MassiveScenarioClassification (ka)": 51.97, + "MassiveScenarioClassification (ur)": 63.09, + "MassiveScenarioClassification (ar)": 58.24, + "MassiveScenarioClassification (da)": 67.38, + "MassiveScenarioClassification (af)": 63.79, + "MassiveScenarioClassification (kn)": 59.3, + "MassiveScenarioClassification (km)": 47.04, + "MassiveScenarioClassification (cy)": 53.0, + "MassiveScenarioClassification (sv)": 68.99, + "MassiveScenarioClassification (ml)": 66.25, + "MassiveScenarioClassification (ru)": 67.1, + "MassiveScenarioClassification (te)": 61.28, + "MassiveScenarioClassification (he)": 62.46, + "MassiveScenarioClassification (it)": 66.58, + "MassiveScenarioClassification (ro)": 64.57, + "MassiveScenarioClassification (tl)": 61.11, + "NoRecClassification": 49.0, + "NordicLangClassification": 65.97, + "PAC": 67.11, + "PolEmo2.0-IN": 69.0, + "PolEmo2.0-OUT": 43.36, + "RuReviewsClassification": 61.96, + "RuSciBenchGRNTIClassification": 58.37, + "RuSciBenchOECDClassification": 45.1 } ] }, @@ -1310,13 +5587,58 @@ "v_measure": [ { "Model": "Cohere-embed-multilingual-light-v3.0", - "AlloProfClusteringP2P": 61.96, - "AlloProfClusteringS2S": 31.36, - "HALClusteringS2S": 17.31, + "AlloProfClusteringP2P": 63.22, + "AlloProfClusteringS2S": 40.34, + "BlurbsClusteringP2P": 38.9, + "BlurbsClusteringS2S": 16.12, + "GeoreviewClusteringP2P": 61.06, + "HALClusteringS2S": 24.67, "MLSUMClusteringP2P": 42.8, + "MLSUMClusteringP2P (de)": 42.98, + "MLSUMClusteringP2P (fr)": 43.54, + "MLSUMClusteringP2P (ru)": 44.81, + "MLSUMClusteringP2P (es)": 46.01, "MLSUMClusteringS2S": 32.72, - "MasakhaNEWSClusteringP2P (fra)": 56.81, - "MasakhaNEWSClusteringS2S (fra)": 29.41 + "MLSUMClusteringS2S (de)": 42.84, + "MLSUMClusteringS2S (fr)": 43.6, + "MLSUMClusteringS2S (ru)": 43.16, + "MLSUMClusteringS2S (es)": 44.91, + "MasakhaNEWSClusteringP2P (fra)": 62.82, + "MasakhaNEWSClusteringP2P (amh)": 68.51, + "MasakhaNEWSClusteringP2P (eng)": 64.65, + "MasakhaNEWSClusteringP2P (hau)": 62.53, + "MasakhaNEWSClusteringP2P (ibo)": 54.8, + "MasakhaNEWSClusteringP2P (lin)": 62.84, + "MasakhaNEWSClusteringP2P (lug)": 71.91, + "MasakhaNEWSClusteringP2P (orm)": 35.6, + "MasakhaNEWSClusteringP2P (pcm)": 83.55, + "MasakhaNEWSClusteringP2P (run)": 57.36, + "MasakhaNEWSClusteringP2P (sna)": 60.32, + "MasakhaNEWSClusteringP2P (som)": 43.44, + "MasakhaNEWSClusteringP2P (swa)": 31.63, + "MasakhaNEWSClusteringP2P (tir)": 67.09, + "MasakhaNEWSClusteringP2P (xho)": 41.42, + "MasakhaNEWSClusteringP2P (yor)": 44.26, + "MasakhaNEWSClusteringS2S (fra)": 48.97, + "MasakhaNEWSClusteringS2S (amh)": 54.37, + "MasakhaNEWSClusteringS2S (eng)": 56.49, + "MasakhaNEWSClusteringS2S (hau)": 20.89, + "MasakhaNEWSClusteringS2S (ibo)": 32.92, + "MasakhaNEWSClusteringS2S (lin)": 62.98, + "MasakhaNEWSClusteringS2S (lug)": 43.72, + "MasakhaNEWSClusteringS2S (orm)": 21.5, + "MasakhaNEWSClusteringS2S (pcm)": 71.32, + "MasakhaNEWSClusteringS2S (run)": 47.4, + "MasakhaNEWSClusteringS2S (sna)": 50.36, + "MasakhaNEWSClusteringS2S (som)": 27.7, + "MasakhaNEWSClusteringS2S (swa)": 18.25, + "MasakhaNEWSClusteringS2S (tir)": 57.3, + "MasakhaNEWSClusteringS2S (xho)": 35.72, + "MasakhaNEWSClusteringS2S (yor)": 29.02, + "RuSciBenchGRNTIClusteringP2P": 53.84, + "RuSciBenchOECDClusteringP2P": 46.46, + "TenKGnadClusteringP2P": 47.32, + "TenKGnadClusteringS2S": 29.27 } ] }, @@ -1324,13 +5646,50 @@ "max_ap": [ { "Model": "Cohere-embed-multilingual-light-v3.0", + "CDSC-E": 70.98, + "FalseFriendsGermanEnglish": 49.82, + "OpusparcusPC (de)": 95.03, + "OpusparcusPC (en)": 98.3, + "OpusparcusPC (fi)": 88.58, "OpusparcusPC (fr)": 90.92, - "PawsXPairClassification (fr)": 57.32 + "OpusparcusPC (ru)": 85.41, + "OpusparcusPC (sv)": 91.14, + "PSC": 99.33, + "PawsXPairClassification (de)": 54.78, + "PawsXPairClassification (en)": 57.12, + "PawsXPairClassification (es)": 55.06, + "PawsXPairClassification (fr)": 57.33, + "PawsXPairClassification (ja)": 49.82, + "PawsXPairClassification (ko)": 50.79, + "PawsXPairClassification (zh)": 55.2, + "SICK-E-PL": 66.56, + "TERRa": 56.66 + }, + { + "Model": "Cohere-embed-multilingual-light-v3.0", + "CDSC-E": 71.05, + "FalseFriendsGermanEnglish": 49.82, + "OpusparcusPC (fr)": 90.92, + "OpusparcusPC (de)": 95.03, + "OpusparcusPC (en)": 98.3, + "OpusparcusPC (fi)": 88.58, + "OpusparcusPC (ru)": 85.41, + "OpusparcusPC (sv)": 91.14, + "PSC": 99.33, + "PawsXPairClassification (fr)": 57.35, + "PawsXPairClassification (de)": 54.88, + "PawsXPairClassification (en)": 57.24, + "PawsXPairClassification (es)": 55.09, + "PawsXPairClassification (ja)": 49.85, + "PawsXPairClassification (ko)": 50.79, + "PawsXPairClassification (zh)": 55.22, + "SICK-E-PL": 66.57, + "TERRa": 56.67 }, { "Model": "Cohere-embed-multilingual-light-v3.0", "OpusparcusPC (fr)": 90.92, - "PawsXPairClassification (fr)": 57.35 + "PawsXPairClassification (fr)": 57.32 } ] }, @@ -1338,8 +5697,31 @@ "map": [ { "Model": "Cohere-embed-multilingual-light-v3.0", - "AlloprofReranking": 51.6, - "SyntecReranking": 88.03 + "AlloprofReranking": 72.49, + "RuBQReranking": 71.12, + "SyntecReranking": 85.96, + "T2Reranking": 67.46 + }, + { + "Model": "Cohere-embed-multilingual-light-v3.0", + "MIRACLReranking (ar)": 74.34, + "MIRACLReranking (bn)": 71.23, + "MIRACLReranking (de)": 48.25, + "MIRACLReranking (en)": 60.8, + "MIRACLReranking (es)": 62.1, + "MIRACLReranking (fa)": 54.52, + "MIRACLReranking (fi)": 76.11, + "MIRACLReranking (fr)": 51.29, + "MIRACLReranking (hi)": 60.07, + "MIRACLReranking (id)": 57.15, + "MIRACLReranking (ja)": 63.91, + "MIRACLReranking (ko)": 54.58, + "MIRACLReranking (ru)": 61.22, + "MIRACLReranking (sw)": 63.28, + "MIRACLReranking (te)": 78.62, + "MIRACLReranking (th)": 73.62, + "MIRACLReranking (yo)": 67.25, + "MIRACLReranking (zh)": 46.58 } ] }, @@ -1347,16 +5729,190 @@ "ndcg_at_10": [ { "Model": "Cohere-embed-multilingual-light-v3.0", - "AlloprofRetrieval": 35.39, - "BSARDRetrieval": 0.0, - "MintakaRetrieval (fr)": 23.0, - "SyntecRetrieval": 76.88, - "XPQARetrieval (fr)": 45.23 + "AILACasedocs": 26.92, + "AILAStatutes": 29.12, + "ARCChallenge": 7.73, + "AlloprofRetrieval": 46.43, + "AlphaNLI": 21.33, + "AppsRetrieval": 8.27, + "BSARDRetrieval": 17.34, + "CmedqaRetrieval": 26.17, + "CodeFeedbackMT": 39.34, + "CodeFeedbackST": 70.71, + "CodeSearchNetCCRetrieval (python)": 61.34, + "CodeSearchNetCCRetrieval (javascript)": 60.8, + "CodeSearchNetCCRetrieval (go)": 45.26, + "CodeSearchNetCCRetrieval (ruby)": 58.99, + "CodeSearchNetCCRetrieval (java)": 55.42, + "CodeSearchNetCCRetrieval (php)": 46.96, + "CodeSearchNetRetrieval (python)": 86.75, + "CodeSearchNetRetrieval (javascript)": 73.5, + "CodeSearchNetRetrieval (go)": 92.13, + "CodeSearchNetRetrieval (ruby)": 77.16, + "CodeSearchNetRetrieval (java)": 71.04, + "CodeSearchNetRetrieval (php)": 81.35, + "CodeTransOceanContest": 59.35, + "CodeTransOceanDL": 29.06, + "CosQA": 27.99, + "CovidRetrieval": 72.97, + "GerDaLIR": 7.02, + "GerDaLIRSmall": 15.97, + "GermanQuAD-Retrieval": 92.25, + "LEMBNarrativeQARetrieval": 21.36, + "LEMBQMSumRetrieval": 22.08, + "LEMBSummScreenFDRetrieval": 66.32, + "LEMBWikimQARetrieval": 57.84, + "LeCaRDv2": 59.75, + "LegalBenchConsumerContractsQA": 71.43, + "LegalBenchCorporateLobbying": 92.64, + "LegalQuAD": 44.21, + "LegalSummarization": 61.37, + "MintakaRetrieval (fr)": 26.68, + "MintakaRetrieval (ar)": 19.86, + "MintakaRetrieval (de)": 25.36, + "MintakaRetrieval (es)": 26.03, + "MintakaRetrieval (hi)": 20.16, + "MintakaRetrieval (it)": 26.62, + "MintakaRetrieval (ja)": 21.28, + "MintakaRetrieval (pt)": 26.98, + "PIQA": 24.67, + "Quail": 3.05, + "RARbCode": 41.03, + "RiaNewsRetrieval": 74.28, + "RuBQRetrieval": 67.24, + "SciFact-PL": 62.27, + "SpartQA": 5.4, + "StackOverflowQA": 81.47, + "SyntecRetrieval": 82.68, + "SyntheticText2SQL": 53.26, + "TRECCOVID-PL": 74.68, + "TempReasonL1": 0.94, + "TempReasonL2Fact": 27.02, + "TempReasonL2Pure": 0.75, + "TempReasonL3Fact": 22.9, + "TempReasonL3Pure": 6.32, + "WinoGrande": 61.87, + "XMarket (de)": 20.96, + "XMarket (en)": 26.68, + "XMarket (es)": 19.19, + "XPQARetrieval (fr)": 45.23, + "XPQARetrieval (ara-ara)": 42.71, + "XPQARetrieval (eng-ara)": 24.05, + "XPQARetrieval (ara-eng)": 34.64, + "XPQARetrieval (deu-deu)": 71.54, + "XPQARetrieval (eng-deu)": 35.15, + "XPQARetrieval (deu-eng)": 59.76, + "XPQARetrieval (spa-spa)": 57.85, + "XPQARetrieval (eng-spa)": 32.72, + "XPQARetrieval (spa-eng)": 49.1, + "XPQARetrieval (fra-fra)": 64.68, + "XPQARetrieval (eng-fra)": 36.9, + "XPQARetrieval (fra-eng)": 52.26, + "XPQARetrieval (hin-hin)": 73.61, + "XPQARetrieval (eng-hin)": 31.79, + "XPQARetrieval (hin-eng)": 65.19, + "XPQARetrieval (ita-ita)": 70.98, + "XPQARetrieval (eng-ita)": 30.47, + "XPQARetrieval (ita-eng)": 53.46, + "XPQARetrieval (jpn-jpn)": 70.31, + "XPQARetrieval (eng-jpn)": 33.82, + "XPQARetrieval (jpn-eng)": 57.97, + "XPQARetrieval (kor-kor)": 33.69, + "XPQARetrieval (eng-kor)": 28.25, + "XPQARetrieval (kor-eng)": 25.06, + "XPQARetrieval (pol-pol)": 45.37, + "XPQARetrieval (eng-pol)": 23.99, + "XPQARetrieval (pol-eng)": 36.88, + "XPQARetrieval (por-por)": 43.62, + "XPQARetrieval (eng-por)": 25.14, + "XPQARetrieval (por-eng)": 35.36, + "XPQARetrieval (tam-tam)": 38.79, + "XPQARetrieval (eng-tam)": 17.54, + "XPQARetrieval (tam-eng)": 30.35, + "XPQARetrieval (cmn-cmn)": 63.71, + "XPQARetrieval (eng-cmn)": 22.11, + "XPQARetrieval (cmn-eng)": 47.18 } ] }, "STS": { "cosine_spearman": [ + { + "Model": "Cohere-embed-multilingual-light-v3.0", + "CDSC-R": 90.9, + "GermanSTSBenchmark": 76.78, + "RUParaPhraserSTS": 69.33, + "RuSTSBenchmarkSTS": 77.15, + "SICK-R-PL": 68.14, + "SICKFr": 75.51, + "STS22 (it)": 78.41, + "STS22 (fr)": 82.8, + "STS22 (pl-en)": 79.82, + "STS22 (de-en)": 53.01, + "STS22 (ar)": 58.03, + "STS22 (de-fr)": 65.06, + "STS22 (en)": 67.5, + "STS22 (de)": 60.0, + "STS22 (zh)": 67.02, + "STS22 (es)": 67.44, + "STS22 (fr-pl)": 84.52, + "STS22 (es-it)": 73.91, + "STS22 (de-pl)": 46.31, + "STS22 (ru)": 63.39, + "STS22 (pl)": 39.33, + "STS22 (tr)": 66.08, + "STS22 (zh-en)": 65.99, + "STS22 (es-en)": 77.32, + "STSB": 76.03, + "STSBenchmarkMultilingualSTS (zh)": 76.59, + "STSBenchmarkMultilingualSTS (ru)": 77.14, + "STSBenchmarkMultilingualSTS (fr)": 76.48, + "STSBenchmarkMultilingualSTS (es)": 78.79, + "STSBenchmarkMultilingualSTS (pl)": 71.3, + "STSBenchmarkMultilingualSTS (pt)": 73.29, + "STSBenchmarkMultilingualSTS (nl)": 74.54, + "STSBenchmarkMultilingualSTS (en)": 83.42, + "STSBenchmarkMultilingualSTS (it)": 75.75, + "STSBenchmarkMultilingualSTS (de)": 77.52 + }, + { + "Model": "Cohere-embed-multilingual-light-v3.0", + "CDSC-R": 90.9, + "GermanSTSBenchmark": 76.78, + "RUParaPhraserSTS": 69.33, + "RuSTSBenchmarkSTS": 77.15, + "SICK-R-PL": 68.14, + "SICKFr": 75.51, + "STS22 (it)": 78.41, + "STS22 (fr)": 82.8, + "STS22 (pl-en)": 79.82, + "STS22 (de-en)": 53.01, + "STS22 (ar)": 58.03, + "STS22 (de-fr)": 65.06, + "STS22 (en)": 67.5, + "STS22 (de)": 60.0, + "STS22 (zh)": 67.02, + "STS22 (es)": 67.44, + "STS22 (fr-pl)": 84.52, + "STS22 (es-it)": 73.91, + "STS22 (de-pl)": 46.31, + "STS22 (ru)": 63.39, + "STS22 (pl)": 39.33, + "STS22 (tr)": 66.08, + "STS22 (zh-en)": 65.99, + "STS22 (es-en)": 77.32, + "STSB": 76.03, + "STSBenchmarkMultilingualSTS (zh)": 76.59, + "STSBenchmarkMultilingualSTS (ru)": 77.14, + "STSBenchmarkMultilingualSTS (fr)": 76.48, + "STSBenchmarkMultilingualSTS (es)": 78.79, + "STSBenchmarkMultilingualSTS (pl)": 71.3, + "STSBenchmarkMultilingualSTS (pt)": 73.29, + "STSBenchmarkMultilingualSTS (nl)": 74.54, + "STSBenchmarkMultilingualSTS (en)": 83.42, + "STSBenchmarkMultilingualSTS (it)": 75.75, + "STSBenchmarkMultilingualSTS (de)": 77.52 + }, { "Model": "Cohere-embed-multilingual-light-v3.0", "SICKFr": 75.5, @@ -1370,30 +5926,319 @@ { "Model": "Cohere-embed-multilingual-light-v3.0", "SummEvalFr": 31.4 + }, + { + "Model": "Cohere-embed-multilingual-light-v3.0", + "SummEvalFr": 31.41 + }, + { + "Model": "Cohere-embed-multilingual-light-v3.0", + "SummEvalFr": 31.41 } ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "Cohere-embed-multilingual-light-v3.0", + "CEDRClassification": 38.5, + "SensitiveTopicsClassification": 27.29 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "Cohere-embed-multilingual-light-v3.0", + "Core17InstructionRetrieval": 1.58, + "News21InstructionRetrieval": -0.83, + "Robust04InstructionRetrieval": -8.11 + } + ] } }, "Cohere__Cohere-embed-multilingual-v3.0": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "Cohere-embed-multilingual-v3.0", + "BornholmBitextMining": 35.6, + "Tatoeba (tur-eng)": 95.33, + "Tatoeba (wuu-eng)": 81.85, + "Tatoeba (mkd-eng)": 86.69, + "Tatoeba (xho-eng)": 71.03, + "Tatoeba (ina-eng)": 88.51, + "Tatoeba (hrv-eng)": 95.08, + "Tatoeba (cha-eng)": 23.45, + "Tatoeba (dtp-eng)": 5.68, + "Tatoeba (ceb-eng)": 50.4, + "Tatoeba (gle-eng)": 68.24, + "Tatoeba (dan-eng)": 94.99, + "Tatoeba (swh-eng)": 68.22, + "Tatoeba (nov-eng)": 66.11, + "Tatoeba (fao-eng)": 71.4, + "Tatoeba (slk-eng)": 92.69, + "Tatoeba (por-eng)": 92.88, + "Tatoeba (vie-eng)": 96.15, + "Tatoeba (lit-eng)": 86.7, + "Tatoeba (pam-eng)": 7.22, + "Tatoeba (uzb-eng)": 66.08, + "Tatoeba (bel-eng)": 91.35, + "Tatoeba (pes-eng)": 90.23, + "Tatoeba (sqi-eng)": 93.85, + "Tatoeba (cmn-eng)": 94.85, + "Tatoeba (bul-eng)": 92.37, + "Tatoeba (war-eng)": 51.66, + "Tatoeba (bre-eng)": 10.28, + "Tatoeba (pms-eng)": 49.51, + "Tatoeba (tat-eng)": 67.82, + "Tatoeba (csb-eng)": 34.22, + "Tatoeba (epo-eng)": 94.75, + "Tatoeba (orv-eng)": 33.34, + "Tatoeba (mar-eng)": 87.77, + "Tatoeba (cym-eng)": 75.97, + "Tatoeba (ast-eng)": 72.4, + "Tatoeba (khm-eng)": 49.34, + "Tatoeba (jpn-eng)": 92.6, + "Tatoeba (nno-eng)": 89.31, + "Tatoeba (ben-eng)": 82.93, + "Tatoeba (ukr-eng)": 92.97, + "Tatoeba (heb-eng)": 87.09, + "Tatoeba (nds-eng)": 62.46, + "Tatoeba (gla-eng)": 50.55, + "Tatoeba (mal-eng)": 96.7, + "Tatoeba (jav-eng)": 68.65, + "Tatoeba (kaz-eng)": 78.64, + "Tatoeba (kab-eng)": 25.52, + "Tatoeba (ang-eng)": 40.19, + "Tatoeba (arq-eng)": 31.67, + "Tatoeba (est-eng)": 83.62, + "Tatoeba (tel-eng)": 88.06, + "Tatoeba (tzl-eng)": 42.22, + "Tatoeba (ara-eng)": 87.68, + "Tatoeba (aze-eng)": 86.04, + "Tatoeba (ber-eng)": 26.19, + "Tatoeba (uig-eng)": 66.63, + "Tatoeba (ido-eng)": 79.36, + "Tatoeba (yid-eng)": 70.75, + "Tatoeba (mhr-eng)": 6.95, + "Tatoeba (srp-eng)": 92.97, + "Tatoeba (nob-eng)": 96.72, + "Tatoeba (ell-eng)": 93.2, + "Tatoeba (dsb-eng)": 42.47, + "Tatoeba (slv-eng)": 88.65, + "Tatoeba (amh-eng)": 76.53, + "Tatoeba (lfn-eng)": 58.58, + "Tatoeba (lat-eng)": 51.06, + "Tatoeba (tha-eng)": 95.74, + "Tatoeba (hye-eng)": 88.23, + "Tatoeba (arz-eng)": 72.54, + "Tatoeba (cor-eng)": 5.99, + "Tatoeba (urd-eng)": 87.2, + "Tatoeba (glg-eng)": 75.7, + "Tatoeba (cat-eng)": 88.53, + "Tatoeba (ita-eng)": 92.48, + "Tatoeba (spa-eng)": 96.52, + "Tatoeba (awa-eng)": 70.39, + "Tatoeba (isl-eng)": 90.92, + "Tatoeba (ron-eng)": 94.45, + "Tatoeba (oci-eng)": 37.4, + "Tatoeba (max-eng)": 54.89, + "Tatoeba (mon-eng)": 84.82, + "Tatoeba (swg-eng)": 56.43, + "Tatoeba (kur-eng)": 56.24, + "Tatoeba (hin-eng)": 93.47, + "Tatoeba (nld-eng)": 96.38, + "Tatoeba (ces-eng)": 94.36, + "Tatoeba (cbk-eng)": 63.39, + "Tatoeba (tam-eng)": 85.65, + "Tatoeba (fra-eng)": 93.82, + "Tatoeba (tgl-eng)": 87.63, + "Tatoeba (tuk-eng)": 25.92, + "Tatoeba (hun-eng)": 92.96, + "Tatoeba (deu-eng)": 99.2, + "Tatoeba (yue-eng)": 85.87, + "Tatoeba (eus-eng)": 70.8, + "Tatoeba (ind-eng)": 89.99, + "Tatoeba (zsm-eng)": 94.77, + "Tatoeba (kor-eng)": 88.98, + "Tatoeba (kat-eng)": 81.09, + "Tatoeba (gsw-eng)": 47.07, + "Tatoeba (ile-eng)": 76.22, + "Tatoeba (rus-eng)": 92.32, + "Tatoeba (lvs-eng)": 88.62, + "Tatoeba (afr-eng)": 88.23, + "Tatoeba (pol-eng)": 96.92, + "Tatoeba (kzj-eng)": 6.13, + "Tatoeba (hsb-eng)": 53.8, + "Tatoeba (bos-eng)": 92.28, + "Tatoeba (swe-eng)": 93.88, + "Tatoeba (fry-eng)": 57.25, + "Tatoeba (fin-eng)": 94.15 + } + ] }, "Classification": { "accuracy": [ { "Model": "Cohere-embed-multilingual-v3.0", - "AmazonReviewsClassification (fr)": 41.89, - "MTOPDomainClassification (fr)": 86.23, - "MTOPIntentClassification (fr)": 61.07, - "MasakhaNEWSClassification (fra)": 83.06, - "MassiveIntentClassification (fr)": 62.94, - "MassiveScenarioClassification (fr)": 67.29 + "AllegroReviews": 47.3, + "AmazonCounterfactualClassification (en-ext)": 77.41, + "AmazonCounterfactualClassification (en)": 77.67, + "AmazonCounterfactualClassification (de)": 68.58, + "AmazonCounterfactualClassification (ja)": 77.99, + "AmazonReviewsClassification (fr)": 45.61, + "AmazonReviewsClassification (en)": 51.97, + "AmazonReviewsClassification (de)": 48.05, + "AmazonReviewsClassification (es)": 45.77, + "AmazonReviewsClassification (ja)": 42.46, + "AmazonReviewsClassification (zh)": 40.71, + "AngryTweetsClassification": 58.92, + "CBD": 71.67, + "DanishPoliticalCommentsClassification": 42.69, + "GeoreviewClassification": 50.88, + "HeadlineClassification": 79.5, + "InappropriatenessClassification": 62.46, + "KinopoiskClassification": 61.84, + "LccSentimentClassification": 60.47, + "MTOPDomainClassification (fr)": 90.32, + "MTOPDomainClassification (en)": 94.44, + "MTOPDomainClassification (de)": 92.86, + "MTOPDomainClassification (es)": 92.76, + "MTOPDomainClassification (hi)": 90.81, + "MTOPDomainClassification (th)": 88.56, + "MTOPIntentClassification (fr)": 63.64, + "MTOPIntentClassification (en)": 69.95, + "MTOPIntentClassification (de)": 69.81, + "MTOPIntentClassification (es)": 70.37, + "MTOPIntentClassification (hi)": 65.94, + "MTOPIntentClassification (th)": 68.74, + "MasakhaNEWSClassification (fra)": 79.5, + "MasakhaNEWSClassification (amh)": 87.39, + "MasakhaNEWSClassification (eng)": 82.01, + "MasakhaNEWSClassification (hau)": 81.81, + "MasakhaNEWSClassification (ibo)": 74.62, + "MasakhaNEWSClassification (lin)": 80.17, + "MasakhaNEWSClassification (lug)": 77.58, + "MasakhaNEWSClassification (orm)": 81.14, + "MasakhaNEWSClassification (pcm)": 93.34, + "MasakhaNEWSClassification (run)": 82.86, + "MasakhaNEWSClassification (sna)": 90.0, + "MasakhaNEWSClassification (som)": 65.99, + "MasakhaNEWSClassification (swa)": 76.39, + "MasakhaNEWSClassification (tir)": 74.63, + "MasakhaNEWSClassification (xho)": 85.12, + "MasakhaNEWSClassification (yor)": 82.21, + "MassiveIntentClassification (fr)": 67.37, + "MassiveIntentClassification (zh-TW)": 63.14, + "MassiveIntentClassification (sw)": 55.88, + "MassiveIntentClassification (ur)": 61.8, + "MassiveIntentClassification (ja)": 69.2, + "MassiveIntentClassification (sq)": 63.61, + "MassiveIntentClassification (ar)": 57.88, + "MassiveIntentClassification (tl)": 62.23, + "MassiveIntentClassification (ru)": 69.08, + "MassiveIntentClassification (tr)": 67.23, + "MassiveIntentClassification (kn)": 59.99, + "MassiveIntentClassification (de)": 66.25, + "MassiveIntentClassification (it)": 67.58, + "MassiveIntentClassification (zh-CN)": 68.14, + "MassiveIntentClassification (id)": 67.09, + "MassiveIntentClassification (ms)": 64.21, + "MassiveIntentClassification (hy)": 59.61, + "MassiveIntentClassification (el)": 66.86, + "MassiveIntentClassification (sv)": 69.6, + "MassiveIntentClassification (fi)": 66.69, + "MassiveIntentClassification (ml)": 63.87, + "MassiveIntentClassification (da)": 66.49, + "MassiveIntentClassification (lv)": 63.81, + "MassiveIntentClassification (he)": 64.47, + "MassiveIntentClassification (nl)": 68.13, + "MassiveIntentClassification (th)": 64.94, + "MassiveIntentClassification (ko)": 65.89, + "MassiveIntentClassification (jv)": 55.1, + "MassiveIntentClassification (mn)": 58.64, + "MassiveIntentClassification (pt)": 68.7, + "MassiveIntentClassification (te)": 61.75, + "MassiveIntentClassification (cy)": 53.01, + "MassiveIntentClassification (bn)": 62.12, + "MassiveIntentClassification (am)": 52.95, + "MassiveIntentClassification (es)": 67.59, + "MassiveIntentClassification (ka)": 49.77, + "MassiveIntentClassification (km)": 43.05, + "MassiveIntentClassification (hi)": 66.02, + "MassiveIntentClassification (ro)": 64.5, + "MassiveIntentClassification (ta)": 60.78, + "MassiveIntentClassification (en)": 72.11, + "MassiveIntentClassification (fa)": 68.69, + "MassiveIntentClassification (hu)": 66.59, + "MassiveIntentClassification (my)": 58.0, + "MassiveIntentClassification (az)": 61.82, + "MassiveIntentClassification (is)": 60.44, + "MassiveIntentClassification (af)": 59.89, + "MassiveIntentClassification (pl)": 68.48, + "MassiveIntentClassification (vi)": 65.76, + "MassiveIntentClassification (nb)": 67.14, + "MassiveIntentClassification (sl)": 64.79, + "MassiveScenarioClassification (fr)": 73.11, + "MassiveScenarioClassification (jv)": 62.3, + "MassiveScenarioClassification (sl)": 71.41, + "MassiveScenarioClassification (hi)": 71.49, + "MassiveScenarioClassification (hy)": 64.16, + "MassiveScenarioClassification (da)": 74.32, + "MassiveScenarioClassification (he)": 69.79, + "MassiveScenarioClassification (fi)": 71.3, + "MassiveScenarioClassification (my)": 61.86, + "MassiveScenarioClassification (pt)": 71.8, + "MassiveScenarioClassification (ta)": 66.38, + "MassiveScenarioClassification (lv)": 69.35, + "MassiveScenarioClassification (tl)": 67.43, + "MassiveScenarioClassification (id)": 73.29, + "MassiveScenarioClassification (fa)": 73.25, + "MassiveScenarioClassification (it)": 72.5, + "MassiveScenarioClassification (el)": 72.69, + "MassiveScenarioClassification (zh-TW)": 69.52, + "MassiveScenarioClassification (bn)": 68.62, + "MassiveScenarioClassification (ja)": 75.32, + "MassiveScenarioClassification (de)": 74.29, + "MassiveScenarioClassification (mn)": 63.42, + "MassiveScenarioClassification (nl)": 74.14, + "MassiveScenarioClassification (tr)": 71.79, + "MassiveScenarioClassification (is)": 67.99, + "MassiveScenarioClassification (sq)": 70.47, + "MassiveScenarioClassification (nb)": 73.82, + "MassiveScenarioClassification (ru)": 74.26, + "MassiveScenarioClassification (cy)": 60.29, + "MassiveScenarioClassification (sw)": 63.81, + "MassiveScenarioClassification (th)": 71.08, + "MassiveScenarioClassification (af)": 67.77, + "MassiveScenarioClassification (ms)": 69.28, + "MassiveScenarioClassification (ur)": 68.42, + "MassiveScenarioClassification (az)": 65.33, + "MassiveScenarioClassification (vi)": 72.03, + "MassiveScenarioClassification (ro)": 70.12, + "MassiveScenarioClassification (es)": 72.32, + "MassiveScenarioClassification (hu)": 72.82, + "MassiveScenarioClassification (ka)": 57.6, + "MassiveScenarioClassification (pl)": 72.81, + "MassiveScenarioClassification (sv)": 75.69, + "MassiveScenarioClassification (am)": 59.54, + "MassiveScenarioClassification (te)": 67.72, + "MassiveScenarioClassification (ml)": 69.96, + "MassiveScenarioClassification (ar)": 64.82, + "MassiveScenarioClassification (zh-CN)": 74.61, + "MassiveScenarioClassification (km)": 49.84, + "MassiveScenarioClassification (kn)": 66.78, + "MassiveScenarioClassification (ko)": 73.42, + "MassiveScenarioClassification (en)": 76.37, + "NoRecClassification": 60.27, + "NordicLangClassification": 78.74, + "PAC": 68.29, + "PolEmo2.0-IN": 82.31, + "PolEmo2.0-OUT": 63.0, + "RuReviewsClassification": 66.83, + "RuSciBenchGRNTIClassification": 62.17, + "RuSciBenchOECDClassification": 48.22 } ] }, @@ -1401,13 +6246,58 @@ "v_measure": [ { "Model": "Cohere-embed-multilingual-v3.0", - "AlloProfClusteringP2P": 63.53, - "AlloProfClusteringS2S": 36.18, - "HALClusteringS2S": 19.9, + "AlloProfClusteringP2P": 62.87, + "AlloProfClusteringS2S": 46.5, + "BlurbsClusteringP2P": 42.32, + "BlurbsClusteringS2S": 19.26, + "GeoreviewClusteringP2P": 64.4, + "HALClusteringS2S": 27.61, "MLSUMClusteringP2P": 45.08, + "MLSUMClusteringP2P (de)": 44.04, + "MLSUMClusteringP2P (fr)": 45.43, + "MLSUMClusteringP2P (ru)": 47.6, + "MLSUMClusteringP2P (es)": 48.14, "MLSUMClusteringS2S": 34.75, - "MasakhaNEWSClusteringP2P (fra)": 53.18, - "MasakhaNEWSClusteringS2S (fra)": 32.31 + "MLSUMClusteringS2S (de)": 43.06, + "MLSUMClusteringS2S (fr)": 45.69, + "MLSUMClusteringS2S (ru)": 45.76, + "MLSUMClusteringS2S (es)": 47.72, + "MasakhaNEWSClusteringP2P (fra)": 64.89, + "MasakhaNEWSClusteringP2P (amh)": 67.44, + "MasakhaNEWSClusteringP2P (eng)": 71.02, + "MasakhaNEWSClusteringP2P (hau)": 79.53, + "MasakhaNEWSClusteringP2P (ibo)": 68.03, + "MasakhaNEWSClusteringP2P (lin)": 60.21, + "MasakhaNEWSClusteringP2P (lug)": 67.34, + "MasakhaNEWSClusteringP2P (orm)": 63.94, + "MasakhaNEWSClusteringP2P (pcm)": 82.02, + "MasakhaNEWSClusteringP2P (run)": 65.24, + "MasakhaNEWSClusteringP2P (sna)": 61.69, + "MasakhaNEWSClusteringP2P (som)": 39.54, + "MasakhaNEWSClusteringP2P (swa)": 36.93, + "MasakhaNEWSClusteringP2P (tir)": 61.32, + "MasakhaNEWSClusteringP2P (xho)": 45.56, + "MasakhaNEWSClusteringP2P (yor)": 48.43, + "MasakhaNEWSClusteringS2S (fra)": 55.29, + "MasakhaNEWSClusteringS2S (amh)": 50.53, + "MasakhaNEWSClusteringS2S (eng)": 57.31, + "MasakhaNEWSClusteringS2S (hau)": 41.86, + "MasakhaNEWSClusteringS2S (ibo)": 34.99, + "MasakhaNEWSClusteringS2S (lin)": 43.77, + "MasakhaNEWSClusteringS2S (lug)": 43.26, + "MasakhaNEWSClusteringS2S (orm)": 29.75, + "MasakhaNEWSClusteringS2S (pcm)": 61.13, + "MasakhaNEWSClusteringS2S (run)": 53.39, + "MasakhaNEWSClusteringS2S (sna)": 69.47, + "MasakhaNEWSClusteringS2S (som)": 35.33, + "MasakhaNEWSClusteringS2S (swa)": 12.76, + "MasakhaNEWSClusteringS2S (tir)": 54.47, + "MasakhaNEWSClusteringS2S (xho)": 31.21, + "MasakhaNEWSClusteringS2S (yor)": 36.85, + "RuSciBenchGRNTIClusteringP2P": 56.95, + "RuSciBenchOECDClusteringP2P": 48.45, + "TenKGnadClusteringP2P": 48.44, + "TenKGnadClusteringS2S": 37.86 } ] }, @@ -1415,8 +6305,45 @@ "max_ap": [ { "Model": "Cohere-embed-multilingual-v3.0", + "CDSC-E": 73.47, + "FalseFriendsGermanEnglish": 54.25, + "OpusparcusPC (de)": 97.44, + "OpusparcusPC (en)": 98.67, + "OpusparcusPC (fi)": 94.57, + "OpusparcusPC (fr)": 94.07, + "OpusparcusPC (ru)": 90.55, + "OpusparcusPC (sv)": 95.4, + "PSC": 99.51, + "PawsXPairClassification (de)": 59.47, + "PawsXPairClassification (en)": 64.94, + "PawsXPairClassification (es)": 59.28, + "PawsXPairClassification (fr)": 61.24, + "PawsXPairClassification (ja)": 51.82, + "PawsXPairClassification (ko)": 53.09, + "PawsXPairClassification (zh)": 58.59, + "SICK-E-PL": 79.27, + "TERRa": 58.5 + }, + { + "Model": "Cohere-embed-multilingual-v3.0", + "CDSC-E": 73.47, + "FalseFriendsGermanEnglish": 54.29, "OpusparcusPC (fr)": 94.08, - "PawsXPairClassification (fr)": 61.26 + "OpusparcusPC (de)": 97.47, + "OpusparcusPC (en)": 98.7, + "OpusparcusPC (fi)": 94.57, + "OpusparcusPC (ru)": 90.55, + "OpusparcusPC (sv)": 95.4, + "PSC": 99.51, + "PawsXPairClassification (fr)": 61.24, + "PawsXPairClassification (de)": 59.52, + "PawsXPairClassification (en)": 65.06, + "PawsXPairClassification (es)": 59.28, + "PawsXPairClassification (ja)": 52.08, + "PawsXPairClassification (ko)": 53.4, + "PawsXPairClassification (zh)": 58.59, + "SICK-E-PL": 79.27, + "TERRa": 58.61 }, { "Model": "Cohere-embed-multilingual-v3.0", @@ -1429,8 +6356,31 @@ "map": [ { "Model": "Cohere-embed-multilingual-v3.0", - "AlloprofReranking": 51.01, - "SyntecReranking": 85.72 + "AlloprofReranking": 75.41, + "RuBQReranking": 75.26, + "SyntecReranking": 91.2, + "T2Reranking": 67.8 + }, + { + "Model": "Cohere-embed-multilingual-v3.0", + "MIRACLReranking (ar)": 77.86, + "MIRACLReranking (bn)": 75.74, + "MIRACLReranking (de)": 55.61, + "MIRACLReranking (en)": 63.53, + "MIRACLReranking (es)": 64.33, + "MIRACLReranking (fa)": 60.26, + "MIRACLReranking (fi)": 79.69, + "MIRACLReranking (fr)": 57.2, + "MIRACLReranking (hi)": 66.53, + "MIRACLReranking (id)": 58.68, + "MIRACLReranking (ja)": 66.93, + "MIRACLReranking (ko)": 55.89, + "MIRACLReranking (ru)": 64.88, + "MIRACLReranking (sw)": 66.32, + "MIRACLReranking (te)": 80.68, + "MIRACLReranking (th)": 77.09, + "MIRACLReranking (yo)": 67.58, + "MIRACLReranking (zh)": 53.72 } ] }, @@ -1438,16 +6388,193 @@ "ndcg_at_10": [ { "Model": "Cohere-embed-multilingual-v3.0", - "AlloprofRetrieval": 38.36, - "BSARDRetrieval": 0.14, - "MintakaRetrieval (fr)": 25.44, - "SyntecRetrieval": 79.27, - "XPQARetrieval (fr)": 58.87 + "AILACasedocs": 28.31, + "AILAStatutes": 29.69, + "ARCChallenge": 11.6, + "AlloprofRetrieval": 51.51, + "AlphaNLI": 18.47, + "AppsRetrieval": 31.91, + "BSARDRetrieval": 22.91, + "CmedqaRetrieval": 30.2, + "CodeFeedbackMT": 42.9, + "CodeFeedbackST": 74.19, + "CodeSearchNetCCRetrieval (python)": 66.39, + "CodeSearchNetCCRetrieval (javascript)": 60.54, + "CodeSearchNetCCRetrieval (go)": 46.68, + "CodeSearchNetCCRetrieval (ruby)": 61.72, + "CodeSearchNetCCRetrieval (java)": 60.47, + "CodeSearchNetCCRetrieval (php)": 49.65, + "CodeSearchNetRetrieval (python)": 89.87, + "CodeSearchNetRetrieval (javascript)": 76.56, + "CodeSearchNetRetrieval (go)": 92.42, + "CodeSearchNetRetrieval (ruby)": 81.6, + "CodeSearchNetRetrieval (java)": 80.11, + "CodeSearchNetRetrieval (php)": 84.47, + "CodeTransOceanContest": 70.25, + "CodeTransOceanDL": 30.14, + "CosQA": 32.58, + "CovidRetrieval": 77.12, + "GerDaLIR": 8.62, + "GerDaLIRSmall": 19.6, + "GermanQuAD-Retrieval": 94.31, + "HellaSwag": 30.57, + "LEMBNarrativeQARetrieval": 21.94, + "LEMBQMSumRetrieval": 23.05, + "LEMBSummScreenFDRetrieval": 70.03, + "LEMBWikimQARetrieval": 56.59, + "LeCaRDv2": 58.31, + "LegalBenchConsumerContractsQA": 75.9, + "LegalBenchCorporateLobbying": 93.76, + "LegalQuAD": 46.85, + "LegalSummarization": 64.22, + "MintakaRetrieval (fr)": 34.56, + "MintakaRetrieval (ar)": 25.53, + "MintakaRetrieval (de)": 34.09, + "MintakaRetrieval (es)": 33.82, + "MintakaRetrieval (hi)": 27.88, + "MintakaRetrieval (it)": 35.76, + "MintakaRetrieval (ja)": 26.37, + "MintakaRetrieval (pt)": 36.2, + "PIQA": 30.62, + "Quail": 5.42, + "RARbCode": 60.08, + "RARbMath": 71.31, + "RiaNewsRetrieval": 82.5, + "RuBQRetrieval": 73.04, + "SIQA": 5.58, + "SciFact-PL": 65.68, + "SpartQA": 4.66, + "StackOverflowQA": 89.42, + "SyntecRetrieval": 88.59, + "SyntheticText2SQL": 59.79, + "TRECCOVID-PL": 80.26, + "TempReasonL1": 0.91, + "TempReasonL2Fact": 34.23, + "TempReasonL2Pure": 1.92, + "TempReasonL3Fact": 29.08, + "TempReasonL3Pure": 7.81, + "WinoGrande": 58.44, + "XMarket (de)": 20.17, + "XMarket (en)": 26.07, + "XMarket (es)": 19.47, + "XPQARetrieval (fr)": 58.87, + "XPQARetrieval (ara-ara)": 47.98, + "XPQARetrieval (eng-ara)": 33.72, + "XPQARetrieval (ara-eng)": 44.1, + "XPQARetrieval (deu-deu)": 79.11, + "XPQARetrieval (eng-deu)": 51.12, + "XPQARetrieval (deu-eng)": 74.31, + "XPQARetrieval (spa-spa)": 64.45, + "XPQARetrieval (eng-spa)": 42.89, + "XPQARetrieval (spa-eng)": 59.59, + "XPQARetrieval (fra-fra)": 69.72, + "XPQARetrieval (eng-fra)": 46.91, + "XPQARetrieval (fra-eng)": 65.47, + "XPQARetrieval (hin-hin)": 74.06, + "XPQARetrieval (eng-hin)": 37.98, + "XPQARetrieval (hin-eng)": 71.01, + "XPQARetrieval (ita-ita)": 77.46, + "XPQARetrieval (eng-ita)": 44.84, + "XPQARetrieval (ita-eng)": 69.11, + "XPQARetrieval (jpn-jpn)": 75.36, + "XPQARetrieval (eng-jpn)": 43.83, + "XPQARetrieval (jpn-eng)": 70.16, + "XPQARetrieval (kor-kor)": 38.21, + "XPQARetrieval (eng-kor)": 37.74, + "XPQARetrieval (kor-eng)": 36.38, + "XPQARetrieval (pol-pol)": 51.53, + "XPQARetrieval (eng-pol)": 34.7, + "XPQARetrieval (pol-eng)": 47.96, + "XPQARetrieval (por-por)": 48.46, + "XPQARetrieval (eng-por)": 28.12, + "XPQARetrieval (por-eng)": 47.01, + "XPQARetrieval (tam-tam)": 45.46, + "XPQARetrieval (eng-tam)": 22.35, + "XPQARetrieval (tam-eng)": 39.33, + "XPQARetrieval (cmn-cmn)": 65.78, + "XPQARetrieval (eng-cmn)": 34.24, + "XPQARetrieval (cmn-eng)": 57.79 } ] }, "STS": { "cosine_spearman": [ + { + "Model": "Cohere-embed-multilingual-v3.0", + "CDSC-R": 90.43, + "GermanSTSBenchmark": 81.1, + "RUParaPhraserSTS": 71.11, + "RuSTSBenchmarkSTS": 81.91, + "SICK-R-PL": 76.21, + "SICKFr": 79.24, + "STS22 (zh-en)": 74.01, + "STS22 (ar)": 60.53, + "STS22 (es-it)": 76.51, + "STS22 (fr-pl)": 50.71, + "STS22 (fr)": 82.76, + "STS22 (de)": 61.71, + "STS22 (es)": 68.78, + "STS22 (ru)": 65.39, + "STS22 (en)": 69.63, + "STS22 (pl-en)": 78.24, + "STS22 (zh)": 68.83, + "STS22 (de-en)": 61.8, + "STS22 (tr)": 66.67, + "STS22 (pl)": 41.69, + "STS22 (de-fr)": 65.96, + "STS22 (it)": 79.78, + "STS22 (de-pl)": 55.89, + "STS22 (es-en)": 79.94, + "STSB": 80.81, + "STSBenchmarkMultilingualSTS (en)": 86.7, + "STSBenchmarkMultilingualSTS (it)": 80.41, + "STSBenchmarkMultilingualSTS (de)": 82.3, + "STSBenchmarkMultilingualSTS (nl)": 80.29, + "STSBenchmarkMultilingualSTS (pt)": 76.18, + "STSBenchmarkMultilingualSTS (zh)": 80.14, + "STSBenchmarkMultilingualSTS (es)": 82.82, + "STSBenchmarkMultilingualSTS (fr)": 81.86, + "STSBenchmarkMultilingualSTS (ru)": 81.61, + "STSBenchmarkMultilingualSTS (pl)": 79.44 + }, + { + "Model": "Cohere-embed-multilingual-v3.0", + "CDSC-R": 90.43, + "GermanSTSBenchmark": 81.1, + "RUParaPhraserSTS": 71.11, + "RuSTSBenchmarkSTS": 81.91, + "SICK-R-PL": 76.21, + "SICKFr": 79.24, + "STS22 (zh-en)": 74.01, + "STS22 (ar)": 60.53, + "STS22 (es-it)": 76.51, + "STS22 (fr-pl)": 50.71, + "STS22 (fr)": 82.76, + "STS22 (de)": 61.71, + "STS22 (es)": 68.78, + "STS22 (ru)": 65.39, + "STS22 (en)": 69.63, + "STS22 (pl-en)": 78.24, + "STS22 (zh)": 68.83, + "STS22 (de-en)": 61.8, + "STS22 (tr)": 66.67, + "STS22 (pl)": 41.69, + "STS22 (de-fr)": 65.96, + "STS22 (it)": 79.78, + "STS22 (de-pl)": 55.89, + "STS22 (es-en)": 79.94, + "STSB": 80.81, + "STSBenchmarkMultilingualSTS (en)": 86.7, + "STSBenchmarkMultilingualSTS (it)": 80.41, + "STSBenchmarkMultilingualSTS (de)": 82.3, + "STSBenchmarkMultilingualSTS (nl)": 80.29, + "STSBenchmarkMultilingualSTS (pt)": 76.18, + "STSBenchmarkMultilingualSTS (zh)": 80.14, + "STSBenchmarkMultilingualSTS (es)": 82.82, + "STSBenchmarkMultilingualSTS (fr)": 81.86, + "STSBenchmarkMultilingualSTS (ru)": 81.61, + "STSBenchmarkMultilingualSTS (pl)": 79.44 + }, { "Model": "Cohere-embed-multilingual-v3.0", "SICKFr": 79.23, @@ -1461,14 +6588,35 @@ { "Model": "Cohere-embed-multilingual-v3.0", "SummEvalFr": 31.26 + }, + { + "Model": "Cohere-embed-multilingual-v3.0", + "SummEvalFr": 31.24 + }, + { + "Model": "Cohere-embed-multilingual-v3.0", + "SummEvalFr": 31.24 } ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "Cohere-embed-multilingual-v3.0", + "CEDRClassification": 45.67, + "SensitiveTopicsClassification": 30.83 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "Cohere-embed-multilingual-v3.0", + "Core17InstructionRetrieval": 0.29, + "News21InstructionRetrieval": 0.83, + "Robust04InstructionRetrieval": -6.8 + } + ] } }, "DeepPavlov__distilrubert-small-cased-conversational": { @@ -1476,6 +6624,7 @@ "f1": [ { "Model": "distilrubert-small-cased-conversational", + "BornholmBitextMining": 13.33, "Tatoeba (rus-eng)": 24.16 } ] @@ -1484,15 +6633,169 @@ "accuracy": [ { "Model": "distilrubert-small-cased-conversational", + "AllegroReviews": 23.31, + "AmazonCounterfactualClassification (en-ext)": 69.65, + "AmazonCounterfactualClassification (en)": 72.24, + "AmazonCounterfactualClassification (de)": 63.37, + "AmazonCounterfactualClassification (ja)": 56.24, + "AmazonPolarityClassification": 57.61, + "AmazonReviewsClassification (en)": 28.83, + "AmazonReviewsClassification (de)": 25.01, + "AmazonReviewsClassification (es)": 24.62, + "AmazonReviewsClassification (fr)": 25.9, + "AmazonReviewsClassification (ja)": 21.21, + "AmazonReviewsClassification (zh)": 21.67, + "AngryTweetsClassification": 42.73, + "Banking77Classification": 64.28, + "CBD": 52.53, + "DanishPoliticalCommentsClassification": 27.46, + "EmotionClassification": 22.08, "GeoreviewClassification": 38.95, "HeadlineClassification": 75.59, + "ImdbClassification": 54.55, "InappropriatenessClassification": 60.68, "KinopoiskClassification": 49.67, + "LccSentimentClassification": 42.87, + "MTOPDomainClassification (en)": 71.22, + "MTOPDomainClassification (de)": 54.5, + "MTOPDomainClassification (es)": 62.17, + "MTOPDomainClassification (fr)": 54.21, + "MTOPDomainClassification (hi)": 23.0, + "MTOPDomainClassification (th)": 15.66, + "MTOPIntentClassification (en)": 53.1, + "MTOPIntentClassification (de)": 46.41, + "MTOPIntentClassification (es)": 46.94, + "MTOPIntentClassification (fr)": 39.64, + "MTOPIntentClassification (hi)": 5.12, + "MTOPIntentClassification (th)": 4.9, + "MasakhaNEWSClassification (amh)": 27.79, + "MasakhaNEWSClassification (eng)": 58.73, + "MasakhaNEWSClassification (fra)": 40.14, + "MasakhaNEWSClassification (hau)": 46.64, + "MasakhaNEWSClassification (ibo)": 34.13, + "MasakhaNEWSClassification (lin)": 46.11, + "MasakhaNEWSClassification (lug)": 43.72, + "MasakhaNEWSClassification (orm)": 42.98, + "MasakhaNEWSClassification (pcm)": 72.98, + "MasakhaNEWSClassification (run)": 39.6, + "MasakhaNEWSClassification (sna)": 56.26, + "MasakhaNEWSClassification (som)": 28.37, + "MasakhaNEWSClassification (swa)": 34.12, + "MasakhaNEWSClassification (tir)": 24.49, + "MasakhaNEWSClassification (xho)": 52.05, + "MasakhaNEWSClassification (yor)": 40.02, "MassiveIntentClassification (ru)": 63.12, + "MassiveIntentClassification (tr)": 35.68, + "MassiveIntentClassification (fi)": 38.38, + "MassiveIntentClassification (hi)": 3.66, + "MassiveIntentClassification (ar)": 10.51, + "MassiveIntentClassification (id)": 42.01, + "MassiveIntentClassification (de)": 38.92, + "MassiveIntentClassification (sq)": 39.72, + "MassiveIntentClassification (tl)": 38.01, + "MassiveIntentClassification (pt)": 41.53, + "MassiveIntentClassification (te)": 2.92, + "MassiveIntentClassification (ro)": 35.01, + "MassiveIntentClassification (am)": 3.09, + "MassiveIntentClassification (hy)": 3.44, + "MassiveIntentClassification (th)": 4.5, + "MassiveIntentClassification (hu)": 35.67, + "MassiveIntentClassification (sw)": 37.86, + "MassiveIntentClassification (nl)": 38.91, + "MassiveIntentClassification (nb)": 39.67, + "MassiveIntentClassification (kn)": 3.64, + "MassiveIntentClassification (fa)": 9.91, + "MassiveIntentClassification (mn)": 36.35, + "MassiveIntentClassification (sl)": 41.18, + "MassiveIntentClassification (ta)": 3.02, + "MassiveIntentClassification (ja)": 5.08, + "MassiveIntentClassification (sv)": 40.52, + "MassiveIntentClassification (bn)": 3.2, + "MassiveIntentClassification (jv)": 37.07, + "MassiveIntentClassification (km)": 4.63, + "MassiveIntentClassification (af)": 39.21, + "MassiveIntentClassification (he)": 17.66, + "MassiveIntentClassification (my)": 3.94, + "MassiveIntentClassification (zh-TW)": 7.38, + "MassiveIntentClassification (da)": 41.31, + "MassiveIntentClassification (fr)": 38.49, + "MassiveIntentClassification (lv)": 37.51, + "MassiveIntentClassification (is)": 35.77, + "MassiveIntentClassification (es)": 38.33, + "MassiveIntentClassification (ur)": 9.69, + "MassiveIntentClassification (ml)": 3.03, + "MassiveIntentClassification (cy)": 37.7, + "MassiveIntentClassification (zh-CN)": 6.54, + "MassiveIntentClassification (en)": 51.68, + "MassiveIntentClassification (el)": 22.94, + "MassiveIntentClassification (vi)": 28.04, + "MassiveIntentClassification (ka)": 2.84, + "MassiveIntentClassification (ko)": 2.9, + "MassiveIntentClassification (it)": 43.34, + "MassiveIntentClassification (az)": 35.64, + "MassiveIntentClassification (pl)": 38.29, + "MassiveIntentClassification (ms)": 40.91, "MassiveScenarioClassification (ru)": 68.08, + "MassiveScenarioClassification (ml)": 7.2, + "MassiveScenarioClassification (he)": 23.39, + "MassiveScenarioClassification (da)": 39.88, + "MassiveScenarioClassification (lv)": 35.41, + "MassiveScenarioClassification (my)": 9.7, + "MassiveScenarioClassification (ja)": 9.41, + "MassiveScenarioClassification (az)": 33.91, + "MassiveScenarioClassification (de)": 38.2, + "MassiveScenarioClassification (es)": 38.77, + "MassiveScenarioClassification (tr)": 32.96, + "MassiveScenarioClassification (zh-CN)": 10.71, + "MassiveScenarioClassification (sw)": 39.73, + "MassiveScenarioClassification (fr)": 38.54, + "MassiveScenarioClassification (hy)": 8.77, + "MassiveScenarioClassification (is)": 34.28, + "MassiveScenarioClassification (pl)": 36.87, + "MassiveScenarioClassification (zh-TW)": 12.68, + "MassiveScenarioClassification (af)": 39.15, + "MassiveScenarioClassification (nl)": 39.05, + "MassiveScenarioClassification (am)": 7.69, + "MassiveScenarioClassification (id)": 40.03, + "MassiveScenarioClassification (sq)": 38.93, + "MassiveScenarioClassification (cy)": 39.15, + "MassiveScenarioClassification (ro)": 36.54, + "MassiveScenarioClassification (km)": 9.91, + "MassiveScenarioClassification (bn)": 7.78, + "MassiveScenarioClassification (jv)": 37.71, + "MassiveScenarioClassification (it)": 41.1, + "MassiveScenarioClassification (mn)": 36.03, + "MassiveScenarioClassification (ko)": 7.46, + "MassiveScenarioClassification (th)": 9.65, + "MassiveScenarioClassification (sl)": 41.38, + "MassiveScenarioClassification (ur)": 14.69, + "MassiveScenarioClassification (sv)": 39.18, + "MassiveScenarioClassification (en)": 55.21, + "MassiveScenarioClassification (tl)": 35.29, + "MassiveScenarioClassification (ms)": 42.59, + "MassiveScenarioClassification (fa)": 13.46, + "MassiveScenarioClassification (nb)": 38.43, + "MassiveScenarioClassification (el)": 24.37, + "MassiveScenarioClassification (te)": 7.29, + "MassiveScenarioClassification (ka)": 7.29, + "MassiveScenarioClassification (kn)": 8.07, + "MassiveScenarioClassification (vi)": 30.81, + "MassiveScenarioClassification (hu)": 34.26, + "MassiveScenarioClassification (pt)": 40.5, + "MassiveScenarioClassification (ar)": 15.72, + "MassiveScenarioClassification (hi)": 8.73, + "MassiveScenarioClassification (ta)": 6.82, + "MassiveScenarioClassification (fi)": 37.41, + "NoRecClassification": 39.2, + "NordicLangClassification": 59.34, + "PAC": 57.98, + "PolEmo2.0-IN": 40.42, + "PolEmo2.0-OUT": 30.89, "RuReviewsClassification": 54.05, "RuSciBenchGRNTIClassification": 48.53, - "RuSciBenchOECDClassification": 37.65 + "RuSciBenchOECDClassification": 37.65, + "ToxicConversationsClassification": 58.89, + "TweetSentimentExtractionClassification": 43.44 } ] }, @@ -1500,11 +6803,67 @@ "v_measure": [ { "Model": "distilrubert-small-cased-conversational", + "AlloProfClusteringP2P": 31.59, + "AlloProfClusteringS2S": 21.2, + "ArxivClusteringP2P": 18.41, + "ArxivClusteringS2S": 12.81, + "BiorxivClusteringP2P": 13.07, + "BiorxivClusteringS2S": 7.72, + "BlurbsClusteringP2P": 8.59, + "BlurbsClusteringS2S": 7.36, "GeoreviewClusteringP2P": 43.26, + "HALClusteringS2S": 3.35, "MLSUMClusteringP2P (ru)": 50.08, + "MLSUMClusteringP2P (de)": 7.49, + "MLSUMClusteringP2P (fr)": 20.49, + "MLSUMClusteringP2P (es)": 27.16, "MLSUMClusteringS2S (ru)": 51.12, + "MLSUMClusteringS2S (de)": 7.63, + "MLSUMClusteringS2S (fr)": 20.18, + "MLSUMClusteringS2S (es)": 26.82, + "MasakhaNEWSClusteringP2P (amh)": 40.28, + "MasakhaNEWSClusteringP2P (eng)": 7.58, + "MasakhaNEWSClusteringP2P (fra)": 22.87, + "MasakhaNEWSClusteringP2P (hau)": 4.18, + "MasakhaNEWSClusteringP2P (ibo)": 20.88, + "MasakhaNEWSClusteringP2P (lin)": 42.94, + "MasakhaNEWSClusteringP2P (lug)": 44.9, + "MasakhaNEWSClusteringP2P (orm)": 27.54, + "MasakhaNEWSClusteringP2P (pcm)": 22.4, + "MasakhaNEWSClusteringP2P (run)": 42.28, + "MasakhaNEWSClusteringP2P (sna)": 42.22, + "MasakhaNEWSClusteringP2P (som)": 26.63, + "MasakhaNEWSClusteringP2P (swa)": 5.87, + "MasakhaNEWSClusteringP2P (tir)": 42.99, + "MasakhaNEWSClusteringP2P (xho)": 21.02, + "MasakhaNEWSClusteringP2P (yor)": 21.54, + "MasakhaNEWSClusteringS2S (amh)": 40.06, + "MasakhaNEWSClusteringS2S (eng)": 8.71, + "MasakhaNEWSClusteringS2S (fra)": 22.43, + "MasakhaNEWSClusteringS2S (hau)": 6.42, + "MasakhaNEWSClusteringS2S (ibo)": 22.5, + "MasakhaNEWSClusteringS2S (lin)": 52.19, + "MasakhaNEWSClusteringS2S (lug)": 43.89, + "MasakhaNEWSClusteringS2S (orm)": 23.6, + "MasakhaNEWSClusteringS2S (pcm)": 42.75, + "MasakhaNEWSClusteringS2S (run)": 44.59, + "MasakhaNEWSClusteringS2S (sna)": 42.25, + "MasakhaNEWSClusteringS2S (som)": 24.67, + "MasakhaNEWSClusteringS2S (swa)": 14.31, + "MasakhaNEWSClusteringS2S (tir)": 44.02, + "MasakhaNEWSClusteringS2S (xho)": 25.97, + "MasakhaNEWSClusteringS2S (yor)": 21.79, + "MedrxivClusteringP2P": 18.17, + "MedrxivClusteringS2S": 15.25, + "RedditClustering": 11.95, + "RedditClusteringP2P": 22.76, "RuSciBenchGRNTIClusteringP2P": 37.84, - "RuSciBenchOECDClusteringP2P": 34.12 + "RuSciBenchOECDClusteringP2P": 34.12, + "StackExchangeClustering": 21.41, + "StackExchangeClusteringP2P": 22.98, + "TenKGnadClusteringP2P": 7.42, + "TenKGnadClusteringS2S": 4.29, + "TwentyNewsgroupsClustering": 11.65 } ] }, @@ -1512,13 +6871,51 @@ "max_ap": [ { "Model": "distilrubert-small-cased-conversational", + "CDSC-E": 46.48, + "FalseFriendsGermanEnglish": 48.72, "OpusparcusPC (ru)": 84.35, - "TERRa": 52.48 + "OpusparcusPC (de)": 88.05, + "OpusparcusPC (en)": 93.94, + "OpusparcusPC (fi)": 82.18, + "OpusparcusPC (fr)": 85.23, + "OpusparcusPC (sv)": 81.83, + "PSC": 62.14, + "PawsXPairClassification (de)": 48.55, + "PawsXPairClassification (en)": 43.39, + "PawsXPairClassification (es)": 47.18, + "PawsXPairClassification (fr)": 49.14, + "PawsXPairClassification (ja)": 45.87, + "PawsXPairClassification (ko)": 46.69, + "PawsXPairClassification (zh)": 49.73, + "SICK-E-PL": 45.62, + "SprintDuplicateQuestions": 33.2, + "TERRa": 52.48, + "TwitterSemEval2015": 50.27, + "TwitterURLCorpus": 63.78 }, { "Model": "distilrubert-small-cased-conversational", + "CDSC-E": 46.84, + "FalseFriendsGermanEnglish": 49.57, "OpusparcusPC (ru)": 84.35, - "TERRa": 53.02 + "OpusparcusPC (de)": 88.57, + "OpusparcusPC (en)": 93.94, + "OpusparcusPC (fi)": 82.18, + "OpusparcusPC (fr)": 85.23, + "OpusparcusPC (sv)": 81.84, + "PSC": 62.23, + "PawsXPairClassification (de)": 48.55, + "PawsXPairClassification (en)": 47.47, + "PawsXPairClassification (es)": 47.18, + "PawsXPairClassification (fr)": 49.14, + "PawsXPairClassification (ja)": 47.68, + "PawsXPairClassification (ko)": 46.69, + "PawsXPairClassification (zh)": 50.2, + "SICK-E-PL": 45.78, + "SprintDuplicateQuestions": 33.2, + "TERRa": 53.02, + "TwitterSemEval2015": 50.38, + "TwitterURLCorpus": 63.78 } ] }, @@ -1526,11 +6923,35 @@ "map": [ { "Model": "distilrubert-small-cased-conversational", - "MIRACLReranking (ru)": 13.09 + "AlloprofReranking": 32.82, + "AskUbuntuDupQuestions": 45.48, + "MindSmallReranking": 24.95, + "RuBQReranking": 42.58, + "SciDocsRR": 47.81, + "StackOverflowDupQuestions": 33.41, + "SyntecReranking": 36.86, + "T2Reranking": 52.07 }, { "Model": "distilrubert-small-cased-conversational", - "RuBQReranking": 42.58 + "MIRACLReranking (ru)": 13.09, + "MIRACLReranking (ar)": 3.73, + "MIRACLReranking (bn)": 2.86, + "MIRACLReranking (de)": 4.48, + "MIRACLReranking (en)": 10.73, + "MIRACLReranking (es)": 7.05, + "MIRACLReranking (fa)": 3.92, + "MIRACLReranking (fi)": 12.52, + "MIRACLReranking (fr)": 5.53, + "MIRACLReranking (hi)": 4.02, + "MIRACLReranking (id)": 8.43, + "MIRACLReranking (ja)": 2.56, + "MIRACLReranking (ko)": 3.66, + "MIRACLReranking (sw)": 11.13, + "MIRACLReranking (te)": 2.57, + "MIRACLReranking (th)": 2.15, + "MIRACLReranking (yo)": 9.3, + "MIRACLReranking (zh)": 2.83 } ] }, @@ -1538,9 +6959,143 @@ "ndcg_at_10": [ { "Model": "distilrubert-small-cased-conversational", + "AILACasedocs": 9.74, + "AILAStatutes": 14.09, + "ARCChallenge": 2.13, + "AlloprofRetrieval": 0.35, + "AlphaNLI": 2.5, + "AppsRetrieval": 0.07, + "ArguAna": 15.13, + "BSARDRetrieval": 0.0, + "ClimateFEVER": 0.3, + "CmedqaRetrieval": 0.12, + "CodeFeedbackMT": 11.84, + "CodeFeedbackST": 7.42, + "CodeSearchNetCCRetrieval (python)": 13.81, + "CodeSearchNetCCRetrieval (javascript)": 22.77, + "CodeSearchNetCCRetrieval (go)": 12.62, + "CodeSearchNetCCRetrieval (ruby)": 26.22, + "CodeSearchNetCCRetrieval (java)": 16.29, + "CodeSearchNetCCRetrieval (php)": 14.17, + "CodeSearchNetRetrieval (python)": 19.08, + "CodeSearchNetRetrieval (javascript)": 10.38, + "CodeSearchNetRetrieval (go)": 7.54, + "CodeSearchNetRetrieval (ruby)": 12.38, + "CodeSearchNetRetrieval (java)": 11.9, + "CodeSearchNetRetrieval (php)": 11.26, + "CodeTransOceanContest": 15.26, + "CodeTransOceanDL": 31.1, + "CosQA": 2.93, + "CovidRetrieval": 0.0, + "DBPedia": 1.58, + "FEVER": 1.69, + "FiQA2018": 1.66, + "GerDaLIR": 0.22, + "GerDaLIRSmall": 0.51, + "GermanQuAD-Retrieval": 5.35, + "HellaSwag": 5.59, + "HotpotQA": 5.82, + "LEMBNarrativeQARetrieval": 1.9, + "LEMBQMSumRetrieval": 6.27, + "LEMBSummScreenFDRetrieval": 5.86, + "LEMBWikimQARetrieval": 16.75, + "LeCaRDv2": 7.95, + "LegalBenchConsumerContractsQA": 7.61, + "LegalBenchCorporateLobbying": 40.71, + "LegalQuAD": 3.7, + "LegalSummarization": 34.95, "MIRACLRetrieval (ru)": 2.39, + "MIRACLRetrieval (ar)": 0.0, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 0.28, + "MIRACLRetrieval (en)": 1.23, + "MIRACLRetrieval (es)": 0.08, + "MIRACLRetrieval (fa)": 0.03, + "MIRACLRetrieval (fi)": 1.02, + "MIRACLRetrieval (fr)": 0.16, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 0.28, + "MIRACLRetrieval (ja)": 0.0, + "MIRACLRetrieval (ko)": 0.22, + "MIRACLRetrieval (sw)": 1.95, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.0, + "MIRACLRetrieval (yo)": 4.42, + "MIRACLRetrieval (zh)": 0.0, + "MSMARCO": 0.63, + "MintakaRetrieval (ar)": 0.51, + "MintakaRetrieval (de)": 0.75, + "MintakaRetrieval (es)": 0.69, + "MintakaRetrieval (fr)": 1.06, + "MintakaRetrieval (hi)": 0.42, + "MintakaRetrieval (it)": 1.21, + "MintakaRetrieval (ja)": 0.21, + "MintakaRetrieval (pt)": 0.86, + "NFCorpus": 3.51, + "NQ": 0.1, + "PIQA": 5.05, + "Quail": 0.08, + "QuoraRetrieval": 65.11, + "RARbCode": 0.0, + "RARbMath": 6.96, "RiaNewsRetrieval": 4.14, - "RuBQRetrieval": 10.6 + "RuBQRetrieval": 10.6, + "SCIDOCS": 0.48, + "SIQA": 0.1, + "SciFact": 6.63, + "SciFact-PL": 0.71, + "SpartQA": 5.81, + "StackOverflowQA": 13.99, + "SyntecRetrieval": 10.17, + "SyntheticText2SQL": 3.4, + "TRECCOVID": 15.18, + "TRECCOVID-PL": 4.67, + "TempReasonL1": 0.88, + "TempReasonL2Fact": 1.91, + "TempReasonL2Pure": 0.09, + "TempReasonL3Fact": 2.41, + "TempReasonL3Pure": 2.14, + "Touche2020": 1.9, + "WinoGrande": 0.62, + "XMarket (de)": 3.29, + "XMarket (en)": 3.24, + "XMarket (es)": 2.58, + "XPQARetrieval (ara-ara)": 1.83, + "XPQARetrieval (eng-ara)": 0.67, + "XPQARetrieval (ara-eng)": 1.23, + "XPQARetrieval (deu-deu)": 19.74, + "XPQARetrieval (eng-deu)": 0.54, + "XPQARetrieval (deu-eng)": 1.64, + "XPQARetrieval (spa-spa)": 13.1, + "XPQARetrieval (eng-spa)": 0.9, + "XPQARetrieval (spa-eng)": 1.78, + "XPQARetrieval (fra-fra)": 19.06, + "XPQARetrieval (eng-fra)": 1.01, + "XPQARetrieval (fra-eng)": 2.85, + "XPQARetrieval (hin-hin)": 4.81, + "XPQARetrieval (eng-hin)": 1.63, + "XPQARetrieval (hin-eng)": 0.93, + "XPQARetrieval (ita-ita)": 30.64, + "XPQARetrieval (eng-ita)": 2.4, + "XPQARetrieval (ita-eng)": 4.45, + "XPQARetrieval (jpn-jpn)": 1.8, + "XPQARetrieval (eng-jpn)": 0.53, + "XPQARetrieval (jpn-eng)": 0.82, + "XPQARetrieval (kor-kor)": 1.95, + "XPQARetrieval (eng-kor)": 1.11, + "XPQARetrieval (kor-eng)": 1.03, + "XPQARetrieval (pol-pol)": 10.77, + "XPQARetrieval (eng-pol)": 0.84, + "XPQARetrieval (pol-eng)": 2.65, + "XPQARetrieval (por-por)": 11.69, + "XPQARetrieval (eng-por)": 1.09, + "XPQARetrieval (por-eng)": 1.8, + "XPQARetrieval (tam-tam)": 0.81, + "XPQARetrieval (eng-tam)": 1.15, + "XPQARetrieval (tam-eng)": 0.54, + "XPQARetrieval (cmn-cmn)": 4.74, + "XPQARetrieval (eng-cmn)": 0.63, + "XPQARetrieval (cmn-eng)": 0.75 } ] }, @@ -1548,15 +7103,129 @@ "cosine_spearman": [ { "Model": "distilrubert-small-cased-conversational", + "BIOSSES": 47.99, + "CDSC-R": 66.98, + "GermanSTSBenchmark": 40.07, + "SICK-R": 56.7, + "SICK-R-PL": 43.36, + "SICKFr": 54.27, + "STS12": 40.83, + "STS13": 52.22, + "STS14": 46.76, + "STS15": 60.64, + "STS16": 64.37, + "STS17 (ko-ko)": 7.82, + "STS17 (fr-en)": 26.82, + "STS17 (it-en)": 22.21, + "STS17 (ar-ar)": 27.9, + "STS17 (es-es)": 47.95, + "STS17 (en-tr)": 19.37, + "STS17 (en-en)": 60.3, + "STS17 (nl-en)": 18.33, + "STS17 (es-en)": 16.25, + "STS17 (en-de)": 16.62, + "STS17 (en-ar)": 11.54, + "STS22 (pl-en)": 14.03, + "STS22 (fr-pl)": 39.44, + "STS22 (ar)": 26.69, + "STS22 (fr)": 50.98, + "STS22 (en)": 44.27, + "STS22 (pl)": 16.55, + "STS22 (de)": 23.32, + "STS22 (de-en)": 27.38, + "STS22 (it)": 38.44, + "STS22 (es)": 37.91, + "STS22 (zh-en)": 7.34, + "STS22 (de-pl)": -29.47, + "STS22 (tr)": 10.5, + "STS22 (es-en)": 21.73, + "STS22 (es-it)": 20.72, + "STS22 (de-fr)": 1.78, + "STS22 (zh)": 18.15, + "STSB": 17.17, + "STSBenchmark": 55.97, + "STSBenchmarkMultilingualSTS (en)": 55.97, + "STSBenchmarkMultilingualSTS (es)": 49.87, + "STSBenchmarkMultilingualSTS (nl)": 46.12, + "STSBenchmarkMultilingualSTS (pl)": 46.61, + "STSBenchmarkMultilingualSTS (zh)": 15.41, + "STSBenchmarkMultilingualSTS (fr)": 51.0, + "STSBenchmarkMultilingualSTS (it)": 48.59, + "STSBenchmarkMultilingualSTS (pt)": 45.29, + "STSBenchmarkMultilingualSTS (de)": 42.19 + }, + { + "Model": "distilrubert-small-cased-conversational", + "BIOSSES": 47.99, + "CDSC-R": 66.98, + "GermanSTSBenchmark": 40.07, "RUParaPhraserSTS": 55.01, "RuSTSBenchmarkSTS": 61.72, + "SICK-R": 56.7, + "SICK-R-PL": 43.36, + "SICKFr": 54.27, + "STS12": 40.83, + "STS13": 52.22, + "STS14": 46.76, + "STS15": 60.64, + "STS16": 64.37, + "STS17 (ko-ko)": 7.35, + "STS17 (fr-en)": 26.82, + "STS17 (it-en)": 22.21, + "STS17 (ar-ar)": 27.13, + "STS17 (es-es)": 47.95, + "STS17 (en-tr)": 19.37, + "STS17 (en-en)": 60.3, + "STS17 (nl-en)": 18.33, + "STS17 (es-en)": 16.25, + "STS17 (en-de)": 16.62, + "STS17 (en-ar)": 11.54, "STS22 (ru)": 51.87, - "STSBenchmarkMultilingualSTS (ru)": 61.6 + "STS22 (pl-en)": 14.03, + "STS22 (fr-pl)": 39.44, + "STS22 (ar)": 26.71, + "STS22 (fr)": 50.98, + "STS22 (en)": 44.27, + "STS22 (pl)": 16.75, + "STS22 (de)": 23.32, + "STS22 (de-en)": 27.38, + "STS22 (it)": 38.44, + "STS22 (es)": 37.91, + "STS22 (zh-en)": 7.34, + "STS22 (de-pl)": -29.47, + "STS22 (tr)": 10.5, + "STS22 (es-en)": 21.73, + "STS22 (es-it)": 20.72, + "STS22 (de-fr)": 1.78, + "STS22 (zh)": 18.15, + "STSB": 17.21, + "STSBenchmark": 55.97, + "STSBenchmarkMultilingualSTS (ru)": 61.6, + "STSBenchmarkMultilingualSTS (en)": 55.97, + "STSBenchmarkMultilingualSTS (es)": 49.87, + "STSBenchmarkMultilingualSTS (nl)": 46.12, + "STSBenchmarkMultilingualSTS (pl)": 46.61, + "STSBenchmarkMultilingualSTS (zh)": 15.38, + "STSBenchmarkMultilingualSTS (fr)": 51.0, + "STSBenchmarkMultilingualSTS (it)": 48.59, + "STSBenchmarkMultilingualSTS (pt)": 45.29, + "STSBenchmarkMultilingualSTS (de)": 42.19 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "distilrubert-small-cased-conversational", + "SummEval": 29.45, + "SummEvalFr": 28.13 + }, + { + "Model": "distilrubert-small-cased-conversational", + "SummEval": 29.45, + "SummEvalFr": 28.13 + } + ] }, "MultilabelClassification": { "accuracy": [ @@ -1568,7 +7237,14 @@ ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "distilrubert-small-cased-conversational", + "Core17InstructionRetrieval": 0.22, + "News21InstructionRetrieval": -1.44, + "Robust04InstructionRetrieval": -0.6 + } + ] } }, "DeepPavlov__rubert-base-cased": { @@ -1576,6 +7252,7 @@ "f1": [ { "Model": "rubert-base-cased", + "BornholmBitextMining": 12.61, "Tatoeba (rus-eng)": 16.76 } ] @@ -1584,15 +7261,169 @@ "accuracy": [ { "Model": "rubert-base-cased", + "AllegroReviews": 23.05, + "AmazonCounterfactualClassification (en-ext)": 64.54, + "AmazonCounterfactualClassification (en)": 65.67, + "AmazonCounterfactualClassification (de)": 63.45, + "AmazonCounterfactualClassification (ja)": 53.38, + "AmazonPolarityClassification": 56.54, + "AmazonReviewsClassification (en)": 26.34, + "AmazonReviewsClassification (de)": 25.25, + "AmazonReviewsClassification (es)": 25.49, + "AmazonReviewsClassification (fr)": 25.07, + "AmazonReviewsClassification (ja)": 22.65, + "AmazonReviewsClassification (zh)": 24.87, + "AngryTweetsClassification": 43.11, + "Banking77Classification": 51.54, + "CBD": 52.84, + "DanishPoliticalCommentsClassification": 26.26, + "EmotionClassification": 20.01, "GeoreviewClassification": 37.22, "HeadlineClassification": 75.23, + "ImdbClassification": 54.6, "InappropriatenessClassification": 57.34, "KinopoiskClassification": 49.91, + "LccSentimentClassification": 38.0, + "MTOPDomainClassification (en)": 62.95, + "MTOPDomainClassification (de)": 53.03, + "MTOPDomainClassification (es)": 59.51, + "MTOPDomainClassification (fr)": 53.35, + "MTOPDomainClassification (hi)": 20.73, + "MTOPDomainClassification (th)": 15.17, + "MTOPIntentClassification (en)": 45.33, + "MTOPIntentClassification (de)": 42.59, + "MTOPIntentClassification (es)": 42.4, + "MTOPIntentClassification (fr)": 38.32, + "MTOPIntentClassification (hi)": 7.91, + "MTOPIntentClassification (th)": 4.64, + "MasakhaNEWSClassification (amh)": 31.36, + "MasakhaNEWSClassification (eng)": 68.88, + "MasakhaNEWSClassification (fra)": 57.51, + "MasakhaNEWSClassification (hau)": 47.08, + "MasakhaNEWSClassification (ibo)": 37.38, + "MasakhaNEWSClassification (lin)": 45.66, + "MasakhaNEWSClassification (lug)": 41.66, + "MasakhaNEWSClassification (orm)": 43.35, + "MasakhaNEWSClassification (pcm)": 78.75, + "MasakhaNEWSClassification (run)": 43.01, + "MasakhaNEWSClassification (sna)": 60.11, + "MasakhaNEWSClassification (som)": 35.82, + "MasakhaNEWSClassification (swa)": 38.66, + "MasakhaNEWSClassification (tir)": 24.45, + "MasakhaNEWSClassification (xho)": 54.61, + "MasakhaNEWSClassification (yor)": 41.27, "MassiveIntentClassification (ru)": 53.02, + "MassiveIntentClassification (ro)": 32.09, + "MassiveIntentClassification (hi)": 5.77, + "MassiveIntentClassification (ta)": 3.32, + "MassiveIntentClassification (nb)": 33.94, + "MassiveIntentClassification (lv)": 37.55, + "MassiveIntentClassification (ur)": 21.03, + "MassiveIntentClassification (es)": 34.14, + "MassiveIntentClassification (tl)": 31.63, + "MassiveIntentClassification (km)": 4.03, + "MassiveIntentClassification (is)": 32.66, + "MassiveIntentClassification (pt)": 36.75, + "MassiveIntentClassification (ar)": 21.45, + "MassiveIntentClassification (he)": 23.67, + "MassiveIntentClassification (kn)": 3.82, + "MassiveIntentClassification (af)": 31.89, + "MassiveIntentClassification (hu)": 31.87, + "MassiveIntentClassification (jv)": 31.14, + "MassiveIntentClassification (bn)": 2.93, + "MassiveIntentClassification (fa)": 29.0, + "MassiveIntentClassification (fi)": 33.55, + "MassiveIntentClassification (ko)": 9.3, + "MassiveIntentClassification (my)": 3.78, + "MassiveIntentClassification (th)": 4.77, + "MassiveIntentClassification (ja)": 30.31, + "MassiveIntentClassification (pl)": 36.98, + "MassiveIntentClassification (sq)": 35.35, + "MassiveIntentClassification (da)": 35.55, + "MassiveIntentClassification (sl)": 36.68, + "MassiveIntentClassification (en)": 40.37, + "MassiveIntentClassification (az)": 35.33, + "MassiveIntentClassification (hy)": 7.1, + "MassiveIntentClassification (sv)": 34.93, + "MassiveIntentClassification (te)": 3.63, + "MassiveIntentClassification (el)": 30.25, + "MassiveIntentClassification (nl)": 33.82, + "MassiveIntentClassification (tr)": 33.28, + "MassiveIntentClassification (sw)": 32.75, + "MassiveIntentClassification (ka)": 8.83, + "MassiveIntentClassification (id)": 35.82, + "MassiveIntentClassification (de)": 34.34, + "MassiveIntentClassification (vi)": 26.95, + "MassiveIntentClassification (mn)": 35.6, + "MassiveIntentClassification (cy)": 31.99, + "MassiveIntentClassification (am)": 2.9, + "MassiveIntentClassification (fr)": 36.45, + "MassiveIntentClassification (ml)": 2.63, + "MassiveIntentClassification (it)": 39.35, + "MassiveIntentClassification (zh-CN)": 46.24, + "MassiveIntentClassification (zh-TW)": 41.71, + "MassiveIntentClassification (ms)": 33.19, "MassiveScenarioClassification (ru)": 56.79, + "MassiveScenarioClassification (fr)": 36.73, + "MassiveScenarioClassification (fi)": 32.83, + "MassiveScenarioClassification (el)": 31.53, + "MassiveScenarioClassification (ml)": 6.86, + "MassiveScenarioClassification (ko)": 13.25, + "MassiveScenarioClassification (sq)": 33.63, + "MassiveScenarioClassification (sw)": 34.01, + "MassiveScenarioClassification (hu)": 32.14, + "MassiveScenarioClassification (tr)": 32.45, + "MassiveScenarioClassification (tl)": 31.62, + "MassiveScenarioClassification (ka)": 15.21, + "MassiveScenarioClassification (th)": 9.65, + "MassiveScenarioClassification (af)": 33.29, + "MassiveScenarioClassification (ar)": 22.05, + "MassiveScenarioClassification (pl)": 35.54, + "MassiveScenarioClassification (sl)": 37.54, + "MassiveScenarioClassification (km)": 9.39, + "MassiveScenarioClassification (kn)": 7.06, + "MassiveScenarioClassification (bn)": 6.99, + "MassiveScenarioClassification (ms)": 36.12, + "MassiveScenarioClassification (ta)": 7.3, + "MassiveScenarioClassification (ur)": 24.5, + "MassiveScenarioClassification (hy)": 13.37, + "MassiveScenarioClassification (ja)": 31.19, + "MassiveScenarioClassification (cy)": 33.38, + "MassiveScenarioClassification (fa)": 29.86, + "MassiveScenarioClassification (nb)": 31.87, + "MassiveScenarioClassification (vi)": 30.1, + "MassiveScenarioClassification (es)": 35.76, + "MassiveScenarioClassification (is)": 32.45, + "MassiveScenarioClassification (zh-CN)": 49.06, + "MassiveScenarioClassification (it)": 37.46, + "MassiveScenarioClassification (te)": 7.78, + "MassiveScenarioClassification (hi)": 9.65, + "MassiveScenarioClassification (he)": 24.11, + "MassiveScenarioClassification (am)": 8.15, + "MassiveScenarioClassification (sv)": 33.72, + "MassiveScenarioClassification (jv)": 32.34, + "MassiveScenarioClassification (da)": 34.44, + "MassiveScenarioClassification (mn)": 35.48, + "MassiveScenarioClassification (ro)": 34.4, + "MassiveScenarioClassification (id)": 35.64, + "MassiveScenarioClassification (en)": 44.48, + "MassiveScenarioClassification (pt)": 36.31, + "MassiveScenarioClassification (de)": 33.5, + "MassiveScenarioClassification (nl)": 34.67, + "MassiveScenarioClassification (lv)": 36.42, + "MassiveScenarioClassification (az)": 34.58, + "MassiveScenarioClassification (my)": 9.47, + "MassiveScenarioClassification (zh-TW)": 42.15, + "NoRecClassification": 38.16, + "NordicLangClassification": 60.75, + "PAC": 61.78, + "PolEmo2.0-IN": 42.33, + "PolEmo2.0-OUT": 34.19, "RuReviewsClassification": 50.74, "RuSciBenchGRNTIClassification": 48.03, - "RuSciBenchOECDClassification": 36.13 + "RuSciBenchOECDClassification": 36.13, + "ToxicConversationsClassification": 55.36, + "TweetSentimentExtractionClassification": 40.79 } ] }, @@ -1600,11 +7431,67 @@ "v_measure": [ { "Model": "rubert-base-cased", + "AlloProfClusteringP2P": 41.54, + "AlloProfClusteringS2S": 24.5, + "ArxivClusteringP2P": 19.12, + "ArxivClusteringS2S": 15.72, + "BiorxivClusteringP2P": 15.01, + "BiorxivClusteringS2S": 11.27, + "BlurbsClusteringP2P": 10.74, + "BlurbsClusteringS2S": 8.66, "GeoreviewClusteringP2P": 28.77, + "HALClusteringS2S": 9.48, "MLSUMClusteringP2P (ru)": 41.42, + "MLSUMClusteringP2P (de)": 22.69, + "MLSUMClusteringP2P (fr)": 31.79, + "MLSUMClusteringP2P (es)": 34.6, "MLSUMClusteringS2S (ru)": 40.52, + "MLSUMClusteringS2S (de)": 21.14, + "MLSUMClusteringS2S (fr)": 30.71, + "MLSUMClusteringS2S (es)": 33.77, + "MasakhaNEWSClusteringP2P (amh)": 40.38, + "MasakhaNEWSClusteringP2P (eng)": 7.24, + "MasakhaNEWSClusteringP2P (fra)": 24.89, + "MasakhaNEWSClusteringP2P (hau)": 4.33, + "MasakhaNEWSClusteringP2P (ibo)": 20.68, + "MasakhaNEWSClusteringP2P (lin)": 42.23, + "MasakhaNEWSClusteringP2P (lug)": 48.28, + "MasakhaNEWSClusteringP2P (orm)": 24.84, + "MasakhaNEWSClusteringP2P (pcm)": 29.98, + "MasakhaNEWSClusteringP2P (run)": 42.83, + "MasakhaNEWSClusteringP2P (sna)": 41.71, + "MasakhaNEWSClusteringP2P (som)": 26.34, + "MasakhaNEWSClusteringP2P (swa)": 3.39, + "MasakhaNEWSClusteringP2P (tir)": 42.65, + "MasakhaNEWSClusteringP2P (xho)": 22.04, + "MasakhaNEWSClusteringP2P (yor)": 21.69, + "MasakhaNEWSClusteringS2S (amh)": 40.03, + "MasakhaNEWSClusteringS2S (eng)": 18.53, + "MasakhaNEWSClusteringS2S (fra)": 23.47, + "MasakhaNEWSClusteringS2S (hau)": 9.65, + "MasakhaNEWSClusteringS2S (ibo)": 27.01, + "MasakhaNEWSClusteringS2S (lin)": 51.17, + "MasakhaNEWSClusteringS2S (lug)": 43.62, + "MasakhaNEWSClusteringS2S (orm)": 21.9, + "MasakhaNEWSClusteringS2S (pcm)": 39.29, + "MasakhaNEWSClusteringS2S (run)": 45.33, + "MasakhaNEWSClusteringS2S (sna)": 42.96, + "MasakhaNEWSClusteringS2S (som)": 25.29, + "MasakhaNEWSClusteringS2S (swa)": 10.43, + "MasakhaNEWSClusteringS2S (tir)": 42.77, + "MasakhaNEWSClusteringS2S (xho)": 25.79, + "MasakhaNEWSClusteringS2S (yor)": 24.62, + "MedrxivClusteringP2P": 19.13, + "MedrxivClusteringS2S": 16.96, + "RedditClustering": 14.16, + "RedditClusteringP2P": 18.47, "RuSciBenchGRNTIClusteringP2P": 28.29, - "RuSciBenchOECDClusteringP2P": 26.67 + "RuSciBenchOECDClusteringP2P": 26.67, + "StackExchangeClustering": 21.35, + "StackExchangeClusteringP2P": 21.64, + "TenKGnadClusteringP2P": 16.29, + "TenKGnadClusteringS2S": 10.93, + "TwentyNewsgroupsClustering": 12.36 } ] }, @@ -1612,13 +7499,51 @@ "max_ap": [ { "Model": "rubert-base-cased", + "CDSC-E": 35.01, + "FalseFriendsGermanEnglish": 48.57, "OpusparcusPC (ru)": 81.65, - "TERRa": 52.12 + "OpusparcusPC (de)": 89.13, + "OpusparcusPC (en)": 92.78, + "OpusparcusPC (fi)": 79.66, + "OpusparcusPC (fr)": 84.68, + "OpusparcusPC (sv)": 78.49, + "PSC": 64.57, + "PawsXPairClassification (de)": 49.6, + "PawsXPairClassification (en)": 46.62, + "PawsXPairClassification (es)": 49.86, + "PawsXPairClassification (fr)": 51.09, + "PawsXPairClassification (ja)": 46.04, + "PawsXPairClassification (ko)": 47.52, + "PawsXPairClassification (zh)": 52.77, + "SICK-E-PL": 47.26, + "SprintDuplicateQuestions": 21.8, + "TERRa": 52.12, + "TwitterSemEval2015": 42.31, + "TwitterURLCorpus": 55.52 }, { "Model": "rubert-base-cased", + "CDSC-E": 35.32, + "FalseFriendsGermanEnglish": 49.53, "OpusparcusPC (ru)": 81.65, - "TERRa": 53.17 + "OpusparcusPC (de)": 89.13, + "OpusparcusPC (en)": 92.78, + "OpusparcusPC (fi)": 79.66, + "OpusparcusPC (fr)": 84.68, + "OpusparcusPC (sv)": 78.49, + "PSC": 66.18, + "PawsXPairClassification (de)": 49.96, + "PawsXPairClassification (en)": 47.53, + "PawsXPairClassification (es)": 49.86, + "PawsXPairClassification (fr)": 51.09, + "PawsXPairClassification (ja)": 46.42, + "PawsXPairClassification (ko)": 47.71, + "PawsXPairClassification (zh)": 52.89, + "SICK-E-PL": 47.31, + "SprintDuplicateQuestions": 21.8, + "TERRa": 53.17, + "TwitterSemEval2015": 42.31, + "TwitterURLCorpus": 55.52 } ] }, @@ -1626,11 +7551,35 @@ "map": [ { "Model": "rubert-base-cased", - "MIRACLReranking (ru)": 13.27 + "AlloprofReranking": 29.26, + "AskUbuntuDupQuestions": 46.46, + "MindSmallReranking": 28.06, + "RuBQReranking": 41.65, + "SciDocsRR": 45.85, + "StackOverflowDupQuestions": 29.76, + "SyntecReranking": 35.08, + "T2Reranking": 53.26 }, { "Model": "rubert-base-cased", - "RuBQReranking": 41.65 + "MIRACLReranking (ru)": 13.27, + "MIRACLReranking (ar)": 2.85, + "MIRACLReranking (bn)": 2.97, + "MIRACLReranking (de)": 6.21, + "MIRACLReranking (en)": 7.53, + "MIRACLReranking (es)": 5.16, + "MIRACLReranking (fa)": 3.53, + "MIRACLReranking (fi)": 6.96, + "MIRACLReranking (fr)": 4.97, + "MIRACLReranking (hi)": 5.1, + "MIRACLReranking (id)": 5.28, + "MIRACLReranking (ja)": 4.89, + "MIRACLReranking (ko)": 5.86, + "MIRACLReranking (sw)": 4.75, + "MIRACLReranking (te)": 5.36, + "MIRACLReranking (th)": 2.72, + "MIRACLReranking (yo)": 4.67, + "MIRACLReranking (zh)": 3.95 } ] }, @@ -1638,9 +7587,143 @@ "ndcg_at_10": [ { "Model": "rubert-base-cased", + "AILACasedocs": 10.35, + "AILAStatutes": 11.53, + "ARCChallenge": 0.56, + "AlloprofRetrieval": 0.24, + "AlphaNLI": 0.56, + "AppsRetrieval": 0.04, + "ArguAna": 13.02, + "BSARDRetrieval": 0.0, + "ClimateFEVER": 0.15, + "CmedqaRetrieval": 0.9, + "CodeFeedbackMT": 8.43, + "CodeFeedbackST": 3.66, + "CodeSearchNetCCRetrieval (python)": 3.77, + "CodeSearchNetCCRetrieval (javascript)": 8.05, + "CodeSearchNetCCRetrieval (go)": 4.52, + "CodeSearchNetCCRetrieval (ruby)": 9.34, + "CodeSearchNetCCRetrieval (java)": 5.94, + "CodeSearchNetCCRetrieval (php)": 4.64, + "CodeSearchNetRetrieval (python)": 6.47, + "CodeSearchNetRetrieval (javascript)": 2.31, + "CodeSearchNetRetrieval (go)": 2.67, + "CodeSearchNetRetrieval (ruby)": 5.26, + "CodeSearchNetRetrieval (java)": 2.8, + "CodeSearchNetRetrieval (php)": 1.9, + "CodeTransOceanContest": 5.49, + "CodeTransOceanDL": 29.16, + "CosQA": 0.14, + "CovidRetrieval": 0.07, + "DBPedia": 0.83, + "FEVER": 0.83, + "FiQA2018": 0.16, + "GerDaLIR": 0.25, + "GerDaLIRSmall": 0.62, + "GermanQuAD-Retrieval": 5.16, + "HellaSwag": 1.71, + "HotpotQA": 0.61, + "LEMBNarrativeQARetrieval": 2.92, + "LEMBQMSumRetrieval": 5.52, + "LEMBSummScreenFDRetrieval": 6.19, + "LEMBWikimQARetrieval": 15.54, + "LeCaRDv2": 26.33, + "LegalBenchConsumerContractsQA": 9.09, + "LegalBenchCorporateLobbying": 15.85, + "LegalQuAD": 3.65, + "LegalSummarization": 16.31, "MIRACLRetrieval (ru)": 0.88, + "MIRACLRetrieval (ar)": 0.0, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 0.0, + "MIRACLRetrieval (en)": 0.04, + "MIRACLRetrieval (es)": 0.0, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 0.05, + "MIRACLRetrieval (fr)": 0.0, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 0.0, + "MIRACLRetrieval (ja)": 0.13, + "MIRACLRetrieval (ko)": 0.22, + "MIRACLRetrieval (sw)": 0.09, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.0, + "MIRACLRetrieval (yo)": 0.99, + "MIRACLRetrieval (zh)": 0.09, + "MSMARCO": 0.12, + "MintakaRetrieval (ar)": 0.78, + "MintakaRetrieval (de)": 0.94, + "MintakaRetrieval (es)": 1.16, + "MintakaRetrieval (fr)": 1.13, + "MintakaRetrieval (hi)": 0.44, + "MintakaRetrieval (it)": 0.91, + "MintakaRetrieval (ja)": 0.62, + "MintakaRetrieval (pt)": 0.76, + "NFCorpus": 2.69, + "NQ": 0.09, + "PIQA": 1.66, + "Quail": 0.03, + "QuoraRetrieval": 44.56, + "RARbCode": 0.03, + "RARbMath": 3.54, "RiaNewsRetrieval": 5.58, - "RuBQRetrieval": 9.52 + "RuBQRetrieval": 9.52, + "SCIDOCS": 0.35, + "SIQA": 0.0, + "SciFact": 1.06, + "SciFact-PL": 0.42, + "SpartQA": 0.03, + "StackOverflowQA": 8.75, + "SyntecRetrieval": 6.26, + "SyntheticText2SQL": 1.79, + "TRECCOVID": 4.05, + "TRECCOVID-PL": 2.64, + "TempReasonL1": 0.24, + "TempReasonL2Fact": 1.06, + "TempReasonL2Pure": 0.22, + "TempReasonL3Fact": 1.01, + "TempReasonL3Pure": 2.61, + "Touche2020": 0.0, + "WinoGrande": 0.03, + "XMarket (de)": 0.96, + "XMarket (en)": 0.95, + "XMarket (es)": 0.76, + "XPQARetrieval (ara-ara)": 1.72, + "XPQARetrieval (eng-ara)": 0.9, + "XPQARetrieval (ara-eng)": 0.72, + "XPQARetrieval (deu-deu)": 8.62, + "XPQARetrieval (eng-deu)": 0.65, + "XPQARetrieval (deu-eng)": 2.13, + "XPQARetrieval (spa-spa)": 5.23, + "XPQARetrieval (eng-spa)": 0.57, + "XPQARetrieval (spa-eng)": 2.27, + "XPQARetrieval (fra-fra)": 7.82, + "XPQARetrieval (eng-fra)": 0.87, + "XPQARetrieval (fra-eng)": 2.62, + "XPQARetrieval (hin-hin)": 2.85, + "XPQARetrieval (eng-hin)": 2.81, + "XPQARetrieval (hin-eng)": 0.85, + "XPQARetrieval (ita-ita)": 15.62, + "XPQARetrieval (eng-ita)": 1.4, + "XPQARetrieval (ita-eng)": 3.26, + "XPQARetrieval (jpn-jpn)": 15.82, + "XPQARetrieval (eng-jpn)": 0.83, + "XPQARetrieval (jpn-eng)": 0.71, + "XPQARetrieval (kor-kor)": 1.9, + "XPQARetrieval (eng-kor)": 1.08, + "XPQARetrieval (kor-eng)": 0.85, + "XPQARetrieval (pol-pol)": 7.68, + "XPQARetrieval (eng-pol)": 0.82, + "XPQARetrieval (pol-eng)": 1.68, + "XPQARetrieval (por-por)": 5.3, + "XPQARetrieval (eng-por)": 0.57, + "XPQARetrieval (por-eng)": 1.63, + "XPQARetrieval (tam-tam)": 1.57, + "XPQARetrieval (eng-tam)": 1.17, + "XPQARetrieval (tam-eng)": 0.8, + "XPQARetrieval (cmn-cmn)": 18.63, + "XPQARetrieval (eng-cmn)": 1.06, + "XPQARetrieval (cmn-eng)": 1.79 } ] }, @@ -1648,15 +7731,129 @@ "cosine_spearman": [ { "Model": "rubert-base-cased", + "BIOSSES": 30.22, + "CDSC-R": 51.41, + "GermanSTSBenchmark": 36.37, + "SICK-R": 44.22, + "SICK-R-PL": 44.3, + "SICKFr": 44.03, + "STS12": 27.75, + "STS13": 39.83, + "STS14": 34.89, + "STS15": 43.05, + "STS16": 45.66, + "STS17 (nl-en)": 18.44, + "STS17 (en-ar)": 1.36, + "STS17 (ar-ar)": 30.14, + "STS17 (en-de)": 10.88, + "STS17 (es-en)": 3.39, + "STS17 (it-en)": 19.23, + "STS17 (en-tr)": -7.83, + "STS17 (fr-en)": 15.78, + "STS17 (en-en)": 56.29, + "STS17 (es-es)": 42.21, + "STS17 (ko-ko)": 13.94, + "STS22 (it)": 30.56, + "STS22 (de)": 24.22, + "STS22 (en)": 38.33, + "STS22 (es)": 36.12, + "STS22 (zh)": 32.97, + "STS22 (ar)": 22.72, + "STS22 (tr)": 12.35, + "STS22 (es-en)": 21.75, + "STS22 (zh-en)": 11.04, + "STS22 (es-it)": 20.11, + "STS22 (fr)": 44.7, + "STS22 (de-en)": 22.58, + "STS22 (de-fr)": 17.12, + "STS22 (pl)": 5.9, + "STS22 (pl-en)": 30.9, + "STS22 (fr-pl)": 39.44, + "STS22 (de-pl)": -25.81, + "STSB": 36.87, + "STSBenchmark": 39.84, + "STSBenchmarkMultilingualSTS (de)": 40.7, + "STSBenchmarkMultilingualSTS (nl)": 33.12, + "STSBenchmarkMultilingualSTS (en)": 39.84, + "STSBenchmarkMultilingualSTS (es)": 38.62, + "STSBenchmarkMultilingualSTS (fr)": 44.46, + "STSBenchmarkMultilingualSTS (pl)": 37.96, + "STSBenchmarkMultilingualSTS (pt)": 36.13, + "STSBenchmarkMultilingualSTS (it)": 35.86, + "STSBenchmarkMultilingualSTS (zh)": 36.96 + }, + { + "Model": "rubert-base-cased", + "BIOSSES": 30.22, + "CDSC-R": 51.41, + "GermanSTSBenchmark": 36.37, "RUParaPhraserSTS": 49.72, "RuSTSBenchmarkSTS": 53.95, + "SICK-R": 44.22, + "SICK-R-PL": 44.3, + "SICKFr": 44.02, + "STS12": 27.75, + "STS13": 39.83, + "STS14": 34.89, + "STS15": 43.05, + "STS16": 45.66, + "STS17 (nl-en)": 18.44, + "STS17 (en-ar)": 1.36, + "STS17 (ar-ar)": 30.13, + "STS17 (en-de)": 10.88, + "STS17 (es-en)": 3.39, + "STS17 (it-en)": 19.23, + "STS17 (en-tr)": -7.83, + "STS17 (fr-en)": 15.78, + "STS17 (en-en)": 56.29, + "STS17 (es-es)": 42.21, + "STS17 (ko-ko)": 14.2, "STS22 (ru)": 34.98, - "STSBenchmarkMultilingualSTS (ru)": 53.76 + "STS22 (it)": 30.56, + "STS22 (de)": 24.23, + "STS22 (en)": 38.33, + "STS22 (es)": 36.12, + "STS22 (zh)": 32.97, + "STS22 (ar)": 22.74, + "STS22 (tr)": 12.35, + "STS22 (es-en)": 21.75, + "STS22 (zh-en)": 11.04, + "STS22 (es-it)": 20.11, + "STS22 (fr)": 44.7, + "STS22 (de-en)": 22.58, + "STS22 (de-fr)": 17.12, + "STS22 (pl)": 5.72, + "STS22 (pl-en)": 30.9, + "STS22 (fr-pl)": 39.44, + "STS22 (de-pl)": -25.81, + "STSB": 36.87, + "STSBenchmark": 39.84, + "STSBenchmarkMultilingualSTS (ru)": 53.76, + "STSBenchmarkMultilingualSTS (de)": 40.7, + "STSBenchmarkMultilingualSTS (nl)": 33.12, + "STSBenchmarkMultilingualSTS (en)": 39.84, + "STSBenchmarkMultilingualSTS (es)": 38.62, + "STSBenchmarkMultilingualSTS (fr)": 44.46, + "STSBenchmarkMultilingualSTS (pl)": 37.96, + "STSBenchmarkMultilingualSTS (pt)": 36.13, + "STSBenchmarkMultilingualSTS (it)": 35.86, + "STSBenchmarkMultilingualSTS (zh)": 36.96 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "rubert-base-cased", + "SummEval": 29.56, + "SummEvalFr": 28.42 + }, + { + "Model": "rubert-base-cased", + "SummEval": 29.56, + "SummEvalFr": 28.42 + } + ] }, "MultilabelClassification": { "accuracy": [ @@ -1668,7 +7865,14 @@ ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "rubert-base-cased", + "Core17InstructionRetrieval": -3.56, + "News21InstructionRetrieval": 4.57, + "Robust04InstructionRetrieval": 1.4 + } + ] } }, "DeepPavlov__rubert-base-cased-sentence": { @@ -1676,6 +7880,7 @@ "f1": [ { "Model": "rubert-base-cased-sentence", + "BornholmBitextMining": 15.6, "Tatoeba (rus-eng)": 20.26 } ] @@ -1684,15 +7889,169 @@ "accuracy": [ { "Model": "rubert-base-cased-sentence", + "AllegroReviews": 23.97, + "AmazonCounterfactualClassification (en-ext)": 62.27, + "AmazonCounterfactualClassification (en)": 59.58, + "AmazonCounterfactualClassification (de)": 55.93, + "AmazonCounterfactualClassification (ja)": 53.95, + "AmazonPolarityClassification": 60.81, + "AmazonReviewsClassification (en)": 28.25, + "AmazonReviewsClassification (de)": 27.28, + "AmazonReviewsClassification (es)": 28.62, + "AmazonReviewsClassification (fr)": 28.85, + "AmazonReviewsClassification (ja)": 23.6, + "AmazonReviewsClassification (zh)": 26.82, + "AngryTweetsClassification": 40.25, + "Banking77Classification": 48.98, + "CBD": 44.27, + "DanishPoliticalCommentsClassification": 27.05, + "EmotionClassification": 19.64, "GeoreviewClassification": 38.05, "HeadlineClassification": 67.64, + "ImdbClassification": 57.73, "InappropriatenessClassification": 58.27, "KinopoiskClassification": 45.86, + "LccSentimentClassification": 37.0, + "MTOPDomainClassification (en)": 59.98, + "MTOPDomainClassification (de)": 49.12, + "MTOPDomainClassification (es)": 51.0, + "MTOPDomainClassification (fr)": 49.23, + "MTOPDomainClassification (hi)": 21.92, + "MTOPDomainClassification (th)": 16.94, + "MTOPIntentClassification (en)": 39.37, + "MTOPIntentClassification (de)": 36.57, + "MTOPIntentClassification (es)": 34.91, + "MTOPIntentClassification (fr)": 32.83, + "MTOPIntentClassification (hi)": 7.64, + "MTOPIntentClassification (th)": 4.67, + "MasakhaNEWSClassification (amh)": 33.46, + "MasakhaNEWSClassification (eng)": 65.83, + "MasakhaNEWSClassification (fra)": 62.13, + "MasakhaNEWSClassification (hau)": 48.6, + "MasakhaNEWSClassification (ibo)": 41.97, + "MasakhaNEWSClassification (lin)": 57.77, + "MasakhaNEWSClassification (lug)": 43.72, + "MasakhaNEWSClassification (orm)": 37.51, + "MasakhaNEWSClassification (pcm)": 82.13, + "MasakhaNEWSClassification (run)": 45.75, + "MasakhaNEWSClassification (sna)": 60.89, + "MasakhaNEWSClassification (som)": 33.84, + "MasakhaNEWSClassification (swa)": 42.46, + "MasakhaNEWSClassification (tir)": 26.88, + "MasakhaNEWSClassification (xho)": 52.29, + "MasakhaNEWSClassification (yor)": 48.93, "MassiveIntentClassification (ru)": 49.1, + "MassiveIntentClassification (af)": 30.31, + "MassiveIntentClassification (bn)": 3.39, + "MassiveIntentClassification (el)": 31.9, + "MassiveIntentClassification (cy)": 30.38, + "MassiveIntentClassification (da)": 33.35, + "MassiveIntentClassification (ro)": 31.19, + "MassiveIntentClassification (mn)": 34.67, + "MassiveIntentClassification (hi)": 6.01, + "MassiveIntentClassification (hu)": 30.56, + "MassiveIntentClassification (id)": 34.3, + "MassiveIntentClassification (km)": 4.34, + "MassiveIntentClassification (am)": 3.09, + "MassiveIntentClassification (sq)": 34.82, + "MassiveIntentClassification (fi)": 31.83, + "MassiveIntentClassification (th)": 5.14, + "MassiveIntentClassification (vi)": 26.94, + "MassiveIntentClassification (pl)": 35.34, + "MassiveIntentClassification (te)": 3.21, + "MassiveIntentClassification (pt)": 34.8, + "MassiveIntentClassification (sl)": 33.18, + "MassiveIntentClassification (hy)": 7.77, + "MassiveIntentClassification (fr)": 33.06, + "MassiveIntentClassification (he)": 22.67, + "MassiveIntentClassification (ar)": 23.08, + "MassiveIntentClassification (ta)": 3.34, + "MassiveIntentClassification (zh-CN)": 48.85, + "MassiveIntentClassification (ko)": 9.85, + "MassiveIntentClassification (is)": 29.98, + "MassiveIntentClassification (ms)": 31.07, + "MassiveIntentClassification (ml)": 2.89, + "MassiveIntentClassification (nb)": 31.28, + "MassiveIntentClassification (tl)": 31.0, + "MassiveIntentClassification (de)": 32.2, + "MassiveIntentClassification (ja)": 32.27, + "MassiveIntentClassification (fa)": 28.38, + "MassiveIntentClassification (en)": 37.12, + "MassiveIntentClassification (zh-TW)": 43.83, + "MassiveIntentClassification (nl)": 31.93, + "MassiveIntentClassification (tr)": 31.73, + "MassiveIntentClassification (es)": 31.66, + "MassiveIntentClassification (az)": 35.5, + "MassiveIntentClassification (ur)": 21.08, + "MassiveIntentClassification (my)": 4.09, + "MassiveIntentClassification (lv)": 34.72, + "MassiveIntentClassification (ka)": 9.45, + "MassiveIntentClassification (it)": 35.87, + "MassiveIntentClassification (jv)": 28.62, + "MassiveIntentClassification (sw)": 29.21, + "MassiveIntentClassification (sv)": 33.01, + "MassiveIntentClassification (kn)": 3.79, "MassiveScenarioClassification (ru)": 51.91, + "MassiveScenarioClassification (zh-CN)": 51.12, + "MassiveScenarioClassification (id)": 32.58, + "MassiveScenarioClassification (de)": 32.16, + "MassiveScenarioClassification (tl)": 31.56, + "MassiveScenarioClassification (ja)": 33.43, + "MassiveScenarioClassification (hy)": 13.76, + "MassiveScenarioClassification (th)": 9.85, + "MassiveScenarioClassification (my)": 10.63, + "MassiveScenarioClassification (fi)": 31.93, + "MassiveScenarioClassification (mn)": 33.53, + "MassiveScenarioClassification (fa)": 28.15, + "MassiveScenarioClassification (nl)": 33.1, + "MassiveScenarioClassification (ro)": 34.53, + "MassiveScenarioClassification (el)": 33.91, + "MassiveScenarioClassification (az)": 34.63, + "MassiveScenarioClassification (zh-TW)": 45.25, + "MassiveScenarioClassification (ko)": 13.87, + "MassiveScenarioClassification (am)": 7.97, + "MassiveScenarioClassification (ml)": 7.05, + "MassiveScenarioClassification (bn)": 7.62, + "MassiveScenarioClassification (it)": 35.72, + "MassiveScenarioClassification (hu)": 31.62, + "MassiveScenarioClassification (da)": 33.28, + "MassiveScenarioClassification (is)": 31.24, + "MassiveScenarioClassification (sl)": 34.1, + "MassiveScenarioClassification (sv)": 32.09, + "MassiveScenarioClassification (ms)": 32.15, + "MassiveScenarioClassification (vi)": 30.31, + "MassiveScenarioClassification (ar)": 23.6, + "MassiveScenarioClassification (fr)": 35.38, + "MassiveScenarioClassification (km)": 9.61, + "MassiveScenarioClassification (pl)": 35.88, + "MassiveScenarioClassification (ur)": 25.08, + "MassiveScenarioClassification (sq)": 35.52, + "MassiveScenarioClassification (pt)": 35.7, + "MassiveScenarioClassification (kn)": 7.82, + "MassiveScenarioClassification (lv)": 33.23, + "MassiveScenarioClassification (hi)": 9.56, + "MassiveScenarioClassification (he)": 23.09, + "MassiveScenarioClassification (te)": 7.38, + "MassiveScenarioClassification (af)": 30.26, + "MassiveScenarioClassification (cy)": 30.19, + "MassiveScenarioClassification (ka)": 16.23, + "MassiveScenarioClassification (sw)": 31.42, + "MassiveScenarioClassification (tr)": 32.19, + "MassiveScenarioClassification (es)": 32.66, + "MassiveScenarioClassification (nb)": 29.6, + "MassiveScenarioClassification (en)": 40.34, + "MassiveScenarioClassification (ta)": 6.79, + "MassiveScenarioClassification (jv)": 29.67, + "NoRecClassification": 39.12, + "NordicLangClassification": 51.39, + "PAC": 64.36, + "PolEmo2.0-IN": 43.75, + "PolEmo2.0-OUT": 34.64, "RuReviewsClassification": 58.34, "RuSciBenchGRNTIClassification": 52.18, - "RuSciBenchOECDClassification": 40.11 + "RuSciBenchOECDClassification": 40.11, + "ToxicConversationsClassification": 53.28, + "TweetSentimentExtractionClassification": 43.27 } ] }, @@ -1700,11 +8059,67 @@ "v_measure": [ { "Model": "rubert-base-cased-sentence", + "AlloProfClusteringP2P": 39.0, + "AlloProfClusteringS2S": 23.5, + "ArxivClusteringP2P": 19.4, + "ArxivClusteringS2S": 15.28, + "BiorxivClusteringP2P": 14.91, + "BiorxivClusteringS2S": 10.62, + "BlurbsClusteringP2P": 16.55, + "BlurbsClusteringS2S": 8.99, "GeoreviewClusteringP2P": 41.82, + "HALClusteringS2S": 10.36, "MLSUMClusteringP2P (ru)": 43.71, + "MLSUMClusteringP2P (de)": 27.24, + "MLSUMClusteringP2P (fr)": 29.91, + "MLSUMClusteringP2P (es)": 30.69, "MLSUMClusteringS2S (ru)": 45.94, + "MLSUMClusteringS2S (de)": 27.47, + "MLSUMClusteringS2S (fr)": 28.79, + "MLSUMClusteringS2S (es)": 30.33, + "MasakhaNEWSClusteringP2P (amh)": 40.3, + "MasakhaNEWSClusteringP2P (eng)": 37.13, + "MasakhaNEWSClusteringP2P (fra)": 42.57, + "MasakhaNEWSClusteringP2P (hau)": 13.22, + "MasakhaNEWSClusteringP2P (ibo)": 28.87, + "MasakhaNEWSClusteringP2P (lin)": 49.89, + "MasakhaNEWSClusteringP2P (lug)": 42.71, + "MasakhaNEWSClusteringP2P (orm)": 23.69, + "MasakhaNEWSClusteringP2P (pcm)": 62.14, + "MasakhaNEWSClusteringP2P (run)": 48.63, + "MasakhaNEWSClusteringP2P (sna)": 49.34, + "MasakhaNEWSClusteringP2P (som)": 25.51, + "MasakhaNEWSClusteringP2P (swa)": 16.72, + "MasakhaNEWSClusteringP2P (tir)": 43.32, + "MasakhaNEWSClusteringP2P (xho)": 29.13, + "MasakhaNEWSClusteringP2P (yor)": 26.91, + "MasakhaNEWSClusteringS2S (amh)": 43.23, + "MasakhaNEWSClusteringS2S (eng)": 17.18, + "MasakhaNEWSClusteringS2S (fra)": 21.71, + "MasakhaNEWSClusteringS2S (hau)": 8.85, + "MasakhaNEWSClusteringS2S (ibo)": 29.68, + "MasakhaNEWSClusteringS2S (lin)": 54.18, + "MasakhaNEWSClusteringS2S (lug)": 42.77, + "MasakhaNEWSClusteringS2S (orm)": 21.24, + "MasakhaNEWSClusteringS2S (pcm)": 33.99, + "MasakhaNEWSClusteringS2S (run)": 44.17, + "MasakhaNEWSClusteringS2S (sna)": 41.15, + "MasakhaNEWSClusteringS2S (som)": 24.67, + "MasakhaNEWSClusteringS2S (swa)": 6.4, + "MasakhaNEWSClusteringS2S (tir)": 43.32, + "MasakhaNEWSClusteringS2S (xho)": 29.98, + "MasakhaNEWSClusteringS2S (yor)": 26.69, + "MedrxivClusteringP2P": 17.86, + "MedrxivClusteringS2S": 16.45, + "RedditClustering": 14.72, + "RedditClusteringP2P": 23.35, "RuSciBenchGRNTIClusteringP2P": 46.29, - "RuSciBenchOECDClusteringP2P": 41.28 + "RuSciBenchOECDClusteringP2P": 41.28, + "StackExchangeClustering": 16.48, + "StackExchangeClusteringP2P": 26.54, + "TenKGnadClusteringP2P": 23.3, + "TenKGnadClusteringS2S": 8.08, + "TwentyNewsgroupsClustering": 15.85 } ] }, @@ -1712,13 +8127,51 @@ "max_ap": [ { "Model": "rubert-base-cased-sentence", + "CDSC-E": 61.23, + "FalseFriendsGermanEnglish": 48.16, "OpusparcusPC (ru)": 81.52, - "TERRa": 59.12 + "OpusparcusPC (de)": 88.51, + "OpusparcusPC (en)": 93.0, + "OpusparcusPC (fi)": 81.98, + "OpusparcusPC (fr)": 82.89, + "OpusparcusPC (sv)": 80.84, + "PSC": 59.64, + "PawsXPairClassification (de)": 51.99, + "PawsXPairClassification (en)": 53.54, + "PawsXPairClassification (es)": 51.51, + "PawsXPairClassification (fr)": 54.04, + "PawsXPairClassification (ja)": 46.79, + "PawsXPairClassification (ko)": 47.86, + "PawsXPairClassification (zh)": 54.4, + "SICK-E-PL": 60.78, + "SprintDuplicateQuestions": 47.57, + "TERRa": 59.12, + "TwitterSemEval2015": 44.14, + "TwitterURLCorpus": 70.15 }, { "Model": "rubert-base-cased-sentence", + "CDSC-E": 61.23, + "FalseFriendsGermanEnglish": 48.28, "OpusparcusPC (ru)": 81.76, - "TERRa": 59.12 + "OpusparcusPC (de)": 88.78, + "OpusparcusPC (en)": 93.0, + "OpusparcusPC (fi)": 82.1, + "OpusparcusPC (fr)": 83.27, + "OpusparcusPC (sv)": 80.84, + "PSC": 59.64, + "PawsXPairClassification (de)": 52.04, + "PawsXPairClassification (en)": 53.68, + "PawsXPairClassification (es)": 51.53, + "PawsXPairClassification (fr)": 54.1, + "PawsXPairClassification (ja)": 46.95, + "PawsXPairClassification (ko)": 47.86, + "PawsXPairClassification (zh)": 54.44, + "SICK-E-PL": 60.78, + "SprintDuplicateQuestions": 48.25, + "TERRa": 59.12, + "TwitterSemEval2015": 44.64, + "TwitterURLCorpus": 70.15 } ] }, @@ -1726,11 +8179,35 @@ "map": [ { "Model": "rubert-base-cased-sentence", - "MIRACLReranking (ru)": 13.77 + "AlloprofReranking": 33.58, + "AskUbuntuDupQuestions": 45.69, + "MindSmallReranking": 26.15, + "RuBQReranking": 39.89, + "SciDocsRR": 48.28, + "StackOverflowDupQuestions": 29.81, + "SyntecReranking": 36.0, + "T2Reranking": 53.74 }, { "Model": "rubert-base-cased-sentence", - "RuBQReranking": 39.89 + "MIRACLReranking (ru)": 13.77, + "MIRACLReranking (ar)": 3.16, + "MIRACLReranking (bn)": 3.09, + "MIRACLReranking (de)": 8.84, + "MIRACLReranking (en)": 8.53, + "MIRACLReranking (es)": 8.02, + "MIRACLReranking (fa)": 4.56, + "MIRACLReranking (fi)": 10.54, + "MIRACLReranking (fr)": 6.26, + "MIRACLReranking (hi)": 4.55, + "MIRACLReranking (id)": 7.57, + "MIRACLReranking (ja)": 4.76, + "MIRACLReranking (ko)": 3.8, + "MIRACLReranking (sw)": 8.93, + "MIRACLReranking (te)": 3.58, + "MIRACLReranking (th)": 2.07, + "MIRACLReranking (yo)": 12.73, + "MIRACLReranking (zh)": 3.48 } ] }, @@ -1738,9 +8215,143 @@ "ndcg_at_10": [ { "Model": "rubert-base-cased-sentence", + "AILACasedocs": 9.0, + "AILAStatutes": 14.13, + "ARCChallenge": 0.94, + "AlloprofRetrieval": 1.09, + "AlphaNLI": 1.74, + "AppsRetrieval": 0.29, + "ArguAna": 9.39, + "BSARDRetrieval": 0.0, + "ClimateFEVER": 0.22, + "CmedqaRetrieval": 1.51, + "CodeFeedbackMT": 5.54, + "CodeFeedbackST": 5.55, + "CodeSearchNetCCRetrieval (python)": 4.46, + "CodeSearchNetCCRetrieval (javascript)": 7.57, + "CodeSearchNetCCRetrieval (go)": 3.93, + "CodeSearchNetCCRetrieval (ruby)": 10.55, + "CodeSearchNetCCRetrieval (java)": 5.2, + "CodeSearchNetCCRetrieval (php)": 3.92, + "CodeSearchNetRetrieval (python)": 11.06, + "CodeSearchNetRetrieval (javascript)": 6.01, + "CodeSearchNetRetrieval (go)": 7.78, + "CodeSearchNetRetrieval (ruby)": 10.14, + "CodeSearchNetRetrieval (java)": 6.09, + "CodeSearchNetRetrieval (php)": 4.98, + "CodeTransOceanContest": 9.55, + "CodeTransOceanDL": 29.35, + "CosQA": 0.88, + "CovidRetrieval": 0.07, + "DBPedia": 1.46, + "FEVER": 0.64, + "FiQA2018": 0.19, + "GerDaLIR": 0.14, + "GerDaLIRSmall": 0.6, + "GermanQuAD-Retrieval": 16.71, + "HellaSwag": 2.08, + "HotpotQA": 2.39, + "LEMBNarrativeQARetrieval": 2.64, + "LEMBQMSumRetrieval": 6.92, + "LEMBSummScreenFDRetrieval": 13.38, + "LEMBWikimQARetrieval": 11.01, + "LeCaRDv2": 22.36, + "LegalBenchConsumerContractsQA": 18.02, + "LegalBenchCorporateLobbying": 35.35, + "LegalQuAD": 2.94, + "LegalSummarization": 18.23, "MIRACLRetrieval (ru)": 1.92, + "MIRACLRetrieval (ar)": 0.0, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 0.36, + "MIRACLRetrieval (en)": 0.04, + "MIRACLRetrieval (es)": 0.15, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 0.73, + "MIRACLRetrieval (fr)": 0.0, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 0.27, + "MIRACLRetrieval (ja)": 0.06, + "MIRACLRetrieval (ko)": 0.52, + "MIRACLRetrieval (sw)": 1.01, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.08, + "MIRACLRetrieval (yo)": 2.03, + "MIRACLRetrieval (zh)": 0.07, + "MSMARCO": 0.2, + "MintakaRetrieval (ar)": 1.02, + "MintakaRetrieval (de)": 4.68, + "MintakaRetrieval (es)": 2.87, + "MintakaRetrieval (fr)": 4.43, + "MintakaRetrieval (hi)": 0.37, + "MintakaRetrieval (it)": 3.39, + "MintakaRetrieval (ja)": 2.12, + "MintakaRetrieval (pt)": 4.86, + "NFCorpus": 1.62, + "NQ": 0.04, + "PIQA": 1.94, + "Quail": 0.14, + "QuoraRetrieval": 48.23, + "RARbCode": 0.36, + "RARbMath": 5.57, "RiaNewsRetrieval": 6.72, - "RuBQRetrieval": 12.63 + "RuBQRetrieval": 12.63, + "SCIDOCS": 1.18, + "SIQA": 0.25, + "SciFact": 3.4, + "SciFact-PL": 1.67, + "SpartQA": 1.73, + "StackOverflowQA": 6.97, + "SyntecRetrieval": 12.78, + "SyntheticText2SQL": 9.32, + "TRECCOVID": 7.8, + "TRECCOVID-PL": 3.96, + "TempReasonL1": 0.16, + "TempReasonL2Fact": 1.73, + "TempReasonL2Pure": 0.12, + "TempReasonL3Fact": 1.79, + "TempReasonL3Pure": 3.7, + "Touche2020": 0.0, + "WinoGrande": 5.27, + "XMarket (de)": 1.2, + "XMarket (en)": 1.36, + "XMarket (es)": 1.05, + "XPQARetrieval (ara-ara)": 4.3, + "XPQARetrieval (eng-ara)": 0.87, + "XPQARetrieval (ara-eng)": 1.71, + "XPQARetrieval (deu-deu)": 12.4, + "XPQARetrieval (eng-deu)": 3.2, + "XPQARetrieval (deu-eng)": 5.85, + "XPQARetrieval (spa-spa)": 8.73, + "XPQARetrieval (eng-spa)": 2.72, + "XPQARetrieval (spa-eng)": 6.49, + "XPQARetrieval (fra-fra)": 9.02, + "XPQARetrieval (eng-fra)": 2.91, + "XPQARetrieval (fra-eng)": 4.29, + "XPQARetrieval (hin-hin)": 5.13, + "XPQARetrieval (eng-hin)": 2.55, + "XPQARetrieval (hin-eng)": 1.72, + "XPQARetrieval (ita-ita)": 17.6, + "XPQARetrieval (eng-ita)": 3.22, + "XPQARetrieval (ita-eng)": 6.54, + "XPQARetrieval (jpn-jpn)": 15.14, + "XPQARetrieval (eng-jpn)": 1.29, + "XPQARetrieval (jpn-eng)": 2.03, + "XPQARetrieval (kor-kor)": 1.99, + "XPQARetrieval (eng-kor)": 1.22, + "XPQARetrieval (kor-eng)": 2.0, + "XPQARetrieval (pol-pol)": 6.8, + "XPQARetrieval (eng-pol)": 2.06, + "XPQARetrieval (pol-eng)": 5.41, + "XPQARetrieval (por-por)": 9.19, + "XPQARetrieval (eng-por)": 1.86, + "XPQARetrieval (por-eng)": 5.75, + "XPQARetrieval (tam-tam)": 2.66, + "XPQARetrieval (eng-tam)": 2.1, + "XPQARetrieval (tam-eng)": 1.33, + "XPQARetrieval (cmn-cmn)": 15.91, + "XPQARetrieval (eng-cmn)": 1.06, + "XPQARetrieval (cmn-eng)": 4.43 } ] }, @@ -1748,15 +8359,129 @@ "cosine_spearman": [ { "Model": "rubert-base-cased-sentence", + "BIOSSES": 49.0, + "CDSC-R": 54.78, + "GermanSTSBenchmark": 50.75, + "SICK-R": 56.98, + "SICK-R-PL": 46.8, + "SICKFr": 57.97, + "STS12": 59.02, + "STS13": 46.99, + "STS14": 50.88, + "STS15": 64.58, + "STS16": 53.98, + "STS17 (en-ar)": 0.76, + "STS17 (en-de)": 24.32, + "STS17 (ko-ko)": 15.41, + "STS17 (en-en)": 67.13, + "STS17 (fr-en)": 34.08, + "STS17 (it-en)": 29.34, + "STS17 (nl-en)": 37.16, + "STS17 (en-tr)": 10.7, + "STS17 (es-en)": 30.01, + "STS17 (es-es)": 72.47, + "STS17 (ar-ar)": 45.68, + "STS22 (en)": 52.22, + "STS22 (de-en)": 35.87, + "STS22 (ar)": 24.57, + "STS22 (tr)": 14.64, + "STS22 (fr-pl)": 5.63, + "STS22 (es-en)": 44.2, + "STS22 (de-fr)": 30.19, + "STS22 (de)": 25.32, + "STS22 (es)": 39.82, + "STS22 (es-it)": 27.88, + "STS22 (zh)": 40.52, + "STS22 (fr)": 58.5, + "STS22 (zh-en)": 30.35, + "STS22 (pl-en)": 23.13, + "STS22 (pl)": 13.46, + "STS22 (de-pl)": 34.06, + "STS22 (it)": 46.45, + "STSB": 62.38, + "STSBenchmark": 55.79, + "STSBenchmarkMultilingualSTS (pl)": 47.56, + "STSBenchmarkMultilingualSTS (zh)": 61.68, + "STSBenchmarkMultilingualSTS (en)": 55.79, + "STSBenchmarkMultilingualSTS (it)": 52.36, + "STSBenchmarkMultilingualSTS (nl)": 49.51, + "STSBenchmarkMultilingualSTS (de)": 52.98, + "STSBenchmarkMultilingualSTS (es)": 56.64, + "STSBenchmarkMultilingualSTS (fr)": 55.41, + "STSBenchmarkMultilingualSTS (pt)": 51.92 + }, + { + "Model": "rubert-base-cased-sentence", + "BIOSSES": 49.0, + "CDSC-R": 54.78, + "GermanSTSBenchmark": 50.75, "RUParaPhraserSTS": 66.24, "RuSTSBenchmarkSTS": 66.03, + "SICK-R": 56.98, + "SICK-R-PL": 46.8, + "SICKFr": 57.97, + "STS12": 59.02, + "STS13": 46.99, + "STS14": 50.88, + "STS15": 64.58, + "STS16": 53.98, + "STS17 (en-ar)": 0.76, + "STS17 (en-de)": 24.32, + "STS17 (ko-ko)": 15.65, + "STS17 (en-en)": 67.13, + "STS17 (fr-en)": 34.08, + "STS17 (it-en)": 29.34, + "STS17 (nl-en)": 37.16, + "STS17 (en-tr)": 10.7, + "STS17 (es-en)": 30.01, + "STS17 (es-es)": 72.47, + "STS17 (ar-ar)": 45.67, "STS22 (ru)": 51.27, - "STSBenchmarkMultilingualSTS (ru)": 66.71 + "STS22 (en)": 52.22, + "STS22 (de-en)": 35.87, + "STS22 (ar)": 24.55, + "STS22 (tr)": 14.64, + "STS22 (fr-pl)": 5.63, + "STS22 (es-en)": 44.2, + "STS22 (de-fr)": 30.19, + "STS22 (de)": 25.32, + "STS22 (es)": 39.82, + "STS22 (es-it)": 27.88, + "STS22 (zh)": 40.52, + "STS22 (fr)": 58.5, + "STS22 (zh-en)": 30.35, + "STS22 (pl-en)": 23.13, + "STS22 (pl)": 13.37, + "STS22 (de-pl)": 34.06, + "STS22 (it)": 46.45, + "STSB": 62.38, + "STSBenchmark": 55.79, + "STSBenchmarkMultilingualSTS (ru)": 66.71, + "STSBenchmarkMultilingualSTS (pl)": 47.56, + "STSBenchmarkMultilingualSTS (zh)": 61.68, + "STSBenchmarkMultilingualSTS (en)": 55.79, + "STSBenchmarkMultilingualSTS (it)": 52.36, + "STSBenchmarkMultilingualSTS (nl)": 49.51, + "STSBenchmarkMultilingualSTS (de)": 52.98, + "STSBenchmarkMultilingualSTS (es)": 56.64, + "STSBenchmarkMultilingualSTS (fr)": 55.41, + "STSBenchmarkMultilingualSTS (pt)": 51.92 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "rubert-base-cased-sentence", + "SummEval": 30.57, + "SummEvalFr": 29.2 + }, + { + "Model": "rubert-base-cased-sentence", + "SummEval": 30.57, + "SummEvalFr": 29.2 + } + ] }, "MultilabelClassification": { "accuracy": [ @@ -1768,7 +8493,14 @@ ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "rubert-base-cased-sentence", + "Core17InstructionRetrieval": 1.6, + "News21InstructionRetrieval": 3.22, + "Robust04InstructionRetrieval": -4.02 + } + ] } }, "FacebookAI__xlm-roberta-base": { @@ -2392,13 +9124,13 @@ "CBD": 77.23, "DanishPoliticalCommentsClassification": 41.28, "EmotionClassification": 56.01, - "GeoreviewClassification": 53.47, - "HeadlineClassification": 85.66, + "GeoreviewClassification": 57.7, + "HeadlineClassification": 86.08, "IFlyTek": 52.7, "ImdbClassification": 95.07, - "InappropriatenessClassification": 65.29, + "InappropriatenessClassification": 67.85, "JDReview": 87.65, - "KinopoiskClassification": 64.25, + "KinopoiskClassification": 69.15, "LccSentimentClassification": 70.13, "MTOPDomainClassification (en)": 95.37, "MTOPDomainClassification (de)": 93.51, @@ -2537,9 +9269,9 @@ "PAC": 67.05, "PolEmo2.0-IN": 83.5, "PolEmo2.0-OUT": 62.55, - "RuReviewsClassification": 68.58, - "RuSciBenchGRNTIClassification": 64.56, - "RuSciBenchOECDClassification": 51.2, + "RuReviewsClassification": 72.05, + "RuSciBenchGRNTIClassification": 67.98, + "RuSciBenchOECDClassification": 54.45, "TNews": 51.99, "ToxicConversationsClassification": 68.81, "TweetSentimentExtractionClassification": 66.26, @@ -2551,7 +9283,7 @@ "v_measure": [ { "Model": "GritLM-7B", - "GeoreviewClusteringP2P": 74.06, + "GeoreviewClusteringP2P": 77.46, "MasakhaNEWSClusteringP2P (amh)": 45.1, "MasakhaNEWSClusteringP2P (eng)": 70.5, "MasakhaNEWSClusteringP2P (fra)": 73.54, @@ -2584,8 +9316,8 @@ "MasakhaNEWSClusteringS2S (tir)": 45.32, "MasakhaNEWSClusteringS2S (xho)": 28.94, "MasakhaNEWSClusteringS2S (yor)": 63.26, - "RuSciBenchGRNTIClusteringP2P": 60.01, - "RuSciBenchOECDClusteringP2P": 51.66 + "RuSciBenchGRNTIClusteringP2P": 61.57, + "RuSciBenchOECDClusteringP2P": 53.88 } ] }, @@ -2594,6 +9326,7 @@ { "Model": "GritLM-7B", "CDSC-E": 75.61, + "FalseFriendsGermanEnglish": 54.17, "OpusparcusPC (de)": 97.43, "OpusparcusPC (en)": 99.14, "OpusparcusPC (fi)": 92.05, @@ -2617,6 +9350,7 @@ { "Model": "GritLM-7B", "CDSC-E": 75.61, + "FalseFriendsGermanEnglish": 54.17, "OpusparcusPC (de)": 97.43, "OpusparcusPC (en)": 99.14, "OpusparcusPC (fi)": 92.05, @@ -2647,11 +9381,32 @@ "AskUbuntuDupQuestions": 67.37, "MMarcoReranking": 21.7, "MindSmallReranking": 31.81, - "RuBQReranking": 72.43, + "RuBQReranking": 75.75, "SciDocsRR": 86.82, "StackOverflowDupQuestions": 55.94, "SyntecReranking": 92.62, "T2Reranking": 65.64 + }, + { + "Model": "GritLM-7B", + "MIRACLReranking (ru)": 64.23, + "MIRACLReranking (ar)": 74.22, + "MIRACLReranking (bn)": 70.07, + "MIRACLReranking (de)": 54.02, + "MIRACLReranking (en)": 63.11, + "MIRACLReranking (es)": 61.47, + "MIRACLReranking (fa)": 53.69, + "MIRACLReranking (fi)": 77.58, + "MIRACLReranking (fr)": 53.51, + "MIRACLReranking (hi)": 60.69, + "MIRACLReranking (id)": 57.2, + "MIRACLReranking (ja)": 66.14, + "MIRACLReranking (ko)": 55.58, + "MIRACLReranking (sw)": 65.0, + "MIRACLReranking (te)": 70.57, + "MIRACLReranking (th)": 70.61, + "MIRACLReranking (yo)": 66.19, + "MIRACLReranking (zh)": 50.02 } ] }, @@ -2703,7 +9458,10 @@ "EcomRetrieval": 54.33, "FiQA-PL": 37.98, "FiQA2018": 59.91, + "GerDaLIR": 9.32, "GerDaLIRSmall": 20.61, + "GermanDPR": 83.09, + "GermanQuAD-Retrieval": 95.32, "HellaSwag": 39.45, "LEMBNarrativeQARetrieval": 41.45, "LEMBQMSumRetrieval": 30.36, @@ -2714,6 +9472,7 @@ "LegalBenchCorporateLobbying": 95.0, "LegalQuAD": 44.18, "LegalSummarization": 70.64, + "MIRACLRetrieval (ru)": 68.3, "MMarcoRetrieval": 76.54, "MedicalRetrieval": 55.81, "MintakaRetrieval (ar)": 25.88, @@ -2730,7 +9489,8 @@ "Quail": 11.67, "RARbCode": 84.02, "RARbMath": 82.35, - "RuBQRetrieval": 70.94, + "RiaNewsRetrieval": 82.76, + "RuBQRetrieval": 76.31, "SCIDOCS": 24.41, "SCIDOCS-PL": 18.34, "SIQA": 7.23, @@ -2751,6 +9511,9 @@ "Touche2020": 27.78, "VideoRetrieval": 53.85, "WinoGrande": 53.7, + "XMarket (de)": 25.01, + "XMarket (en)": 26.66, + "XMarket (es)": 26.2, "XPQARetrieval (ara-ara)": 45.21, "XPQARetrieval (eng-ara)": 27.34, "XPQARetrieval (ara-eng)": 39.43, @@ -2812,6 +9575,7 @@ "BIOSSES": 86.32, "BQ": 49.18, "CDSC-R": 93.38, + "GermanSTSBenchmark": 81.23, "LCQMC": 75.52, "PAWSX": 16.4, "RUParaPhraserSTS": 74.37, @@ -2835,6 +9599,8 @@ "STS17 (en-en)": 90.14, "STS17 (es-en)": 87.47, "STS17 (es-es)": 87.12, + "STS22 (ru)": 68.45, + "STS22 (en)": 68.63, "STSB": 78.12, "STSBenchmark": 85.64, "STSBenchmarkMultilingualSTS (en)": 85.65, @@ -2855,6 +9621,7 @@ "BIOSSES": 86.32, "BQ": 49.18, "CDSC-R": 93.38, + "GermanSTSBenchmark": 81.23, "LCQMC": 75.52, "PAWSX": 16.4, "RUParaPhraserSTS": 74.37, @@ -2878,6 +9645,7 @@ "STS17 (en-en)": 90.14, "STS17 (es-en)": 87.47, "STS17 (es-es)": 87.12, + "STS22 (ru)": 68.45, "STSB": 78.12, "STSBenchmark": 85.64, "STSBenchmarkMultilingualSTS (en)": 85.65, @@ -2890,6 +9658,10 @@ "STSBenchmarkMultilingualSTS (de)": 82.08, "STSBenchmarkMultilingualSTS (it)": 81.69, "STSBenchmarkMultilingualSTS (zh)": 79.73 + }, + { + "Model": "GritLM-7B", + "STS22 (en)": 68.63 } ] }, @@ -2909,8 +9681,8 @@ "accuracy": [ { "Model": "GritLM-7B", - "CEDRClassification": 42.68, - "SensitiveTopicsClassification": 28.52 + "CEDRClassification": 50.67, + "SensitiveTopicsClassification": 33.24 } ] }, @@ -4234,24 +11006,435 @@ }, "Salesforce__SFR-Embedding-Mistral": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "SFR-Embedding-Mistral", + "BornholmBitextMining": 50.24, + "Tatoeba (yid-eng)": 32.58, + "Tatoeba (heb-eng)": 82.59, + "Tatoeba (cat-eng)": 92.62, + "Tatoeba (ita-eng)": 92.07, + "Tatoeba (isl-eng)": 87.17, + "Tatoeba (awa-eng)": 67.75, + "Tatoeba (jav-eng)": 38.43, + "Tatoeba (lfn-eng)": 74.85, + "Tatoeba (spa-eng)": 98.87, + "Tatoeba (ast-eng)": 81.86, + "Tatoeba (pam-eng)": 15.44, + "Tatoeba (epo-eng)": 87.38, + "Tatoeba (mon-eng)": 38.56, + "Tatoeba (slv-eng)": 87.19, + "Tatoeba (ina-eng)": 96.25, + "Tatoeba (por-eng)": 94.44, + "Tatoeba (dsb-eng)": 66.31, + "Tatoeba (ceb-eng)": 43.93, + "Tatoeba (mhr-eng)": 16.21, + "Tatoeba (cor-eng)": 9.26, + "Tatoeba (hun-eng)": 90.48, + "Tatoeba (amh-eng)": 21.64, + "Tatoeba (ile-eng)": 84.97, + "Tatoeba (orv-eng)": 60.95, + "Tatoeba (fin-eng)": 90.56, + "Tatoeba (eus-eng)": 40.81, + "Tatoeba (hye-eng)": 63.0, + "Tatoeba (jpn-eng)": 95.32, + "Tatoeba (zsm-eng)": 94.56, + "Tatoeba (fry-eng)": 71.85, + "Tatoeba (hin-eng)": 95.25, + "Tatoeba (tat-eng)": 37.32, + "Tatoeba (ido-eng)": 77.09, + "Tatoeba (csb-eng)": 64.89, + "Tatoeba (ara-eng)": 89.16, + "Tatoeba (ces-eng)": 95.12, + "Tatoeba (urd-eng)": 83.88, + "Tatoeba (vie-eng)": 94.85, + "Tatoeba (ron-eng)": 93.29, + "Tatoeba (kzj-eng)": 12.17, + "Tatoeba (pol-eng)": 96.87, + "Tatoeba (deu-eng)": 99.47, + "Tatoeba (gla-eng)": 63.95, + "Tatoeba (uzb-eng)": 53.17, + "Tatoeba (cha-eng)": 47.21, + "Tatoeba (est-eng)": 64.2, + "Tatoeba (tuk-eng)": 42.48, + "Tatoeba (max-eng)": 65.53, + "Tatoeba (gle-eng)": 74.19, + "Tatoeba (mar-eng)": 69.32, + "Tatoeba (rus-eng)": 93.6, + "Tatoeba (tam-eng)": 72.45, + "Tatoeba (kur-eng)": 31.05, + "Tatoeba (kab-eng)": 3.37, + "Tatoeba (fao-eng)": 71.95, + "Tatoeba (cym-eng)": 72.14, + "Tatoeba (nno-eng)": 87.26, + "Tatoeba (lvs-eng)": 65.97, + "Tatoeba (arq-eng)": 50.3, + "Tatoeba (nov-eng)": 74.65, + "Tatoeba (uig-eng)": 39.11, + "Tatoeba (swe-eng)": 92.91, + "Tatoeba (wuu-eng)": 90.01, + "Tatoeba (nob-eng)": 96.83, + "Tatoeba (ukr-eng)": 94.22, + "Tatoeba (tha-eng)": 93.89, + "Tatoeba (ind-eng)": 93.7, + "Tatoeba (yue-eng)": 89.87, + "Tatoeba (glg-eng)": 91.35, + "Tatoeba (bel-eng)": 87.44, + "Tatoeba (xho-eng)": 36.47, + "Tatoeba (kor-eng)": 92.25, + "Tatoeba (dtp-eng)": 11.84, + "Tatoeba (lit-eng)": 69.19, + "Tatoeba (swh-eng)": 62.9, + "Tatoeba (bos-eng)": 91.92, + "Tatoeba (swg-eng)": 68.76, + "Tatoeba (mkd-eng)": 86.9, + "Tatoeba (lat-eng)": 88.01, + "Tatoeba (tel-eng)": 43.17, + "Tatoeba (bre-eng)": 14.57, + "Tatoeba (mal-eng)": 55.6, + "Tatoeba (tur-eng)": 94.33, + "Tatoeba (ben-eng)": 81.15, + "Tatoeba (pms-eng)": 63.17, + "Tatoeba (tzl-eng)": 51.59, + "Tatoeba (tgl-eng)": 93.37, + "Tatoeba (fra-eng)": 95.61, + "Tatoeba (sqi-eng)": 66.66, + "Tatoeba (gsw-eng)": 52.27, + "Tatoeba (arz-eng)": 70.4, + "Tatoeba (hsb-eng)": 75.51, + "Tatoeba (oci-eng)": 62.22, + "Tatoeba (ber-eng)": 7.85, + "Tatoeba (hrv-eng)": 94.38, + "Tatoeba (cbk-eng)": 81.96, + "Tatoeba (nld-eng)": 96.07, + "Tatoeba (dan-eng)": 93.81, + "Tatoeba (pes-eng)": 89.78, + "Tatoeba (aze-eng)": 76.2, + "Tatoeba (srp-eng)": 93.23, + "Tatoeba (ang-eng)": 83.46, + "Tatoeba (ell-eng)": 91.21, + "Tatoeba (khm-eng)": 36.97, + "Tatoeba (bul-eng)": 93.77, + "Tatoeba (kaz-eng)": 46.98, + "Tatoeba (kat-eng)": 60.22, + "Tatoeba (war-eng)": 43.02, + "Tatoeba (afr-eng)": 87.66, + "Tatoeba (nds-eng)": 78.34, + "Tatoeba (slk-eng)": 89.48, + "Tatoeba (cmn-eng)": 96.22 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "SFR-Embedding-Mistral", + "AllegroReviews": 57.4, + "AmazonCounterfactualClassification (en-ext)": 74.03, + "AmazonCounterfactualClassification (en)": 74.51, + "AmazonCounterfactualClassification (de)": 71.56, + "AmazonCounterfactualClassification (ja)": 74.03, + "AmazonReviewsClassification (en)": 55.43, + "AmazonReviewsClassification (de)": 54.46, + "AmazonReviewsClassification (es)": 51.63, + "AmazonReviewsClassification (fr)": 51.0, + "AmazonReviewsClassification (ja)": 50.44, + "AmazonReviewsClassification (zh)": 46.61, + "AngryTweetsClassification": 63.94, + "CBD": 71.93, + "DanishPoliticalCommentsClassification": 35.9, + "GeoreviewClassification": 57.36, + "HeadlineClassification": 87.11, + "InappropriatenessClassification": 70.64, + "KinopoiskClassification": 68.9, + "LccSentimentClassification": 63.87, + "MTOPDomainClassification (en)": 95.88, + "MTOPDomainClassification (de)": 91.7, + "MTOPDomainClassification (es)": 91.75, + "MTOPDomainClassification (fr)": 89.97, + "MTOPDomainClassification (hi)": 87.66, + "MTOPDomainClassification (th)": 84.42, + "MTOPIntentClassification (en)": 78.86, + "MTOPIntentClassification (de)": 69.41, + "MTOPIntentClassification (es)": 73.93, + "MTOPIntentClassification (fr)": 70.47, + "MTOPIntentClassification (hi)": 60.12, + "MTOPIntentClassification (th)": 62.46, + "MasakhaNEWSClassification (amh)": 53.54, + "MasakhaNEWSClassification (eng)": 87.18, + "MasakhaNEWSClassification (fra)": 83.39, + "MasakhaNEWSClassification (hau)": 77.22, + "MasakhaNEWSClassification (ibo)": 77.1, + "MasakhaNEWSClassification (lin)": 83.66, + "MasakhaNEWSClassification (lug)": 73.32, + "MasakhaNEWSClassification (orm)": 77.63, + "MasakhaNEWSClassification (pcm)": 96.46, + "MasakhaNEWSClassification (run)": 85.75, + "MasakhaNEWSClassification (sna)": 90.24, + "MasakhaNEWSClassification (som)": 65.85, + "MasakhaNEWSClassification (swa)": 79.89, + "MasakhaNEWSClassification (tir)": 33.71, + "MasakhaNEWSClassification (xho)": 88.32, + "MasakhaNEWSClassification (yor)": 85.47, + "MassiveIntentClassification (it)": 71.45, + "MassiveIntentClassification (es)": 70.96, + "MassiveIntentClassification (sv)": 70.53, + "MassiveIntentClassification (zh-CN)": 72.23, + "MassiveIntentClassification (nb)": 65.92, + "MassiveIntentClassification (nl)": 71.39, + "MassiveIntentClassification (da)": 68.55, + "MassiveIntentClassification (ur)": 57.45, + "MassiveIntentClassification (tl)": 62.7, + "MassiveIntentClassification (bn)": 58.97, + "MassiveIntentClassification (sq)": 49.97, + "MassiveIntentClassification (sl)": 63.79, + "MassiveIntentClassification (ru)": 73.86, + "MassiveIntentClassification (ms)": 66.75, + "MassiveIntentClassification (jv)": 51.45, + "MassiveIntentClassification (lv)": 51.32, + "MassiveIntentClassification (id)": 68.38, + "MassiveIntentClassification (hy)": 47.97, + "MassiveIntentClassification (sw)": 52.32, + "MassiveIntentClassification (cy)": 48.02, + "MassiveIntentClassification (hi)": 64.37, + "MassiveIntentClassification (af)": 62.53, + "MassiveIntentClassification (el)": 64.8, + "MassiveIntentClassification (is)": 53.62, + "MassiveIntentClassification (ko)": 69.6, + "MassiveIntentClassification (ka)": 46.01, + "MassiveIntentClassification (mn)": 41.87, + "MassiveIntentClassification (th)": 61.74, + "MassiveIntentClassification (ar)": 58.41, + "MassiveIntentClassification (hu)": 65.21, + "MassiveIntentClassification (ro)": 64.58, + "MassiveIntentClassification (fa)": 69.15, + "MassiveIntentClassification (vi)": 67.01, + "MassiveIntentClassification (zh-TW)": 65.92, + "MassiveIntentClassification (ml)": 42.18, + "MassiveIntentClassification (pl)": 71.17, + "MassiveIntentClassification (he)": 62.79, + "MassiveIntentClassification (km)": 40.3, + "MassiveIntentClassification (tr)": 66.87, + "MassiveIntentClassification (az)": 58.73, + "MassiveIntentClassification (ja)": 71.81, + "MassiveIntentClassification (my)": 39.62, + "MassiveIntentClassification (fi)": 64.47, + "MassiveIntentClassification (pt)": 71.78, + "MassiveIntentClassification (ta)": 45.59, + "MassiveIntentClassification (te)": 46.17, + "MassiveIntentClassification (am)": 35.64, + "MassiveIntentClassification (fr)": 71.71, + "MassiveIntentClassification (de)": 70.56, + "MassiveIntentClassification (en)": 77.05, + "MassiveIntentClassification (kn)": 46.58, + "MassiveScenarioClassification (my)": 46.43, + "MassiveScenarioClassification (mn)": 48.85, + "MassiveScenarioClassification (fa)": 74.87, + "MassiveScenarioClassification (ro)": 69.59, + "MassiveScenarioClassification (sl)": 71.38, + "MassiveScenarioClassification (lv)": 59.14, + "MassiveScenarioClassification (ms)": 73.14, + "MassiveScenarioClassification (zh-CN)": 76.77, + "MassiveScenarioClassification (ka)": 54.83, + "MassiveScenarioClassification (vi)": 72.77, + "MassiveScenarioClassification (nl)": 76.71, + "MassiveScenarioClassification (hu)": 71.21, + "MassiveScenarioClassification (de)": 77.63, + "MassiveScenarioClassification (te)": 54.0, + "MassiveScenarioClassification (en)": 79.73, + "MassiveScenarioClassification (ta)": 53.35, + "MassiveScenarioClassification (tr)": 70.87, + "MassiveScenarioClassification (nb)": 73.02, + "MassiveScenarioClassification (zh-TW)": 72.02, + "MassiveScenarioClassification (az)": 64.38, + "MassiveScenarioClassification (es)": 75.4, + "MassiveScenarioClassification (kn)": 56.38, + "MassiveScenarioClassification (pt)": 74.69, + "MassiveScenarioClassification (af)": 71.47, + "MassiveScenarioClassification (bn)": 64.78, + "MassiveScenarioClassification (hi)": 70.04, + "MassiveScenarioClassification (tl)": 71.28, + "MassiveScenarioClassification (el)": 70.39, + "MassiveScenarioClassification (id)": 74.24, + "MassiveScenarioClassification (th)": 69.72, + "MassiveScenarioClassification (pl)": 75.43, + "MassiveScenarioClassification (sq)": 59.53, + "MassiveScenarioClassification (hy)": 54.93, + "MassiveScenarioClassification (ur)": 64.5, + "MassiveScenarioClassification (ml)": 49.94, + "MassiveScenarioClassification (sv)": 77.09, + "MassiveScenarioClassification (da)": 74.31, + "MassiveScenarioClassification (am)": 42.84, + "MassiveScenarioClassification (jv)": 61.77, + "MassiveScenarioClassification (ja)": 76.23, + "MassiveScenarioClassification (km)": 48.96, + "MassiveScenarioClassification (cy)": 59.47, + "MassiveScenarioClassification (fi)": 68.89, + "MassiveScenarioClassification (ko)": 74.8, + "MassiveScenarioClassification (ar)": 67.42, + "MassiveScenarioClassification (is)": 64.42, + "MassiveScenarioClassification (sw)": 62.53, + "MassiveScenarioClassification (ru)": 77.65, + "MassiveScenarioClassification (it)": 75.4, + "MassiveScenarioClassification (he)": 66.27, + "MassiveScenarioClassification (fr)": 76.04, + "NoRecClassification": 55.04, + "NordicLangClassification": 68.8, + "PAC": 62.48, + "PolEmo2.0-IN": 84.46, + "PolEmo2.0-OUT": 54.86, + "RuReviewsClassification": 71.01, + "RuSciBenchGRNTIClassification": 68.08, + "RuSciBenchOECDClassification": 54.1 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "SFR-Embedding-Mistral", + "AlloProfClusteringP2P": 70.89, + "AlloProfClusteringS2S": 61.38, + "BlurbsClusteringP2P": 47.73, + "BlurbsClusteringS2S": 21.8, + "GeoreviewClusteringP2P": 77.21, + "HALClusteringS2S": 27.76, + "MLSUMClusteringP2P (de)": 53.27, + "MLSUMClusteringP2P (fr)": 48.16, + "MLSUMClusteringP2P (ru)": 56.1, + "MLSUMClusteringP2P (es)": 50.1, + "MLSUMClusteringS2S (de)": 52.44, + "MLSUMClusteringS2S (fr)": 47.44, + "MLSUMClusteringS2S (ru)": 54.12, + "MLSUMClusteringS2S (es)": 49.02, + "MasakhaNEWSClusteringP2P (amh)": 48.0, + "MasakhaNEWSClusteringP2P (eng)": 72.93, + "MasakhaNEWSClusteringP2P (fra)": 72.3, + "MasakhaNEWSClusteringP2P (hau)": 68.64, + "MasakhaNEWSClusteringP2P (ibo)": 67.08, + "MasakhaNEWSClusteringP2P (lin)": 79.52, + "MasakhaNEWSClusteringP2P (lug)": 53.77, + "MasakhaNEWSClusteringP2P (orm)": 55.13, + "MasakhaNEWSClusteringP2P (pcm)": 92.6, + "MasakhaNEWSClusteringP2P (run)": 61.05, + "MasakhaNEWSClusteringP2P (sna)": 79.0, + "MasakhaNEWSClusteringP2P (som)": 42.12, + "MasakhaNEWSClusteringP2P (swa)": 42.16, + "MasakhaNEWSClusteringP2P (tir)": 47.59, + "MasakhaNEWSClusteringP2P (xho)": 58.32, + "MasakhaNEWSClusteringP2P (yor)": 77.24, + "MasakhaNEWSClusteringS2S (amh)": 47.22, + "MasakhaNEWSClusteringS2S (eng)": 66.21, + "MasakhaNEWSClusteringS2S (fra)": 66.88, + "MasakhaNEWSClusteringS2S (hau)": 39.76, + "MasakhaNEWSClusteringS2S (ibo)": 62.6, + "MasakhaNEWSClusteringS2S (lin)": 80.5, + "MasakhaNEWSClusteringS2S (lug)": 51.36, + "MasakhaNEWSClusteringS2S (orm)": 31.55, + "MasakhaNEWSClusteringS2S (pcm)": 83.95, + "MasakhaNEWSClusteringS2S (run)": 59.32, + "MasakhaNEWSClusteringS2S (sna)": 53.11, + "MasakhaNEWSClusteringS2S (som)": 36.74, + "MasakhaNEWSClusteringS2S (swa)": 35.93, + "MasakhaNEWSClusteringS2S (tir)": 44.42, + "MasakhaNEWSClusteringS2S (xho)": 33.32, + "MasakhaNEWSClusteringS2S (yor)": 61.95, + "RuSciBenchGRNTIClusteringP2P": 63.05, + "RuSciBenchOECDClusteringP2P": 54.49, + "TenKGnadClusteringP2P": 55.31, + "TenKGnadClusteringS2S": 39.78 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "SFR-Embedding-Mistral", + "CDSC-E": 75.1, + "FalseFriendsGermanEnglish": 53.11, + "OpusparcusPC (de)": 97.46, + "OpusparcusPC (en)": 99.09, + "OpusparcusPC (fi)": 92.3, + "OpusparcusPC (fr)": 94.94, + "OpusparcusPC (ru)": 90.82, + "OpusparcusPC (sv)": 95.01, + "PSC": 99.63, + "PawsXPairClassification (de)": 59.59, + "PawsXPairClassification (en)": 66.91, + "PawsXPairClassification (es)": 60.97, + "PawsXPairClassification (fr)": 62.57, + "PawsXPairClassification (ja)": 52.68, + "PawsXPairClassification (ko)": 53.28, + "PawsXPairClassification (zh)": 59.15, + "SICK-E-PL": 77.4, + "TERRa": 60.65 + }, + { + "Model": "SFR-Embedding-Mistral", + "CDSC-E": 75.1, + "FalseFriendsGermanEnglish": 53.11, + "OpusparcusPC (de)": 97.46, + "OpusparcusPC (en)": 99.09, + "OpusparcusPC (fi)": 92.3, + "OpusparcusPC (fr)": 94.94, + "OpusparcusPC (ru)": 90.82, + "OpusparcusPC (sv)": 95.01, + "PSC": 99.67, + "PawsXPairClassification (de)": 60.1, + "PawsXPairClassification (en)": 67.08, + "PawsXPairClassification (es)": 61.15, + "PawsXPairClassification (fr)": 62.69, + "PawsXPairClassification (ja)": 52.81, + "PawsXPairClassification (ko)": 53.32, + "PawsXPairClassification (zh)": 59.63, + "SICK-E-PL": 77.4, + "TERRa": 60.65 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "SFR-Embedding-Mistral", + "AlloprofReranking": 79.32, + "RuBQReranking": 77.24, + "SyntecReranking": 90.75, + "T2Reranking": 66.97 + }, + { + "Model": "SFR-Embedding-Mistral", + "MIRACLReranking (ar)": 75.78, + "MIRACLReranking (bn)": 69.72, + "MIRACLReranking (de)": 54.39, + "MIRACLReranking (en)": 62.66, + "MIRACLReranking (es)": 61.76, + "MIRACLReranking (fa)": 55.27, + "MIRACLReranking (fi)": 78.11, + "MIRACLReranking (fr)": 55.93, + "MIRACLReranking (hi)": 58.09, + "MIRACLReranking (id)": 58.12, + "MIRACLReranking (ja)": 63.56, + "MIRACLReranking (ko)": 55.38, + "MIRACLReranking (ru)": 63.67, + "MIRACLReranking (sw)": 61.87, + "MIRACLReranking (te)": 74.0, + "MIRACLReranking (th)": 72.49, + "MIRACLReranking (yo)": 66.0, + "MIRACLReranking (zh)": 49.75 + } + ] }, "Retrieval": { "ndcg_at_10": [ { "Model": "SFR-Embedding-Mistral", + "AILACasedocs": 36.67, + "AILAStatutes": 37.47, + "ARCChallenge": 20.39, + "AlloprofRetrieval": 56.74, + "AlphaNLI": 29.36, + "AppsRetrieval": 26.11, + "BSARDRetrieval": 26.28, "BrightRetrieval (sustainable_living)": 19.79, "BrightRetrieval (economics)": 17.84, "BrightRetrieval (theoremqa_theorems)": 24.32, @@ -4263,7 +11446,105 @@ "BrightRetrieval (leetcode)": 27.35, "BrightRetrieval (biology)": 19.49, "BrightRetrieval (earth_science)": 26.63, - "BrightRetrieval (robotics)": 16.7 + "BrightRetrieval (robotics)": 16.7, + "CmedqaRetrieval": 35.12, + "CodeFeedbackMT": 40.33, + "CodeFeedbackST": 78.07, + "CodeSearchNetCCRetrieval (python)": 70.38, + "CodeSearchNetCCRetrieval (javascript)": 69.09, + "CodeSearchNetCCRetrieval (go)": 62.02, + "CodeSearchNetCCRetrieval (ruby)": 65.21, + "CodeSearchNetCCRetrieval (java)": 67.75, + "CodeSearchNetCCRetrieval (php)": 52.44, + "CodeSearchNetRetrieval (python)": 92.26, + "CodeSearchNetRetrieval (javascript)": 81.47, + "CodeSearchNetRetrieval (go)": 93.28, + "CodeSearchNetRetrieval (ruby)": 85.94, + "CodeSearchNetRetrieval (java)": 84.03, + "CodeSearchNetRetrieval (php)": 83.39, + "CodeTransOceanContest": 83.52, + "CodeTransOceanDL": 32.93, + "CosQA": 34.31, + "CovidRetrieval": 73.07, + "GerDaLIR": 8.22, + "GerDaLIRSmall": 18.39, + "GermanQuAD-Retrieval": 95.22, + "HellaSwag": 37.68, + "LEMBNarrativeQARetrieval": 39.2, + "LEMBQMSumRetrieval": 29.43, + "LEMBSummScreenFDRetrieval": 77.73, + "LEMBWikimQARetrieval": 59.28, + "LeCaRDv2": 59.58, + "LegalBenchConsumerContractsQA": 79.15, + "LegalBenchCorporateLobbying": 94.83, + "LegalQuAD": 45.08, + "LegalSummarization": 71.05, + "MintakaRetrieval (ar)": 24.63, + "MintakaRetrieval (de)": 51.48, + "MintakaRetrieval (es)": 48.5, + "MintakaRetrieval (fr)": 49.62, + "MintakaRetrieval (hi)": 25.72, + "MintakaRetrieval (it)": 47.58, + "MintakaRetrieval (ja)": 33.34, + "MintakaRetrieval (pt)": 51.01, + "PIQA": 42.46, + "Quail": 8.19, + "RARbCode": 77.97, + "RARbMath": 79.36, + "RiaNewsRetrieval": 81.14, + "RuBQRetrieval": 77.16, + "SIQA": 5.96, + "SciFact-PL": 69.64, + "SpartQA": 8.81, + "StackOverflowQA": 92.28, + "SyntecRetrieval": 91.49, + "SyntheticText2SQL": 59.29, + "TRECCOVID-PL": 75.27, + "TempReasonL1": 3.59, + "TempReasonL2Fact": 40.15, + "TempReasonL2Pure": 9.64, + "TempReasonL3Fact": 31.24, + "TempReasonL3Pure": 14.14, + "WinoGrande": 45.65, + "XMarket (de)": 27.3, + "XMarket (en)": 33.95, + "XMarket (es)": 28.26, + "XPQARetrieval (ara-ara)": 47.29, + "XPQARetrieval (eng-ara)": 31.26, + "XPQARetrieval (ara-eng)": 42.44, + "XPQARetrieval (deu-deu)": 79.35, + "XPQARetrieval (eng-deu)": 49.52, + "XPQARetrieval (deu-eng)": 74.65, + "XPQARetrieval (spa-spa)": 63.0, + "XPQARetrieval (eng-spa)": 36.73, + "XPQARetrieval (spa-eng)": 60.38, + "XPQARetrieval (fra-fra)": 71.58, + "XPQARetrieval (eng-fra)": 44.35, + "XPQARetrieval (fra-eng)": 67.19, + "XPQARetrieval (hin-hin)": 74.64, + "XPQARetrieval (eng-hin)": 22.02, + "XPQARetrieval (hin-eng)": 66.84, + "XPQARetrieval (ita-ita)": 75.67, + "XPQARetrieval (eng-ita)": 33.01, + "XPQARetrieval (ita-eng)": 69.6, + "XPQARetrieval (jpn-jpn)": 73.75, + "XPQARetrieval (eng-jpn)": 43.54, + "XPQARetrieval (jpn-eng)": 70.8, + "XPQARetrieval (kor-kor)": 40.03, + "XPQARetrieval (eng-kor)": 33.38, + "XPQARetrieval (kor-eng)": 35.08, + "XPQARetrieval (pol-pol)": 48.25, + "XPQARetrieval (eng-pol)": 35.62, + "XPQARetrieval (pol-eng)": 44.53, + "XPQARetrieval (por-por)": 50.61, + "XPQARetrieval (eng-por)": 28.2, + "XPQARetrieval (por-eng)": 49.27, + "XPQARetrieval (tam-tam)": 43.19, + "XPQARetrieval (eng-tam)": 4.27, + "XPQARetrieval (tam-eng)": 22.14, + "XPQARetrieval (cmn-cmn)": 64.0, + "XPQARetrieval (eng-cmn)": 35.37, + "XPQARetrieval (cmn-eng)": 57.31 } ], "recall_at_1": [ @@ -4281,16 +11562,115 @@ ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "SFR-Embedding-Mistral", + "CDSC-R": 92.59, + "GermanSTSBenchmark": 85.74, + "RUParaPhraserSTS": 75.99, + "RuSTSBenchmarkSTS": 84.57, + "SICK-R-PL": 76.59, + "SICKFr": 81.27, + "STS22 (en)": 68.63, + "STS22 (es-it)": 75.55, + "STS22 (it)": 76.79, + "STS22 (ar)": 55.04, + "STS22 (pl-en)": 74.65, + "STS22 (fr)": 81.03, + "STS22 (de-en)": 61.61, + "STS22 (pl)": 40.47, + "STS22 (zh-en)": 72.83, + "STS22 (es)": 67.38, + "STS22 (zh)": 64.65, + "STS22 (ru)": 62.38, + "STS22 (es-en)": 77.51, + "STS22 (de-fr)": 62.9, + "STS22 (de)": 52.72, + "STS22 (tr)": 68.81, + "STS22 (de-pl)": 58.39, + "STS22 (fr-pl)": 84.52, + "STSB": 81.97, + "STSBenchmarkMultilingualSTS (pl)": 84.24, + "STSBenchmarkMultilingualSTS (ru)": 84.68, + "STSBenchmarkMultilingualSTS (nl)": 84.23, + "STSBenchmarkMultilingualSTS (en)": 89.0, + "STSBenchmarkMultilingualSTS (it)": 85.04, + "STSBenchmarkMultilingualSTS (zh)": 82.95, + "STSBenchmarkMultilingualSTS (es)": 86.69, + "STSBenchmarkMultilingualSTS (de)": 86.02, + "STSBenchmarkMultilingualSTS (pt)": 85.24, + "STSBenchmarkMultilingualSTS (fr)": 85.87 + }, + { + "Model": "SFR-Embedding-Mistral", + "CDSC-R": 92.59, + "GermanSTSBenchmark": 85.74, + "RUParaPhraserSTS": 75.99, + "RuSTSBenchmarkSTS": 84.57, + "SICK-R-PL": 76.59, + "SICKFr": 81.27, + "STS22 (en)": 68.63, + "STS22 (es-it)": 75.55, + "STS22 (it)": 76.79, + "STS22 (ar)": 55.04, + "STS22 (pl-en)": 74.65, + "STS22 (fr)": 81.03, + "STS22 (de-en)": 61.61, + "STS22 (pl)": 40.47, + "STS22 (zh-en)": 72.83, + "STS22 (es)": 67.38, + "STS22 (zh)": 64.65, + "STS22 (ru)": 62.38, + "STS22 (es-en)": 77.51, + "STS22 (de-fr)": 62.9, + "STS22 (de)": 52.72, + "STS22 (tr)": 68.81, + "STS22 (de-pl)": 58.39, + "STS22 (fr-pl)": 84.52, + "STSB": 81.97, + "STSBenchmarkMultilingualSTS (pl)": 84.24, + "STSBenchmarkMultilingualSTS (ru)": 84.68, + "STSBenchmarkMultilingualSTS (nl)": 84.23, + "STSBenchmarkMultilingualSTS (en)": 89.0, + "STSBenchmarkMultilingualSTS (it)": 85.04, + "STSBenchmarkMultilingualSTS (zh)": 82.95, + "STSBenchmarkMultilingualSTS (es)": 86.69, + "STSBenchmarkMultilingualSTS (de)": 86.02, + "STSBenchmarkMultilingualSTS (pt)": 85.24, + "STSBenchmarkMultilingualSTS (fr)": 85.87 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "SFR-Embedding-Mistral", + "SummEvalFr": 30.44 + }, + { + "Model": "SFR-Embedding-Mistral", + "SummEvalFr": 30.44 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "SFR-Embedding-Mistral", + "CEDRClassification": 51.74, + "SensitiveTopicsClassification": 34.15 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "SFR-Embedding-Mistral", + "Core17InstructionRetrieval": 4.55, + "News21InstructionRetrieval": 1.38, + "Robust04InstructionRetrieval": -5.45 + } + ] } }, "T-Systems-onsite__cross-en-de-roberta-sentence-transformer": { @@ -4426,21 +11806,292 @@ }, "ai-forever__sbert_large_mt_nlu_ru": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "sbert_large_mt_nlu_ru", + "BornholmBitextMining": 17.0, + "Tatoeba (aze-eng)": 0.49, + "Tatoeba (pes-eng)": 0.14, + "Tatoeba (kaz-eng)": 1.97, + "Tatoeba (bel-eng)": 11.51, + "Tatoeba (hye-eng)": 0.23, + "Tatoeba (cmn-eng)": 0.43, + "Tatoeba (tat-eng)": 1.93, + "Tatoeba (max-eng)": 1.41, + "Tatoeba (oci-eng)": 0.88, + "Tatoeba (lat-eng)": 0.49, + "Tatoeba (gla-eng)": 0.62, + "Tatoeba (awa-eng)": 0.17, + "Tatoeba (ron-eng)": 0.66, + "Tatoeba (ast-eng)": 6.21, + "Tatoeba (vie-eng)": 0.63, + "Tatoeba (ang-eng)": 5.98, + "Tatoeba (nno-eng)": 1.5, + "Tatoeba (mal-eng)": 0.15, + "Tatoeba (arz-eng)": 0.47, + "Tatoeba (kur-eng)": 0.14, + "Tatoeba (kzj-eng)": 0.27, + "Tatoeba (glg-eng)": 1.21, + "Tatoeba (ben-eng)": 0.0, + "Tatoeba (dan-eng)": 1.64, + "Tatoeba (jpn-eng)": 0.0, + "Tatoeba (ces-eng)": 0.14, + "Tatoeba (ind-eng)": 0.61, + "Tatoeba (hrv-eng)": 0.53, + "Tatoeba (orv-eng)": 4.82, + "Tatoeba (fra-eng)": 1.81, + "Tatoeba (arq-eng)": 0.18, + "Tatoeba (nld-eng)": 2.56, + "Tatoeba (rus-eng)": 28.16, + "Tatoeba (hin-eng)": 0.02, + "Tatoeba (bos-eng)": 1.9, + "Tatoeba (cym-eng)": 1.16, + "Tatoeba (cor-eng)": 0.4, + "Tatoeba (ido-eng)": 0.94, + "Tatoeba (cha-eng)": 4.68, + "Tatoeba (gsw-eng)": 8.64, + "Tatoeba (epo-eng)": 0.4, + "Tatoeba (nob-eng)": 1.01, + "Tatoeba (ceb-eng)": 0.27, + "Tatoeba (uzb-eng)": 1.71, + "Tatoeba (pms-eng)": 1.88, + "Tatoeba (cat-eng)": 2.71, + "Tatoeba (kab-eng)": 0.28, + "Tatoeba (heb-eng)": 0.37, + "Tatoeba (uig-eng)": 0.11, + "Tatoeba (ara-eng)": 0.07, + "Tatoeba (gle-eng)": 0.73, + "Tatoeba (zsm-eng)": 0.5, + "Tatoeba (kat-eng)": 0.38, + "Tatoeba (srp-eng)": 4.58, + "Tatoeba (dsb-eng)": 0.11, + "Tatoeba (cbk-eng)": 0.41, + "Tatoeba (nds-eng)": 3.5, + "Tatoeba (ita-eng)": 1.04, + "Tatoeba (ber-eng)": 0.68, + "Tatoeba (ile-eng)": 4.41, + "Tatoeba (tel-eng)": 0.54, + "Tatoeba (kor-eng)": 0.15, + "Tatoeba (fin-eng)": 0.2, + "Tatoeba (yue-eng)": 0.2, + "Tatoeba (fao-eng)": 1.03, + "Tatoeba (jav-eng)": 2.43, + "Tatoeba (ukr-eng)": 25.12, + "Tatoeba (tuk-eng)": 1.26, + "Tatoeba (wuu-eng)": 0.28, + "Tatoeba (swg-eng)": 0.04, + "Tatoeba (pam-eng)": 0.51, + "Tatoeba (tur-eng)": 0.21, + "Tatoeba (bre-eng)": 0.63, + "Tatoeba (hun-eng)": 0.71, + "Tatoeba (isl-eng)": 0.51, + "Tatoeba (tzl-eng)": 0.59, + "Tatoeba (mhr-eng)": 1.64, + "Tatoeba (nov-eng)": 4.41, + "Tatoeba (dtp-eng)": 0.6, + "Tatoeba (xho-eng)": 1.22, + "Tatoeba (mar-eng)": 0.05, + "Tatoeba (khm-eng)": 0.01, + "Tatoeba (slv-eng)": 0.85, + "Tatoeba (hsb-eng)": 0.45, + "Tatoeba (csb-eng)": 1.21, + "Tatoeba (mon-eng)": 1.64, + "Tatoeba (war-eng)": 0.43, + "Tatoeba (tam-eng)": 0.0, + "Tatoeba (lfn-eng)": 1.8, + "Tatoeba (spa-eng)": 1.0, + "Tatoeba (fry-eng)": 5.52, + "Tatoeba (deu-eng)": 2.42, + "Tatoeba (amh-eng)": 1.2, + "Tatoeba (ell-eng)": 0.13, + "Tatoeba (swe-eng)": 0.86, + "Tatoeba (lit-eng)": 0.21, + "Tatoeba (yid-eng)": 0.15, + "Tatoeba (eus-eng)": 0.99, + "Tatoeba (est-eng)": 0.37, + "Tatoeba (tgl-eng)": 0.27, + "Tatoeba (pol-eng)": 0.73, + "Tatoeba (mkd-eng)": 5.36, + "Tatoeba (ina-eng)": 2.96, + "Tatoeba (sqi-eng)": 0.78, + "Tatoeba (swh-eng)": 0.58, + "Tatoeba (urd-eng)": 0.03, + "Tatoeba (por-eng)": 0.73, + "Tatoeba (bul-eng)": 7.69, + "Tatoeba (lvs-eng)": 0.38, + "Tatoeba (tha-eng)": 0.18, + "Tatoeba (afr-eng)": 2.42, + "Tatoeba (slk-eng)": 0.39 + } + ] }, "Classification": { "accuracy": [ { "Model": "sbert_large_mt_nlu_ru", + "AllegroReviews": 21.21, + "AmazonCounterfactualClassification (en-ext)": 59.12, + "AmazonCounterfactualClassification (en)": 61.57, + "AmazonCounterfactualClassification (de)": 60.52, + "AmazonCounterfactualClassification (ja)": 50.22, + "AmazonPolarityClassification": 60.24, + "AmazonReviewsClassification (en)": 28.26, + "AmazonReviewsClassification (de)": 24.32, + "AmazonReviewsClassification (es)": 23.78, + "AmazonReviewsClassification (fr)": 24.55, + "AmazonReviewsClassification (ja)": 20.16, + "AmazonReviewsClassification (zh)": 20.7, + "AngryTweetsClassification": 44.35, + "Banking77Classification": 39.12, + "CBD": 49.87, + "DanishPoliticalCommentsClassification": 27.26, + "EmotionClassification": 19.28, "GeoreviewClassification": 39.67, "HeadlineClassification": 77.19, + "ImdbClassification": 56.13, "InappropriatenessClassification": 64.64, "KinopoiskClassification": 50.33, + "LccSentimentClassification": 42.0, + "MTOPDomainClassification (en)": 72.42, + "MTOPDomainClassification (de)": 42.69, + "MTOPDomainClassification (es)": 42.65, + "MTOPDomainClassification (fr)": 44.15, + "MTOPDomainClassification (hi)": 22.55, + "MTOPDomainClassification (th)": 15.97, + "MTOPIntentClassification (en)": 40.1, + "MTOPIntentClassification (de)": 23.36, + "MTOPIntentClassification (es)": 19.5, + "MTOPIntentClassification (fr)": 23.37, + "MTOPIntentClassification (hi)": 4.45, + "MTOPIntentClassification (th)": 5.56, + "MasakhaNEWSClassification (amh)": 30.24, + "MasakhaNEWSClassification (eng)": 60.19, + "MasakhaNEWSClassification (fra)": 45.24, + "MasakhaNEWSClassification (hau)": 32.75, + "MasakhaNEWSClassification (ibo)": 30.21, + "MasakhaNEWSClassification (lin)": 40.11, + "MasakhaNEWSClassification (lug)": 34.3, + "MasakhaNEWSClassification (orm)": 32.06, + "MasakhaNEWSClassification (pcm)": 79.21, + "MasakhaNEWSClassification (run)": 38.51, + "MasakhaNEWSClassification (sna)": 50.0, + "MasakhaNEWSClassification (som)": 30.85, + "MasakhaNEWSClassification (swa)": 30.17, + "MasakhaNEWSClassification (tir)": 25.4, + "MasakhaNEWSClassification (xho)": 42.83, + "MasakhaNEWSClassification (yor)": 36.13, "MassiveIntentClassification (ru)": 61.42, + "MassiveIntentClassification (ur)": 2.66, + "MassiveIntentClassification (nl)": 19.71, + "MassiveIntentClassification (ar)": 4.44, + "MassiveIntentClassification (pt)": 21.61, + "MassiveIntentClassification (tr)": 18.12, + "MassiveIntentClassification (bn)": 3.43, + "MassiveIntentClassification (ro)": 18.92, + "MassiveIntentClassification (jv)": 18.97, + "MassiveIntentClassification (af)": 16.74, + "MassiveIntentClassification (hu)": 18.03, + "MassiveIntentClassification (ja)": 4.32, + "MassiveIntentClassification (pl)": 17.96, + "MassiveIntentClassification (es)": 18.79, + "MassiveIntentClassification (fa)": 3.44, + "MassiveIntentClassification (de)": 22.32, + "MassiveIntentClassification (en)": 38.41, + "MassiveIntentClassification (lv)": 16.7, + "MassiveIntentClassification (zh-CN)": 2.12, + "MassiveIntentClassification (vi)": 19.68, + "MassiveIntentClassification (hy)": 3.28, + "MassiveIntentClassification (nb)": 18.97, + "MassiveIntentClassification (tl)": 18.93, + "MassiveIntentClassification (my)": 4.16, + "MassiveIntentClassification (zh-TW)": 3.87, + "MassiveIntentClassification (he)": 2.54, + "MassiveIntentClassification (id)": 22.3, + "MassiveIntentClassification (is)": 14.84, + "MassiveIntentClassification (hi)": 2.66, + "MassiveIntentClassification (el)": 13.64, + "MassiveIntentClassification (am)": 2.68, + "MassiveIntentClassification (sl)": 18.98, + "MassiveIntentClassification (th)": 4.06, + "MassiveIntentClassification (kn)": 3.08, + "MassiveIntentClassification (km)": 4.83, + "MassiveIntentClassification (it)": 21.19, + "MassiveIntentClassification (sv)": 18.94, + "MassiveIntentClassification (az)": 16.05, + "MassiveIntentClassification (fi)": 17.73, + "MassiveIntentClassification (ka)": 2.79, + "MassiveIntentClassification (ml)": 3.05, + "MassiveIntentClassification (ms)": 16.25, + "MassiveIntentClassification (te)": 2.37, + "MassiveIntentClassification (sw)": 18.46, + "MassiveIntentClassification (cy)": 17.52, + "MassiveIntentClassification (mn)": 22.36, + "MassiveIntentClassification (sq)": 20.08, + "MassiveIntentClassification (ta)": 2.37, + "MassiveIntentClassification (ko)": 2.4, + "MassiveIntentClassification (da)": 21.51, + "MassiveIntentClassification (fr)": 20.94, "MassiveScenarioClassification (ru)": 68.13, + "MassiveScenarioClassification (sl)": 23.28, + "MassiveScenarioClassification (sv)": 23.79, + "MassiveScenarioClassification (bn)": 8.97, + "MassiveScenarioClassification (ar)": 11.91, + "MassiveScenarioClassification (hu)": 24.64, + "MassiveScenarioClassification (ko)": 6.91, + "MassiveScenarioClassification (kn)": 7.58, + "MassiveScenarioClassification (am)": 7.24, + "MassiveScenarioClassification (ka)": 7.51, + "MassiveScenarioClassification (it)": 26.18, + "MassiveScenarioClassification (my)": 10.3, + "MassiveScenarioClassification (jv)": 27.39, + "MassiveScenarioClassification (te)": 7.8, + "MassiveScenarioClassification (fi)": 23.82, + "MassiveScenarioClassification (ja)": 9.13, + "MassiveScenarioClassification (af)": 24.34, + "MassiveScenarioClassification (pl)": 27.01, + "MassiveScenarioClassification (da)": 29.86, + "MassiveScenarioClassification (th)": 8.94, + "MassiveScenarioClassification (hy)": 9.14, + "MassiveScenarioClassification (id)": 27.32, + "MassiveScenarioClassification (nl)": 26.8, + "MassiveScenarioClassification (fa)": 6.97, + "MassiveScenarioClassification (ro)": 24.28, + "MassiveScenarioClassification (ur)": 9.68, + "MassiveScenarioClassification (tl)": 25.1, + "MassiveScenarioClassification (az)": 20.32, + "MassiveScenarioClassification (ta)": 6.95, + "MassiveScenarioClassification (el)": 20.43, + "MassiveScenarioClassification (km)": 8.92, + "MassiveScenarioClassification (nb)": 24.55, + "MassiveScenarioClassification (es)": 23.82, + "MassiveScenarioClassification (he)": 7.29, + "MassiveScenarioClassification (ms)": 23.08, + "MassiveScenarioClassification (de)": 31.89, + "MassiveScenarioClassification (sw)": 25.07, + "MassiveScenarioClassification (is)": 22.25, + "MassiveScenarioClassification (pt)": 26.39, + "MassiveScenarioClassification (zh-TW)": 9.31, + "MassiveScenarioClassification (hi)": 7.99, + "MassiveScenarioClassification (sq)": 27.3, + "MassiveScenarioClassification (vi)": 26.56, + "MassiveScenarioClassification (ml)": 7.28, + "MassiveScenarioClassification (fr)": 28.63, + "MassiveScenarioClassification (lv)": 19.48, + "MassiveScenarioClassification (mn)": 24.93, + "MassiveScenarioClassification (cy)": 22.42, + "MassiveScenarioClassification (tr)": 21.86, + "MassiveScenarioClassification (zh-CN)": 9.71, + "MassiveScenarioClassification (en)": 49.23, + "NoRecClassification": 38.42, + "NordicLangClassification": 38.2, + "PAC": 63.64, + "PolEmo2.0-IN": 41.48, + "PolEmo2.0-OUT": 33.79, "RuReviewsClassification": 58.29, "RuSciBenchGRNTIClassification": 54.19, - "RuSciBenchOECDClassification": 43.8 + "RuSciBenchOECDClassification": 43.8, + "ToxicConversationsClassification": 56.2, + "TweetSentimentExtractionClassification": 47.37 } ] }, @@ -4448,9 +12099,67 @@ "v_measure": [ { "Model": "sbert_large_mt_nlu_ru", + "AlloProfClusteringP2P": 35.78, + "AlloProfClusteringS2S": 23.21, + "ArxivClusteringP2P": 18.86, + "ArxivClusteringS2S": 13.17, + "BiorxivClusteringP2P": 11.85, + "BiorxivClusteringS2S": 7.62, + "BlurbsClusteringP2P": 10.62, + "BlurbsClusteringS2S": 8.8, "GeoreviewClusteringP2P": 57.07, + "HALClusteringS2S": 6.28, + "MLSUMClusteringP2P (de)": 14.6, + "MLSUMClusteringP2P (fr)": 25.26, + "MLSUMClusteringP2P (ru)": 50.88, + "MLSUMClusteringP2P (es)": 28.3, + "MLSUMClusteringS2S (de)": 15.56, + "MLSUMClusteringS2S (fr)": 24.85, + "MLSUMClusteringS2S (ru)": 48.17, + "MLSUMClusteringS2S (es)": 28.07, + "MasakhaNEWSClusteringP2P (amh)": 40.5, + "MasakhaNEWSClusteringP2P (eng)": 28.6, + "MasakhaNEWSClusteringP2P (fra)": 24.46, + "MasakhaNEWSClusteringP2P (hau)": 4.7, + "MasakhaNEWSClusteringP2P (ibo)": 21.98, + "MasakhaNEWSClusteringP2P (lin)": 47.61, + "MasakhaNEWSClusteringP2P (lug)": 44.57, + "MasakhaNEWSClusteringP2P (orm)": 21.68, + "MasakhaNEWSClusteringP2P (pcm)": 36.56, + "MasakhaNEWSClusteringP2P (run)": 44.46, + "MasakhaNEWSClusteringP2P (sna)": 43.14, + "MasakhaNEWSClusteringP2P (som)": 24.25, + "MasakhaNEWSClusteringP2P (swa)": 17.51, + "MasakhaNEWSClusteringP2P (tir)": 42.96, + "MasakhaNEWSClusteringP2P (xho)": 23.45, + "MasakhaNEWSClusteringP2P (yor)": 20.87, + "MasakhaNEWSClusteringS2S (amh)": 43.32, + "MasakhaNEWSClusteringS2S (eng)": 11.71, + "MasakhaNEWSClusteringS2S (fra)": 27.34, + "MasakhaNEWSClusteringS2S (hau)": 4.58, + "MasakhaNEWSClusteringS2S (ibo)": 25.82, + "MasakhaNEWSClusteringS2S (lin)": 49.46, + "MasakhaNEWSClusteringS2S (lug)": 48.05, + "MasakhaNEWSClusteringS2S (orm)": 22.57, + "MasakhaNEWSClusteringS2S (pcm)": 34.83, + "MasakhaNEWSClusteringS2S (run)": 44.33, + "MasakhaNEWSClusteringS2S (sna)": 40.15, + "MasakhaNEWSClusteringS2S (som)": 25.33, + "MasakhaNEWSClusteringS2S (swa)": 12.2, + "MasakhaNEWSClusteringS2S (tir)": 43.12, + "MasakhaNEWSClusteringS2S (xho)": 25.53, + "MasakhaNEWSClusteringS2S (yor)": 23.17, + "MedrxivClusteringP2P": 18.77, + "MedrxivClusteringS2S": 16.42, + "RedditClustering": 16.05, + "RedditClusteringP2P": 30.9, "RuSciBenchGRNTIClusteringP2P": 51.44, - "RuSciBenchOECDClusteringP2P": 45.36 + "RuSciBenchOECDClusteringP2P": 45.36, + "StackExchangeClustering": 23.38, + "StackExchangeClusteringP2P": 24.29, + "TenKGnadClusteringP2P": 13.99, + "TenKGnadClusteringS2S": 9.3, + "TwentyNewsgroupsClustering": 13.4 } ] }, @@ -4458,11 +12167,51 @@ "max_ap": [ { "Model": "sbert_large_mt_nlu_ru", - "TERRa": 51.97 + "CDSC-E": 37.2, + "FalseFriendsGermanEnglish": 47.48, + "OpusparcusPC (de)": 88.85, + "OpusparcusPC (en)": 96.31, + "OpusparcusPC (fi)": 83.7, + "OpusparcusPC (fr)": 83.31, + "OpusparcusPC (ru)": 90.3, + "OpusparcusPC (sv)": 81.9, + "PSC": 54.62, + "PawsXPairClassification (de)": 50.91, + "PawsXPairClassification (en)": 51.82, + "PawsXPairClassification (es)": 51.22, + "PawsXPairClassification (fr)": 53.01, + "PawsXPairClassification (ja)": 46.23, + "PawsXPairClassification (ko)": 47.32, + "PawsXPairClassification (zh)": 49.44, + "SICK-E-PL": 44.93, + "SprintDuplicateQuestions": 32.48, + "TERRa": 51.97, + "TwitterSemEval2015": 53.21, + "TwitterURLCorpus": 74.79 }, { "Model": "sbert_large_mt_nlu_ru", - "TERRa": 51.97 + "CDSC-E": 37.27, + "FalseFriendsGermanEnglish": 47.51, + "OpusparcusPC (de)": 88.85, + "OpusparcusPC (en)": 96.38, + "OpusparcusPC (fi)": 83.79, + "OpusparcusPC (fr)": 83.34, + "OpusparcusPC (ru)": 90.37, + "OpusparcusPC (sv)": 81.9, + "PSC": 54.94, + "PawsXPairClassification (de)": 51.15, + "PawsXPairClassification (en)": 51.91, + "PawsXPairClassification (es)": 51.28, + "PawsXPairClassification (fr)": 53.06, + "PawsXPairClassification (ja)": 46.7, + "PawsXPairClassification (ko)": 47.38, + "PawsXPairClassification (zh)": 49.7, + "SICK-E-PL": 44.97, + "SprintDuplicateQuestions": 32.92, + "TERRa": 51.97, + "TwitterSemEval2015": 53.21, + "TwitterURLCorpus": 74.79 } ] }, @@ -4470,11 +12219,35 @@ "map": [ { "Model": "sbert_large_mt_nlu_ru", - "MIRACLReranking (ru)": 24.99 + "AlloprofReranking": 31.62, + "AskUbuntuDupQuestions": 46.55, + "MindSmallReranking": 26.72, + "RuBQReranking": 56.14, + "SciDocsRR": 48.16, + "StackOverflowDupQuestions": 32.85, + "SyntecReranking": 35.97, + "T2Reranking": 50.61 }, { "Model": "sbert_large_mt_nlu_ru", - "RuBQReranking": 56.14 + "MIRACLReranking (ru)": 24.99, + "MIRACLReranking (ar)": 2.12, + "MIRACLReranking (bn)": 1.32, + "MIRACLReranking (de)": 5.55, + "MIRACLReranking (en)": 9.94, + "MIRACLReranking (es)": 6.11, + "MIRACLReranking (fa)": 3.1, + "MIRACLReranking (fi)": 8.8, + "MIRACLReranking (fr)": 4.94, + "MIRACLReranking (hi)": 4.01, + "MIRACLReranking (id)": 4.71, + "MIRACLReranking (ja)": 1.92, + "MIRACLReranking (ko)": 6.18, + "MIRACLReranking (sw)": 6.32, + "MIRACLReranking (te)": 1.8, + "MIRACLReranking (th)": 2.54, + "MIRACLReranking (yo)": 6.59, + "MIRACLReranking (zh)": 2.42 } ] }, @@ -4482,9 +12255,125 @@ "ndcg_at_10": [ { "Model": "sbert_large_mt_nlu_ru", + "AILACasedocs": 6.54, + "AILAStatutes": 11.76, + "ARCChallenge": 1.9, + "AlloprofRetrieval": 0.26, + "AlphaNLI": 0.71, + "AppsRetrieval": 0.23, + "ArguAna": 15.62, + "BSARDRetrieval": 0.13, + "ClimateFEVER": 0.16, + "CmedqaRetrieval": 0.45, + "CodeFeedbackMT": 5.6, + "CodeFeedbackST": 5.33, + "CodeSearchNetCCRetrieval (python)": 5.89, + "CodeSearchNetCCRetrieval (javascript)": 8.44, + "CodeSearchNetCCRetrieval (go)": 3.82, + "CodeSearchNetCCRetrieval (ruby)": 12.6, + "CodeSearchNetCCRetrieval (java)": 5.22, + "CodeSearchNetCCRetrieval (php)": 3.76, + "CodeSearchNetRetrieval (python)": 15.0, + "CodeSearchNetRetrieval (javascript)": 9.82, + "CodeSearchNetRetrieval (go)": 10.42, + "CodeSearchNetRetrieval (ruby)": 11.82, + "CodeSearchNetRetrieval (java)": 4.77, + "CodeSearchNetRetrieval (php)": 9.82, + "CodeTransOceanContest": 11.01, + "CodeTransOceanDL": 28.95, + "CosQA": 0.56, + "CovidRetrieval": 0.0, + "DBPedia": 0.27, + "FEVER": 0.22, + "FiQA2018": 0.52, + "GerDaLIR": 0.14, + "GerDaLIRSmall": 0.38, + "GermanQuAD-Retrieval": 4.86, + "HellaSwag": 2.87, + "HotpotQA": 1.63, + "LEMBNarrativeQARetrieval": 2.67, + "LEMBQMSumRetrieval": 6.84, + "LEMBSummScreenFDRetrieval": 6.53, + "LEMBWikimQARetrieval": 9.2, + "LeCaRDv2": 6.4, + "LegalBenchConsumerContractsQA": 15.84, + "LegalBenchCorporateLobbying": 21.07, + "LegalQuAD": 2.6, + "LegalSummarization": 20.57, "MIRACLRetrieval (ru)": 6.2, + "MintakaRetrieval (ar)": 0.25, + "MintakaRetrieval (de)": 1.1, + "MintakaRetrieval (es)": 0.56, + "MintakaRetrieval (fr)": 1.07, + "MintakaRetrieval (hi)": 0.51, + "MintakaRetrieval (it)": 0.71, + "MintakaRetrieval (ja)": 0.34, + "MintakaRetrieval (pt)": 0.94, + "NFCorpus": 2.35, + "NQ": 0.39, + "PIQA": 1.81, + "Quail": 0.13, + "QuoraRetrieval": 57.97, + "RARbCode": 0.09, + "RARbMath": 6.37, "RiaNewsRetrieval": 21.4, - "RuBQRetrieval": 29.8 + "RuBQRetrieval": 29.8, + "SCIDOCS": 0.37, + "SIQA": 0.35, + "SciFact": 1.27, + "SciFact-PL": 0.12, + "SpartQA": 0.0, + "StackOverflowQA": 8.95, + "SyntecRetrieval": 9.18, + "SyntheticText2SQL": 8.82, + "TRECCOVID": 7.16, + "TRECCOVID-PL": 1.17, + "TempReasonL1": 0.17, + "TempReasonL2Fact": 0.4, + "TempReasonL2Pure": 0.11, + "TempReasonL3Fact": 0.5, + "TempReasonL3Pure": 0.6, + "Touche2020": 1.33, + "WinoGrande": 0.71, + "XMarket (de)": 0.6, + "XMarket (en)": 0.81, + "XMarket (es)": 0.52, + "XPQARetrieval (ara-ara)": 2.35, + "XPQARetrieval (eng-ara)": 0.52, + "XPQARetrieval (ara-eng)": 2.27, + "XPQARetrieval (deu-deu)": 12.57, + "XPQARetrieval (eng-deu)": 0.8, + "XPQARetrieval (deu-eng)": 4.53, + "XPQARetrieval (spa-spa)": 5.96, + "XPQARetrieval (eng-spa)": 0.61, + "XPQARetrieval (spa-eng)": 2.01, + "XPQARetrieval (fra-fra)": 9.68, + "XPQARetrieval (eng-fra)": 1.03, + "XPQARetrieval (fra-eng)": 5.62, + "XPQARetrieval (hin-hin)": 5.47, + "XPQARetrieval (eng-hin)": 2.81, + "XPQARetrieval (hin-eng)": 1.68, + "XPQARetrieval (ita-ita)": 14.3, + "XPQARetrieval (eng-ita)": 1.23, + "XPQARetrieval (ita-eng)": 4.44, + "XPQARetrieval (jpn-jpn)": 4.2, + "XPQARetrieval (eng-jpn)": 1.04, + "XPQARetrieval (jpn-eng)": 1.47, + "XPQARetrieval (kor-kor)": 1.86, + "XPQARetrieval (eng-kor)": 0.83, + "XPQARetrieval (kor-eng)": 0.85, + "XPQARetrieval (pol-pol)": 7.34, + "XPQARetrieval (eng-pol)": 1.5, + "XPQARetrieval (pol-eng)": 2.4, + "XPQARetrieval (por-por)": 6.56, + "XPQARetrieval (eng-por)": 0.99, + "XPQARetrieval (por-eng)": 2.48, + "XPQARetrieval (tam-tam)": 1.09, + "XPQARetrieval (eng-tam)": 1.49, + "XPQARetrieval (tam-eng)": 1.06, + "XPQARetrieval (cmn-cmn)": 6.79, + "XPQARetrieval (eng-cmn)": 1.38, + "XPQARetrieval (cmn-eng)": 1.86 } ] }, @@ -4492,14 +12381,130 @@ "cosine_spearman": [ { "Model": "sbert_large_mt_nlu_ru", + "BIOSSES": 44.98, + "CDSC-R": 59.74, + "GermanSTSBenchmark": 36.44, + "SICK-R": 65.91, + "SICK-R-PL": 42.58, + "SICKFr": 53.29, + "STS12": 48.13, + "STS13": 48.04, + "STS14": 46.6, + "STS15": 65.34, + "STS16": 61.94, + "STS17 (ar-ar)": 12.08, + "STS17 (it-en)": 11.01, + "STS17 (es-es)": 39.85, + "STS17 (en-ar)": 4.47, + "STS17 (en-tr)": -6.65, + "STS17 (es-en)": 25.72, + "STS17 (en-de)": 14.55, + "STS17 (fr-en)": 17.21, + "STS17 (nl-en)": 19.39, + "STS17 (en-en)": 68.58, + "STS17 (ko-ko)": 8.05, + "STS22 (de)": 17.08, + "STS22 (de-pl)": -22.02, + "STS22 (zh-en)": 12.37, + "STS22 (pl-en)": 28.42, + "STS22 (tr)": 25.85, + "STS22 (fr)": 36.5, + "STS22 (es)": 37.89, + "STS22 (de-en)": 23.56, + "STS22 (es-en)": 23.75, + "STS22 (pl)": 5.41, + "STS22 (fr-pl)": -28.17, + "STS22 (it)": 36.74, + "STS22 (es-it)": 28.02, + "STS22 (de-fr)": 16.37, + "STS22 (zh)": 14.23, + "STS22 (ar)": 23.71, + "STS22 (en)": 51.46, + "STSB": 4.92, + "STSBenchmark": 58.81, + "STSBenchmarkMultilingualSTS (es)": 37.36, + "STSBenchmarkMultilingualSTS (zh)": 5.23, + "STSBenchmarkMultilingualSTS (it)": 38.85, + "STSBenchmarkMultilingualSTS (nl)": 37.38, + "STSBenchmarkMultilingualSTS (en)": 58.81, + "STSBenchmarkMultilingualSTS (fr)": 44.41, + "STSBenchmarkMultilingualSTS (pl)": 41.04, + "STSBenchmarkMultilingualSTS (ru)": 70.91, + "STSBenchmarkMultilingualSTS (pt)": 35.18, + "STSBenchmarkMultilingualSTS (de)": 39.48 + }, + { + "Model": "sbert_large_mt_nlu_ru", + "BIOSSES": 44.98, + "CDSC-R": 59.74, + "GermanSTSBenchmark": 36.44, "RUParaPhraserSTS": 65.17, "RuSTSBenchmarkSTS": 71.22, - "STS22 (ru)": 56.82 + "SICK-R": 65.91, + "SICK-R-PL": 42.58, + "SICKFr": 53.3, + "STS12": 48.13, + "STS13": 48.04, + "STS14": 46.6, + "STS15": 65.34, + "STS16": 61.94, + "STS17 (ar-ar)": 12.1, + "STS17 (it-en)": 11.01, + "STS17 (es-es)": 39.86, + "STS17 (en-ar)": 4.47, + "STS17 (en-tr)": -6.65, + "STS17 (es-en)": 25.72, + "STS17 (en-de)": 14.55, + "STS17 (fr-en)": 17.21, + "STS17 (nl-en)": 19.39, + "STS17 (en-en)": 68.58, + "STS17 (ko-ko)": 8.11, + "STS22 (ru)": 56.82, + "STS22 (de)": 17.08, + "STS22 (de-pl)": -22.02, + "STS22 (zh-en)": 12.37, + "STS22 (pl-en)": 28.42, + "STS22 (tr)": 25.85, + "STS22 (fr)": 36.5, + "STS22 (es)": 37.89, + "STS22 (de-en)": 23.56, + "STS22 (es-en)": 23.75, + "STS22 (pl)": 5.51, + "STS22 (fr-pl)": -28.17, + "STS22 (it)": 36.74, + "STS22 (es-it)": 28.02, + "STS22 (de-fr)": 16.37, + "STS22 (zh)": 14.22, + "STS22 (ar)": 23.73, + "STS22 (en)": 51.46, + "STSB": 4.9, + "STSBenchmark": 58.81, + "STSBenchmarkMultilingualSTS (es)": 37.36, + "STSBenchmarkMultilingualSTS (zh)": 5.26, + "STSBenchmarkMultilingualSTS (it)": 38.85, + "STSBenchmarkMultilingualSTS (nl)": 37.38, + "STSBenchmarkMultilingualSTS (en)": 58.81, + "STSBenchmarkMultilingualSTS (fr)": 44.41, + "STSBenchmarkMultilingualSTS (pl)": 41.04, + "STSBenchmarkMultilingualSTS (ru)": 70.91, + "STSBenchmarkMultilingualSTS (pt)": 35.18, + "STSBenchmarkMultilingualSTS (de)": 39.48 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "sbert_large_mt_nlu_ru", + "SummEval": 27.17, + "SummEvalFr": 30.39 + }, + { + "Model": "sbert_large_mt_nlu_ru", + "SummEval": 27.17, + "SummEvalFr": 30.39 + } + ] }, "MultilabelClassification": { "accuracy": [ @@ -4511,26 +12516,304 @@ ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "sbert_large_mt_nlu_ru", + "Core17InstructionRetrieval": 0.08, + "News21InstructionRetrieval": -0.36, + "Robust04InstructionRetrieval": -1.16 + } + ] } }, "ai-forever__sbert_large_nlu_ru": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "sbert_large_nlu_ru", + "BornholmBitextMining": 12.6, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (arq-eng)": 0.29, + "Tatoeba (cat-eng)": 0.58, + "Tatoeba (gle-eng)": 0.24, + "Tatoeba (epo-eng)": 0.5, + "Tatoeba (mon-eng)": 1.96, + "Tatoeba (ita-eng)": 0.86, + "Tatoeba (nds-eng)": 1.72, + "Tatoeba (tzl-eng)": 0.03, + "Tatoeba (hin-eng)": 0.0, + "Tatoeba (xho-eng)": 1.04, + "Tatoeba (nno-eng)": 0.65, + "Tatoeba (hye-eng)": 0.13, + "Tatoeba (awa-eng)": 0.25, + "Tatoeba (ber-eng)": 0.35, + "Tatoeba (amh-eng)": 0.6, + "Tatoeba (swg-eng)": 1.05, + "Tatoeba (uzb-eng)": 1.16, + "Tatoeba (mhr-eng)": 2.08, + "Tatoeba (lit-eng)": 0.04, + "Tatoeba (jav-eng)": 1.17, + "Tatoeba (eus-eng)": 0.33, + "Tatoeba (lfn-eng)": 1.34, + "Tatoeba (tat-eng)": 1.43, + "Tatoeba (slv-eng)": 0.47, + "Tatoeba (tha-eng)": 0.12, + "Tatoeba (ceb-eng)": 0.56, + "Tatoeba (ile-eng)": 1.75, + "Tatoeba (vie-eng)": 0.34, + "Tatoeba (ces-eng)": 0.2, + "Tatoeba (tgl-eng)": 0.1, + "Tatoeba (kzj-eng)": 0.1, + "Tatoeba (ell-eng)": 0.17, + "Tatoeba (orv-eng)": 6.42, + "Tatoeba (swe-eng)": 0.55, + "Tatoeba (bel-eng)": 17.74, + "Tatoeba (nov-eng)": 2.34, + "Tatoeba (max-eng)": 0.79, + "Tatoeba (ast-eng)": 0.56, + "Tatoeba (ara-eng)": 0.2, + "Tatoeba (dan-eng)": 0.79, + "Tatoeba (pms-eng)": 0.92, + "Tatoeba (kab-eng)": 0.1, + "Tatoeba (tur-eng)": 0.01, + "Tatoeba (nld-eng)": 1.35, + "Tatoeba (dtp-eng)": 0.21, + "Tatoeba (est-eng)": 0.21, + "Tatoeba (ind-eng)": 0.32, + "Tatoeba (pes-eng)": 0.07, + "Tatoeba (ina-eng)": 1.37, + "Tatoeba (gla-eng)": 0.13, + "Tatoeba (tel-eng)": 0.76, + "Tatoeba (pol-eng)": 0.19, + "Tatoeba (yue-eng)": 0.01, + "Tatoeba (fin-eng)": 0.07, + "Tatoeba (war-eng)": 0.22, + "Tatoeba (sqi-eng)": 0.2, + "Tatoeba (yid-eng)": 0.1, + "Tatoeba (khm-eng)": 0.44, + "Tatoeba (cmn-eng)": 0.33, + "Tatoeba (fry-eng)": 3.06, + "Tatoeba (kat-eng)": 0.61, + "Tatoeba (ben-eng)": 0.0, + "Tatoeba (zsm-eng)": 0.24, + "Tatoeba (cym-eng)": 0.32, + "Tatoeba (kaz-eng)": 2.55, + "Tatoeba (lvs-eng)": 0.4, + "Tatoeba (spa-eng)": 0.34, + "Tatoeba (hsb-eng)": 0.22, + "Tatoeba (tam-eng)": 0.22, + "Tatoeba (arz-eng)": 0.01, + "Tatoeba (kur-eng)": 0.02, + "Tatoeba (hrv-eng)": 0.28, + "Tatoeba (slk-eng)": 0.12, + "Tatoeba (kor-eng)": 0.29, + "Tatoeba (wuu-eng)": 0.35, + "Tatoeba (jpn-eng)": 0.0, + "Tatoeba (aze-eng)": 0.13, + "Tatoeba (cor-eng)": 0.19, + "Tatoeba (isl-eng)": 0.11, + "Tatoeba (bul-eng)": 11.98, + "Tatoeba (ido-eng)": 0.74, + "Tatoeba (nob-eng)": 0.73, + "Tatoeba (fra-eng)": 0.45, + "Tatoeba (bos-eng)": 0.04, + "Tatoeba (deu-eng)": 1.61, + "Tatoeba (ron-eng)": 0.37, + "Tatoeba (rus-eng)": 38.98, + "Tatoeba (ang-eng)": 5.77, + "Tatoeba (pam-eng)": 0.34, + "Tatoeba (fao-eng)": 0.63, + "Tatoeba (mal-eng)": 0.0, + "Tatoeba (dsb-eng)": 0.16, + "Tatoeba (oci-eng)": 0.63, + "Tatoeba (srp-eng)": 5.54, + "Tatoeba (lat-eng)": 0.21, + "Tatoeba (afr-eng)": 1.24, + "Tatoeba (cha-eng)": 1.05, + "Tatoeba (bre-eng)": 0.31, + "Tatoeba (hun-eng)": 0.37, + "Tatoeba (swh-eng)": 0.0, + "Tatoeba (mar-eng)": 0.03, + "Tatoeba (csb-eng)": 0.16, + "Tatoeba (tuk-eng)": 0.51, + "Tatoeba (uig-eng)": 0.08, + "Tatoeba (glg-eng)": 0.42, + "Tatoeba (heb-eng)": 0.3, + "Tatoeba (por-eng)": 0.21, + "Tatoeba (cbk-eng)": 0.0, + "Tatoeba (ukr-eng)": 35.48, + "Tatoeba (mkd-eng)": 6.83, + "Tatoeba (gsw-eng)": 3.43 + } + ] }, "Classification": { "accuracy": [ { "Model": "sbert_large_nlu_ru", + "AllegroReviews": 21.01, + "AmazonCounterfactualClassification (en-ext)": 62.03, + "AmazonCounterfactualClassification (en)": 62.37, + "AmazonCounterfactualClassification (de)": 53.43, + "AmazonCounterfactualClassification (ja)": 52.0, + "AmazonPolarityClassification": 59.33, + "AmazonReviewsClassification (en)": 27.26, + "AmazonReviewsClassification (de)": 23.78, + "AmazonReviewsClassification (es)": 23.42, + "AmazonReviewsClassification (fr)": 23.95, + "AmazonReviewsClassification (ja)": 20.12, + "AmazonReviewsClassification (zh)": 20.91, + "AngryTweetsClassification": 44.11, + "Banking77Classification": 34.79, + "CBD": 51.0, + "DanishPoliticalCommentsClassification": 26.55, + "EmotionClassification": 18.68, "GeoreviewClassification": 39.97, "HeadlineClassification": 79.26, + "ImdbClassification": 55.97, "InappropriatenessClassification": 62.52, "KinopoiskClassification": 49.51, + "LccSentimentClassification": 38.87, + "MTOPDomainClassification (en)": 69.22, + "MTOPDomainClassification (de)": 38.11, + "MTOPDomainClassification (es)": 39.46, + "MTOPDomainClassification (fr)": 38.27, + "MTOPDomainClassification (hi)": 22.61, + "MTOPDomainClassification (th)": 15.48, + "MTOPIntentClassification (en)": 37.01, + "MTOPIntentClassification (de)": 17.73, + "MTOPIntentClassification (es)": 15.75, + "MTOPIntentClassification (fr)": 16.34, + "MTOPIntentClassification (hi)": 4.66, + "MTOPIntentClassification (th)": 5.57, + "MasakhaNEWSClassification (amh)": 31.7, + "MasakhaNEWSClassification (eng)": 65.42, + "MasakhaNEWSClassification (fra)": 56.61, + "MasakhaNEWSClassification (hau)": 34.62, + "MasakhaNEWSClassification (ibo)": 30.54, + "MasakhaNEWSClassification (lin)": 50.4, + "MasakhaNEWSClassification (lug)": 30.31, + "MasakhaNEWSClassification (orm)": 32.55, + "MasakhaNEWSClassification (pcm)": 84.23, + "MasakhaNEWSClassification (run)": 32.17, + "MasakhaNEWSClassification (sna)": 47.67, + "MasakhaNEWSClassification (som)": 22.55, + "MasakhaNEWSClassification (swa)": 29.43, + "MasakhaNEWSClassification (tir)": 29.96, + "MasakhaNEWSClassification (xho)": 44.04, + "MasakhaNEWSClassification (yor)": 35.47, "MassiveIntentClassification (ru)": 61.09, + "MassiveIntentClassification (fr)": 18.21, + "MassiveIntentClassification (az)": 11.82, + "MassiveIntentClassification (bn)": 3.05, + "MassiveIntentClassification (hu)": 14.69, + "MassiveIntentClassification (cy)": 15.27, + "MassiveIntentClassification (ar)": 3.9, + "MassiveIntentClassification (sl)": 15.13, + "MassiveIntentClassification (ms)": 14.04, + "MassiveIntentClassification (en)": 36.64, + "MassiveIntentClassification (af)": 14.72, + "MassiveIntentClassification (ta)": 2.34, + "MassiveIntentClassification (nl)": 17.77, + "MassiveIntentClassification (my)": 4.45, + "MassiveIntentClassification (nb)": 14.4, + "MassiveIntentClassification (sw)": 13.76, + "MassiveIntentClassification (es)": 14.85, + "MassiveIntentClassification (ml)": 2.85, + "MassiveIntentClassification (is)": 11.27, + "MassiveIntentClassification (km)": 4.86, + "MassiveIntentClassification (pl)": 18.13, + "MassiveIntentClassification (hy)": 3.19, + "MassiveIntentClassification (hi)": 3.06, + "MassiveIntentClassification (tr)": 13.43, + "MassiveIntentClassification (zh-CN)": 2.52, + "MassiveIntentClassification (zh-TW)": 3.33, + "MassiveIntentClassification (ja)": 4.14, + "MassiveIntentClassification (de)": 21.1, + "MassiveIntentClassification (kn)": 3.07, + "MassiveIntentClassification (el)": 12.06, + "MassiveIntentClassification (th)": 3.96, + "MassiveIntentClassification (sq)": 17.11, + "MassiveIntentClassification (ko)": 2.62, + "MassiveIntentClassification (mn)": 16.16, + "MassiveIntentClassification (am)": 2.74, + "MassiveIntentClassification (id)": 17.75, + "MassiveIntentClassification (fa)": 3.42, + "MassiveIntentClassification (da)": 18.77, + "MassiveIntentClassification (it)": 17.9, + "MassiveIntentClassification (vi)": 16.58, + "MassiveIntentClassification (tl)": 16.85, + "MassiveIntentClassification (ur)": 2.59, + "MassiveIntentClassification (ro)": 15.75, + "MassiveIntentClassification (sv)": 14.53, + "MassiveIntentClassification (he)": 2.51, + "MassiveIntentClassification (lv)": 15.26, + "MassiveIntentClassification (jv)": 14.86, + "MassiveIntentClassification (ka)": 2.44, + "MassiveIntentClassification (pt)": 17.47, + "MassiveIntentClassification (fi)": 12.62, + "MassiveIntentClassification (te)": 2.18, "MassiveScenarioClassification (ru)": 67.6, + "MassiveScenarioClassification (ml)": 7.72, + "MassiveScenarioClassification (hi)": 7.63, + "MassiveScenarioClassification (fi)": 17.24, + "MassiveScenarioClassification (pl)": 26.4, + "MassiveScenarioClassification (ur)": 9.33, + "MassiveScenarioClassification (sl)": 19.84, + "MassiveScenarioClassification (nl)": 25.23, + "MassiveScenarioClassification (fa)": 6.75, + "MassiveScenarioClassification (id)": 23.56, + "MassiveScenarioClassification (de)": 29.53, + "MassiveScenarioClassification (ka)": 7.74, + "MassiveScenarioClassification (km)": 9.32, + "MassiveScenarioClassification (tr)": 18.96, + "MassiveScenarioClassification (ms)": 19.85, + "MassiveScenarioClassification (en)": 49.26, + "MassiveScenarioClassification (lv)": 18.1, + "MassiveScenarioClassification (th)": 8.9, + "MassiveScenarioClassification (am)": 7.3, + "MassiveScenarioClassification (el)": 19.95, + "MassiveScenarioClassification (is)": 17.48, + "MassiveScenarioClassification (cy)": 21.95, + "MassiveScenarioClassification (fr)": 24.94, + "MassiveScenarioClassification (az)": 18.88, + "MassiveScenarioClassification (pt)": 25.3, + "MassiveScenarioClassification (ro)": 21.17, + "MassiveScenarioClassification (ja)": 9.7, + "MassiveScenarioClassification (bn)": 8.49, + "MassiveScenarioClassification (mn)": 19.35, + "MassiveScenarioClassification (da)": 27.23, + "MassiveScenarioClassification (es)": 21.27, + "MassiveScenarioClassification (hy)": 8.86, + "MassiveScenarioClassification (vi)": 22.54, + "MassiveScenarioClassification (tl)": 24.39, + "MassiveScenarioClassification (nb)": 20.82, + "MassiveScenarioClassification (ko)": 6.71, + "MassiveScenarioClassification (kn)": 7.7, + "MassiveScenarioClassification (ta)": 7.14, + "MassiveScenarioClassification (ar)": 11.92, + "MassiveScenarioClassification (zh-TW)": 9.25, + "MassiveScenarioClassification (sq)": 22.42, + "MassiveScenarioClassification (zh-CN)": 9.3, + "MassiveScenarioClassification (he)": 8.12, + "MassiveScenarioClassification (jv)": 22.46, + "MassiveScenarioClassification (sw)": 22.07, + "MassiveScenarioClassification (my)": 10.27, + "MassiveScenarioClassification (hu)": 20.7, + "MassiveScenarioClassification (it)": 26.82, + "MassiveScenarioClassification (sv)": 19.74, + "MassiveScenarioClassification (af)": 21.73, + "MassiveScenarioClassification (te)": 7.88, + "NoRecClassification": 39.06, + "NordicLangClassification": 39.41, + "PAC": 68.93, + "PolEmo2.0-IN": 40.28, + "PolEmo2.0-OUT": 30.67, "RuReviewsClassification": 58.27, "RuSciBenchGRNTIClassification": 53.9, - "RuSciBenchOECDClassification": 43.04 + "RuSciBenchOECDClassification": 43.04, + "ToxicConversationsClassification": 57.76, + "TweetSentimentExtractionClassification": 47.21 } ] }, @@ -4538,9 +12821,67 @@ "v_measure": [ { "Model": "sbert_large_nlu_ru", + "AlloProfClusteringP2P": 39.96, + "AlloProfClusteringS2S": 23.7, + "ArxivClusteringP2P": 18.57, + "ArxivClusteringS2S": 11.83, + "BiorxivClusteringP2P": 12.51, + "BiorxivClusteringS2S": 6.79, + "BlurbsClusteringP2P": 11.42, + "BlurbsClusteringS2S": 8.6, "GeoreviewClusteringP2P": 57.12, + "HALClusteringS2S": 6.03, + "MLSUMClusteringP2P (de)": 15.09, + "MLSUMClusteringP2P (fr)": 27.16, + "MLSUMClusteringP2P (ru)": 49.13, + "MLSUMClusteringP2P (es)": 29.37, + "MLSUMClusteringS2S (de)": 15.85, + "MLSUMClusteringS2S (fr)": 27.09, + "MLSUMClusteringS2S (ru)": 49.58, + "MLSUMClusteringS2S (es)": 28.74, + "MasakhaNEWSClusteringP2P (amh)": 43.17, + "MasakhaNEWSClusteringP2P (eng)": 41.53, + "MasakhaNEWSClusteringP2P (fra)": 38.45, + "MasakhaNEWSClusteringP2P (hau)": 8.06, + "MasakhaNEWSClusteringP2P (ibo)": 21.37, + "MasakhaNEWSClusteringP2P (lin)": 50.33, + "MasakhaNEWSClusteringP2P (lug)": 43.55, + "MasakhaNEWSClusteringP2P (orm)": 20.41, + "MasakhaNEWSClusteringP2P (pcm)": 71.26, + "MasakhaNEWSClusteringP2P (run)": 41.88, + "MasakhaNEWSClusteringP2P (sna)": 46.77, + "MasakhaNEWSClusteringP2P (som)": 24.45, + "MasakhaNEWSClusteringP2P (swa)": 12.32, + "MasakhaNEWSClusteringP2P (tir)": 43.45, + "MasakhaNEWSClusteringP2P (xho)": 22.84, + "MasakhaNEWSClusteringP2P (yor)": 21.23, + "MasakhaNEWSClusteringS2S (amh)": 43.26, + "MasakhaNEWSClusteringS2S (eng)": 9.15, + "MasakhaNEWSClusteringS2S (fra)": 24.96, + "MasakhaNEWSClusteringS2S (hau)": 4.21, + "MasakhaNEWSClusteringS2S (ibo)": 26.62, + "MasakhaNEWSClusteringS2S (lin)": 51.88, + "MasakhaNEWSClusteringS2S (lug)": 45.69, + "MasakhaNEWSClusteringS2S (orm)": 21.66, + "MasakhaNEWSClusteringS2S (pcm)": 31.8, + "MasakhaNEWSClusteringS2S (run)": 44.07, + "MasakhaNEWSClusteringS2S (sna)": 52.21, + "MasakhaNEWSClusteringS2S (som)": 23.6, + "MasakhaNEWSClusteringS2S (swa)": 14.33, + "MasakhaNEWSClusteringS2S (tir)": 43.31, + "MasakhaNEWSClusteringS2S (xho)": 24.5, + "MasakhaNEWSClusteringS2S (yor)": 23.26, + "MedrxivClusteringP2P": 19.63, + "MedrxivClusteringS2S": 14.79, + "RedditClustering": 15.97, + "RedditClusteringP2P": 33.1, "RuSciBenchGRNTIClusteringP2P": 49.7, - "RuSciBenchOECDClusteringP2P": 44.48 + "RuSciBenchOECDClusteringP2P": 44.48, + "StackExchangeClustering": 21.14, + "StackExchangeClusteringP2P": 23.58, + "TenKGnadClusteringP2P": 18.55, + "TenKGnadClusteringS2S": 10.24, + "TwentyNewsgroupsClustering": 13.55 } ] }, @@ -4548,11 +12889,51 @@ "max_ap": [ { "Model": "sbert_large_nlu_ru", - "TERRa": 50.17 + "CDSC-E": 35.64, + "FalseFriendsGermanEnglish": 48.33, + "OpusparcusPC (de)": 88.54, + "OpusparcusPC (en)": 96.42, + "OpusparcusPC (fi)": 83.14, + "OpusparcusPC (fr)": 82.75, + "OpusparcusPC (ru)": 89.72, + "OpusparcusPC (sv)": 80.13, + "PSC": 57.84, + "PawsXPairClassification (de)": 50.88, + "PawsXPairClassification (en)": 50.62, + "PawsXPairClassification (es)": 51.74, + "PawsXPairClassification (fr)": 53.8, + "PawsXPairClassification (ja)": 46.11, + "PawsXPairClassification (ko)": 47.25, + "PawsXPairClassification (zh)": 48.87, + "SICK-E-PL": 44.12, + "SprintDuplicateQuestions": 15.22, + "TERRa": 50.17, + "TwitterSemEval2015": 51.4, + "TwitterURLCorpus": 73.98 }, { "Model": "sbert_large_nlu_ru", - "TERRa": 50.17 + "CDSC-E": 35.69, + "FalseFriendsGermanEnglish": 48.34, + "OpusparcusPC (de)": 88.55, + "OpusparcusPC (en)": 96.45, + "OpusparcusPC (fi)": 83.15, + "OpusparcusPC (fr)": 82.75, + "OpusparcusPC (ru)": 89.72, + "OpusparcusPC (sv)": 80.13, + "PSC": 57.84, + "PawsXPairClassification (de)": 51.01, + "PawsXPairClassification (en)": 50.64, + "PawsXPairClassification (es)": 51.74, + "PawsXPairClassification (fr)": 53.84, + "PawsXPairClassification (ja)": 46.43, + "PawsXPairClassification (ko)": 47.67, + "PawsXPairClassification (zh)": 48.93, + "SICK-E-PL": 44.15, + "SprintDuplicateQuestions": 15.34, + "TERRa": 50.17, + "TwitterSemEval2015": 51.4, + "TwitterURLCorpus": 73.98 } ] }, @@ -4560,11 +12941,35 @@ "map": [ { "Model": "sbert_large_nlu_ru", - "MIRACLReranking (ru)": 18.8 + "AlloprofReranking": 31.06, + "AskUbuntuDupQuestions": 43.65, + "MindSmallReranking": 26.86, + "RuBQReranking": 46.81, + "SciDocsRR": 44.72, + "StackOverflowDupQuestions": 29.72, + "SyntecReranking": 35.78, + "T2Reranking": 50.7 }, { "Model": "sbert_large_nlu_ru", - "RuBQReranking": 46.81 + "MIRACLReranking (ru)": 18.8, + "MIRACLReranking (ar)": 2.13, + "MIRACLReranking (bn)": 1.59, + "MIRACLReranking (de)": 3.86, + "MIRACLReranking (en)": 8.82, + "MIRACLReranking (es)": 4.97, + "MIRACLReranking (fa)": 3.24, + "MIRACLReranking (fi)": 4.84, + "MIRACLReranking (fr)": 3.15, + "MIRACLReranking (hi)": 4.13, + "MIRACLReranking (id)": 3.65, + "MIRACLReranking (ja)": 1.94, + "MIRACLReranking (ko)": 4.9, + "MIRACLReranking (sw)": 3.1, + "MIRACLReranking (te)": 2.57, + "MIRACLReranking (th)": 2.5, + "MIRACLReranking (yo)": 4.17, + "MIRACLReranking (zh)": 2.58 } ] }, @@ -4572,9 +12977,142 @@ "ndcg_at_10": [ { "Model": "sbert_large_nlu_ru", + "AILACasedocs": 12.96, + "AILAStatutes": 19.38, + "ARCChallenge": 1.8, + "AlloprofRetrieval": 0.34, + "AlphaNLI": 1.1, + "AppsRetrieval": 0.19, + "ArguAna": 17.29, + "BSARDRetrieval": 0.0, + "ClimateFEVER": 0.03, + "CmedqaRetrieval": 0.28, + "CodeFeedbackMT": 5.2, + "CodeFeedbackST": 3.78, + "CodeSearchNetCCRetrieval (python)": 4.27, + "CodeSearchNetCCRetrieval (javascript)": 4.77, + "CodeSearchNetCCRetrieval (go)": 3.2, + "CodeSearchNetCCRetrieval (ruby)": 11.04, + "CodeSearchNetCCRetrieval (java)": 4.12, + "CodeSearchNetCCRetrieval (php)": 2.43, + "CodeSearchNetRetrieval (python)": 10.29, + "CodeSearchNetRetrieval (javascript)": 6.59, + "CodeSearchNetRetrieval (go)": 7.18, + "CodeSearchNetRetrieval (ruby)": 7.29, + "CodeSearchNetRetrieval (java)": 5.06, + "CodeSearchNetRetrieval (php)": 6.07, + "CodeTransOceanContest": 9.79, + "CodeTransOceanDL": 31.48, + "CosQA": 0.35, + "CovidRetrieval": 0.0, + "DBPedia": 0.23, + "FEVER": 0.2, + "FiQA2018": 0.33, + "GerDaLIR": 0.25, + "GerDaLIRSmall": 0.72, + "GermanQuAD-Retrieval": 5.61, + "HellaSwag": 2.48, + "HotpotQA": 0.93, + "LEMBNarrativeQARetrieval": 2.65, + "LEMBQMSumRetrieval": 7.24, + "LEMBSummScreenFDRetrieval": 7.8, + "LEMBWikimQARetrieval": 10.12, + "LeCaRDv2": 9.14, + "LegalBenchConsumerContractsQA": 12.36, + "LegalBenchCorporateLobbying": 22.42, + "LegalQuAD": 3.1, + "LegalSummarization": 15.01, "MIRACLRetrieval (ru)": 1.98, + "MIRACLRetrieval (ar)": 0.0, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 0.0, + "MIRACLRetrieval (en)": 0.02, + "MIRACLRetrieval (es)": 0.0, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 0.13, + "MIRACLRetrieval (fr)": 0.0, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 0.05, + "MIRACLRetrieval (ja)": 0.0, + "MIRACLRetrieval (ko)": 0.31, + "MIRACLRetrieval (sw)": 0.18, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.0, + "MIRACLRetrieval (yo)": 0.49, + "MIRACLRetrieval (zh)": 0.0, + "MintakaRetrieval (ar)": 0.26, + "MintakaRetrieval (de)": 0.85, + "MintakaRetrieval (es)": 0.9, + "MintakaRetrieval (fr)": 0.85, + "MintakaRetrieval (hi)": 0.6, + "MintakaRetrieval (it)": 0.68, + "MintakaRetrieval (ja)": 0.26, + "MintakaRetrieval (pt)": 0.99, + "NFCorpus": 1.99, + "NQ": 0.2, + "PIQA": 1.65, + "Quail": 0.19, + "QuoraRetrieval": 44.36, + "RARbCode": 0.21, + "RARbMath": 4.27, "RiaNewsRetrieval": 11.11, - "RuBQRetrieval": 12.45 + "RuBQRetrieval": 12.45, + "SCIDOCS": 0.23, + "SIQA": 0.13, + "SciFact": 1.73, + "SciFact-PL": 0.66, + "SpartQA": 0.02, + "StackOverflowQA": 6.87, + "SyntecRetrieval": 11.87, + "SyntheticText2SQL": 4.57, + "TRECCOVID": 3.91, + "TRECCOVID-PL": 1.16, + "TempReasonL1": 0.19, + "TempReasonL2Fact": 0.35, + "TempReasonL2Pure": 0.09, + "TempReasonL3Fact": 0.45, + "TempReasonL3Pure": 0.3, + "Touche2020": 0.29, + "WinoGrande": 0.38, + "XMarket (de)": 0.49, + "XMarket (en)": 0.7, + "XMarket (es)": 0.39, + "XPQARetrieval (ara-ara)": 2.35, + "XPQARetrieval (eng-ara)": 0.76, + "XPQARetrieval (ara-eng)": 2.03, + "XPQARetrieval (deu-deu)": 10.72, + "XPQARetrieval (eng-deu)": 0.63, + "XPQARetrieval (deu-eng)": 4.67, + "XPQARetrieval (spa-spa)": 4.19, + "XPQARetrieval (eng-spa)": 0.7, + "XPQARetrieval (spa-eng)": 2.37, + "XPQARetrieval (fra-fra)": 9.13, + "XPQARetrieval (eng-fra)": 0.71, + "XPQARetrieval (fra-eng)": 3.77, + "XPQARetrieval (hin-hin)": 4.85, + "XPQARetrieval (eng-hin)": 2.3, + "XPQARetrieval (hin-eng)": 0.92, + "XPQARetrieval (ita-ita)": 8.48, + "XPQARetrieval (eng-ita)": 1.0, + "XPQARetrieval (ita-eng)": 2.85, + "XPQARetrieval (jpn-jpn)": 3.62, + "XPQARetrieval (eng-jpn)": 1.13, + "XPQARetrieval (jpn-eng)": 1.32, + "XPQARetrieval (kor-kor)": 1.82, + "XPQARetrieval (eng-kor)": 0.54, + "XPQARetrieval (kor-eng)": 0.91, + "XPQARetrieval (pol-pol)": 6.15, + "XPQARetrieval (eng-pol)": 1.0, + "XPQARetrieval (pol-eng)": 1.89, + "XPQARetrieval (por-por)": 5.66, + "XPQARetrieval (eng-por)": 0.75, + "XPQARetrieval (por-eng)": 1.76, + "XPQARetrieval (tam-tam)": 1.18, + "XPQARetrieval (eng-tam)": 1.73, + "XPQARetrieval (tam-eng)": 0.76, + "XPQARetrieval (cmn-cmn)": 6.07, + "XPQARetrieval (eng-cmn)": 1.94, + "XPQARetrieval (cmn-eng)": 1.18 } ] }, @@ -4582,14 +13120,130 @@ "cosine_spearman": [ { "Model": "sbert_large_nlu_ru", + "BIOSSES": 39.69, + "CDSC-R": 58.92, + "GermanSTSBenchmark": 25.4, + "SICK-R": 58.86, + "SICK-R-PL": 39.82, + "SICKFr": 48.31, + "STS12": 31.48, + "STS13": 37.11, + "STS14": 36.65, + "STS15": 58.25, + "STS16": 58.5, + "STS17 (en-tr)": 1.31, + "STS17 (nl-en)": 13.17, + "STS17 (it-en)": 11.05, + "STS17 (fr-en)": 20.49, + "STS17 (es-es)": 44.07, + "STS17 (ko-ko)": 7.57, + "STS17 (en-ar)": 21.74, + "STS17 (en-en)": 62.5, + "STS17 (es-en)": 10.26, + "STS17 (ar-ar)": 11.58, + "STS17 (en-de)": 9.52, + "STS22 (es-it)": 29.06, + "STS22 (de)": 14.13, + "STS22 (es-en)": 13.97, + "STS22 (de-en)": 18.76, + "STS22 (pl)": 10.48, + "STS22 (ar)": 24.15, + "STS22 (fr-pl)": 50.71, + "STS22 (es)": 38.98, + "STS22 (tr)": 29.01, + "STS22 (it)": 39.38, + "STS22 (zh)": 20.71, + "STS22 (en)": 57.74, + "STS22 (pl-en)": 6.58, + "STS22 (fr)": 55.23, + "STS22 (zh-en)": 7.36, + "STS22 (de-fr)": 29.79, + "STS22 (de-pl)": -20.42, + "STSB": 4.05, + "STSBenchmark": 46.8, + "STSBenchmarkMultilingualSTS (de)": 29.29, + "STSBenchmarkMultilingualSTS (zh)": 5.4, + "STSBenchmarkMultilingualSTS (pl)": 39.01, + "STSBenchmarkMultilingualSTS (es)": 30.71, + "STSBenchmarkMultilingualSTS (pt)": 33.45, + "STSBenchmarkMultilingualSTS (en)": 46.8, + "STSBenchmarkMultilingualSTS (ru)": 58.45, + "STSBenchmarkMultilingualSTS (nl)": 33.46, + "STSBenchmarkMultilingualSTS (fr)": 37.91, + "STSBenchmarkMultilingualSTS (it)": 33.1 + }, + { + "Model": "sbert_large_nlu_ru", + "BIOSSES": 39.69, + "CDSC-R": 58.92, + "GermanSTSBenchmark": 25.4, "RUParaPhraserSTS": 62.06, "RuSTSBenchmarkSTS": 58.82, - "STS22 (ru)": 50.75 + "SICK-R": 58.86, + "SICK-R-PL": 39.82, + "SICKFr": 48.31, + "STS12": 31.48, + "STS13": 37.11, + "STS14": 36.65, + "STS15": 58.25, + "STS16": 58.5, + "STS17 (en-tr)": 1.31, + "STS17 (nl-en)": 13.17, + "STS17 (it-en)": 11.05, + "STS17 (fr-en)": 20.49, + "STS17 (es-es)": 44.07, + "STS17 (ko-ko)": 7.62, + "STS17 (en-ar)": 21.74, + "STS17 (en-en)": 62.5, + "STS17 (es-en)": 10.26, + "STS17 (ar-ar)": 12.45, + "STS17 (en-de)": 9.52, + "STS22 (ru)": 50.75, + "STS22 (es-it)": 29.06, + "STS22 (de)": 14.12, + "STS22 (es-en)": 13.97, + "STS22 (de-en)": 18.76, + "STS22 (pl)": 10.29, + "STS22 (ar)": 24.13, + "STS22 (fr-pl)": 50.71, + "STS22 (es)": 38.98, + "STS22 (tr)": 29.01, + "STS22 (it)": 39.38, + "STS22 (zh)": 20.71, + "STS22 (en)": 57.74, + "STS22 (pl-en)": 6.58, + "STS22 (fr)": 55.23, + "STS22 (zh-en)": 7.36, + "STS22 (de-fr)": 29.79, + "STS22 (de-pl)": -20.42, + "STSB": 4.09, + "STSBenchmark": 46.8, + "STSBenchmarkMultilingualSTS (de)": 29.29, + "STSBenchmarkMultilingualSTS (zh)": 5.41, + "STSBenchmarkMultilingualSTS (pl)": 39.01, + "STSBenchmarkMultilingualSTS (es)": 30.71, + "STSBenchmarkMultilingualSTS (pt)": 33.45, + "STSBenchmarkMultilingualSTS (en)": 46.8, + "STSBenchmarkMultilingualSTS (ru)": 58.46, + "STSBenchmarkMultilingualSTS (nl)": 33.46, + "STSBenchmarkMultilingualSTS (fr)": 37.91, + "STSBenchmarkMultilingualSTS (it)": 33.1 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "sbert_large_nlu_ru", + "SummEval": 28.2, + "SummEvalFr": 28.06 + }, + { + "Model": "sbert_large_nlu_ru", + "SummEval": 28.2, + "SummEvalFr": 28.06 + } + ] }, "MultilabelClassification": { "accuracy": [ @@ -4601,7 +13255,14 @@ ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "sbert_large_nlu_ru", + "Core17InstructionRetrieval": -2.17, + "News21InstructionRetrieval": 3.38, + "Robust04InstructionRetrieval": 0.46 + } + ] } }, "aliyun__OpenSearch-text-hybrid": { @@ -4953,6 +13614,7 @@ "ArguAna": 49.28, "CQADupstackRetrieval": 31.86, "ClimateFEVER": 13.62, + "CmedqaRetrieval": 1.34, "DBPedia": 29.91, "FEVER": 48.09, "FiQA2018": 25.14, @@ -4962,8 +13624,10 @@ "NQ": 28.5, "QuoraRetrieval": 80.42, "SCIDOCS": 15.78, - "SciFact": 68.7, + "SciFact": 68.74, + "SciFact-PL": 56.36, "TRECCOVID": 62.31, + "TRECCOVID-PL": 44.6, "Touche2020": 33.05 } ] @@ -5159,6 +13823,7 @@ "f1": [ { "Model": "LaBSE-en-ru", + "BornholmBitextMining": 37.36, "Tatoeba (rus-eng)": 93.62 } ] @@ -5167,10 +13832,57 @@ "accuracy": [ { "Model": "LaBSE-en-ru", + "AllegroReviews": 28.22, + "AmazonCounterfactualClassification (en-ext)": 76.12, + "AmazonCounterfactualClassification (en)": 76.06, + "AmazonCounterfactualClassification (de)": 52.69, + "AmazonCounterfactualClassification (ja)": 56.98, + "AmazonPolarityClassification": 68.35, + "AmazonReviewsClassification (en)": 35.53, + "AmazonReviewsClassification (de)": 29.83, + "AmazonReviewsClassification (es)": 33.68, + "AmazonReviewsClassification (fr)": 32.75, + "AmazonReviewsClassification (ja)": 20.65, + "AmazonReviewsClassification (zh)": 20.77, + "AngryTweetsClassification": 46.23, + "Banking77Classification": 69.6, + "CBD": 52.86, + "DanishPoliticalCommentsClassification": 30.4, + "EmotionClassification": 36.15, "GeoreviewClassification": 40.89, "HeadlineClassification": 68.75, + "ImdbClassification": 62.87, "InappropriatenessClassification": 58.48, "KinopoiskClassification": 49.85, + "LccSentimentClassification": 44.53, + "MTOPDomainClassification (en)": 85.6, + "MTOPDomainClassification (de)": 59.47, + "MTOPDomainClassification (es)": 61.23, + "MTOPDomainClassification (fr)": 64.84, + "MTOPDomainClassification (hi)": 20.35, + "MTOPDomainClassification (th)": 14.86, + "MTOPIntentClassification (en)": 62.39, + "MTOPIntentClassification (de)": 35.1, + "MTOPIntentClassification (es)": 42.27, + "MTOPIntentClassification (fr)": 41.03, + "MTOPIntentClassification (hi)": 4.17, + "MTOPIntentClassification (th)": 4.44, + "MasakhaNEWSClassification (amh)": 34.89, + "MasakhaNEWSClassification (eng)": 78.43, + "MasakhaNEWSClassification (fra)": 73.44, + "MasakhaNEWSClassification (hau)": 59.15, + "MasakhaNEWSClassification (ibo)": 50.46, + "MasakhaNEWSClassification (lin)": 67.09, + "MasakhaNEWSClassification (lug)": 52.91, + "MasakhaNEWSClassification (orm)": 43.51, + "MasakhaNEWSClassification (pcm)": 90.23, + "MasakhaNEWSClassification (run)": 57.14, + "MasakhaNEWSClassification (sna)": 74.61, + "MasakhaNEWSClassification (som)": 44.18, + "MasakhaNEWSClassification (swa)": 54.39, + "MasakhaNEWSClassification (tir)": 24.82, + "MasakhaNEWSClassification (xho)": 60.51, + "MasakhaNEWSClassification (yor)": 63.77, "MassiveIntentClassification (sw)": 19.98, "MassiveIntentClassification (az)": 19.52, "MassiveIntentClassification (tr)": 24.12, @@ -5273,9 +13985,16 @@ "MassiveScenarioClassification (hu)": 33.92, "MassiveScenarioClassification (ko)": 7.37, "MassiveScenarioClassification (ar)": 12.43, + "NoRecClassification": 40.21, + "NordicLangClassification": 38.84, + "PAC": 68.61, + "PolEmo2.0-IN": 55.44, + "PolEmo2.0-OUT": 33.64, "RuReviewsClassification": 58.01, "RuSciBenchGRNTIClassification": 52.8, - "RuSciBenchOECDClassification": 40.36 + "RuSciBenchOECDClassification": 40.36, + "ToxicConversationsClassification": 60.26, + "TweetSentimentExtractionClassification": 57.75 } ] }, @@ -5283,11 +14002,67 @@ "v_measure": [ { "Model": "LaBSE-en-ru", + "AlloProfClusteringP2P": 55.09, + "AlloProfClusteringS2S": 22.69, + "ArxivClusteringP2P": 31.41, + "ArxivClusteringS2S": 20.66, + "BiorxivClusteringP2P": 28.94, + "BiorxivClusteringS2S": 18.04, + "BlurbsClusteringP2P": 30.05, + "BlurbsClusteringS2S": 11.13, "GeoreviewClusteringP2P": 51.89, + "HALClusteringS2S": 13.46, "MLSUMClusteringP2P (ru)": 37.87, + "MLSUMClusteringP2P (de)": 34.6, + "MLSUMClusteringP2P (fr)": 41.44, + "MLSUMClusteringP2P (es)": 41.36, "MLSUMClusteringS2S (ru)": 41.24, + "MLSUMClusteringS2S (de)": 37.56, + "MLSUMClusteringS2S (fr)": 40.85, + "MLSUMClusteringS2S (es)": 41.08, + "MasakhaNEWSClusteringP2P (amh)": 40.17, + "MasakhaNEWSClusteringP2P (eng)": 51.39, + "MasakhaNEWSClusteringP2P (fra)": 58.64, + "MasakhaNEWSClusteringP2P (hau)": 37.52, + "MasakhaNEWSClusteringP2P (ibo)": 33.42, + "MasakhaNEWSClusteringP2P (lin)": 54.8, + "MasakhaNEWSClusteringP2P (lug)": 44.12, + "MasakhaNEWSClusteringP2P (orm)": 24.77, + "MasakhaNEWSClusteringP2P (pcm)": 67.72, + "MasakhaNEWSClusteringP2P (run)": 50.52, + "MasakhaNEWSClusteringP2P (sna)": 57.22, + "MasakhaNEWSClusteringP2P (som)": 27.57, + "MasakhaNEWSClusteringP2P (swa)": 20.27, + "MasakhaNEWSClusteringP2P (tir)": 48.36, + "MasakhaNEWSClusteringP2P (xho)": 39.56, + "MasakhaNEWSClusteringP2P (yor)": 29.12, + "MasakhaNEWSClusteringS2S (amh)": 44.07, + "MasakhaNEWSClusteringS2S (eng)": 21.34, + "MasakhaNEWSClusteringS2S (fra)": 26.33, + "MasakhaNEWSClusteringS2S (hau)": 12.88, + "MasakhaNEWSClusteringS2S (ibo)": 29.53, + "MasakhaNEWSClusteringS2S (lin)": 51.47, + "MasakhaNEWSClusteringS2S (lug)": 44.0, + "MasakhaNEWSClusteringS2S (orm)": 21.96, + "MasakhaNEWSClusteringS2S (pcm)": 69.63, + "MasakhaNEWSClusteringS2S (run)": 46.67, + "MasakhaNEWSClusteringS2S (sna)": 44.44, + "MasakhaNEWSClusteringS2S (som)": 24.45, + "MasakhaNEWSClusteringS2S (swa)": 13.6, + "MasakhaNEWSClusteringS2S (tir)": 45.41, + "MasakhaNEWSClusteringS2S (xho)": 27.49, + "MasakhaNEWSClusteringS2S (yor)": 27.71, + "MedrxivClusteringP2P": 28.51, + "MedrxivClusteringS2S": 23.45, + "RedditClustering": 25.45, + "RedditClusteringP2P": 47.49, "RuSciBenchGRNTIClusteringP2P": 47.48, - "RuSciBenchOECDClusteringP2P": 41.16 + "RuSciBenchOECDClusteringP2P": 41.16, + "StackExchangeClustering": 32.87, + "StackExchangeClusteringP2P": 27.69, + "TenKGnadClusteringP2P": 40.15, + "TenKGnadClusteringS2S": 13.25, + "TwentyNewsgroupsClustering": 21.92 } ] }, @@ -5295,13 +14070,51 @@ "max_ap": [ { "Model": "LaBSE-en-ru", + "CDSC-E": 46.83, + "FalseFriendsGermanEnglish": 45.84, "OpusparcusPC (ru)": 87.18, - "TERRa": 55.61 + "OpusparcusPC (de)": 92.16, + "OpusparcusPC (en)": 98.01, + "OpusparcusPC (fi)": 85.3, + "OpusparcusPC (fr)": 88.76, + "OpusparcusPC (sv)": 86.89, + "PSC": 88.51, + "PawsXPairClassification (de)": 49.25, + "PawsXPairClassification (en)": 54.46, + "PawsXPairClassification (es)": 49.41, + "PawsXPairClassification (fr)": 51.62, + "PawsXPairClassification (ja)": 47.43, + "PawsXPairClassification (ko)": 46.91, + "PawsXPairClassification (zh)": 50.78, + "SICK-E-PL": 46.03, + "SprintDuplicateQuestions": 88.53, + "TERRa": 55.61, + "TwitterSemEval2015": 60.61, + "TwitterURLCorpus": 84.04 }, { "Model": "LaBSE-en-ru", + "CDSC-E": 46.83, + "FalseFriendsGermanEnglish": 45.86, "OpusparcusPC (ru)": 87.18, - "TERRa": 55.61 + "OpusparcusPC (de)": 92.16, + "OpusparcusPC (en)": 98.01, + "OpusparcusPC (fi)": 85.3, + "OpusparcusPC (fr)": 88.76, + "OpusparcusPC (sv)": 86.89, + "PSC": 88.51, + "PawsXPairClassification (de)": 49.72, + "PawsXPairClassification (en)": 54.46, + "PawsXPairClassification (es)": 49.41, + "PawsXPairClassification (fr)": 51.69, + "PawsXPairClassification (ja)": 47.43, + "PawsXPairClassification (ko)": 47.05, + "PawsXPairClassification (zh)": 51.37, + "SICK-E-PL": 46.12, + "SprintDuplicateQuestions": 88.53, + "TERRa": 55.61, + "TwitterSemEval2015": 60.61, + "TwitterURLCorpus": 84.04 } ] }, @@ -5309,11 +14122,35 @@ "map": [ { "Model": "LaBSE-en-ru", - "MIRACLReranking (ru)": 28.86 + "AlloprofReranking": 42.29, + "AskUbuntuDupQuestions": 53.02, + "MindSmallReranking": 29.22, + "RuBQReranking": 54.83, + "SciDocsRR": 67.31, + "StackOverflowDupQuestions": 42.36, + "SyntecReranking": 53.7, + "T2Reranking": 57.1 }, { "Model": "LaBSE-en-ru", - "RuBQReranking": 54.83 + "MIRACLReranking (ru)": 28.86, + "MIRACLReranking (ar)": 3.36, + "MIRACLReranking (bn)": 2.99, + "MIRACLReranking (de)": 18.12, + "MIRACLReranking (en)": 23.94, + "MIRACLReranking (es)": 18.08, + "MIRACLReranking (fa)": 3.51, + "MIRACLReranking (fi)": 30.5, + "MIRACLReranking (fr)": 18.36, + "MIRACLReranking (hi)": 2.65, + "MIRACLReranking (id)": 15.56, + "MIRACLReranking (ja)": 2.73, + "MIRACLReranking (ko)": 4.15, + "MIRACLReranking (sw)": 18.93, + "MIRACLReranking (te)": 3.04, + "MIRACLReranking (th)": 2.32, + "MIRACLReranking (yo)": 40.52, + "MIRACLReranking (zh)": 4.15 } ] }, @@ -5321,9 +14158,143 @@ "ndcg_at_10": [ { "Model": "LaBSE-en-ru", + "AILACasedocs": 22.41, + "AILAStatutes": 15.94, + "ARCChallenge": 3.46, + "AlloprofRetrieval": 10.73, + "AlphaNLI": 12.55, + "AppsRetrieval": 2.26, + "ArguAna": 31.91, + "BSARDRetrieval": 1.61, + "ClimateFEVER": 3.16, + "CmedqaRetrieval": 1.55, + "CodeFeedbackMT": 24.65, + "CodeFeedbackST": 39.86, + "CodeSearchNetCCRetrieval (python)": 29.56, + "CodeSearchNetCCRetrieval (javascript)": 39.74, + "CodeSearchNetCCRetrieval (go)": 27.72, + "CodeSearchNetCCRetrieval (ruby)": 36.38, + "CodeSearchNetCCRetrieval (java)": 33.75, + "CodeSearchNetCCRetrieval (php)": 24.25, + "CodeSearchNetRetrieval (python)": 59.24, + "CodeSearchNetRetrieval (javascript)": 45.1, + "CodeSearchNetRetrieval (go)": 49.06, + "CodeSearchNetRetrieval (ruby)": 49.67, + "CodeSearchNetRetrieval (java)": 33.58, + "CodeSearchNetRetrieval (php)": 44.36, + "CodeTransOceanContest": 32.93, + "CodeTransOceanDL": 32.37, + "CosQA": 8.76, + "CovidRetrieval": 0.35, + "DBPedia": 13.51, + "FEVER": 8.6, + "FiQA2018": 6.8, + "GerDaLIR": 0.92, + "GerDaLIRSmall": 2.24, + "GermanQuAD-Retrieval": 64.84, + "HellaSwag": 5.57, + "HotpotQA": 17.02, + "LEMBNarrativeQARetrieval": 11.1, + "LEMBQMSumRetrieval": 18.32, + "LEMBSummScreenFDRetrieval": 45.33, + "LEMBWikimQARetrieval": 25.1, + "LeCaRDv2": 12.13, + "LegalBenchConsumerContractsQA": 56.69, + "LegalBenchCorporateLobbying": 66.24, + "LegalQuAD": 12.78, + "LegalSummarization": 52.49, "MIRACLRetrieval (ru)": 10.58, + "MIRACLRetrieval (ar)": 0.04, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 4.18, + "MIRACLRetrieval (en)": 4.0, + "MIRACLRetrieval (es)": 2.34, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 11.13, + "MIRACLRetrieval (fr)": 4.4, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 2.75, + "MIRACLRetrieval (ja)": 0.04, + "MIRACLRetrieval (ko)": 2.06, + "MIRACLRetrieval (sw)": 7.82, + "MIRACLRetrieval (te)": 0.04, + "MIRACLRetrieval (th)": 0.0, + "MIRACLRetrieval (yo)": 16.31, + "MIRACLRetrieval (zh)": 0.0, + "MSMARCO": 6.78, + "MintakaRetrieval (ar)": 0.48, + "MintakaRetrieval (de)": 15.01, + "MintakaRetrieval (es)": 14.71, + "MintakaRetrieval (fr)": 14.4, + "MintakaRetrieval (hi)": 0.88, + "MintakaRetrieval (it)": 15.28, + "MintakaRetrieval (ja)": 0.97, + "MintakaRetrieval (pt)": 14.97, + "NFCorpus": 12.59, + "NQ": 7.01, + "PIQA": 6.47, + "Quail": 1.55, + "QuoraRetrieval": 76.76, + "RARbCode": 2.11, + "RARbMath": 27.24, "RiaNewsRetrieval": 34.73, - "RuBQRetrieval": 29.03 + "RuBQRetrieval": 29.03, + "SCIDOCS": 4.96, + "SIQA": 1.05, + "SciFact": 33.78, + "SciFact-PL": 22.71, + "SpartQA": 1.52, + "StackOverflowQA": 38.16, + "SyntecRetrieval": 32.97, + "SyntheticText2SQL": 39.52, + "TRECCOVID": 14.9, + "TRECCOVID-PL": 8.43, + "TempReasonL1": 1.65, + "TempReasonL2Fact": 7.07, + "TempReasonL2Pure": 0.1, + "TempReasonL3Fact": 8.34, + "TempReasonL3Pure": 4.69, + "Touche2020": 4.16, + "WinoGrande": 53.29, + "XMarket (de)": 1.71, + "XMarket (en)": 2.38, + "XMarket (es)": 1.98, + "XPQARetrieval (ara-ara)": 4.65, + "XPQARetrieval (eng-ara)": 4.38, + "XPQARetrieval (ara-eng)": 6.93, + "XPQARetrieval (deu-deu)": 32.76, + "XPQARetrieval (eng-deu)": 15.1, + "XPQARetrieval (deu-eng)": 29.14, + "XPQARetrieval (spa-spa)": 26.94, + "XPQARetrieval (eng-spa)": 15.15, + "XPQARetrieval (spa-eng)": 24.44, + "XPQARetrieval (fra-fra)": 34.15, + "XPQARetrieval (eng-fra)": 13.19, + "XPQARetrieval (fra-eng)": 31.59, + "XPQARetrieval (hin-hin)": 7.03, + "XPQARetrieval (eng-hin)": 6.64, + "XPQARetrieval (hin-eng)": 7.09, + "XPQARetrieval (ita-ita)": 38.17, + "XPQARetrieval (eng-ita)": 15.56, + "XPQARetrieval (ita-eng)": 30.28, + "XPQARetrieval (jpn-jpn)": 6.77, + "XPQARetrieval (eng-jpn)": 3.35, + "XPQARetrieval (jpn-eng)": 8.17, + "XPQARetrieval (kor-kor)": 2.68, + "XPQARetrieval (eng-kor)": 4.24, + "XPQARetrieval (kor-eng)": 2.89, + "XPQARetrieval (pol-pol)": 17.92, + "XPQARetrieval (eng-pol)": 9.24, + "XPQARetrieval (pol-eng)": 15.74, + "XPQARetrieval (por-por)": 20.3, + "XPQARetrieval (eng-por)": 10.69, + "XPQARetrieval (por-eng)": 19.59, + "XPQARetrieval (tam-tam)": 3.01, + "XPQARetrieval (eng-tam)": 5.01, + "XPQARetrieval (tam-eng)": 2.93, + "XPQARetrieval (cmn-cmn)": 14.24, + "XPQARetrieval (eng-cmn)": 7.1, + "XPQARetrieval (cmn-eng)": 12.57 } ] }, @@ -5331,8 +14302,66 @@ "cosine_spearman": [ { "Model": "LaBSE-en-ru", + "BIOSSES": 78.82, + "CDSC-R": 70.29, + "GermanSTSBenchmark": 51.6, + "SICK-R": 69.13, + "SICK-R-PL": 47.05, + "SICKFr": 60.31, + "STS12": 65.16, + "STS13": 67.66, + "STS14": 63.32, + "STS15": 76.21, + "STS16": 73.27, + "STS17 (en-en)": 79.48, + "STS17 (en-ar)": 1.85, + "STS17 (en-tr)": 30.01, + "STS17 (fr-en)": 49.54, + "STS17 (ar-ar)": 15.84, + "STS17 (es-es)": 68.88, + "STS17 (es-en)": 46.6, + "STS17 (it-en)": 48.58, + "STS17 (en-de)": 45.81, + "STS17 (nl-en)": 42.33, + "STS17 (ko-ko)": 10.13, + "STSB": 7.69, + "STSBenchmark": 71.91, + "STSBenchmarkMultilingualSTS (de)": 54.55, + "STSBenchmarkMultilingualSTS (it)": 54.52, + "STSBenchmarkMultilingualSTS (en)": 71.91, + "STSBenchmarkMultilingualSTS (pl)": 51.02, + "STSBenchmarkMultilingualSTS (fr)": 58.4, + "STSBenchmarkMultilingualSTS (es)": 53.05, + "STSBenchmarkMultilingualSTS (pt)": 51.64, + "STSBenchmarkMultilingualSTS (nl)": 52.8, + "STSBenchmarkMultilingualSTS (zh)": 11.39 + }, + { + "Model": "LaBSE-en-ru", + "BIOSSES": 78.82, + "CDSC-R": 70.29, + "GermanSTSBenchmark": 51.6, "RUParaPhraserSTS": 65.87, "RuSTSBenchmarkSTS": 73.32, + "SICK-R": 69.13, + "SICK-R-PL": 47.05, + "SICKFr": 60.32, + "STS12": 65.16, + "STS13": 67.66, + "STS14": 63.32, + "STS15": 76.21, + "STS16": 73.27, + "STS17 (en-en)": 79.48, + "STS17 (en-ar)": 1.85, + "STS17 (en-tr)": 30.01, + "STS17 (fr-en)": 49.54, + "STS17 (ar-ar)": 15.51, + "STS17 (es-es)": 68.87, + "STS17 (es-en)": 46.6, + "STS17 (it-en)": 48.58, + "STS17 (en-de)": 45.81, + "STS17 (nl-en)": 42.33, + "STS17 (ko-ko)": 9.85, "STS22 (de)": 38.9, "STS22 (en)": 59.47, "STS22 (pl-en)": 58.73, @@ -5351,12 +14380,34 @@ "STS22 (zh-en)": 24.98, "STS22 (ar)": 31.85, "STS22 (zh)": 35.1, - "STSBenchmarkMultilingualSTS (ru)": 73.02 + "STSB": 7.53, + "STSBenchmark": 71.91, + "STSBenchmarkMultilingualSTS (ru)": 73.02, + "STSBenchmarkMultilingualSTS (de)": 54.55, + "STSBenchmarkMultilingualSTS (it)": 54.52, + "STSBenchmarkMultilingualSTS (en)": 71.91, + "STSBenchmarkMultilingualSTS (pl)": 51.02, + "STSBenchmarkMultilingualSTS (fr)": 58.4, + "STSBenchmarkMultilingualSTS (es)": 53.05, + "STSBenchmarkMultilingualSTS (pt)": 51.64, + "STSBenchmarkMultilingualSTS (nl)": 52.8, + "STSBenchmarkMultilingualSTS (zh)": 11.31 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "LaBSE-en-ru", + "SummEval": 30.58, + "SummEvalFr": 31.63 + }, + { + "Model": "LaBSE-en-ru", + "SummEval": 30.58, + "SummEvalFr": 31.63 + } + ] }, "MultilabelClassification": { "accuracy": [ @@ -5368,26 +14419,304 @@ ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "LaBSE-en-ru", + "Core17InstructionRetrieval": -1.6, + "News21InstructionRetrieval": -0.39, + "Robust04InstructionRetrieval": -9.07 + } + ] } }, "cointegrated__rubert-tiny": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "rubert-tiny", + "BornholmBitextMining": 22.12, + "Tatoeba (aze-eng)": 0.54, + "Tatoeba (mar-eng)": 0.01, + "Tatoeba (kab-eng)": 0.14, + "Tatoeba (ile-eng)": 10.55, + "Tatoeba (ell-eng)": 0.2, + "Tatoeba (slv-eng)": 1.62, + "Tatoeba (dan-eng)": 3.21, + "Tatoeba (slk-eng)": 0.76, + "Tatoeba (ceb-eng)": 1.07, + "Tatoeba (swg-eng)": 2.2, + "Tatoeba (war-eng)": 0.97, + "Tatoeba (eus-eng)": 0.86, + "Tatoeba (ido-eng)": 2.59, + "Tatoeba (arz-eng)": 0.07, + "Tatoeba (kur-eng)": 1.73, + "Tatoeba (nds-eng)": 3.17, + "Tatoeba (awa-eng)": 0.11, + "Tatoeba (heb-eng)": 0.43, + "Tatoeba (fao-eng)": 2.21, + "Tatoeba (cmn-eng)": 0.6, + "Tatoeba (cym-eng)": 0.76, + "Tatoeba (hun-eng)": 0.36, + "Tatoeba (tel-eng)": 0.06, + "Tatoeba (amh-eng)": 0.61, + "Tatoeba (ita-eng)": 3.99, + "Tatoeba (nno-eng)": 1.65, + "Tatoeba (uzb-eng)": 2.94, + "Tatoeba (sqi-eng)": 1.38, + "Tatoeba (pms-eng)": 1.81, + "Tatoeba (ben-eng)": 0.15, + "Tatoeba (nov-eng)": 18.1, + "Tatoeba (ast-eng)": 5.05, + "Tatoeba (cha-eng)": 2.63, + "Tatoeba (wuu-eng)": 0.17, + "Tatoeba (dsb-eng)": 1.4, + "Tatoeba (kat-eng)": 0.2, + "Tatoeba (tam-eng)": 0.36, + "Tatoeba (khm-eng)": 0.03, + "Tatoeba (afr-eng)": 3.2, + "Tatoeba (epo-eng)": 1.58, + "Tatoeba (fra-eng)": 5.04, + "Tatoeba (tha-eng)": 0.19, + "Tatoeba (swe-eng)": 2.77, + "Tatoeba (tzl-eng)": 1.78, + "Tatoeba (isl-eng)": 0.83, + "Tatoeba (bel-eng)": 9.88, + "Tatoeba (max-eng)": 3.1, + "Tatoeba (rus-eng)": 78.56, + "Tatoeba (ang-eng)": 6.76, + "Tatoeba (kaz-eng)": 3.22, + "Tatoeba (ces-eng)": 0.8, + "Tatoeba (cbk-eng)": 2.38, + "Tatoeba (gle-eng)": 0.38, + "Tatoeba (kor-eng)": 0.25, + "Tatoeba (cor-eng)": 0.58, + "Tatoeba (hsb-eng)": 1.71, + "Tatoeba (hin-eng)": 0.03, + "Tatoeba (xho-eng)": 1.25, + "Tatoeba (orv-eng)": 5.51, + "Tatoeba (oci-eng)": 1.91, + "Tatoeba (fin-eng)": 0.4, + "Tatoeba (ukr-eng)": 20.11, + "Tatoeba (nld-eng)": 4.39, + "Tatoeba (mhr-eng)": 1.99, + "Tatoeba (ind-eng)": 1.02, + "Tatoeba (tuk-eng)": 1.84, + "Tatoeba (fry-eng)": 6.18, + "Tatoeba (tur-eng)": 0.44, + "Tatoeba (mon-eng)": 1.34, + "Tatoeba (vie-eng)": 0.89, + "Tatoeba (ara-eng)": 0.0, + "Tatoeba (lat-eng)": 3.23, + "Tatoeba (ber-eng)": 0.66, + "Tatoeba (glg-eng)": 2.41, + "Tatoeba (est-eng)": 0.86, + "Tatoeba (arq-eng)": 0.17, + "Tatoeba (bos-eng)": 2.31, + "Tatoeba (jav-eng)": 1.71, + "Tatoeba (tat-eng)": 1.82, + "Tatoeba (swh-eng)": 1.04, + "Tatoeba (mkd-eng)": 10.82, + "Tatoeba (ron-eng)": 2.69, + "Tatoeba (uig-eng)": 0.2, + "Tatoeba (dtp-eng)": 0.46, + "Tatoeba (pam-eng)": 0.4, + "Tatoeba (srp-eng)": 4.8, + "Tatoeba (hye-eng)": 0.0, + "Tatoeba (yid-eng)": 0.15, + "Tatoeba (cat-eng)": 3.31, + "Tatoeba (bul-eng)": 19.67, + "Tatoeba (zsm-eng)": 1.1, + "Tatoeba (lvs-eng)": 0.65, + "Tatoeba (gsw-eng)": 4.01, + "Tatoeba (lfn-eng)": 4.36, + "Tatoeba (por-eng)": 2.54, + "Tatoeba (nob-eng)": 2.32, + "Tatoeba (mal-eng)": 0.01, + "Tatoeba (hrv-eng)": 1.97, + "Tatoeba (pol-eng)": 0.92, + "Tatoeba (yue-eng)": 0.0, + "Tatoeba (kzj-eng)": 0.45, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (gla-eng)": 0.79, + "Tatoeba (deu-eng)": 2.04, + "Tatoeba (spa-eng)": 3.18, + "Tatoeba (lit-eng)": 0.71, + "Tatoeba (tgl-eng)": 0.79, + "Tatoeba (pes-eng)": 0.0, + "Tatoeba (csb-eng)": 0.86, + "Tatoeba (ina-eng)": 8.61, + "Tatoeba (bre-eng)": 0.56, + "Tatoeba (jpn-eng)": 0.0 + } + ] }, "Classification": { "accuracy": [ { "Model": "rubert-tiny", + "AllegroReviews": 23.22, + "AmazonCounterfactualClassification (en-ext)": 67.74, + "AmazonCounterfactualClassification (en)": 69.09, + "AmazonCounterfactualClassification (de)": 66.01, + "AmazonCounterfactualClassification (ja)": 50.63, + "AmazonPolarityClassification": 62.69, + "AmazonReviewsClassification (en)": 30.32, + "AmazonReviewsClassification (de)": 24.9, + "AmazonReviewsClassification (es)": 26.09, + "AmazonReviewsClassification (fr)": 26.03, + "AmazonReviewsClassification (ja)": 21.16, + "AmazonReviewsClassification (zh)": 21.09, + "AngryTweetsClassification": 41.09, + "Banking77Classification": 61.03, + "CBD": 50.55, + "DanishPoliticalCommentsClassification": 27.63, + "EmotionClassification": 27.94, "GeoreviewClassification": 33.45, "HeadlineClassification": 57.65, + "ImdbClassification": 60.52, "InappropriatenessClassification": 54.5, "KinopoiskClassification": 41.36, + "LccSentimentClassification": 36.2, + "MTOPDomainClassification (en)": 81.03, + "MTOPDomainClassification (de)": 59.95, + "MTOPDomainClassification (es)": 62.76, + "MTOPDomainClassification (fr)": 58.29, + "MTOPDomainClassification (hi)": 23.3, + "MTOPDomainClassification (th)": 16.71, + "MTOPIntentClassification (en)": 61.7, + "MTOPIntentClassification (de)": 45.91, + "MTOPIntentClassification (es)": 47.12, + "MTOPIntentClassification (fr)": 42.63, + "MTOPIntentClassification (hi)": 4.95, + "MTOPIntentClassification (th)": 4.81, + "MasakhaNEWSClassification (amh)": 31.52, + "MasakhaNEWSClassification (eng)": 69.47, + "MasakhaNEWSClassification (fra)": 55.52, + "MasakhaNEWSClassification (hau)": 50.35, + "MasakhaNEWSClassification (ibo)": 39.18, + "MasakhaNEWSClassification (lin)": 50.91, + "MasakhaNEWSClassification (lug)": 44.89, + "MasakhaNEWSClassification (orm)": 44.55, + "MasakhaNEWSClassification (pcm)": 82.43, + "MasakhaNEWSClassification (run)": 42.58, + "MasakhaNEWSClassification (sna)": 63.06, + "MasakhaNEWSClassification (som)": 30.78, + "MasakhaNEWSClassification (swa)": 37.92, + "MasakhaNEWSClassification (tir)": 26.18, + "MasakhaNEWSClassification (xho)": 55.25, + "MasakhaNEWSClassification (yor)": 44.04, "MassiveIntentClassification (ru)": 50.1, + "MassiveIntentClassification (el)": 11.82, + "MassiveIntentClassification (it)": 43.42, + "MassiveIntentClassification (zh-CN)": 7.35, + "MassiveIntentClassification (az)": 35.1, + "MassiveIntentClassification (nb)": 37.0, + "MassiveIntentClassification (te)": 2.86, + "MassiveIntentClassification (pt)": 39.64, + "MassiveIntentClassification (sw)": 37.25, + "MassiveIntentClassification (sl)": 39.27, + "MassiveIntentClassification (ja)": 7.35, + "MassiveIntentClassification (ka)": 2.94, + "MassiveIntentClassification (bn)": 3.54, + "MassiveIntentClassification (ta)": 3.05, + "MassiveIntentClassification (tr)": 38.14, + "MassiveIntentClassification (ko)": 2.66, + "MassiveIntentClassification (fr)": 38.64, + "MassiveIntentClassification (kn)": 3.81, + "MassiveIntentClassification (ms)": 38.99, + "MassiveIntentClassification (tl)": 37.39, + "MassiveIntentClassification (vi)": 30.18, + "MassiveIntentClassification (lv)": 28.27, + "MassiveIntentClassification (ro)": 37.64, + "MassiveIntentClassification (my)": 4.21, + "MassiveIntentClassification (sq)": 38.01, + "MassiveIntentClassification (es)": 38.18, + "MassiveIntentClassification (af)": 37.64, + "MassiveIntentClassification (th)": 4.54, + "MassiveIntentClassification (zh-TW)": 9.27, + "MassiveIntentClassification (id)": 39.87, + "MassiveIntentClassification (he)": 2.73, + "MassiveIntentClassification (mn)": 37.02, + "MassiveIntentClassification (hi)": 3.71, + "MassiveIntentClassification (hy)": 3.49, + "MassiveIntentClassification (hu)": 35.29, + "MassiveIntentClassification (cy)": 35.89, + "MassiveIntentClassification (km)": 4.66, + "MassiveIntentClassification (sv)": 38.81, + "MassiveIntentClassification (da)": 40.76, + "MassiveIntentClassification (en)": 53.87, + "MassiveIntentClassification (is)": 32.71, + "MassiveIntentClassification (nl)": 38.91, + "MassiveIntentClassification (ur)": 3.45, + "MassiveIntentClassification (pl)": 33.66, + "MassiveIntentClassification (ar)": 5.6, + "MassiveIntentClassification (am)": 2.96, + "MassiveIntentClassification (fa)": 3.31, + "MassiveIntentClassification (ml)": 2.49, + "MassiveIntentClassification (de)": 38.61, + "MassiveIntentClassification (jv)": 36.44, + "MassiveIntentClassification (fi)": 39.21, "MassiveScenarioClassification (ru)": 52.15, + "MassiveScenarioClassification (ko)": 7.26, + "MassiveScenarioClassification (lv)": 29.87, + "MassiveScenarioClassification (hu)": 36.51, + "MassiveScenarioClassification (es)": 39.99, + "MassiveScenarioClassification (az)": 35.95, + "MassiveScenarioClassification (te)": 7.69, + "MassiveScenarioClassification (tl)": 38.89, + "MassiveScenarioClassification (el)": 19.12, + "MassiveScenarioClassification (zh-CN)": 12.03, + "MassiveScenarioClassification (pt)": 39.54, + "MassiveScenarioClassification (ml)": 7.32, + "MassiveScenarioClassification (fi)": 38.52, + "MassiveScenarioClassification (fr)": 40.47, + "MassiveScenarioClassification (de)": 41.26, + "MassiveScenarioClassification (km)": 9.8, + "MassiveScenarioClassification (ms)": 41.21, + "MassiveScenarioClassification (sl)": 39.95, + "MassiveScenarioClassification (mn)": 33.93, + "MassiveScenarioClassification (sw)": 39.17, + "MassiveScenarioClassification (id)": 40.86, + "MassiveScenarioClassification (th)": 9.37, + "MassiveScenarioClassification (vi)": 33.37, + "MassiveScenarioClassification (ro)": 40.6, + "MassiveScenarioClassification (af)": 38.1, + "MassiveScenarioClassification (my)": 9.76, + "MassiveScenarioClassification (kn)": 7.92, + "MassiveScenarioClassification (ka)": 6.98, + "MassiveScenarioClassification (sv)": 37.58, + "MassiveScenarioClassification (he)": 7.87, + "MassiveScenarioClassification (ar)": 11.34, + "MassiveScenarioClassification (fa)": 7.05, + "MassiveScenarioClassification (tr)": 38.96, + "MassiveScenarioClassification (bn)": 7.71, + "MassiveScenarioClassification (am)": 8.08, + "MassiveScenarioClassification (en)": 58.9, + "MassiveScenarioClassification (nl)": 39.06, + "MassiveScenarioClassification (is)": 34.57, + "MassiveScenarioClassification (hy)": 9.29, + "MassiveScenarioClassification (da)": 41.12, + "MassiveScenarioClassification (sq)": 39.18, + "MassiveScenarioClassification (it)": 44.54, + "MassiveScenarioClassification (jv)": 38.75, + "MassiveScenarioClassification (zh-TW)": 14.03, + "MassiveScenarioClassification (ur)": 8.54, + "MassiveScenarioClassification (pl)": 34.41, + "MassiveScenarioClassification (nb)": 37.43, + "MassiveScenarioClassification (hi)": 8.51, + "MassiveScenarioClassification (ja)": 11.31, + "MassiveScenarioClassification (ta)": 6.91, + "MassiveScenarioClassification (cy)": 38.09, + "NoRecClassification": 39.16, + "NordicLangClassification": 54.8, + "PAC": 61.59, + "PolEmo2.0-IN": 41.09, + "PolEmo2.0-OUT": 33.81, "RuReviewsClassification": 49.56, "RuSciBenchGRNTIClassification": 35.71, - "RuSciBenchOECDClassification": 26.51 + "RuSciBenchOECDClassification": 26.51, + "ToxicConversationsClassification": 59.95, + "TweetSentimentExtractionClassification": 50.65 } ] }, @@ -5395,9 +14724,67 @@ "v_measure": [ { "Model": "rubert-tiny", + "AlloProfClusteringP2P": 42.77, + "AlloProfClusteringS2S": 24.76, + "ArxivClusteringP2P": 22.47, + "ArxivClusteringS2S": 15.5, + "BiorxivClusteringP2P": 19.69, + "BiorxivClusteringS2S": 11.72, + "BlurbsClusteringP2P": 9.69, + "BlurbsClusteringS2S": 8.78, "GeoreviewClusteringP2P": 34.4, + "HALClusteringS2S": 7.39, + "MLSUMClusteringP2P (de)": 10.31, + "MLSUMClusteringP2P (fr)": 27.42, + "MLSUMClusteringP2P (ru)": 43.77, + "MLSUMClusteringP2P (es)": 30.69, + "MLSUMClusteringS2S (de)": 9.7, + "MLSUMClusteringS2S (fr)": 26.52, + "MLSUMClusteringS2S (ru)": 43.04, + "MLSUMClusteringS2S (es)": 30.54, + "MasakhaNEWSClusteringP2P (amh)": 40.23, + "MasakhaNEWSClusteringP2P (eng)": 44.77, + "MasakhaNEWSClusteringP2P (fra)": 34.35, + "MasakhaNEWSClusteringP2P (hau)": 9.03, + "MasakhaNEWSClusteringP2P (ibo)": 21.9, + "MasakhaNEWSClusteringP2P (lin)": 47.93, + "MasakhaNEWSClusteringP2P (lug)": 47.18, + "MasakhaNEWSClusteringP2P (orm)": 26.34, + "MasakhaNEWSClusteringP2P (pcm)": 51.64, + "MasakhaNEWSClusteringP2P (run)": 44.15, + "MasakhaNEWSClusteringP2P (sna)": 43.87, + "MasakhaNEWSClusteringP2P (som)": 23.83, + "MasakhaNEWSClusteringP2P (swa)": 6.81, + "MasakhaNEWSClusteringP2P (tir)": 44.47, + "MasakhaNEWSClusteringP2P (xho)": 33.89, + "MasakhaNEWSClusteringP2P (yor)": 23.12, + "MasakhaNEWSClusteringS2S (amh)": 40.03, + "MasakhaNEWSClusteringS2S (eng)": 17.24, + "MasakhaNEWSClusteringS2S (fra)": 20.82, + "MasakhaNEWSClusteringS2S (hau)": 9.8, + "MasakhaNEWSClusteringS2S (ibo)": 25.65, + "MasakhaNEWSClusteringS2S (lin)": 50.3, + "MasakhaNEWSClusteringS2S (lug)": 42.74, + "MasakhaNEWSClusteringS2S (orm)": 21.84, + "MasakhaNEWSClusteringS2S (pcm)": 38.52, + "MasakhaNEWSClusteringS2S (run)": 44.78, + "MasakhaNEWSClusteringS2S (sna)": 44.25, + "MasakhaNEWSClusteringS2S (som)": 26.25, + "MasakhaNEWSClusteringS2S (swa)": 13.15, + "MasakhaNEWSClusteringS2S (tir)": 42.57, + "MasakhaNEWSClusteringS2S (xho)": 23.88, + "MasakhaNEWSClusteringS2S (yor)": 21.71, + "MedrxivClusteringP2P": 21.25, + "MedrxivClusteringS2S": 17.78, + "RedditClustering": 16.23, + "RedditClusteringP2P": 31.54, "RuSciBenchGRNTIClusteringP2P": 29.89, - "RuSciBenchOECDClusteringP2P": 27.98 + "RuSciBenchOECDClusteringP2P": 27.98, + "StackExchangeClustering": 31.47, + "StackExchangeClusteringP2P": 27.14, + "TenKGnadClusteringP2P": 15.1, + "TenKGnadClusteringS2S": 8.62, + "TwentyNewsgroupsClustering": 15.43 } ] }, @@ -5405,11 +14792,54 @@ "max_ap": [ { "Model": "rubert-tiny", - "TERRa": 51.06 + "CDSC-E": 43.2, + "FalseFriendsGermanEnglish": 48.27, + "OpusparcusPC (de)": 90.13, + "OpusparcusPC (en)": 96.89, + "OpusparcusPC (fi)": 84.26, + "OpusparcusPC (fr)": 86.19, + "OpusparcusPC (ru)": 82.95, + "OpusparcusPC (sv)": 81.52, + "PSC": 57.47, + "PawsXPairClassification (de)": 48.32, + "PawsXPairClassification (en)": 44.09, + "PawsXPairClassification (es)": 47.03, + "PawsXPairClassification (fr)": 49.15, + "PawsXPairClassification (ja)": 48.29, + "PawsXPairClassification (ko)": 46.45, + "PawsXPairClassification (zh)": 50.89, + "SICK-E-PL": 45.51, + "SprintDuplicateQuestions": 53.27, + "TwitterSemEval2015": 54.51, + "TwitterURLCorpus": 71.97 + }, + { + "Model": "rubert-tiny", + "CDSC-E": 44.08, + "FalseFriendsGermanEnglish": 48.41, + "OpusparcusPC (de)": 90.6, + "OpusparcusPC (en)": 96.89, + "OpusparcusPC (fi)": 84.97, + "OpusparcusPC (fr)": 86.61, + "OpusparcusPC (ru)": 83.43, + "OpusparcusPC (sv)": 82.2, + "PSC": 59.31, + "PawsXPairClassification (de)": 48.32, + "PawsXPairClassification (en)": 44.24, + "PawsXPairClassification (es)": 47.16, + "PawsXPairClassification (fr)": 49.4, + "PawsXPairClassification (ja)": 48.29, + "PawsXPairClassification (ko)": 46.68, + "PawsXPairClassification (zh)": 50.97, + "SICK-E-PL": 45.58, + "SprintDuplicateQuestions": 53.27, + "TERRa": 52.85, + "TwitterSemEval2015": 54.51, + "TwitterURLCorpus": 71.97 }, { "Model": "rubert-tiny", - "TERRa": 52.85 + "TERRa": 51.06 } ] }, @@ -5417,7 +14847,35 @@ "map": [ { "Model": "rubert-tiny", - "RuBQReranking": 35.44 + "AlloprofReranking": 31.42, + "AskUbuntuDupQuestions": 47.47, + "MindSmallReranking": 25.98, + "RuBQReranking": 35.44, + "SciDocsRR": 51.43, + "StackOverflowDupQuestions": 35.42, + "SyntecReranking": 42.77, + "T2Reranking": 55.86 + }, + { + "Model": "rubert-tiny", + "MIRACLReranking (ar)": 1.96, + "MIRACLReranking (bn)": 1.68, + "MIRACLReranking (de)": 6.36, + "MIRACLReranking (en)": 9.34, + "MIRACLReranking (es)": 6.58, + "MIRACLReranking (fa)": 3.45, + "MIRACLReranking (fi)": 8.24, + "MIRACLReranking (fr)": 4.62, + "MIRACLReranking (hi)": 4.05, + "MIRACLReranking (id)": 5.85, + "MIRACLReranking (ja)": 2.12, + "MIRACLReranking (ko)": 4.88, + "MIRACLReranking (ru)": 7.08, + "MIRACLReranking (sw)": 6.62, + "MIRACLReranking (te)": 2.5, + "MIRACLReranking (th)": 2.02, + "MIRACLReranking (yo)": 8.39, + "MIRACLReranking (zh)": 2.27 } ] }, @@ -5425,13 +14883,256 @@ "ndcg_at_10": [ { "Model": "rubert-tiny", + "AILACasedocs": 11.96, + "AILAStatutes": 12.64, + "ARCChallenge": 2.13, + "AlloprofRetrieval": 1.39, + "AlphaNLI": 1.48, + "AppsRetrieval": 0.27, + "ArguAna": 24.16, + "BSARDRetrieval": 0.0, + "ClimateFEVER": 1.5, + "CmedqaRetrieval": 0.25, + "CodeFeedbackMT": 13.47, + "CodeFeedbackST": 8.09, + "CodeSearchNetCCRetrieval (python)": 11.11, + "CodeSearchNetCCRetrieval (javascript)": 20.43, + "CodeSearchNetCCRetrieval (go)": 10.31, + "CodeSearchNetCCRetrieval (ruby)": 21.66, + "CodeSearchNetCCRetrieval (java)": 15.11, + "CodeSearchNetCCRetrieval (php)": 11.37, + "CodeSearchNetRetrieval (python)": 20.82, + "CodeSearchNetRetrieval (javascript)": 9.78, + "CodeSearchNetRetrieval (go)": 13.54, + "CodeSearchNetRetrieval (ruby)": 17.34, + "CodeSearchNetRetrieval (java)": 10.23, + "CodeSearchNetRetrieval (php)": 9.12, + "CodeTransOceanContest": 13.36, + "CodeTransOceanDL": 33.44, + "CosQA": 3.73, + "CovidRetrieval": 0.0, + "DBPedia": 1.56, + "FEVER": 0.7, + "FiQA2018": 1.92, + "GerDaLIR": 0.23, + "GerDaLIRSmall": 0.62, + "GermanQuAD-Retrieval": 19.33, + "HellaSwag": 3.74, + "HotpotQA": 2.19, + "LEMBNarrativeQARetrieval": 2.46, + "LEMBQMSumRetrieval": 8.72, + "LEMBSummScreenFDRetrieval": 5.39, + "LEMBWikimQARetrieval": 12.42, + "LeCaRDv2": 11.58, + "LegalBenchConsumerContractsQA": 26.36, + "LegalBenchCorporateLobbying": 53.56, + "LegalQuAD": 5.11, + "LegalSummarization": 38.02, + "MIRACLRetrieval (ar)": 0.0, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 0.03, + "MIRACLRetrieval (en)": 0.06, + "MIRACLRetrieval (es)": 0.15, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 0.77, + "MIRACLRetrieval (fr)": 0.0, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 0.19, + "MIRACLRetrieval (ja)": 0.0, + "MIRACLRetrieval (ko)": 0.5, + "MIRACLRetrieval (ru)": 0.58, + "MIRACLRetrieval (sw)": 0.83, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.02, + "MIRACLRetrieval (yo)": 0.25, + "MIRACLRetrieval (zh)": 0.0, + "MSMARCO": 1.27, + "MintakaRetrieval (ar)": 0.3, + "MintakaRetrieval (de)": 1.07, + "MintakaRetrieval (es)": 1.17, + "MintakaRetrieval (fr)": 1.4, + "MintakaRetrieval (hi)": 0.62, + "MintakaRetrieval (it)": 1.68, + "MintakaRetrieval (ja)": 0.44, + "MintakaRetrieval (pt)": 1.61, + "NFCorpus": 2.82, + "NQ": 0.75, + "PIQA": 3.51, + "Quail": 0.41, + "QuoraRetrieval": 58.8, + "RARbCode": 0.63, + "RARbMath": 4.68, "RiaNewsRetrieval": 0.79, - "RuBQRetrieval": 3.24 + "RuBQRetrieval": 3.24, + "SCIDOCS": 0.86, + "SIQA": 0.23, + "SciFact": 12.55, + "SciFact-PL": 3.63, + "SpartQA": 3.34, + "StackOverflowQA": 16.66, + "SyntecRetrieval": 17.8, + "SyntheticText2SQL": 4.72, + "TRECCOVID": 9.47, + "TRECCOVID-PL": 5.75, + "TempReasonL1": 0.16, + "TempReasonL2Fact": 0.77, + "TempReasonL2Pure": 0.05, + "TempReasonL3Fact": 1.66, + "TempReasonL3Pure": 2.69, + "Touche2020": 2.36, + "WinoGrande": 2.34, + "XMarket (de)": 1.35, + "XMarket (en)": 1.77, + "XMarket (es)": 1.45, + "XPQARetrieval (ara-ara)": 2.46, + "XPQARetrieval (eng-ara)": 0.35, + "XPQARetrieval (ara-eng)": 1.44, + "XPQARetrieval (deu-deu)": 15.49, + "XPQARetrieval (eng-deu)": 1.11, + "XPQARetrieval (deu-eng)": 5.18, + "XPQARetrieval (spa-spa)": 13.85, + "XPQARetrieval (eng-spa)": 0.99, + "XPQARetrieval (spa-eng)": 5.73, + "XPQARetrieval (fra-fra)": 17.15, + "XPQARetrieval (eng-fra)": 1.39, + "XPQARetrieval (fra-eng)": 7.88, + "XPQARetrieval (hin-hin)": 4.78, + "XPQARetrieval (eng-hin)": 1.99, + "XPQARetrieval (hin-eng)": 1.22, + "XPQARetrieval (ita-ita)": 25.22, + "XPQARetrieval (eng-ita)": 2.02, + "XPQARetrieval (ita-eng)": 8.16, + "XPQARetrieval (jpn-jpn)": 3.57, + "XPQARetrieval (eng-jpn)": 0.42, + "XPQARetrieval (jpn-eng)": 1.29, + "XPQARetrieval (kor-kor)": 1.75, + "XPQARetrieval (eng-kor)": 1.26, + "XPQARetrieval (kor-eng)": 1.48, + "XPQARetrieval (pol-pol)": 8.06, + "XPQARetrieval (eng-pol)": 0.75, + "XPQARetrieval (pol-eng)": 4.61, + "XPQARetrieval (por-por)": 11.81, + "XPQARetrieval (eng-por)": 1.03, + "XPQARetrieval (por-eng)": 3.32, + "XPQARetrieval (tam-tam)": 1.94, + "XPQARetrieval (eng-tam)": 1.11, + "XPQARetrieval (tam-eng)": 0.93, + "XPQARetrieval (cmn-cmn)": 5.96, + "XPQARetrieval (eng-cmn)": 0.81, + "XPQARetrieval (cmn-eng)": 1.56 } ] }, "STS": { "cosine_spearman": [ + { + "Model": "rubert-tiny", + "BIOSSES": 58.0, + "CDSC-R": 64.0, + "GermanSTSBenchmark": 48.0, + "SICK-R": 60.39, + "SICK-R-PL": 43.68, + "SICKFr": 54.08, + "STS12": 49.66, + "STS13": 63.4, + "STS14": 54.88, + "STS15": 67.32, + "STS16": 66.87, + "STS17 (nl-en)": 10.21, + "STS17 (ar-ar)": 13.35, + "STS17 (es-en)": 14.43, + "STS17 (en-tr)": -0.72, + "STS17 (fr-en)": 11.82, + "STS17 (en-de)": 2.33, + "STS17 (it-en)": 7.59, + "STS17 (en-en)": 67.66, + "STS17 (en-ar)": 20.94, + "STS17 (ko-ko)": 10.74, + "STS17 (es-es)": 62.45, + "STS22 (tr)": 20.65, + "STS22 (es-it)": 32.31, + "STS22 (en)": 52.63, + "STS22 (ar)": 18.64, + "STS22 (pl)": 8.46, + "STS22 (it)": 46.79, + "STS22 (fr-pl)": -5.63, + "STS22 (es)": 47.32, + "STS22 (de-fr)": 10.26, + "STS22 (fr)": 45.86, + "STS22 (pl-en)": 16.24, + "STS22 (de-en)": 22.47, + "STS22 (zh-en)": 1.05, + "STS22 (de)": 18.79, + "STS22 (zh)": 26.99, + "STS22 (de-pl)": -5.36, + "STS22 (es-en)": 27.71, + "STSB": 21.79, + "STSBenchmark": 55.86, + "STSBenchmarkMultilingualSTS (pl)": 49.04, + "STSBenchmarkMultilingualSTS (nl)": 46.82, + "STSBenchmarkMultilingualSTS (de)": 49.2, + "STSBenchmarkMultilingualSTS (fr)": 52.85, + "STSBenchmarkMultilingualSTS (en)": 55.86, + "STSBenchmarkMultilingualSTS (pt)": 44.54, + "STSBenchmarkMultilingualSTS (es)": 49.77, + "STSBenchmarkMultilingualSTS (zh)": 19.6, + "STSBenchmarkMultilingualSTS (it)": 49.68, + "STSBenchmarkMultilingualSTS (ru)": 58.23 + }, + { + "Model": "rubert-tiny", + "BIOSSES": 58.0, + "CDSC-R": 64.0, + "GermanSTSBenchmark": 48.0, + "SICK-R": 60.39, + "SICK-R-PL": 43.68, + "SICKFr": 54.08, + "STS12": 49.66, + "STS13": 63.4, + "STS14": 54.88, + "STS15": 67.32, + "STS16": 66.87, + "STS17 (nl-en)": 10.21, + "STS17 (ar-ar)": 13.18, + "STS17 (es-en)": 14.43, + "STS17 (en-tr)": -0.72, + "STS17 (fr-en)": 11.82, + "STS17 (en-de)": 2.33, + "STS17 (it-en)": 7.59, + "STS17 (en-en)": 67.66, + "STS17 (en-ar)": 20.94, + "STS17 (ko-ko)": 10.98, + "STS17 (es-es)": 62.45, + "STS22 (tr)": 20.65, + "STS22 (es-it)": 32.31, + "STS22 (en)": 52.63, + "STS22 (ar)": 18.64, + "STS22 (pl)": 8.57, + "STS22 (it)": 46.79, + "STS22 (fr-pl)": -5.63, + "STS22 (es)": 47.32, + "STS22 (de-fr)": 10.26, + "STS22 (fr)": 45.86, + "STS22 (pl-en)": 16.24, + "STS22 (de-en)": 22.47, + "STS22 (zh-en)": 1.05, + "STS22 (de)": 18.79, + "STS22 (zh)": 26.99, + "STS22 (de-pl)": -5.36, + "STS22 (es-en)": 27.71, + "STSB": 21.79, + "STSBenchmark": 55.86, + "STSBenchmarkMultilingualSTS (pl)": 49.04, + "STSBenchmarkMultilingualSTS (nl)": 46.82, + "STSBenchmarkMultilingualSTS (de)": 49.2, + "STSBenchmarkMultilingualSTS (fr)": 52.85, + "STSBenchmarkMultilingualSTS (en)": 55.86, + "STSBenchmarkMultilingualSTS (pt)": 44.54, + "STSBenchmarkMultilingualSTS (es)": 49.77, + "STSBenchmarkMultilingualSTS (zh)": 19.62, + "STSBenchmarkMultilingualSTS (it)": 49.68, + "STSBenchmarkMultilingualSTS (ru)": 58.23 + }, { "Model": "rubert-tiny", "RUParaPhraserSTS": 53.41, @@ -5441,7 +15142,18 @@ ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "rubert-tiny", + "SummEval": 29.54, + "SummEvalFr": 28.57 + }, + { + "Model": "rubert-tiny", + "SummEval": 29.54, + "SummEvalFr": 28.57 + } + ] }, "MultilabelClassification": { "accuracy": [ @@ -5453,26 +15165,304 @@ ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "rubert-tiny", + "Core17InstructionRetrieval": 0.41, + "News21InstructionRetrieval": 1.04, + "Robust04InstructionRetrieval": -4.12 + } + ] } }, "cointegrated__rubert-tiny2": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "rubert-tiny2", + "BornholmBitextMining": 37.66, + "Tatoeba (sqi-eng)": 3.46, + "Tatoeba (orv-eng)": 9.61, + "Tatoeba (swe-eng)": 4.08, + "Tatoeba (deu-eng)": 3.94, + "Tatoeba (nov-eng)": 23.41, + "Tatoeba (isl-eng)": 3.1, + "Tatoeba (ina-eng)": 12.44, + "Tatoeba (yid-eng)": 0.02, + "Tatoeba (pes-eng)": 0.7, + "Tatoeba (fra-eng)": 6.21, + "Tatoeba (ben-eng)": 0.01, + "Tatoeba (pam-eng)": 2.4, + "Tatoeba (epo-eng)": 2.98, + "Tatoeba (kat-eng)": 1.0, + "Tatoeba (nno-eng)": 4.35, + "Tatoeba (gsw-eng)": 9.92, + "Tatoeba (glg-eng)": 4.34, + "Tatoeba (mkd-eng)": 13.4, + "Tatoeba (bre-eng)": 2.27, + "Tatoeba (max-eng)": 7.3, + "Tatoeba (fry-eng)": 10.42, + "Tatoeba (jav-eng)": 2.29, + "Tatoeba (ron-eng)": 4.44, + "Tatoeba (ara-eng)": 0.28, + "Tatoeba (ast-eng)": 12.51, + "Tatoeba (hye-eng)": 0.54, + "Tatoeba (kab-eng)": 0.64, + "Tatoeba (gle-eng)": 2.76, + "Tatoeba (dsb-eng)": 2.13, + "Tatoeba (hun-eng)": 2.03, + "Tatoeba (yue-eng)": 0.36, + "Tatoeba (mon-eng)": 7.39, + "Tatoeba (dtp-eng)": 1.49, + "Tatoeba (por-eng)": 4.5, + "Tatoeba (kor-eng)": 0.35, + "Tatoeba (amh-eng)": 0.65, + "Tatoeba (mhr-eng)": 6.0, + "Tatoeba (fin-eng)": 1.28, + "Tatoeba (hin-eng)": 0.03, + "Tatoeba (kur-eng)": 3.96, + "Tatoeba (gla-eng)": 2.06, + "Tatoeba (bel-eng)": 16.75, + "Tatoeba (heb-eng)": 0.58, + "Tatoeba (jpn-eng)": 0.26, + "Tatoeba (vie-eng)": 3.96, + "Tatoeba (dan-eng)": 6.31, + "Tatoeba (ita-eng)": 7.38, + "Tatoeba (ang-eng)": 16.64, + "Tatoeba (wuu-eng)": 0.14, + "Tatoeba (tgl-eng)": 2.5, + "Tatoeba (nld-eng)": 9.18, + "Tatoeba (tam-eng)": 0.02, + "Tatoeba (ceb-eng)": 2.55, + "Tatoeba (zsm-eng)": 2.78, + "Tatoeba (hrv-eng)": 5.06, + "Tatoeba (lvs-eng)": 1.93, + "Tatoeba (mal-eng)": 0.15, + "Tatoeba (lit-eng)": 1.35, + "Tatoeba (spa-eng)": 5.9, + "Tatoeba (pol-eng)": 2.75, + "Tatoeba (arz-eng)": 0.03, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (ido-eng)": 5.98, + "Tatoeba (hsb-eng)": 2.48, + "Tatoeba (ces-eng)": 1.68, + "Tatoeba (ukr-eng)": 25.99, + "Tatoeba (tzl-eng)": 16.56, + "Tatoeba (war-eng)": 2.07, + "Tatoeba (cha-eng)": 8.89, + "Tatoeba (slv-eng)": 3.44, + "Tatoeba (tha-eng)": 0.3, + "Tatoeba (ber-eng)": 3.6, + "Tatoeba (cbk-eng)": 3.55, + "Tatoeba (tat-eng)": 6.52, + "Tatoeba (nob-eng)": 4.68, + "Tatoeba (cmn-eng)": 0.71, + "Tatoeba (uig-eng)": 0.42, + "Tatoeba (bul-eng)": 30.57, + "Tatoeba (ile-eng)": 13.8, + "Tatoeba (lat-eng)": 6.13, + "Tatoeba (cym-eng)": 3.09, + "Tatoeba (cat-eng)": 7.52, + "Tatoeba (kaz-eng)": 8.08, + "Tatoeba (awa-eng)": 0.45, + "Tatoeba (pms-eng)": 6.26, + "Tatoeba (swg-eng)": 10.32, + "Tatoeba (lfn-eng)": 7.11, + "Tatoeba (uzb-eng)": 4.59, + "Tatoeba (cor-eng)": 3.04, + "Tatoeba (slk-eng)": 2.62, + "Tatoeba (nds-eng)": 6.88, + "Tatoeba (csb-eng)": 4.88, + "Tatoeba (tur-eng)": 1.67, + "Tatoeba (bos-eng)": 6.19, + "Tatoeba (eus-eng)": 3.9, + "Tatoeba (kzj-eng)": 1.39, + "Tatoeba (afr-eng)": 5.01, + "Tatoeba (swh-eng)": 3.76, + "Tatoeba (ell-eng)": 0.44, + "Tatoeba (khm-eng)": 0.0, + "Tatoeba (est-eng)": 2.19, + "Tatoeba (ind-eng)": 2.26, + "Tatoeba (mar-eng)": 0.02, + "Tatoeba (tel-eng)": 0.4, + "Tatoeba (aze-eng)": 2.91, + "Tatoeba (srp-eng)": 8.91, + "Tatoeba (oci-eng)": 4.32, + "Tatoeba (fao-eng)": 4.85, + "Tatoeba (arq-eng)": 0.3, + "Tatoeba (rus-eng)": 82.92, + "Tatoeba (tuk-eng)": 3.56, + "Tatoeba (xho-eng)": 5.52 + } + ] }, "Classification": { "accuracy": [ { "Model": "rubert-tiny2", + "AllegroReviews": 23.03, + "AmazonCounterfactualClassification (en-ext)": 66.64, + "AmazonCounterfactualClassification (en)": 67.42, + "AmazonCounterfactualClassification (de)": 55.34, + "AmazonCounterfactualClassification (ja)": 56.48, + "AmazonPolarityClassification": 63.8, + "AmazonReviewsClassification (en)": 33.02, + "AmazonReviewsClassification (de)": 25.63, + "AmazonReviewsClassification (es)": 27.9, + "AmazonReviewsClassification (fr)": 26.79, + "AmazonReviewsClassification (ja)": 21.02, + "AmazonReviewsClassification (zh)": 20.64, + "AngryTweetsClassification": 43.84, + "Banking77Classification": 50.97, + "CBD": 52.68, + "DanishPoliticalCommentsClassification": 27.9, + "EmotionClassification": 24.77, "GeoreviewClassification": 39.64, "HeadlineClassification": 74.19, + "ImdbClassification": 60.71, "InappropriatenessClassification": 58.57, "KinopoiskClassification": 49.06, + "LccSentimentClassification": 38.73, + "MTOPDomainClassification (en)": 76.85, + "MTOPDomainClassification (de)": 54.64, + "MTOPDomainClassification (es)": 55.03, + "MTOPDomainClassification (fr)": 55.17, + "MTOPDomainClassification (hi)": 23.34, + "MTOPDomainClassification (th)": 15.88, + "MTOPIntentClassification (en)": 41.31, + "MTOPIntentClassification (de)": 27.44, + "MTOPIntentClassification (es)": 29.09, + "MTOPIntentClassification (fr)": 28.69, + "MTOPIntentClassification (hi)": 3.77, + "MTOPIntentClassification (th)": 4.54, + "MasakhaNEWSClassification (amh)": 31.12, + "MasakhaNEWSClassification (eng)": 68.91, + "MasakhaNEWSClassification (fra)": 39.67, + "MasakhaNEWSClassification (hau)": 37.58, + "MasakhaNEWSClassification (ibo)": 30.44, + "MasakhaNEWSClassification (lin)": 46.8, + "MasakhaNEWSClassification (lug)": 41.61, + "MasakhaNEWSClassification (orm)": 42.58, + "MasakhaNEWSClassification (pcm)": 83.44, + "MasakhaNEWSClassification (run)": 38.76, + "MasakhaNEWSClassification (sna)": 55.72, + "MasakhaNEWSClassification (som)": 32.69, + "MasakhaNEWSClassification (swa)": 29.05, + "MasakhaNEWSClassification (tir)": 24.26, + "MasakhaNEWSClassification (xho)": 41.72, + "MasakhaNEWSClassification (yor)": 35.84, "MassiveIntentClassification (ru)": 50.83, + "MassiveIntentClassification (cy)": 25.27, + "MassiveIntentClassification (hi)": 2.78, + "MassiveIntentClassification (te)": 1.79, + "MassiveIntentClassification (fi)": 26.26, + "MassiveIntentClassification (hy)": 2.42, + "MassiveIntentClassification (es)": 27.46, + "MassiveIntentClassification (en)": 44.53, + "MassiveIntentClassification (it)": 31.83, + "MassiveIntentClassification (bn)": 2.99, + "MassiveIntentClassification (fr)": 27.01, + "MassiveIntentClassification (nl)": 28.88, + "MassiveIntentClassification (pl)": 23.3, + "MassiveIntentClassification (jv)": 25.99, + "MassiveIntentClassification (ka)": 2.71, + "MassiveIntentClassification (id)": 29.35, + "MassiveIntentClassification (nb)": 25.68, + "MassiveIntentClassification (mn)": 22.97, + "MassiveIntentClassification (ko)": 2.32, + "MassiveIntentClassification (da)": 28.28, + "MassiveIntentClassification (tl)": 27.22, + "MassiveIntentClassification (km)": 4.96, + "MassiveIntentClassification (hu)": 23.69, + "MassiveIntentClassification (ar)": 3.9, + "MassiveIntentClassification (de)": 27.96, + "MassiveIntentClassification (af)": 23.46, + "MassiveIntentClassification (kn)": 2.44, + "MassiveIntentClassification (pt)": 28.77, + "MassiveIntentClassification (sw)": 23.95, + "MassiveIntentClassification (he)": 2.01, + "MassiveIntentClassification (lv)": 18.07, + "MassiveIntentClassification (ml)": 2.51, + "MassiveIntentClassification (el)": 9.85, + "MassiveIntentClassification (sq)": 28.1, + "MassiveIntentClassification (vi)": 19.48, + "MassiveIntentClassification (am)": 2.54, + "MassiveIntentClassification (my)": 3.54, + "MassiveIntentClassification (ms)": 25.64, + "MassiveIntentClassification (zh-TW)": 6.41, + "MassiveIntentClassification (is)": 20.61, + "MassiveIntentClassification (sl)": 25.4, + "MassiveIntentClassification (tr)": 23.04, + "MassiveIntentClassification (th)": 3.79, + "MassiveIntentClassification (ur)": 2.54, + "MassiveIntentClassification (sv)": 27.48, + "MassiveIntentClassification (ja)": 4.77, + "MassiveIntentClassification (ro)": 27.57, + "MassiveIntentClassification (fa)": 3.22, + "MassiveIntentClassification (az)": 20.28, + "MassiveIntentClassification (zh-CN)": 5.02, + "MassiveIntentClassification (ta)": 1.58, "MassiveScenarioClassification (ru)": 59.15, + "MassiveScenarioClassification (zh-CN)": 10.73, + "MassiveScenarioClassification (de)": 35.28, + "MassiveScenarioClassification (cy)": 33.91, + "MassiveScenarioClassification (te)": 6.52, + "MassiveScenarioClassification (tl)": 33.49, + "MassiveScenarioClassification (sw)": 32.49, + "MassiveScenarioClassification (my)": 10.86, + "MassiveScenarioClassification (ms)": 33.05, + "MassiveScenarioClassification (nb)": 31.09, + "MassiveScenarioClassification (zh-TW)": 11.59, + "MassiveScenarioClassification (fr)": 34.68, + "MassiveScenarioClassification (da)": 35.77, + "MassiveScenarioClassification (is)": 26.28, + "MassiveScenarioClassification (fa)": 6.71, + "MassiveScenarioClassification (ro)": 34.52, + "MassiveScenarioClassification (pt)": 33.75, + "MassiveScenarioClassification (sv)": 30.99, + "MassiveScenarioClassification (it)": 37.19, + "MassiveScenarioClassification (es)": 33.12, + "MassiveScenarioClassification (lv)": 25.09, + "MassiveScenarioClassification (ko)": 6.43, + "MassiveScenarioClassification (ml)": 6.61, + "MassiveScenarioClassification (km)": 9.07, + "MassiveScenarioClassification (bn)": 8.09, + "MassiveScenarioClassification (el)": 17.02, + "MassiveScenarioClassification (kn)": 7.28, + "MassiveScenarioClassification (ka)": 6.64, + "MassiveScenarioClassification (fi)": 31.31, + "MassiveScenarioClassification (vi)": 25.7, + "MassiveScenarioClassification (sq)": 35.62, + "MassiveScenarioClassification (ar)": 11.78, + "MassiveScenarioClassification (hy)": 8.09, + "MassiveScenarioClassification (sl)": 30.45, + "MassiveScenarioClassification (th)": 7.79, + "MassiveScenarioClassification (az)": 27.56, + "MassiveScenarioClassification (pl)": 27.65, + "MassiveScenarioClassification (ta)": 6.88, + "MassiveScenarioClassification (jv)": 32.17, + "MassiveScenarioClassification (nl)": 31.82, + "MassiveScenarioClassification (id)": 33.72, + "MassiveScenarioClassification (ur)": 8.49, + "MassiveScenarioClassification (af)": 29.85, + "MassiveScenarioClassification (en)": 55.36, + "MassiveScenarioClassification (hu)": 31.34, + "MassiveScenarioClassification (ja)": 9.53, + "MassiveScenarioClassification (am)": 7.66, + "MassiveScenarioClassification (mn)": 24.63, + "MassiveScenarioClassification (hi)": 7.29, + "MassiveScenarioClassification (he)": 7.83, + "MassiveScenarioClassification (tr)": 30.63, + "NoRecClassification": 40.82, + "NordicLangClassification": 46.52, + "PAC": 62.19, + "PolEmo2.0-IN": 44.18, + "PolEmo2.0-OUT": 31.56, "RuReviewsClassification": 56.99, "RuSciBenchGRNTIClassification": 45.63, - "RuSciBenchOECDClassification": 35.48 + "RuSciBenchOECDClassification": 35.48, + "ToxicConversationsClassification": 57.44, + "TweetSentimentExtractionClassification": 50.37 } ] }, @@ -5480,9 +15470,67 @@ "v_measure": [ { "Model": "rubert-tiny2", + "AlloProfClusteringP2P": 36.88, + "AlloProfClusteringS2S": 21.97, + "ArxivClusteringP2P": 23.06, + "ArxivClusteringS2S": 15.44, + "BiorxivClusteringP2P": 21.46, + "BiorxivClusteringS2S": 12.43, + "BlurbsClusteringP2P": 12.5, + "BlurbsClusteringS2S": 9.41, "GeoreviewClusteringP2P": 41.58, + "HALClusteringS2S": 5.81, + "MLSUMClusteringP2P (de)": 17.92, + "MLSUMClusteringP2P (fr)": 29.01, + "MLSUMClusteringP2P (ru)": 51.63, + "MLSUMClusteringP2P (es)": 33.53, + "MLSUMClusteringS2S (de)": 18.67, + "MLSUMClusteringS2S (fr)": 28.05, + "MLSUMClusteringS2S (ru)": 47.72, + "MLSUMClusteringS2S (es)": 32.34, + "MasakhaNEWSClusteringP2P (amh)": 40.65, + "MasakhaNEWSClusteringP2P (eng)": 23.62, + "MasakhaNEWSClusteringP2P (fra)": 26.3, + "MasakhaNEWSClusteringP2P (hau)": 16.93, + "MasakhaNEWSClusteringP2P (ibo)": 22.81, + "MasakhaNEWSClusteringP2P (lin)": 45.22, + "MasakhaNEWSClusteringP2P (lug)": 44.31, + "MasakhaNEWSClusteringP2P (orm)": 24.05, + "MasakhaNEWSClusteringP2P (pcm)": 50.73, + "MasakhaNEWSClusteringP2P (run)": 46.44, + "MasakhaNEWSClusteringP2P (sna)": 40.81, + "MasakhaNEWSClusteringP2P (som)": 23.35, + "MasakhaNEWSClusteringP2P (swa)": 8.7, + "MasakhaNEWSClusteringP2P (tir)": 43.15, + "MasakhaNEWSClusteringP2P (xho)": 21.63, + "MasakhaNEWSClusteringP2P (yor)": 23.88, + "MasakhaNEWSClusteringS2S (amh)": 45.44, + "MasakhaNEWSClusteringS2S (eng)": 25.98, + "MasakhaNEWSClusteringS2S (fra)": 30.07, + "MasakhaNEWSClusteringS2S (hau)": 10.62, + "MasakhaNEWSClusteringS2S (ibo)": 36.2, + "MasakhaNEWSClusteringS2S (lin)": 50.06, + "MasakhaNEWSClusteringS2S (lug)": 42.19, + "MasakhaNEWSClusteringS2S (orm)": 26.05, + "MasakhaNEWSClusteringS2S (pcm)": 46.52, + "MasakhaNEWSClusteringS2S (run)": 44.3, + "MasakhaNEWSClusteringS2S (sna)": 48.29, + "MasakhaNEWSClusteringS2S (som)": 24.56, + "MasakhaNEWSClusteringS2S (swa)": 15.48, + "MasakhaNEWSClusteringS2S (tir)": 46.56, + "MasakhaNEWSClusteringS2S (xho)": 26.57, + "MasakhaNEWSClusteringS2S (yor)": 26.75, + "MedrxivClusteringP2P": 22.56, + "MedrxivClusteringS2S": 18.65, + "RedditClustering": 17.72, + "RedditClusteringP2P": 32.99, "RuSciBenchGRNTIClusteringP2P": 39.78, - "RuSciBenchOECDClusteringP2P": 35.98 + "RuSciBenchOECDClusteringP2P": 35.98, + "StackExchangeClustering": 29.01, + "StackExchangeClusteringP2P": 25.71, + "TenKGnadClusteringP2P": 11.05, + "TenKGnadClusteringS2S": 8.74, + "TwentyNewsgroupsClustering": 15.63 } ] }, @@ -5490,11 +15538,51 @@ "max_ap": [ { "Model": "rubert-tiny2", - "TERRa": 51.87 + "CDSC-E": 45.08, + "FalseFriendsGermanEnglish": 46.98, + "OpusparcusPC (de)": 91.7, + "OpusparcusPC (en)": 97.3, + "OpusparcusPC (fi)": 86.73, + "OpusparcusPC (fr)": 86.53, + "OpusparcusPC (ru)": 85.11, + "OpusparcusPC (sv)": 85.1, + "PSC": 70.84, + "PawsXPairClassification (de)": 48.65, + "PawsXPairClassification (en)": 45.47, + "PawsXPairClassification (es)": 47.56, + "PawsXPairClassification (fr)": 49.3, + "PawsXPairClassification (ja)": 48.24, + "PawsXPairClassification (ko)": 47.0, + "PawsXPairClassification (zh)": 51.8, + "SICK-E-PL": 47.84, + "SprintDuplicateQuestions": 70.8, + "TERRa": 51.87, + "TwitterSemEval2015": 55.49, + "TwitterURLCorpus": 77.98 }, { "Model": "rubert-tiny2", - "TERRa": 51.87 + "CDSC-E": 45.22, + "FalseFriendsGermanEnglish": 46.98, + "OpusparcusPC (de)": 91.7, + "OpusparcusPC (en)": 97.34, + "OpusparcusPC (fi)": 86.73, + "OpusparcusPC (fr)": 86.53, + "OpusparcusPC (ru)": 85.24, + "OpusparcusPC (sv)": 85.1, + "PSC": 71.22, + "PawsXPairClassification (de)": 48.67, + "PawsXPairClassification (en)": 45.47, + "PawsXPairClassification (es)": 47.61, + "PawsXPairClassification (fr)": 49.3, + "PawsXPairClassification (ja)": 48.41, + "PawsXPairClassification (ko)": 47.48, + "PawsXPairClassification (zh)": 51.83, + "SICK-E-PL": 47.87, + "SprintDuplicateQuestions": 71.16, + "TERRa": 51.87, + "TwitterSemEval2015": 55.49, + "TwitterURLCorpus": 77.98 } ] }, @@ -5502,11 +15590,35 @@ "map": [ { "Model": "rubert-tiny2", - "MIRACLReranking (ru)": 15.81 + "AlloprofReranking": 27.94, + "AskUbuntuDupQuestions": 48.11, + "MindSmallReranking": 26.74, + "RuBQReranking": 46.09, + "SciDocsRR": 54.67, + "StackOverflowDupQuestions": 36.0, + "SyntecReranking": 38.15, + "T2Reranking": 53.19 }, { "Model": "rubert-tiny2", - "RuBQReranking": 46.09 + "MIRACLReranking (ru)": 15.81, + "MIRACLReranking (ar)": 1.94, + "MIRACLReranking (bn)": 1.56, + "MIRACLReranking (de)": 7.23, + "MIRACLReranking (en)": 14.0, + "MIRACLReranking (es)": 7.49, + "MIRACLReranking (fa)": 3.65, + "MIRACLReranking (fi)": 14.18, + "MIRACLReranking (fr)": 5.5, + "MIRACLReranking (hi)": 3.61, + "MIRACLReranking (id)": 8.22, + "MIRACLReranking (ja)": 1.99, + "MIRACLReranking (ko)": 5.1, + "MIRACLReranking (sw)": 9.07, + "MIRACLReranking (te)": 4.07, + "MIRACLReranking (th)": 1.93, + "MIRACLReranking (yo)": 12.97, + "MIRACLReranking (zh)": 2.64 } ] }, @@ -5514,9 +15626,143 @@ "ndcg_at_10": [ { "Model": "rubert-tiny2", + "AILACasedocs": 16.05, + "AILAStatutes": 13.81, + "ARCChallenge": 2.93, + "AlloprofRetrieval": 0.55, + "AlphaNLI": 6.83, + "AppsRetrieval": 0.44, + "ArguAna": 27.66, + "BSARDRetrieval": 0.15, + "ClimateFEVER": 2.08, + "CmedqaRetrieval": 1.09, + "CodeFeedbackMT": 26.04, + "CodeFeedbackST": 20.27, + "CodeSearchNetCCRetrieval (python)": 19.13, + "CodeSearchNetCCRetrieval (javascript)": 25.81, + "CodeSearchNetCCRetrieval (go)": 13.02, + "CodeSearchNetCCRetrieval (ruby)": 28.35, + "CodeSearchNetCCRetrieval (java)": 15.62, + "CodeSearchNetCCRetrieval (php)": 16.02, + "CodeSearchNetRetrieval (python)": 34.54, + "CodeSearchNetRetrieval (javascript)": 16.11, + "CodeSearchNetRetrieval (go)": 22.91, + "CodeSearchNetRetrieval (ruby)": 26.85, + "CodeSearchNetRetrieval (java)": 16.8, + "CodeSearchNetRetrieval (php)": 18.93, + "CodeTransOceanContest": 19.41, + "CodeTransOceanDL": 34.02, + "CosQA": 4.48, + "CovidRetrieval": 0.1, + "DBPedia": 2.66, + "FEVER": 3.01, + "FiQA2018": 2.42, + "GerDaLIR": 0.13, + "GerDaLIRSmall": 0.41, + "GermanQuAD-Retrieval": 16.79, + "HellaSwag": 5.48, + "HotpotQA": 8.85, + "LEMBNarrativeQARetrieval": 4.78, + "LEMBQMSumRetrieval": 16.95, + "LEMBSummScreenFDRetrieval": 19.52, + "LEMBWikimQARetrieval": 14.17, + "LeCaRDv2": 11.28, + "LegalBenchConsumerContractsQA": 39.51, + "LegalBenchCorporateLobbying": 60.19, + "LegalQuAD": 4.11, + "LegalSummarization": 34.19, "MIRACLRetrieval (ru)": 1.89, + "MIRACLRetrieval (ar)": 0.0, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 0.19, + "MIRACLRetrieval (en)": 0.68, + "MIRACLRetrieval (es)": 0.0, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 0.62, + "MIRACLRetrieval (fr)": 0.0, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 0.12, + "MIRACLRetrieval (ja)": 0.0, + "MIRACLRetrieval (ko)": 0.97, + "MIRACLRetrieval (sw)": 0.76, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.0, + "MIRACLRetrieval (yo)": 1.94, + "MIRACLRetrieval (zh)": 0.0, + "MSMARCO": 1.13, + "MintakaRetrieval (ar)": 0.45, + "MintakaRetrieval (de)": 3.56, + "MintakaRetrieval (es)": 2.95, + "MintakaRetrieval (fr)": 3.07, + "MintakaRetrieval (hi)": 1.03, + "MintakaRetrieval (it)": 4.3, + "MintakaRetrieval (ja)": 0.93, + "MintakaRetrieval (pt)": 4.42, + "NFCorpus": 5.03, + "NQ": 0.62, + "PIQA": 3.01, + "Quail": 0.41, + "QuoraRetrieval": 66.99, + "RARbCode": 1.44, + "RARbMath": 10.72, "RiaNewsRetrieval": 13.92, - "RuBQRetrieval": 10.87 + "RuBQRetrieval": 10.87, + "SCIDOCS": 1.18, + "SIQA": 0.65, + "SciFact": 20.19, + "SciFact-PL": 7.73, + "SpartQA": 6.28, + "StackOverflowQA": 19.97, + "SyntecRetrieval": 14.22, + "SyntheticText2SQL": 19.8, + "TRECCOVID": 8.67, + "TRECCOVID-PL": 4.61, + "TempReasonL1": 0.9, + "TempReasonL2Fact": 3.59, + "TempReasonL2Pure": 0.03, + "TempReasonL3Fact": 4.76, + "TempReasonL3Pure": 4.66, + "Touche2020": 0.91, + "WinoGrande": 1.42, + "XMarket (de)": 0.98, + "XMarket (en)": 1.17, + "XMarket (es)": 0.93, + "XPQARetrieval (ara-ara)": 3.67, + "XPQARetrieval (eng-ara)": 1.24, + "XPQARetrieval (ara-eng)": 4.28, + "XPQARetrieval (deu-deu)": 20.93, + "XPQARetrieval (eng-deu)": 2.09, + "XPQARetrieval (deu-eng)": 8.32, + "XPQARetrieval (spa-spa)": 17.06, + "XPQARetrieval (eng-spa)": 2.7, + "XPQARetrieval (spa-eng)": 10.16, + "XPQARetrieval (fra-fra)": 21.91, + "XPQARetrieval (eng-fra)": 2.93, + "XPQARetrieval (fra-eng)": 10.07, + "XPQARetrieval (hin-hin)": 5.61, + "XPQARetrieval (eng-hin)": 3.34, + "XPQARetrieval (hin-eng)": 3.96, + "XPQARetrieval (ita-ita)": 29.66, + "XPQARetrieval (eng-ita)": 3.76, + "XPQARetrieval (ita-eng)": 12.15, + "XPQARetrieval (jpn-jpn)": 4.65, + "XPQARetrieval (eng-jpn)": 1.38, + "XPQARetrieval (jpn-eng)": 4.31, + "XPQARetrieval (kor-kor)": 2.23, + "XPQARetrieval (eng-kor)": 2.19, + "XPQARetrieval (kor-eng)": 2.06, + "XPQARetrieval (pol-pol)": 12.18, + "XPQARetrieval (eng-pol)": 2.14, + "XPQARetrieval (pol-eng)": 7.82, + "XPQARetrieval (por-por)": 11.97, + "XPQARetrieval (eng-por)": 2.25, + "XPQARetrieval (por-eng)": 6.15, + "XPQARetrieval (tam-tam)": 2.57, + "XPQARetrieval (eng-tam)": 2.2, + "XPQARetrieval (tam-eng)": 2.44, + "XPQARetrieval (cmn-cmn)": 12.1, + "XPQARetrieval (eng-cmn)": 3.64, + "XPQARetrieval (cmn-eng)": 5.93 } ] }, @@ -5524,14 +15770,131 @@ "cosine_spearman": [ { "Model": "rubert-tiny2", + "BIOSSES": 71.48, + "CDSC-R": 67.95, + "GermanSTSBenchmark": 54.76, + "SICK-R": 61.07, + "SICK-R-PL": 48.45, + "SICKFr": 55.65, + "STS12": 65.12, + "STS13": 59.97, + "STS14": 60.28, + "STS15": 74.38, + "STS16": 69.19, + "STS17 (ar-ar)": 17.31, + "STS17 (es-en)": 17.23, + "STS17 (en-ar)": 13.41, + "STS17 (en-tr)": 14.88, + "STS17 (en-de)": 27.75, + "STS17 (es-es)": 66.85, + "STS17 (nl-en)": 30.93, + "STS17 (ko-ko)": 10.32, + "STS17 (en-en)": 73.66, + "STS17 (it-en)": 28.0, + "STS17 (fr-en)": 26.87, + "STS22 (ru)": 54.38, + "STS22 (pl-en)": 26.05, + "STS22 (es-it)": 27.11, + "STS22 (de)": 14.59, + "STS22 (de-pl)": 1.2, + "STS22 (es)": 44.49, + "STS22 (pl)": 9.38, + "STS22 (fr)": 42.82, + "STS22 (ar)": 21.36, + "STS22 (de-en)": 16.12, + "STS22 (fr-pl)": 28.17, + "STS22 (en)": 49.25, + "STS22 (es-en)": 20.13, + "STS22 (de-fr)": 13.24, + "STS22 (zh-en)": 27.07, + "STS22 (zh)": 35.12, + "STS22 (tr)": 20.52, + "STS22 (it)": 43.84, + "STSB": 16.68, + "STSBenchmark": 63.02, + "STSBenchmarkMultilingualSTS (pt)": 49.26, + "STSBenchmarkMultilingualSTS (fr)": 56.61, + "STSBenchmarkMultilingualSTS (nl)": 51.52, + "STSBenchmarkMultilingualSTS (zh)": 16.6, + "STSBenchmarkMultilingualSTS (ru)": 69.29, + "STSBenchmarkMultilingualSTS (it)": 53.15, + "STSBenchmarkMultilingualSTS (es)": 53.78, + "STSBenchmarkMultilingualSTS (pl)": 52.1, + "STSBenchmarkMultilingualSTS (de)": 56.97, + "STSBenchmarkMultilingualSTS (en)": 63.02 + }, + { + "Model": "rubert-tiny2", + "BIOSSES": 71.48, + "CDSC-R": 67.95, + "GermanSTSBenchmark": 54.77, "RUParaPhraserSTS": 65.14, "RuSTSBenchmarkSTS": 69.43, - "STS22 (ru)": 50.23 + "SICK-R": 61.07, + "SICK-R-PL": 48.45, + "SICKFr": 55.65, + "STS12": 65.12, + "STS13": 59.97, + "STS14": 60.28, + "STS15": 74.38, + "STS16": 69.19, + "STS17 (ar-ar)": 16.79, + "STS17 (es-en)": 17.23, + "STS17 (en-ar)": 13.41, + "STS17 (en-tr)": 14.88, + "STS17 (en-de)": 27.75, + "STS17 (es-es)": 66.86, + "STS17 (nl-en)": 30.93, + "STS17 (ko-ko)": 10.4, + "STS17 (en-en)": 73.66, + "STS17 (it-en)": 28.0, + "STS17 (fr-en)": 26.87, + "STS22 (ru)": 54.38, + "STS22 (pl-en)": 26.05, + "STS22 (es-it)": 27.11, + "STS22 (de)": 14.6, + "STS22 (de-pl)": 1.2, + "STS22 (es)": 44.49, + "STS22 (pl)": 9.27, + "STS22 (fr)": 42.82, + "STS22 (ar)": 21.38, + "STS22 (de-en)": 16.12, + "STS22 (fr-pl)": 28.17, + "STS22 (en)": 49.25, + "STS22 (es-en)": 20.13, + "STS22 (de-fr)": 13.24, + "STS22 (zh-en)": 27.07, + "STS22 (zh)": 35.12, + "STS22 (tr)": 20.52, + "STS22 (it)": 43.84, + "STSB": 16.6, + "STSBenchmark": 63.02, + "STSBenchmarkMultilingualSTS (pt)": 49.26, + "STSBenchmarkMultilingualSTS (fr)": 56.61, + "STSBenchmarkMultilingualSTS (nl)": 51.52, + "STSBenchmarkMultilingualSTS (zh)": 16.54, + "STSBenchmarkMultilingualSTS (ru)": 69.29, + "STSBenchmarkMultilingualSTS (it)": 53.15, + "STSBenchmarkMultilingualSTS (es)": 53.78, + "STSBenchmarkMultilingualSTS (pl)": 52.1, + "STSBenchmarkMultilingualSTS (de)": 56.97, + "STSBenchmarkMultilingualSTS (en)": 63.02 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "rubert-tiny2", + "SummEval": 28.46, + "SummEvalFr": 32.52 + }, + { + "Model": "rubert-tiny2", + "SummEval": 28.46, + "SummEvalFr": 32.52 + } + ] }, "MultilabelClassification": { "accuracy": [ @@ -5543,7 +15906,14 @@ ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "rubert-tiny2", + "Core17InstructionRetrieval": 0.88, + "News21InstructionRetrieval": 1.39, + "Robust04InstructionRetrieval": -2.54 + } + ] } }, "dangvantuan__sentence-camembert-base": { @@ -5893,6 +16263,7 @@ "f1": [ { "Model": "USER-base", + "BornholmBitextMining": 34.8, "Tatoeba (rus-eng)": 90.2 } ] @@ -5901,15 +16272,169 @@ "accuracy": [ { "Model": "USER-base", - "GeoreviewClassification": 47.23, - "HeadlineClassification": 74.88, - "InappropriatenessClassification": 61.94, - "KinopoiskClassification": 55.69, - "MassiveIntentClassification (ru)": 65.57, - "MassiveScenarioClassification (ru)": 68.33, - "RuReviewsClassification": 66.44, + "AllegroReviews": 25.6, + "AmazonCounterfactualClassification (en-ext)": 67.92, + "AmazonCounterfactualClassification (en)": 68.37, + "AmazonCounterfactualClassification (de)": 55.33, + "AmazonCounterfactualClassification (ja)": 49.24, + "AmazonPolarityClassification": 66.69, + "AmazonReviewsClassification (en)": 35.19, + "AmazonReviewsClassification (de)": 26.36, + "AmazonReviewsClassification (es)": 27.0, + "AmazonReviewsClassification (fr)": 27.62, + "AmazonReviewsClassification (ja)": 20.55, + "AmazonReviewsClassification (zh)": 22.46, + "AngryTweetsClassification": 46.11, + "Banking77Classification": 65.5, + "CBD": 51.2, + "DanishPoliticalCommentsClassification": 28.0, + "EmotionClassification": 27.16, + "GeoreviewClassification": 46.88, + "HeadlineClassification": 75.0, + "ImdbClassification": 58.56, + "InappropriatenessClassification": 61.83, + "KinopoiskClassification": 56.03, + "LccSentimentClassification": 37.6, + "MTOPDomainClassification (en)": 81.46, + "MTOPDomainClassification (de)": 61.34, + "MTOPDomainClassification (es)": 61.05, + "MTOPDomainClassification (fr)": 59.72, + "MTOPDomainClassification (hi)": 25.63, + "MTOPDomainClassification (th)": 25.03, + "MTOPIntentClassification (en)": 50.21, + "MTOPIntentClassification (de)": 36.4, + "MTOPIntentClassification (es)": 34.85, + "MTOPIntentClassification (fr)": 35.07, + "MTOPIntentClassification (hi)": 4.62, + "MTOPIntentClassification (th)": 8.12, + "MasakhaNEWSClassification (amh)": 29.65, + "MasakhaNEWSClassification (eng)": 72.13, + "MasakhaNEWSClassification (fra)": 67.44, + "MasakhaNEWSClassification (hau)": 54.95, + "MasakhaNEWSClassification (ibo)": 47.59, + "MasakhaNEWSClassification (lin)": 61.09, + "MasakhaNEWSClassification (lug)": 45.61, + "MasakhaNEWSClassification (orm)": 47.05, + "MasakhaNEWSClassification (pcm)": 91.74, + "MasakhaNEWSClassification (run)": 53.07, + "MasakhaNEWSClassification (sna)": 69.43, + "MasakhaNEWSClassification (som)": 40.95, + "MasakhaNEWSClassification (swa)": 44.94, + "MasakhaNEWSClassification (tir)": 24.49, + "MasakhaNEWSClassification (xho)": 58.69, + "MasakhaNEWSClassification (yor)": 57.91, + "MassiveIntentClassification (ru)": 65.92, + "MassiveIntentClassification (hu)": 30.6, + "MassiveIntentClassification (sv)": 35.62, + "MassiveIntentClassification (hy)": 7.88, + "MassiveIntentClassification (te)": 5.31, + "MassiveIntentClassification (af)": 30.74, + "MassiveIntentClassification (ms)": 32.88, + "MassiveIntentClassification (en)": 50.95, + "MassiveIntentClassification (az)": 34.76, + "MassiveIntentClassification (km)": 7.48, + "MassiveIntentClassification (it)": 39.85, + "MassiveIntentClassification (id)": 35.62, + "MassiveIntentClassification (fa)": 12.65, + "MassiveIntentClassification (th)": 6.54, + "MassiveIntentClassification (sl)": 36.21, + "MassiveIntentClassification (ur)": 8.46, + "MassiveIntentClassification (fi)": 32.53, + "MassiveIntentClassification (he)": 4.71, + "MassiveIntentClassification (el)": 19.77, + "MassiveIntentClassification (ka)": 5.46, + "MassiveIntentClassification (am)": 5.65, + "MassiveIntentClassification (is)": 32.58, + "MassiveIntentClassification (zh-CN)": 14.69, + "MassiveIntentClassification (ar)": 14.28, + "MassiveIntentClassification (ml)": 6.04, + "MassiveIntentClassification (lv)": 34.91, + "MassiveIntentClassification (zh-TW)": 12.74, + "MassiveIntentClassification (nb)": 33.1, + "MassiveIntentClassification (ta)": 5.97, + "MassiveIntentClassification (ja)": 12.32, + "MassiveIntentClassification (ko)": 11.02, + "MassiveIntentClassification (tl)": 33.3, + "MassiveIntentClassification (pl)": 36.95, + "MassiveIntentClassification (de)": 33.92, + "MassiveIntentClassification (my)": 7.19, + "MassiveIntentClassification (jv)": 31.16, + "MassiveIntentClassification (kn)": 5.36, + "MassiveIntentClassification (vi)": 23.92, + "MassiveIntentClassification (hi)": 4.28, + "MassiveIntentClassification (da)": 35.68, + "MassiveIntentClassification (mn)": 30.97, + "MassiveIntentClassification (nl)": 33.41, + "MassiveIntentClassification (es)": 34.06, + "MassiveIntentClassification (sq)": 38.32, + "MassiveIntentClassification (pt)": 35.15, + "MassiveIntentClassification (fr)": 35.14, + "MassiveIntentClassification (cy)": 34.48, + "MassiveIntentClassification (bn)": 6.14, + "MassiveIntentClassification (ro)": 34.63, + "MassiveIntentClassification (tr)": 32.85, + "MassiveIntentClassification (sw)": 34.69, + "MassiveScenarioClassification (ru)": 69.06, + "MassiveScenarioClassification (fr)": 42.95, + "MassiveScenarioClassification (az)": 39.16, + "MassiveScenarioClassification (zh-TW)": 19.39, + "MassiveScenarioClassification (sw)": 40.11, + "MassiveScenarioClassification (ar)": 20.44, + "MassiveScenarioClassification (tl)": 38.91, + "MassiveScenarioClassification (zh-CN)": 20.84, + "MassiveScenarioClassification (sv)": 40.71, + "MassiveScenarioClassification (bn)": 11.01, + "MassiveScenarioClassification (nl)": 39.77, + "MassiveScenarioClassification (km)": 13.46, + "MassiveScenarioClassification (te)": 9.99, + "MassiveScenarioClassification (da)": 42.54, + "MassiveScenarioClassification (el)": 27.39, + "MassiveScenarioClassification (my)": 12.72, + "MassiveScenarioClassification (mn)": 35.46, + "MassiveScenarioClassification (es)": 40.44, + "MassiveScenarioClassification (hu)": 38.84, + "MassiveScenarioClassification (cy)": 37.02, + "MassiveScenarioClassification (kn)": 11.12, + "MassiveScenarioClassification (hy)": 13.67, + "MassiveScenarioClassification (is)": 38.64, + "MassiveScenarioClassification (pl)": 43.88, + "MassiveScenarioClassification (nb)": 37.15, + "MassiveScenarioClassification (lv)": 39.17, + "MassiveScenarioClassification (vi)": 31.5, + "MassiveScenarioClassification (en)": 59.93, + "MassiveScenarioClassification (id)": 41.48, + "MassiveScenarioClassification (ro)": 41.77, + "MassiveScenarioClassification (fi)": 37.31, + "MassiveScenarioClassification (jv)": 37.93, + "MassiveScenarioClassification (ko)": 15.33, + "MassiveScenarioClassification (sq)": 43.78, + "MassiveScenarioClassification (ta)": 11.1, + "MassiveScenarioClassification (ms)": 39.48, + "MassiveScenarioClassification (tr)": 37.28, + "MassiveScenarioClassification (af)": 37.03, + "MassiveScenarioClassification (fa)": 17.22, + "MassiveScenarioClassification (ka)": 10.49, + "MassiveScenarioClassification (sl)": 39.28, + "MassiveScenarioClassification (it)": 45.96, + "MassiveScenarioClassification (he)": 10.44, + "MassiveScenarioClassification (hi)": 8.93, + "MassiveScenarioClassification (ja)": 17.52, + "MassiveScenarioClassification (pt)": 40.92, + "MassiveScenarioClassification (th)": 11.93, + "MassiveScenarioClassification (ml)": 10.56, + "MassiveScenarioClassification (ur)": 14.4, + "MassiveScenarioClassification (de)": 42.37, + "MassiveScenarioClassification (am)": 11.75, + "NoRecClassification": 40.97, + "NordicLangClassification": 45.21, + "PAC": 70.41, + "PolEmo2.0-IN": 43.77, + "PolEmo2.0-OUT": 23.18, + "RuReviewsClassification": 65.48, "RuSciBenchGRNTIClassification": 55.55, - "RuSciBenchOECDClassification": 43.28 + "RuSciBenchOECDClassification": 43.28, + "ToxicConversationsClassification": 62.4, + "TweetSentimentExtractionClassification": 55.35 } ] }, @@ -5917,11 +16442,67 @@ "v_measure": [ { "Model": "USER-base", - "GeoreviewClusteringP2P": 64.16, + "AlloProfClusteringP2P": 44.92, + "AlloProfClusteringS2S": 23.19, + "ArxivClusteringP2P": 24.06, + "ArxivClusteringS2S": 14.62, + "BiorxivClusteringP2P": 19.57, + "BiorxivClusteringS2S": 10.96, + "BlurbsClusteringP2P": 15.08, + "BlurbsClusteringS2S": 9.12, + "GeoreviewClusteringP2P": 63.41, + "HALClusteringS2S": 8.99, "MLSUMClusteringP2P (ru)": 48.09, + "MLSUMClusteringP2P (de)": 22.41, + "MLSUMClusteringP2P (fr)": 33.8, + "MLSUMClusteringP2P (es)": 34.12, "MLSUMClusteringS2S (ru)": 45.73, - "RuSciBenchGRNTIClusteringP2P": 51.38, - "RuSciBenchOECDClusteringP2P": 44.73 + "MLSUMClusteringS2S (de)": 23.53, + "MLSUMClusteringS2S (fr)": 32.41, + "MLSUMClusteringS2S (es)": 33.42, + "MasakhaNEWSClusteringP2P (amh)": 41.86, + "MasakhaNEWSClusteringP2P (eng)": 51.77, + "MasakhaNEWSClusteringP2P (fra)": 46.27, + "MasakhaNEWSClusteringP2P (hau)": 29.94, + "MasakhaNEWSClusteringP2P (ibo)": 32.63, + "MasakhaNEWSClusteringP2P (lin)": 53.65, + "MasakhaNEWSClusteringP2P (lug)": 52.56, + "MasakhaNEWSClusteringP2P (orm)": 27.51, + "MasakhaNEWSClusteringP2P (pcm)": 85.52, + "MasakhaNEWSClusteringP2P (run)": 52.92, + "MasakhaNEWSClusteringP2P (sna)": 52.22, + "MasakhaNEWSClusteringP2P (som)": 30.37, + "MasakhaNEWSClusteringP2P (swa)": 22.23, + "MasakhaNEWSClusteringP2P (tir)": 43.83, + "MasakhaNEWSClusteringP2P (xho)": 27.21, + "MasakhaNEWSClusteringP2P (yor)": 26.66, + "MasakhaNEWSClusteringS2S (amh)": 40.86, + "MasakhaNEWSClusteringS2S (eng)": 20.37, + "MasakhaNEWSClusteringS2S (fra)": 25.39, + "MasakhaNEWSClusteringS2S (hau)": 15.37, + "MasakhaNEWSClusteringS2S (ibo)": 34.49, + "MasakhaNEWSClusteringS2S (lin)": 43.66, + "MasakhaNEWSClusteringS2S (lug)": 41.45, + "MasakhaNEWSClusteringS2S (orm)": 25.63, + "MasakhaNEWSClusteringS2S (pcm)": 63.41, + "MasakhaNEWSClusteringS2S (run)": 49.77, + "MasakhaNEWSClusteringS2S (sna)": 43.07, + "MasakhaNEWSClusteringS2S (som)": 26.34, + "MasakhaNEWSClusteringS2S (swa)": 15.66, + "MasakhaNEWSClusteringS2S (tir)": 42.96, + "MasakhaNEWSClusteringS2S (xho)": 27.54, + "MasakhaNEWSClusteringS2S (yor)": 29.03, + "MedrxivClusteringP2P": 23.8, + "MedrxivClusteringS2S": 19.43, + "RedditClustering": 26.95, + "RedditClusteringP2P": 44.11, + "RuSciBenchGRNTIClusteringP2P": 51.5, + "RuSciBenchOECDClusteringP2P": 44.87, + "StackExchangeClustering": 34.21, + "StackExchangeClusteringP2P": 26.54, + "TenKGnadClusteringP2P": 29.7, + "TenKGnadClusteringS2S": 10.51, + "TwentyNewsgroupsClustering": 21.41 } ] }, @@ -5929,13 +16510,51 @@ "max_ap": [ { "Model": "USER-base", + "CDSC-E": 51.61, + "FalseFriendsGermanEnglish": 46.84, "OpusparcusPC (ru)": 91.65, - "TERRa": 60.02 + "OpusparcusPC (de)": 90.82, + "OpusparcusPC (en)": 97.49, + "OpusparcusPC (fi)": 84.61, + "OpusparcusPC (fr)": 88.32, + "OpusparcusPC (sv)": 84.08, + "PSC": 88.54, + "PawsXPairClassification (de)": 52.51, + "PawsXPairClassification (en)": 61.15, + "PawsXPairClassification (es)": 54.72, + "PawsXPairClassification (fr)": 56.35, + "PawsXPairClassification (ja)": 49.39, + "PawsXPairClassification (ko)": 51.82, + "PawsXPairClassification (zh)": 54.65, + "SICK-E-PL": 54.43, + "SprintDuplicateQuestions": 83.25, + "TERRa": 59.76, + "TwitterSemEval2015": 61.72, + "TwitterURLCorpus": 81.61 }, { "Model": "USER-base", + "CDSC-E": 51.93, + "FalseFriendsGermanEnglish": 46.84, "OpusparcusPC (ru)": 91.65, - "TERRa": 60.11 + "OpusparcusPC (de)": 90.82, + "OpusparcusPC (en)": 97.49, + "OpusparcusPC (fi)": 84.61, + "OpusparcusPC (fr)": 88.32, + "OpusparcusPC (sv)": 84.11, + "PSC": 88.71, + "PawsXPairClassification (de)": 52.74, + "PawsXPairClassification (en)": 61.15, + "PawsXPairClassification (es)": 54.75, + "PawsXPairClassification (fr)": 56.39, + "PawsXPairClassification (ja)": 49.39, + "PawsXPairClassification (ko)": 51.82, + "PawsXPairClassification (zh)": 54.88, + "SICK-E-PL": 54.43, + "SprintDuplicateQuestions": 83.25, + "TERRa": 59.76, + "TwitterSemEval2015": 61.72, + "TwitterURLCorpus": 81.61 } ] }, @@ -5943,11 +16562,35 @@ "map": [ { "Model": "USER-base", - "MIRACLReranking (ru)": 46.75 + "AlloprofReranking": 38.67, + "AskUbuntuDupQuestions": 50.31, + "MindSmallReranking": 28.0, + "RuBQReranking": 64.42, + "SciDocsRR": 59.74, + "StackOverflowDupQuestions": 38.95, + "SyntecReranking": 52.11, + "T2Reranking": 54.18 }, { "Model": "USER-base", - "RuBQReranking": 64.42 + "MIRACLReranking (ru)": 46.75, + "MIRACLReranking (ar)": 4.51, + "MIRACLReranking (bn)": 4.54, + "MIRACLReranking (de)": 14.58, + "MIRACLReranking (en)": 35.41, + "MIRACLReranking (es)": 20.5, + "MIRACLReranking (fa)": 5.79, + "MIRACLReranking (fi)": 29.37, + "MIRACLReranking (fr)": 15.83, + "MIRACLReranking (hi)": 5.04, + "MIRACLReranking (id)": 18.1, + "MIRACLReranking (ja)": 3.31, + "MIRACLReranking (ko)": 5.26, + "MIRACLReranking (sw)": 22.13, + "MIRACLReranking (te)": 4.89, + "MIRACLReranking (th)": 4.26, + "MIRACLReranking (yo)": 34.77, + "MIRACLReranking (zh)": 2.96 } ] }, @@ -5955,9 +16598,143 @@ "ndcg_at_10": [ { "Model": "USER-base", - "MIRACLRetrieval (ru)": 35.22, - "RiaNewsRetrieval": 77.83, - "RuBQRetrieval": 56.86 + "AILACasedocs": 17.26, + "AILAStatutes": 10.27, + "ARCChallenge": 4.32, + "AlloprofRetrieval": 4.53, + "AlphaNLI": 17.69, + "AppsRetrieval": 1.06, + "ArguAna": 39.93, + "BSARDRetrieval": 0.5, + "ClimateFEVER": 8.04, + "CmedqaRetrieval": 0.43, + "CodeFeedbackMT": 11.83, + "CodeFeedbackST": 27.39, + "CodeSearchNetCCRetrieval (python)": 15.45, + "CodeSearchNetCCRetrieval (javascript)": 22.88, + "CodeSearchNetCCRetrieval (go)": 13.48, + "CodeSearchNetCCRetrieval (ruby)": 25.8, + "CodeSearchNetCCRetrieval (java)": 16.81, + "CodeSearchNetCCRetrieval (php)": 9.08, + "CodeSearchNetRetrieval (python)": 44.22, + "CodeSearchNetRetrieval (javascript)": 30.73, + "CodeSearchNetRetrieval (go)": 45.46, + "CodeSearchNetRetrieval (ruby)": 37.01, + "CodeSearchNetRetrieval (java)": 17.15, + "CodeSearchNetRetrieval (php)": 23.3, + "CodeTransOceanContest": 18.25, + "CodeTransOceanDL": 26.66, + "CosQA": 2.89, + "CovidRetrieval": 0.0, + "DBPedia": 16.39, + "FEVER": 35.62, + "FiQA2018": 8.47, + "GerDaLIR": 0.41, + "GerDaLIRSmall": 1.14, + "GermanQuAD-Retrieval": 54.92, + "HellaSwag": 9.64, + "HotpotQA": 24.5, + "LEMBNarrativeQARetrieval": 11.11, + "LEMBQMSumRetrieval": 13.38, + "LEMBSummScreenFDRetrieval": 36.32, + "LEMBWikimQARetrieval": 32.33, + "LeCaRDv2": 7.32, + "LegalBenchConsumerContractsQA": 43.78, + "LegalBenchCorporateLobbying": 79.29, + "LegalQuAD": 3.57, + "LegalSummarization": 47.35, + "MIRACLRetrieval (ru)": 33.84, + "MIRACLRetrieval (ar)": 0.06, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 1.53, + "MIRACLRetrieval (en)": 13.77, + "MIRACLRetrieval (es)": 1.22, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 6.42, + "MIRACLRetrieval (fr)": 2.55, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 2.15, + "MIRACLRetrieval (ja)": 0.17, + "MIRACLRetrieval (ko)": 0.94, + "MIRACLRetrieval (sw)": 7.38, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.16, + "MIRACLRetrieval (yo)": 14.33, + "MIRACLRetrieval (zh)": 0.0, + "MSMARCO": 12.58, + "MintakaRetrieval (ar)": 0.54, + "MintakaRetrieval (de)": 7.82, + "MintakaRetrieval (es)": 10.59, + "MintakaRetrieval (fr)": 9.65, + "MintakaRetrieval (hi)": 0.88, + "MintakaRetrieval (it)": 10.93, + "MintakaRetrieval (ja)": 0.95, + "MintakaRetrieval (pt)": 11.38, + "NFCorpus": 16.6, + "NQ": 15.21, + "PIQA": 10.26, + "Quail": 2.06, + "QuoraRetrieval": 76.33, + "RARbCode": 0.55, + "RARbMath": 22.48, + "RiaNewsRetrieval": 77.72, + "RuBQRetrieval": 56.86, + "SCIDOCS": 6.36, + "SIQA": 0.83, + "SciFact": 35.61, + "SciFact-PL": 23.27, + "SpartQA": 2.99, + "StackOverflowQA": 25.25, + "SyntecRetrieval": 36.67, + "SyntheticText2SQL": 29.15, + "TRECCOVID": 20.17, + "TRECCOVID-PL": 4.99, + "TempReasonL1": 1.21, + "TempReasonL2Fact": 9.85, + "TempReasonL2Pure": 0.18, + "TempReasonL3Fact": 11.14, + "TempReasonL3Pure": 6.49, + "Touche2020": 12.97, + "WinoGrande": 44.54, + "XMarket (de)": 4.03, + "XMarket (en)": 5.25, + "XMarket (es)": 5.15, + "XPQARetrieval (ara-ara)": 4.77, + "XPQARetrieval (eng-ara)": 2.29, + "XPQARetrieval (ara-eng)": 6.71, + "XPQARetrieval (deu-deu)": 32.64, + "XPQARetrieval (eng-deu)": 5.29, + "XPQARetrieval (deu-eng)": 17.68, + "XPQARetrieval (spa-spa)": 26.81, + "XPQARetrieval (eng-spa)": 6.1, + "XPQARetrieval (spa-eng)": 16.67, + "XPQARetrieval (fra-fra)": 33.11, + "XPQARetrieval (eng-fra)": 6.89, + "XPQARetrieval (fra-eng)": 23.99, + "XPQARetrieval (hin-hin)": 6.78, + "XPQARetrieval (eng-hin)": 3.72, + "XPQARetrieval (hin-eng)": 5.56, + "XPQARetrieval (ita-ita)": 39.67, + "XPQARetrieval (eng-ita)": 6.36, + "XPQARetrieval (ita-eng)": 18.65, + "XPQARetrieval (jpn-jpn)": 12.33, + "XPQARetrieval (eng-jpn)": 3.85, + "XPQARetrieval (jpn-eng)": 10.44, + "XPQARetrieval (kor-kor)": 5.99, + "XPQARetrieval (eng-kor)": 5.45, + "XPQARetrieval (kor-eng)": 4.52, + "XPQARetrieval (pol-pol)": 16.48, + "XPQARetrieval (eng-pol)": 5.39, + "XPQARetrieval (pol-eng)": 12.87, + "XPQARetrieval (por-por)": 20.89, + "XPQARetrieval (eng-por)": 4.53, + "XPQARetrieval (por-eng)": 15.26, + "XPQARetrieval (tam-tam)": 3.3, + "XPQARetrieval (eng-tam)": 2.59, + "XPQARetrieval (tam-eng)": 3.13, + "XPQARetrieval (cmn-cmn)": 7.42, + "XPQARetrieval (eng-cmn)": 1.75, + "XPQARetrieval (cmn-eng)": 6.27 } ] }, @@ -5965,34 +16742,152 @@ "cosine_spearman": [ { "Model": "USER-base", - "RUParaPhraserSTS": 73.56, - "RuSTSBenchmarkSTS": 82.26, - "STS22 (ru)": 63.39, - "STSBenchmarkMultilingualSTS (ru)": 81.81 + "BIOSSES": 77.26, + "CDSC-R": 74.23, + "GermanSTSBenchmark": 60.5, + "RUParaPhraserSTS": 73.07, + "RuSTSBenchmarkSTS": 81.91, + "SICK-R": 68.34, + "SICK-R-PL": 54.01, + "SICKFr": 60.8, + "STS12": 66.31, + "STS13": 68.77, + "STS14": 67.14, + "STS15": 79.88, + "STS16": 73.39, + "STS17 (fr-en)": 32.17, + "STS17 (nl-en)": 39.12, + "STS17 (it-en)": 24.74, + "STS17 (en-ar)": 5.67, + "STS17 (ko-ko)": 21.1, + "STS17 (en-tr)": 14.19, + "STS17 (en-en)": 81.88, + "STS17 (en-de)": 25.18, + "STS17 (es-en)": 29.2, + "STS17 (es-es)": 68.94, + "STS17 (ar-ar)": 32.72, + "STS22 (ru)": 68.06, + "STS22 (pl)": 24.85, + "STS22 (fr)": 64.37, + "STS22 (fr-pl)": 39.44, + "STS22 (pl-en)": 66.72, + "STS22 (tr)": 45.0, + "STS22 (de-pl)": 4.65, + "STS22 (en)": 61.64, + "STS22 (de-en)": 46.23, + "STS22 (de-fr)": 44.72, + "STS22 (es-en)": 60.25, + "STS22 (ar)": 15.45, + "STS22 (de)": 18.13, + "STS22 (it)": 50.8, + "STS22 (zh)": 15.1, + "STS22 (es-it)": 53.02, + "STS22 (es)": 56.85, + "STS22 (zh-en)": 26.88, + "STSB": 21.81, + "STSBenchmark": 76.03, + "STSBenchmarkMultilingualSTS (ru)": 81.81, + "STSBenchmarkMultilingualSTS (es)": 59.76, + "STSBenchmarkMultilingualSTS (en)": 76.03, + "STSBenchmarkMultilingualSTS (de)": 62.5, + "STSBenchmarkMultilingualSTS (nl)": 56.71, + "STSBenchmarkMultilingualSTS (zh)": 21.78, + "STSBenchmarkMultilingualSTS (pl)": 57.09, + "STSBenchmarkMultilingualSTS (pt)": 55.18, + "STSBenchmarkMultilingualSTS (it)": 57.98, + "STSBenchmarkMultilingualSTS (fr)": 61.91 }, { "Model": "USER-base", - "RUParaPhraserSTS": 73.56, - "RuSTSBenchmarkSTS": 82.26, - "STS22 (ru)": 63.39, - "STSBenchmarkMultilingualSTS (ru)": 81.81 + "BIOSSES": 77.26, + "CDSC-R": 74.23, + "GermanSTSBenchmark": 60.5, + "RUParaPhraserSTS": 73.07, + "RuSTSBenchmarkSTS": 81.91, + "SICK-R": 68.34, + "SICK-R-PL": 54.01, + "SICKFr": 60.8, + "STS12": 66.31, + "STS13": 68.77, + "STS14": 67.14, + "STS15": 79.88, + "STS16": 73.39, + "STS17 (fr-en)": 32.17, + "STS17 (nl-en)": 39.12, + "STS17 (it-en)": 24.74, + "STS17 (en-ar)": 5.67, + "STS17 (ko-ko)": 21.1, + "STS17 (en-tr)": 14.19, + "STS17 (en-en)": 81.88, + "STS17 (en-de)": 25.18, + "STS17 (es-en)": 29.2, + "STS17 (es-es)": 68.94, + "STS17 (ar-ar)": 32.74, + "STS22 (ru)": 68.06, + "STS22 (pl)": 24.74, + "STS22 (fr)": 64.37, + "STS22 (fr-pl)": 39.44, + "STS22 (pl-en)": 66.72, + "STS22 (tr)": 45.0, + "STS22 (de-pl)": 4.65, + "STS22 (en)": 61.64, + "STS22 (de-en)": 46.23, + "STS22 (de-fr)": 44.72, + "STS22 (es-en)": 60.25, + "STS22 (ar)": 15.45, + "STS22 (de)": 18.13, + "STS22 (it)": 50.79, + "STS22 (zh)": 15.1, + "STS22 (es-it)": 53.02, + "STS22 (es)": 56.85, + "STS22 (zh-en)": 26.88, + "STSB": 21.81, + "STSBenchmark": 76.03, + "STSBenchmarkMultilingualSTS (ru)": 81.81, + "STSBenchmarkMultilingualSTS (es)": 59.75, + "STSBenchmarkMultilingualSTS (en)": 76.03, + "STSBenchmarkMultilingualSTS (de)": 62.5, + "STSBenchmarkMultilingualSTS (nl)": 56.71, + "STSBenchmarkMultilingualSTS (zh)": 21.78, + "STSBenchmarkMultilingualSTS (pl)": 57.09, + "STSBenchmarkMultilingualSTS (pt)": 55.18, + "STSBenchmarkMultilingualSTS (it)": 57.98, + "STSBenchmarkMultilingualSTS (fr)": 61.91 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "USER-base", + "SummEval": 32.46, + "SummEvalFr": 31.02 + }, + { + "Model": "USER-base", + "SummEval": 32.46, + "SummEvalFr": 31.02 + } + ] }, "MultilabelClassification": { "accuracy": [ { "Model": "USER-base", - "CEDRClassification": 46.47, - "SensitiveTopicsClassification": 27.5 + "CEDRClassification": 46.78, + "SensitiveTopicsClassification": 28.65 } ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "USER-base", + "Core17InstructionRetrieval": -1.42, + "News21InstructionRetrieval": 1.51, + "Robust04InstructionRetrieval": -5.96 + } + ] } }, "deepvk__USER-bge-m3": { @@ -6000,6 +16895,7 @@ "f1": [ { "Model": "USER-bge-m3", + "BornholmBitextMining": 42.44, "Tatoeba (rus-eng)": 93.52 } ] @@ -6008,15 +16904,169 @@ "accuracy": [ { "Model": "USER-bge-m3", + "AllegroReviews": 48.2, + "AmazonCounterfactualClassification (en-ext)": 73.79, + "AmazonCounterfactualClassification (en)": 74.21, + "AmazonCounterfactualClassification (de)": 67.93, + "AmazonCounterfactualClassification (ja)": 52.86, + "AmazonPolarityClassification": 93.62, + "AmazonReviewsClassification (en)": 48.69, + "AmazonReviewsClassification (de)": 45.56, + "AmazonReviewsClassification (es)": 45.35, + "AmazonReviewsClassification (fr)": 45.16, + "AmazonReviewsClassification (ja)": 22.27, + "AmazonReviewsClassification (zh)": 21.58, + "AngryTweetsClassification": 56.91, + "Banking77Classification": 81.15, + "CBD": 64.42, + "DanishPoliticalCommentsClassification": 37.99, + "EmotionClassification": 50.25, "GeoreviewClassification": 50.98, "HeadlineClassification": 70.09, + "ImdbClassification": 90.88, "InappropriatenessClassification": 60.76, "KinopoiskClassification": 63.33, + "LccSentimentClassification": 63.0, + "MTOPDomainClassification (en)": 92.44, + "MTOPDomainClassification (de)": 86.55, + "MTOPDomainClassification (es)": 89.57, + "MTOPDomainClassification (fr)": 85.44, + "MTOPDomainClassification (hi)": 21.23, + "MTOPDomainClassification (th)": 16.21, + "MTOPIntentClassification (en)": 65.61, + "MTOPIntentClassification (de)": 60.79, + "MTOPIntentClassification (es)": 65.95, + "MTOPIntentClassification (fr)": 58.59, + "MTOPIntentClassification (hi)": 4.41, + "MTOPIntentClassification (th)": 5.32, + "MasakhaNEWSClassification (amh)": 39.23, + "MasakhaNEWSClassification (eng)": 79.93, + "MasakhaNEWSClassification (fra)": 77.54, + "MasakhaNEWSClassification (hau)": 76.7, + "MasakhaNEWSClassification (ibo)": 61.15, + "MasakhaNEWSClassification (lin)": 72.11, + "MasakhaNEWSClassification (lug)": 65.52, + "MasakhaNEWSClassification (orm)": 75.63, + "MasakhaNEWSClassification (pcm)": 91.51, + "MasakhaNEWSClassification (run)": 76.06, + "MasakhaNEWSClassification (sna)": 84.77, + "MasakhaNEWSClassification (som)": 62.55, + "MasakhaNEWSClassification (swa)": 71.85, + "MasakhaNEWSClassification (tir)": 29.45, + "MasakhaNEWSClassification (xho)": 77.74, + "MasakhaNEWSClassification (yor)": 79.29, "MassiveIntentClassification (ru)": 68.85, + "MassiveIntentClassification (zh-CN)": 3.03, + "MassiveIntentClassification (vi)": 34.1, + "MassiveIntentClassification (pl)": 58.31, + "MassiveIntentClassification (pt)": 62.67, + "MassiveIntentClassification (mn)": 40.26, + "MassiveIntentClassification (ms)": 54.77, + "MassiveIntentClassification (sq)": 51.26, + "MassiveIntentClassification (ro)": 58.32, + "MassiveIntentClassification (ml)": 2.74, + "MassiveIntentClassification (tr)": 53.8, + "MassiveIntentClassification (nb)": 61.11, + "MassiveIntentClassification (zh-TW)": 4.62, + "MassiveIntentClassification (is)": 45.99, + "MassiveIntentClassification (fr)": 64.25, + "MassiveIntentClassification (ka)": 7.12, + "MassiveIntentClassification (ko)": 2.91, + "MassiveIntentClassification (ar)": 5.53, + "MassiveIntentClassification (cy)": 38.19, + "MassiveIntentClassification (am)": 2.37, + "MassiveIntentClassification (hu)": 40.52, + "MassiveIntentClassification (hy)": 3.18, + "MassiveIntentClassification (de)": 60.56, + "MassiveIntentClassification (ta)": 2.97, + "MassiveIntentClassification (km)": 5.39, + "MassiveIntentClassification (my)": 4.28, + "MassiveIntentClassification (es)": 63.18, + "MassiveIntentClassification (ur)": 3.34, + "MassiveIntentClassification (sv)": 61.69, + "MassiveIntentClassification (en)": 70.67, + "MassiveIntentClassification (te)": 2.17, + "MassiveIntentClassification (af)": 53.29, + "MassiveIntentClassification (az)": 44.21, + "MassiveIntentClassification (el)": 27.53, + "MassiveIntentClassification (jv)": 42.42, + "MassiveIntentClassification (lv)": 45.12, + "MassiveIntentClassification (sw)": 44.4, + "MassiveIntentClassification (fa)": 3.65, + "MassiveIntentClassification (it)": 63.58, + "MassiveIntentClassification (fi)": 56.34, + "MassiveIntentClassification (da)": 62.24, + "MassiveIntentClassification (he)": 3.06, + "MassiveIntentClassification (id)": 58.18, + "MassiveIntentClassification (kn)": 3.71, + "MassiveIntentClassification (th)": 4.07, + "MassiveIntentClassification (tl)": 47.04, + "MassiveIntentClassification (hi)": 3.78, + "MassiveIntentClassification (ja)": 2.44, + "MassiveIntentClassification (sl)": 56.11, + "MassiveIntentClassification (bn)": 3.05, + "MassiveIntentClassification (nl)": 59.3, "MassiveScenarioClassification (ru)": 72.9, + "MassiveScenarioClassification (de)": 67.22, + "MassiveScenarioClassification (fr)": 69.41, + "MassiveScenarioClassification (id)": 63.48, + "MassiveScenarioClassification (nl)": 65.78, + "MassiveScenarioClassification (cy)": 43.85, + "MassiveScenarioClassification (my)": 9.36, + "MassiveScenarioClassification (ms)": 60.81, + "MassiveScenarioClassification (lv)": 50.58, + "MassiveScenarioClassification (sq)": 59.86, + "MassiveScenarioClassification (ro)": 65.69, + "MassiveScenarioClassification (sv)": 68.53, + "MassiveScenarioClassification (tl)": 56.47, + "MassiveScenarioClassification (ur)": 9.41, + "MassiveScenarioClassification (zh-CN)": 7.85, + "MassiveScenarioClassification (jv)": 50.85, + "MassiveScenarioClassification (hi)": 7.6, + "MassiveScenarioClassification (pl)": 61.86, + "MassiveScenarioClassification (ar)": 12.98, + "MassiveScenarioClassification (vi)": 38.48, + "MassiveScenarioClassification (is)": 54.05, + "MassiveScenarioClassification (hy)": 9.5, + "MassiveScenarioClassification (th)": 9.23, + "MassiveScenarioClassification (te)": 7.39, + "MassiveScenarioClassification (ta)": 7.62, + "MassiveScenarioClassification (fi)": 59.7, + "MassiveScenarioClassification (am)": 6.71, + "MassiveScenarioClassification (fa)": 8.4, + "MassiveScenarioClassification (az)": 50.34, + "MassiveScenarioClassification (el)": 34.63, + "MassiveScenarioClassification (ja)": 7.51, + "MassiveScenarioClassification (es)": 67.88, + "MassiveScenarioClassification (he)": 7.7, + "MassiveScenarioClassification (kn)": 8.32, + "MassiveScenarioClassification (af)": 60.7, + "MassiveScenarioClassification (pt)": 65.38, + "MassiveScenarioClassification (sl)": 61.13, + "MassiveScenarioClassification (da)": 69.64, + "MassiveScenarioClassification (nb)": 67.6, + "MassiveScenarioClassification (tr)": 60.02, + "MassiveScenarioClassification (km)": 9.61, + "MassiveScenarioClassification (it)": 67.92, + "MassiveScenarioClassification (ko)": 6.81, + "MassiveScenarioClassification (en)": 75.85, + "MassiveScenarioClassification (mn)": 45.93, + "MassiveScenarioClassification (sw)": 52.36, + "MassiveScenarioClassification (ml)": 7.55, + "MassiveScenarioClassification (zh-TW)": 8.71, + "MassiveScenarioClassification (hu)": 47.43, + "MassiveScenarioClassification (ka)": 13.81, + "MassiveScenarioClassification (bn)": 8.65, + "NoRecClassification": 54.06, + "NordicLangClassification": 40.53, + "PAC": 67.58, + "PolEmo2.0-IN": 75.48, + "PolEmo2.0-OUT": 50.0, "RuReviewsClassification": 68.52, "RuSciBenchGRNTIClassification": 57.67, - "RuSciBenchOECDClassification": 44.2 + "RuSciBenchOECDClassification": 44.2, + "ToxicConversationsClassification": 69.19, + "TweetSentimentExtractionClassification": 63.88 } ] }, @@ -6024,9 +17074,67 @@ "v_measure": [ { "Model": "USER-bge-m3", - "GeoreviewClusteringP2P": 62.79, + "AlloProfClusteringP2P": 58.28, + "AlloProfClusteringS2S": 37.82, + "ArxivClusteringP2P": 39.68, + "ArxivClusteringS2S": 26.76, + "BiorxivClusteringP2P": 33.43, + "BiorxivClusteringS2S": 25.01, + "BlurbsClusteringP2P": 38.7, + "BlurbsClusteringS2S": 15.58, + "GeoreviewClusteringP2P": 62.94, + "HALClusteringS2S": 23.72, + "MLSUMClusteringP2P (de)": 42.31, + "MLSUMClusteringP2P (fr)": 43.76, + "MLSUMClusteringP2P (ru)": 45.48, + "MLSUMClusteringP2P (es)": 46.2, + "MLSUMClusteringS2S (de)": 40.27, + "MLSUMClusteringS2S (fr)": 43.92, + "MLSUMClusteringS2S (ru)": 45.53, + "MLSUMClusteringS2S (es)": 45.94, + "MasakhaNEWSClusteringP2P (amh)": 42.38, + "MasakhaNEWSClusteringP2P (eng)": 55.13, + "MasakhaNEWSClusteringP2P (fra)": 48.26, + "MasakhaNEWSClusteringP2P (hau)": 61.96, + "MasakhaNEWSClusteringP2P (ibo)": 49.7, + "MasakhaNEWSClusteringP2P (lin)": 60.62, + "MasakhaNEWSClusteringP2P (lug)": 53.66, + "MasakhaNEWSClusteringP2P (orm)": 30.62, + "MasakhaNEWSClusteringP2P (pcm)": 73.49, + "MasakhaNEWSClusteringP2P (run)": 51.27, + "MasakhaNEWSClusteringP2P (sna)": 58.8, + "MasakhaNEWSClusteringP2P (som)": 38.46, + "MasakhaNEWSClusteringP2P (swa)": 27.52, + "MasakhaNEWSClusteringP2P (tir)": 43.84, + "MasakhaNEWSClusteringP2P (xho)": 40.83, + "MasakhaNEWSClusteringP2P (yor)": 31.35, + "MasakhaNEWSClusteringS2S (amh)": 45.04, + "MasakhaNEWSClusteringS2S (eng)": 21.37, + "MasakhaNEWSClusteringS2S (fra)": 37.36, + "MasakhaNEWSClusteringS2S (hau)": 31.24, + "MasakhaNEWSClusteringS2S (ibo)": 35.78, + "MasakhaNEWSClusteringS2S (lin)": 47.57, + "MasakhaNEWSClusteringS2S (lug)": 43.93, + "MasakhaNEWSClusteringS2S (orm)": 24.82, + "MasakhaNEWSClusteringS2S (pcm)": 74.85, + "MasakhaNEWSClusteringS2S (run)": 47.56, + "MasakhaNEWSClusteringS2S (sna)": 46.45, + "MasakhaNEWSClusteringS2S (som)": 30.07, + "MasakhaNEWSClusteringS2S (swa)": 27.98, + "MasakhaNEWSClusteringS2S (tir)": 43.77, + "MasakhaNEWSClusteringS2S (xho)": 33.9, + "MasakhaNEWSClusteringS2S (yor)": 27.7, + "MedrxivClusteringP2P": 30.53, + "MedrxivClusteringS2S": 27.04, + "RedditClustering": 46.3, + "RedditClusteringP2P": 56.92, "RuSciBenchGRNTIClusteringP2P": 53.11, - "RuSciBenchOECDClusteringP2P": 44.93 + "RuSciBenchOECDClusteringP2P": 44.93, + "StackExchangeClustering": 55.22, + "StackExchangeClusteringP2P": 32.11, + "TenKGnadClusteringP2P": 42.3, + "TenKGnadClusteringS2S": 22.82, + "TwentyNewsgroupsClustering": 40.51 } ] }, @@ -6034,13 +17142,51 @@ "max_ap": [ { "Model": "USER-bge-m3", + "CDSC-E": 73.41, + "FalseFriendsGermanEnglish": 50.01, "OpusparcusPC (ru)": 90.73, - "TERRa": 64.99 + "OpusparcusPC (de)": 94.76, + "OpusparcusPC (en)": 98.87, + "OpusparcusPC (fi)": 89.88, + "OpusparcusPC (fr)": 91.78, + "OpusparcusPC (sv)": 92.17, + "PSC": 99.13, + "PawsXPairClassification (de)": 57.78, + "PawsXPairClassification (en)": 66.16, + "PawsXPairClassification (es)": 58.62, + "PawsXPairClassification (fr)": 60.62, + "PawsXPairClassification (ja)": 48.05, + "PawsXPairClassification (ko)": 48.83, + "PawsXPairClassification (zh)": 48.72, + "SICK-E-PL": 70.91, + "SprintDuplicateQuestions": 97.24, + "TERRa": 64.99, + "TwitterSemEval2015": 71.7, + "TwitterURLCorpus": 85.71 }, { "Model": "USER-bge-m3", + "CDSC-E": 73.41, + "FalseFriendsGermanEnglish": 50.01, "OpusparcusPC (ru)": 90.73, - "TERRa": 65.07 + "OpusparcusPC (de)": 94.76, + "OpusparcusPC (en)": 98.88, + "OpusparcusPC (fi)": 89.9, + "OpusparcusPC (fr)": 91.78, + "OpusparcusPC (sv)": 92.17, + "PSC": 99.13, + "PawsXPairClassification (de)": 58.27, + "PawsXPairClassification (en)": 66.22, + "PawsXPairClassification (es)": 58.68, + "PawsXPairClassification (fr)": 60.71, + "PawsXPairClassification (ja)": 48.4, + "PawsXPairClassification (ko)": 48.83, + "PawsXPairClassification (zh)": 49.02, + "SICK-E-PL": 70.91, + "SprintDuplicateQuestions": 97.24, + "TERRa": 65.07, + "TwitterSemEval2015": 71.7, + "TwitterURLCorpus": 85.71 } ] }, @@ -6048,11 +17194,18 @@ "map": [ { "Model": "USER-bge-m3", - "MIRACLReranking (ru)": 64.35 + "AlloprofReranking": 73.25, + "AskUbuntuDupQuestions": 61.89, + "MindSmallReranking": 31.09, + "RuBQReranking": 73.08, + "SciDocsRR": 78.23, + "StackOverflowDupQuestions": 51.19, + "SyntecReranking": 85.83, + "T2Reranking": 56.97 }, { "Model": "USER-bge-m3", - "RuBQReranking": 73.08 + "MIRACLReranking (ru)": 64.35 } ] }, @@ -6060,9 +17213,113 @@ "ndcg_at_10": [ { "Model": "USER-bge-m3", + "AILACasedocs": 34.69, + "AILAStatutes": 26.69, + "AlloprofRetrieval": 46.17, + "AppsRetrieval": 14.17, + "ArguAna": 49.95, + "BSARDRetrieval": 16.57, + "ClimateFEVER": 25.9, + "CmedqaRetrieval": 1.26, + "CodeFeedbackMT": 46.59, + "CodeFeedbackST": 69.42, + "CodeSearchNetCCRetrieval (python)": 57.2, + "CodeSearchNetCCRetrieval (javascript)": 56.58, + "CodeSearchNetCCRetrieval (go)": 45.59, + "CodeSearchNetCCRetrieval (ruby)": 56.2, + "CodeSearchNetCCRetrieval (java)": 51.14, + "CodeSearchNetCCRetrieval (php)": 40.48, + "CodeSearchNetRetrieval (python)": 83.58, + "CodeSearchNetRetrieval (javascript)": 70.26, + "CodeSearchNetRetrieval (go)": 85.71, + "CodeSearchNetRetrieval (ruby)": 72.3, + "CodeSearchNetRetrieval (java)": 63.08, + "CodeSearchNetRetrieval (php)": 76.48, + "CodeTransOceanContest": 65.55, + "CodeTransOceanDL": 29.16, + "CosQA": 28.36, + "CovidRetrieval": 0.12, + "DBPedia": 38.22, + "FEVER": 73.08, + "FiQA2018": 40.8, + "GerDaLIR": 11.12, + "GerDaLIRSmall": 25.79, + "GermanQuAD-Retrieval": 94.78, + "HotpotQA": 64.32, + "LEMBNarrativeQARetrieval": 45.78, + "LEMBQMSumRetrieval": 37.22, + "LEMBSummScreenFDRetrieval": 93.67, + "LEMBWikimQARetrieval": 78.06, + "LeCaRDv2": 14.3, + "LegalBenchConsumerContractsQA": 78.5, + "LegalBenchCorporateLobbying": 90.59, + "LegalQuAD": 45.86, + "LegalSummarization": 62.78, "MIRACLRetrieval (ru)": 67.33, + "MintakaRetrieval (ar)": 0.51, + "MintakaRetrieval (de)": 24.22, + "MintakaRetrieval (es)": 23.68, + "MintakaRetrieval (fr)": 24.43, + "MintakaRetrieval (hi)": 0.87, + "MintakaRetrieval (it)": 24.29, + "MintakaRetrieval (ja)": 1.52, + "MintakaRetrieval (pt)": 24.31, + "NFCorpus": 31.29, + "NQ": 58.9, + "QuoraRetrieval": 87.95, "RiaNewsRetrieval": 83.53, - "RuBQRetrieval": 70.03 + "RuBQRetrieval": 70.03, + "SCIDOCS": 16.15, + "SciFact": 62.43, + "SciFact-PL": 52.67, + "SpartQA": 7.58, + "StackOverflowQA": 79.29, + "SyntecRetrieval": 83.99, + "SyntheticText2SQL": 45.44, + "TRECCOVID": 51.02, + "TRECCOVID-PL": 38.22, + "TempReasonL1": 0.9, + "Touche2020": 18.96, + "WinoGrande": 32.79, + "XMarket (de)": 11.45, + "XMarket (en)": 12.49, + "XMarket (es)": 14.29, + "XPQARetrieval (ara-ara)": 6.08, + "XPQARetrieval (eng-ara)": 4.76, + "XPQARetrieval (ara-eng)": 10.47, + "XPQARetrieval (deu-deu)": 69.1, + "XPQARetrieval (eng-deu)": 42.06, + "XPQARetrieval (deu-eng)": 60.33, + "XPQARetrieval (spa-spa)": 57.54, + "XPQARetrieval (eng-spa)": 36.34, + "XPQARetrieval (spa-eng)": 49.39, + "XPQARetrieval (fra-fra)": 65.52, + "XPQARetrieval (eng-fra)": 37.81, + "XPQARetrieval (fra-eng)": 56.03, + "XPQARetrieval (hin-hin)": 8.22, + "XPQARetrieval (eng-hin)": 6.53, + "XPQARetrieval (hin-eng)": 8.65, + "XPQARetrieval (ita-ita)": 71.13, + "XPQARetrieval (eng-ita)": 37.85, + "XPQARetrieval (ita-eng)": 57.46, + "XPQARetrieval (jpn-jpn)": 16.58, + "XPQARetrieval (eng-jpn)": 8.46, + "XPQARetrieval (jpn-eng)": 16.92, + "XPQARetrieval (kor-kor)": 6.5, + "XPQARetrieval (eng-kor)": 10.44, + "XPQARetrieval (kor-eng)": 7.53, + "XPQARetrieval (pol-pol)": 41.18, + "XPQARetrieval (eng-pol)": 25.85, + "XPQARetrieval (pol-eng)": 35.92, + "XPQARetrieval (por-por)": 43.92, + "XPQARetrieval (eng-por)": 26.94, + "XPQARetrieval (por-eng)": 39.11, + "XPQARetrieval (tam-tam)": 3.6, + "XPQARetrieval (eng-tam)": 5.88, + "XPQARetrieval (tam-eng)": 5.11, + "XPQARetrieval (cmn-cmn)": 13.98, + "XPQARetrieval (eng-cmn)": 6.93, + "XPQARetrieval (cmn-eng)": 14.13 } ] }, @@ -6070,27 +17327,149 @@ "cosine_spearman": [ { "Model": "USER-bge-m3", + "BIOSSES": 82.26, + "CDSC-R": 87.11, + "GermanSTSBenchmark": 79.99, + "SICK-R": 81.0, + "SICK-R-PL": 68.1, + "SICKFr": 77.52, + "STS12": 81.09, + "STS13": 83.85, + "STS14": 82.11, + "STS15": 89.0, + "STS16": 85.8, + "STS17 (en-de)": 79.44, + "STS17 (es-en)": 73.1, + "STS17 (en-tr)": 64.37, + "STS17 (nl-en)": 73.16, + "STS17 (it-en)": 75.26, + "STS17 (en-en)": 87.18, + "STS17 (fr-en)": 75.33, + "STS17 (en-ar)": 1.36, + "STS17 (es-es)": 85.32, + "STS17 (ar-ar)": 19.32, + "STS17 (ko-ko)": 9.41, + "STS22 (ru)": 70.55, + "STS22 (pl-en)": 74.88, + "STS22 (tr)": 66.46, + "STS22 (zh-en)": 17.82, + "STS22 (es-en)": 79.25, + "STS22 (fr)": 82.53, + "STS22 (es)": 70.65, + "STS22 (zh)": 37.88, + "STS22 (de-pl)": 58.48, + "STS22 (de)": 61.46, + "STS22 (de-en)": 58.35, + "STS22 (it)": 81.45, + "STS22 (ar)": 33.34, + "STS22 (fr-pl)": 84.52, + "STS22 (es-it)": 74.77, + "STS22 (pl)": 40.59, + "STS22 (en)": 68.08, + "STS22 (de-fr)": 61.09, + "STSB": 0.87, + "STSBenchmark": 86.85, + "STSBenchmarkMultilingualSTS (it)": 81.62, + "STSBenchmarkMultilingualSTS (de)": 80.64, + "STSBenchmarkMultilingualSTS (es)": 81.15, + "STSBenchmarkMultilingualSTS (nl)": 78.03, + "STSBenchmarkMultilingualSTS (pl)": 73.68, + "STSBenchmarkMultilingualSTS (pt)": 79.64, + "STSBenchmarkMultilingualSTS (fr)": 81.74, + "STSBenchmarkMultilingualSTS (en)": 86.85, + "STSBenchmarkMultilingualSTS (zh)": 1.42 + }, + { + "Model": "USER-bge-m3", + "BIOSSES": 82.26, + "CDSC-R": 87.11, + "GermanSTSBenchmark": 79.99, "RUParaPhraserSTS": 76.36, "RuSTSBenchmarkSTS": 83.35, - "STS22 (ru)": 66.42, - "STSBenchmarkMultilingualSTS (ru)": 82.96 + "SICK-R": 81.0, + "SICK-R-PL": 68.1, + "SICKFr": 77.53, + "STS12": 81.1, + "STS13": 83.85, + "STS14": 82.11, + "STS15": 89.0, + "STS16": 85.8, + "STS17 (en-de)": 79.44, + "STS17 (es-en)": 73.1, + "STS17 (en-tr)": 64.37, + "STS17 (nl-en)": 73.16, + "STS17 (it-en)": 75.26, + "STS17 (en-en)": 87.18, + "STS17 (fr-en)": 75.33, + "STS17 (en-ar)": 1.36, + "STS17 (es-es)": 85.32, + "STS17 (ar-ar)": 19.24, + "STS17 (ko-ko)": 9.22, + "STS22 (ru)": 70.55, + "STS22 (pl-en)": 74.88, + "STS22 (tr)": 66.46, + "STS22 (zh-en)": 17.82, + "STS22 (es-en)": 79.25, + "STS22 (fr)": 82.53, + "STS22 (es)": 70.65, + "STS22 (zh)": 37.88, + "STS22 (de-pl)": 58.48, + "STS22 (de)": 61.47, + "STS22 (de-en)": 58.35, + "STS22 (it)": 81.45, + "STS22 (ar)": 33.36, + "STS22 (fr-pl)": 84.52, + "STS22 (es-it)": 74.77, + "STS22 (pl)": 40.49, + "STS22 (en)": 68.08, + "STS22 (de-fr)": 61.09, + "STSB": 4.61, + "STSBenchmark": 86.85, + "STSBenchmarkMultilingualSTS (ru)": 82.96, + "STSBenchmarkMultilingualSTS (it)": 81.62, + "STSBenchmarkMultilingualSTS (de)": 80.64, + "STSBenchmarkMultilingualSTS (es)": 81.15, + "STSBenchmarkMultilingualSTS (nl)": 78.03, + "STSBenchmarkMultilingualSTS (pl)": 73.68, + "STSBenchmarkMultilingualSTS (pt)": 79.65, + "STSBenchmarkMultilingualSTS (fr)": 81.74, + "STSBenchmarkMultilingualSTS (en)": 86.85, + "STSBenchmarkMultilingualSTS (zh)": 5.27 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "USER-bge-m3", + "SummEval": 31.82, + "SummEvalFr": 29.94 + }, + { + "Model": "USER-bge-m3", + "SummEval": 31.82, + "SummEvalFr": 29.94 + } + ] }, "MultilabelClassification": { "accuracy": [ { "Model": "USER-bge-m3", "CEDRClassification": 45.48, - "SensitiveTopicsClassification": 26.29 + "SensitiveTopicsClassification": 26.88 } ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "USER-bge-m3", + "Core17InstructionRetrieval": 0.04, + "News21InstructionRetrieval": -0.93, + "Robust04InstructionRetrieval": -7.07 + } + ] } }, "deepvk__deberta-v1-base": { @@ -6098,6 +17477,7 @@ "f1": [ { "Model": "deberta-v1-base", + "BornholmBitextMining": 14.11, "Tatoeba (rus-eng)": 13.21 } ] @@ -6106,15 +17486,169 @@ "accuracy": [ { "Model": "deberta-v1-base", + "AllegroReviews": 23.34, + "AmazonCounterfactualClassification (en-ext)": 65.06, + "AmazonCounterfactualClassification (en)": 64.33, + "AmazonCounterfactualClassification (de)": 58.92, + "AmazonCounterfactualClassification (ja)": 56.77, + "AmazonPolarityClassification": 57.15, + "AmazonReviewsClassification (en)": 26.81, + "AmazonReviewsClassification (de)": 24.26, + "AmazonReviewsClassification (es)": 24.58, + "AmazonReviewsClassification (fr)": 25.23, + "AmazonReviewsClassification (ja)": 21.08, + "AmazonReviewsClassification (zh)": 23.14, + "AngryTweetsClassification": 42.98, + "Banking77Classification": 54.06, + "CBD": 50.33, + "DanishPoliticalCommentsClassification": 28.36, + "EmotionClassification": 20.53, "GeoreviewClassification": 40.19, "HeadlineClassification": 78.75, + "ImdbClassification": 52.62, "InappropriatenessClassification": 61.33, "KinopoiskClassification": 48.78, + "LccSentimentClassification": 40.87, + "MTOPDomainClassification (en)": 66.39, + "MTOPDomainClassification (de)": 49.69, + "MTOPDomainClassification (es)": 56.75, + "MTOPDomainClassification (fr)": 49.63, + "MTOPDomainClassification (hi)": 32.99, + "MTOPDomainClassification (th)": 33.1, + "MTOPIntentClassification (en)": 52.32, + "MTOPIntentClassification (de)": 39.95, + "MTOPIntentClassification (es)": 42.92, + "MTOPIntentClassification (fr)": 36.76, + "MTOPIntentClassification (hi)": 15.57, + "MTOPIntentClassification (th)": 19.52, + "MasakhaNEWSClassification (amh)": 32.34, + "MasakhaNEWSClassification (eng)": 58.89, + "MasakhaNEWSClassification (fra)": 48.89, + "MasakhaNEWSClassification (hau)": 48.87, + "MasakhaNEWSClassification (ibo)": 41.56, + "MasakhaNEWSClassification (lin)": 48.97, + "MasakhaNEWSClassification (lug)": 45.87, + "MasakhaNEWSClassification (orm)": 46.12, + "MasakhaNEWSClassification (pcm)": 74.79, + "MasakhaNEWSClassification (run)": 39.16, + "MasakhaNEWSClassification (sna)": 63.93, + "MasakhaNEWSClassification (som)": 29.29, + "MasakhaNEWSClassification (swa)": 37.96, + "MasakhaNEWSClassification (tir)": 22.76, + "MasakhaNEWSClassification (xho)": 59.43, + "MasakhaNEWSClassification (yor)": 45.74, "MassiveIntentClassification (ru)": 61.32, + "MassiveIntentClassification (nb)": 31.01, + "MassiveIntentClassification (sw)": 33.17, + "MassiveIntentClassification (zh-CN)": 26.9, + "MassiveIntentClassification (ko)": 23.74, + "MassiveIntentClassification (ar)": 22.52, + "MassiveIntentClassification (te)": 12.72, + "MassiveIntentClassification (mn)": 29.22, + "MassiveIntentClassification (id)": 35.91, + "MassiveIntentClassification (is)": 29.9, + "MassiveIntentClassification (ml)": 17.03, + "MassiveIntentClassification (ka)": 15.38, + "MassiveIntentClassification (af)": 31.58, + "MassiveIntentClassification (tl)": 32.28, + "MassiveIntentClassification (nl)": 33.04, + "MassiveIntentClassification (zh-TW)": 24.87, + "MassiveIntentClassification (hy)": 14.31, + "MassiveIntentClassification (it)": 35.84, + "MassiveIntentClassification (es)": 30.09, + "MassiveIntentClassification (he)": 15.96, + "MassiveIntentClassification (lv)": 31.19, + "MassiveIntentClassification (sq)": 34.43, + "MassiveIntentClassification (ur)": 15.68, + "MassiveIntentClassification (bn)": 13.67, + "MassiveIntentClassification (my)": 11.9, + "MassiveIntentClassification (sl)": 33.48, + "MassiveIntentClassification (en)": 44.81, + "MassiveIntentClassification (fr)": 31.26, + "MassiveIntentClassification (hi)": 13.06, + "MassiveIntentClassification (am)": 15.42, + "MassiveIntentClassification (jv)": 32.3, + "MassiveIntentClassification (cy)": 34.92, + "MassiveIntentClassification (fa)": 22.87, + "MassiveIntentClassification (ta)": 12.99, + "MassiveIntentClassification (hu)": 29.21, + "MassiveIntentClassification (vi)": 30.6, + "MassiveIntentClassification (pt)": 32.59, + "MassiveIntentClassification (da)": 33.38, + "MassiveIntentClassification (tr)": 28.04, + "MassiveIntentClassification (fi)": 30.73, + "MassiveIntentClassification (th)": 16.83, + "MassiveIntentClassification (ja)": 20.97, + "MassiveIntentClassification (sv)": 31.97, + "MassiveIntentClassification (ro)": 28.66, + "MassiveIntentClassification (az)": 31.04, + "MassiveIntentClassification (de)": 31.68, + "MassiveIntentClassification (km)": 15.76, + "MassiveIntentClassification (ms)": 34.09, + "MassiveIntentClassification (kn)": 12.66, + "MassiveIntentClassification (pl)": 33.5, + "MassiveIntentClassification (el)": 23.2, "MassiveScenarioClassification (ru)": 64.71, + "MassiveScenarioClassification (ko)": 25.21, + "MassiveScenarioClassification (vi)": 30.39, + "MassiveScenarioClassification (da)": 32.6, + "MassiveScenarioClassification (te)": 15.72, + "MassiveScenarioClassification (lv)": 29.84, + "MassiveScenarioClassification (ja)": 21.65, + "MassiveScenarioClassification (nb)": 28.7, + "MassiveScenarioClassification (sl)": 34.81, + "MassiveScenarioClassification (bn)": 16.64, + "MassiveScenarioClassification (sq)": 32.41, + "MassiveScenarioClassification (ka)": 16.96, + "MassiveScenarioClassification (pt)": 29.57, + "MassiveScenarioClassification (he)": 17.7, + "MassiveScenarioClassification (jv)": 33.16, + "MassiveScenarioClassification (tr)": 28.1, + "MassiveScenarioClassification (es)": 29.32, + "MassiveScenarioClassification (fa)": 24.25, + "MassiveScenarioClassification (mn)": 30.46, + "MassiveScenarioClassification (ar)": 24.92, + "MassiveScenarioClassification (sv)": 31.37, + "MassiveScenarioClassification (hy)": 17.57, + "MassiveScenarioClassification (zh-CN)": 29.22, + "MassiveScenarioClassification (az)": 31.36, + "MassiveScenarioClassification (de)": 32.3, + "MassiveScenarioClassification (it)": 35.66, + "MassiveScenarioClassification (km)": 20.19, + "MassiveScenarioClassification (en)": 45.39, + "MassiveScenarioClassification (pl)": 32.46, + "MassiveScenarioClassification (th)": 20.26, + "MassiveScenarioClassification (nl)": 32.45, + "MassiveScenarioClassification (is)": 28.84, + "MassiveScenarioClassification (am)": 17.93, + "MassiveScenarioClassification (my)": 15.23, + "MassiveScenarioClassification (ur)": 19.66, + "MassiveScenarioClassification (ms)": 33.73, + "MassiveScenarioClassification (kn)": 16.33, + "MassiveScenarioClassification (hi)": 16.47, + "MassiveScenarioClassification (ml)": 18.71, + "MassiveScenarioClassification (sw)": 35.19, + "MassiveScenarioClassification (hu)": 29.7, + "MassiveScenarioClassification (af)": 31.98, + "MassiveScenarioClassification (cy)": 35.27, + "MassiveScenarioClassification (fi)": 29.71, + "MassiveScenarioClassification (tl)": 31.7, + "MassiveScenarioClassification (fr)": 29.73, + "MassiveScenarioClassification (id)": 35.47, + "MassiveScenarioClassification (zh-TW)": 26.79, + "MassiveScenarioClassification (ro)": 28.91, + "MassiveScenarioClassification (ta)": 16.85, + "MassiveScenarioClassification (el)": 24.88, + "NoRecClassification": 38.22, + "NordicLangClassification": 57.91, + "PAC": 62.79, + "PolEmo2.0-IN": 40.42, + "PolEmo2.0-OUT": 30.22, "RuReviewsClassification": 55.66, "RuSciBenchGRNTIClassification": 53.53, - "RuSciBenchOECDClassification": 41.34 + "RuSciBenchOECDClassification": 41.34, + "ToxicConversationsClassification": 56.61, + "TweetSentimentExtractionClassification": 44.62 } ] }, @@ -6122,11 +17656,67 @@ "v_measure": [ { "Model": "deberta-v1-base", + "AlloProfClusteringP2P": 32.16, + "AlloProfClusteringS2S": 22.88, + "ArxivClusteringP2P": 16.05, + "ArxivClusteringS2S": 10.67, + "BiorxivClusteringP2P": 11.67, + "BiorxivClusteringS2S": 5.98, + "BlurbsClusteringP2P": 8.62, + "BlurbsClusteringS2S": 7.93, "GeoreviewClusteringP2P": 58.79, + "HALClusteringS2S": 3.26, "MLSUMClusteringP2P (ru)": 47.33, + "MLSUMClusteringP2P (de)": 6.45, + "MLSUMClusteringP2P (fr)": 24.07, + "MLSUMClusteringP2P (es)": 27.72, "MLSUMClusteringS2S (ru)": 44.6, + "MLSUMClusteringS2S (de)": 6.0, + "MLSUMClusteringS2S (fr)": 22.71, + "MLSUMClusteringS2S (es)": 27.16, + "MasakhaNEWSClusteringP2P (amh)": 40.75, + "MasakhaNEWSClusteringP2P (eng)": 17.6, + "MasakhaNEWSClusteringP2P (fra)": 25.68, + "MasakhaNEWSClusteringP2P (hau)": 9.55, + "MasakhaNEWSClusteringP2P (ibo)": 25.62, + "MasakhaNEWSClusteringP2P (lin)": 47.81, + "MasakhaNEWSClusteringP2P (lug)": 49.21, + "MasakhaNEWSClusteringP2P (orm)": 25.11, + "MasakhaNEWSClusteringP2P (pcm)": 61.04, + "MasakhaNEWSClusteringP2P (run)": 41.38, + "MasakhaNEWSClusteringP2P (sna)": 44.57, + "MasakhaNEWSClusteringP2P (som)": 24.65, + "MasakhaNEWSClusteringP2P (swa)": 10.56, + "MasakhaNEWSClusteringP2P (tir)": 41.9, + "MasakhaNEWSClusteringP2P (xho)": 29.12, + "MasakhaNEWSClusteringP2P (yor)": 21.53, + "MasakhaNEWSClusteringS2S (amh)": 41.49, + "MasakhaNEWSClusteringS2S (eng)": 7.12, + "MasakhaNEWSClusteringS2S (fra)": 30.34, + "MasakhaNEWSClusteringS2S (hau)": 9.86, + "MasakhaNEWSClusteringS2S (ibo)": 23.11, + "MasakhaNEWSClusteringS2S (lin)": 47.24, + "MasakhaNEWSClusteringS2S (lug)": 43.53, + "MasakhaNEWSClusteringS2S (orm)": 21.42, + "MasakhaNEWSClusteringS2S (pcm)": 32.22, + "MasakhaNEWSClusteringS2S (run)": 44.58, + "MasakhaNEWSClusteringS2S (sna)": 41.15, + "MasakhaNEWSClusteringS2S (som)": 27.5, + "MasakhaNEWSClusteringS2S (swa)": 13.0, + "MasakhaNEWSClusteringS2S (tir)": 42.49, + "MasakhaNEWSClusteringS2S (xho)": 23.83, + "MasakhaNEWSClusteringS2S (yor)": 21.0, + "MedrxivClusteringP2P": 18.12, + "MedrxivClusteringS2S": 15.2, + "RedditClustering": 12.77, + "RedditClusteringP2P": 21.5, "RuSciBenchGRNTIClusteringP2P": 36.66, - "RuSciBenchOECDClusteringP2P": 33.31 + "RuSciBenchOECDClusteringP2P": 33.31, + "StackExchangeClustering": 20.03, + "StackExchangeClusteringP2P": 20.05, + "TenKGnadClusteringP2P": 11.94, + "TenKGnadClusteringS2S": 5.7, + "TwentyNewsgroupsClustering": 11.68 } ] }, @@ -6134,13 +17724,51 @@ "max_ap": [ { "Model": "deberta-v1-base", + "CDSC-E": 43.0, + "FalseFriendsGermanEnglish": 49.28, "OpusparcusPC (ru)": 83.31, - "TERRa": 53.78 + "OpusparcusPC (de)": 87.3, + "OpusparcusPC (en)": 93.71, + "OpusparcusPC (fi)": 77.56, + "OpusparcusPC (fr)": 85.28, + "OpusparcusPC (sv)": 77.47, + "PSC": 55.97, + "PawsXPairClassification (de)": 51.36, + "PawsXPairClassification (en)": 51.62, + "PawsXPairClassification (es)": 51.5, + "PawsXPairClassification (fr)": 54.75, + "PawsXPairClassification (ja)": 49.06, + "PawsXPairClassification (ko)": 49.3, + "PawsXPairClassification (zh)": 54.11, + "SICK-E-PL": 43.35, + "SprintDuplicateQuestions": 35.51, + "TERRa": 53.78, + "TwitterSemEval2015": 45.73, + "TwitterURLCorpus": 64.83 }, { "Model": "deberta-v1-base", + "CDSC-E": 43.0, + "FalseFriendsGermanEnglish": 49.33, "OpusparcusPC (ru)": 83.69, - "TERRa": 56.49 + "OpusparcusPC (de)": 87.56, + "OpusparcusPC (en)": 93.83, + "OpusparcusPC (fi)": 77.72, + "OpusparcusPC (fr)": 85.53, + "OpusparcusPC (sv)": 77.85, + "PSC": 58.6, + "PawsXPairClassification (de)": 51.6, + "PawsXPairClassification (en)": 51.62, + "PawsXPairClassification (es)": 51.67, + "PawsXPairClassification (fr)": 54.82, + "PawsXPairClassification (ja)": 49.29, + "PawsXPairClassification (ko)": 49.52, + "PawsXPairClassification (zh)": 54.14, + "SICK-E-PL": 43.35, + "SprintDuplicateQuestions": 35.51, + "TERRa": 56.49, + "TwitterSemEval2015": 45.73, + "TwitterURLCorpus": 64.83 } ] }, @@ -6148,7 +17776,35 @@ "map": [ { "Model": "deberta-v1-base", - "RuBQReranking": 34.01 + "AlloprofReranking": 27.81, + "AskUbuntuDupQuestions": 43.92, + "MindSmallReranking": 27.66, + "RuBQReranking": 34.01, + "SciDocsRR": 43.56, + "StackOverflowDupQuestions": 31.42, + "SyntecReranking": 33.19, + "T2Reranking": 50.29 + }, + { + "Model": "deberta-v1-base", + "MIRACLReranking (ar)": 2.44, + "MIRACLReranking (bn)": 3.57, + "MIRACLReranking (de)": 3.51, + "MIRACLReranking (en)": 4.73, + "MIRACLReranking (es)": 4.5, + "MIRACLReranking (fa)": 3.87, + "MIRACLReranking (fi)": 5.18, + "MIRACLReranking (fr)": 4.07, + "MIRACLReranking (hi)": 4.35, + "MIRACLReranking (id)": 4.58, + "MIRACLReranking (ja)": 3.08, + "MIRACLReranking (ko)": 4.3, + "MIRACLReranking (ru)": 13.36, + "MIRACLReranking (sw)": 4.92, + "MIRACLReranking (te)": 2.02, + "MIRACLReranking (th)": 3.48, + "MIRACLReranking (yo)": 5.55, + "MIRACLReranking (zh)": 2.03 } ] }, @@ -6156,8 +17812,142 @@ "ndcg_at_10": [ { "Model": "deberta-v1-base", + "AILACasedocs": 6.61, + "AILAStatutes": 9.71, + "ARCChallenge": 1.06, + "AlloprofRetrieval": 0.12, + "AlphaNLI": 0.64, + "AppsRetrieval": 0.05, + "ArguAna": 12.39, + "BSARDRetrieval": 0.0, + "ClimateFEVER": 0.03, + "CmedqaRetrieval": 0.27, + "CodeFeedbackMT": 3.15, + "CodeFeedbackST": 2.06, + "CodeSearchNetCCRetrieval (python)": 2.74, + "CodeSearchNetCCRetrieval (javascript)": 11.29, + "CodeSearchNetCCRetrieval (go)": 7.66, + "CodeSearchNetCCRetrieval (ruby)": 10.7, + "CodeSearchNetCCRetrieval (java)": 10.38, + "CodeSearchNetCCRetrieval (php)": 4.16, + "CodeSearchNetRetrieval (python)": 7.02, + "CodeSearchNetRetrieval (javascript)": 4.17, + "CodeSearchNetRetrieval (go)": 7.74, + "CodeSearchNetRetrieval (ruby)": 4.92, + "CodeSearchNetRetrieval (java)": 6.66, + "CodeSearchNetRetrieval (php)": 2.69, + "CodeTransOceanContest": 5.93, + "CodeTransOceanDL": 28.11, + "CosQA": 0.16, + "CovidRetrieval": 0.0, + "DBPedia": 0.16, + "FEVER": 0.1, + "FiQA2018": 0.32, + "GerDaLIR": 0.14, + "GerDaLIRSmall": 0.41, + "GermanQuAD-Retrieval": 2.67, + "HellaSwag": 1.89, + "HotpotQA": 0.35, + "LEMBNarrativeQARetrieval": 2.01, + "LEMBQMSumRetrieval": 5.27, + "LEMBSummScreenFDRetrieval": 3.95, + "LEMBWikimQARetrieval": 7.79, + "LeCaRDv2": 22.77, + "LegalBenchConsumerContractsQA": 8.18, + "LegalBenchCorporateLobbying": 10.45, + "LegalQuAD": 2.11, + "LegalSummarization": 13.98, + "MIRACLRetrieval (ar)": 0.0, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 0.0, + "MIRACLRetrieval (en)": 0.0, + "MIRACLRetrieval (es)": 0.0, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 0.24, + "MIRACLRetrieval (fr)": 0.0, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 0.0, + "MIRACLRetrieval (ja)": 0.0, + "MIRACLRetrieval (ko)": 0.0, + "MIRACLRetrieval (ru)": 3.02, + "MIRACLRetrieval (sw)": 0.71, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.0, + "MIRACLRetrieval (yo)": 0.36, + "MIRACLRetrieval (zh)": 0.0, + "MintakaRetrieval (ar)": 0.62, + "MintakaRetrieval (de)": 0.69, + "MintakaRetrieval (es)": 0.75, + "MintakaRetrieval (fr)": 1.02, + "MintakaRetrieval (hi)": 0.66, + "MintakaRetrieval (it)": 0.85, + "MintakaRetrieval (ja)": 0.46, + "MintakaRetrieval (pt)": 0.61, + "NFCorpus": 1.59, + "NQ": 0.06, + "PIQA": 1.59, + "Quail": 0.04, + "QuoraRetrieval": 49.21, + "RARbCode": 0.0, + "RARbMath": 4.04, "RiaNewsRetrieval": 4.84, - "RuBQRetrieval": 10.15 + "RuBQRetrieval": 10.15, + "SCIDOCS": 0.18, + "SIQA": 0.03, + "SciFact": 0.35, + "SciFact-PL": 0.14, + "SpartQA": 0.28, + "StackOverflowQA": 6.63, + "SyntecRetrieval": 5.82, + "SyntheticText2SQL": 0.76, + "TRECCOVID": 3.56, + "TRECCOVID-PL": 2.91, + "TempReasonL1": 0.16, + "TempReasonL2Fact": 0.84, + "TempReasonL2Pure": 0.04, + "TempReasonL3Fact": 0.93, + "TempReasonL3Pure": 0.52, + "Touche2020": 0.0, + "WinoGrande": 0.03, + "XMarket (de)": 0.49, + "XMarket (en)": 0.65, + "XMarket (es)": 0.3, + "XPQARetrieval (ara-ara)": 2.45, + "XPQARetrieval (eng-ara)": 0.52, + "XPQARetrieval (ara-eng)": 0.85, + "XPQARetrieval (deu-deu)": 8.48, + "XPQARetrieval (eng-deu)": 0.46, + "XPQARetrieval (deu-eng)": 2.26, + "XPQARetrieval (spa-spa)": 4.58, + "XPQARetrieval (eng-spa)": 0.66, + "XPQARetrieval (spa-eng)": 1.32, + "XPQARetrieval (fra-fra)": 8.72, + "XPQARetrieval (eng-fra)": 0.55, + "XPQARetrieval (fra-eng)": 1.91, + "XPQARetrieval (hin-hin)": 14.72, + "XPQARetrieval (eng-hin)": 5.7, + "XPQARetrieval (hin-eng)": 0.66, + "XPQARetrieval (ita-ita)": 12.1, + "XPQARetrieval (eng-ita)": 0.92, + "XPQARetrieval (ita-eng)": 1.8, + "XPQARetrieval (jpn-jpn)": 5.58, + "XPQARetrieval (eng-jpn)": 0.47, + "XPQARetrieval (jpn-eng)": 0.46, + "XPQARetrieval (kor-kor)": 3.8, + "XPQARetrieval (eng-kor)": 0.88, + "XPQARetrieval (kor-eng)": 0.75, + "XPQARetrieval (pol-pol)": 4.63, + "XPQARetrieval (eng-pol)": 0.7, + "XPQARetrieval (pol-eng)": 0.63, + "XPQARetrieval (por-por)": 3.99, + "XPQARetrieval (eng-por)": 0.6, + "XPQARetrieval (por-eng)": 0.91, + "XPQARetrieval (tam-tam)": 1.97, + "XPQARetrieval (eng-tam)": 0.77, + "XPQARetrieval (tam-eng)": 0.48, + "XPQARetrieval (cmn-cmn)": 5.04, + "XPQARetrieval (eng-cmn)": 0.33, + "XPQARetrieval (cmn-eng)": 0.49 } ] }, @@ -6165,15 +17955,129 @@ "cosine_spearman": [ { "Model": "deberta-v1-base", + "BIOSSES": 36.15, + "CDSC-R": 62.16, + "GermanSTSBenchmark": 31.4, + "SICK-R": 49.94, + "SICK-R-PL": 36.4, + "SICKFr": 50.14, + "STS12": 44.76, + "STS13": 46.65, + "STS14": 38.15, + "STS15": 54.85, + "STS16": 49.15, + "STS17 (en-en)": 60.31, + "STS17 (es-es)": 53.94, + "STS17 (en-ar)": 1.36, + "STS17 (nl-en)": 17.23, + "STS17 (en-de)": 10.74, + "STS17 (it-en)": 15.7, + "STS17 (fr-en)": 11.55, + "STS17 (ar-ar)": 44.8, + "STS17 (es-en)": 5.34, + "STS17 (en-tr)": -1.09, + "STS17 (ko-ko)": 32.38, + "STS22 (it)": 32.04, + "STS22 (de-pl)": -14.13, + "STS22 (fr-pl)": 16.9, + "STS22 (ar)": 28.79, + "STS22 (de-fr)": -1.08, + "STS22 (pl-en)": 16.21, + "STS22 (zh-en)": 19.26, + "STS22 (en)": 44.81, + "STS22 (de)": 9.44, + "STS22 (es-it)": 24.56, + "STS22 (es)": 38.62, + "STS22 (pl)": 13.21, + "STS22 (es-en)": 21.02, + "STS22 (de-en)": 28.79, + "STS22 (fr)": 20.41, + "STS22 (zh)": 12.51, + "STS22 (tr)": 9.4, + "STSB": 24.8, + "STSBenchmark": 41.32, + "STSBenchmarkMultilingualSTS (it)": 37.39, + "STSBenchmarkMultilingualSTS (pl)": 38.12, + "STSBenchmarkMultilingualSTS (pt)": 28.76, + "STSBenchmarkMultilingualSTS (de)": 35.64, + "STSBenchmarkMultilingualSTS (nl)": 40.24, + "STSBenchmarkMultilingualSTS (zh)": 24.09, + "STSBenchmarkMultilingualSTS (es)": 38.78, + "STSBenchmarkMultilingualSTS (en)": 41.32, + "STSBenchmarkMultilingualSTS (fr)": 41.16 + }, + { + "Model": "deberta-v1-base", + "BIOSSES": 36.15, + "CDSC-R": 62.16, + "GermanSTSBenchmark": 31.4, "RUParaPhraserSTS": 54.03, "RuSTSBenchmarkSTS": 58.47, + "SICK-R": 49.94, + "SICK-R-PL": 36.4, + "SICKFr": 50.14, + "STS12": 44.76, + "STS13": 46.65, + "STS14": 38.15, + "STS15": 54.85, + "STS16": 49.15, + "STS17 (en-en)": 60.31, + "STS17 (es-es)": 53.95, + "STS17 (en-ar)": 1.36, + "STS17 (nl-en)": 17.23, + "STS17 (en-de)": 10.74, + "STS17 (it-en)": 15.7, + "STS17 (fr-en)": 11.55, + "STS17 (ar-ar)": 44.8, + "STS17 (es-en)": 5.34, + "STS17 (en-tr)": -1.09, + "STS17 (ko-ko)": 32.38, "STS22 (ru)": 47.67, - "STSBenchmarkMultilingualSTS (ru)": 58.45 + "STS22 (it)": 32.04, + "STS22 (de-pl)": -14.13, + "STS22 (fr-pl)": 16.9, + "STS22 (ar)": 28.76, + "STS22 (de-fr)": -1.08, + "STS22 (pl-en)": 16.21, + "STS22 (zh-en)": 19.26, + "STS22 (en)": 44.81, + "STS22 (de)": 9.45, + "STS22 (es-it)": 24.56, + "STS22 (es)": 38.62, + "STS22 (pl)": 13.11, + "STS22 (es-en)": 21.02, + "STS22 (de-en)": 28.79, + "STS22 (fr)": 20.41, + "STS22 (zh)": 12.51, + "STS22 (tr)": 9.4, + "STSB": 24.8, + "STSBenchmark": 41.32, + "STSBenchmarkMultilingualSTS (ru)": 58.45, + "STSBenchmarkMultilingualSTS (it)": 37.39, + "STSBenchmarkMultilingualSTS (pl)": 38.13, + "STSBenchmarkMultilingualSTS (pt)": 28.76, + "STSBenchmarkMultilingualSTS (de)": 35.64, + "STSBenchmarkMultilingualSTS (nl)": 40.24, + "STSBenchmarkMultilingualSTS (zh)": 24.09, + "STSBenchmarkMultilingualSTS (es)": 38.78, + "STSBenchmarkMultilingualSTS (en)": 41.32, + "STSBenchmarkMultilingualSTS (fr)": 41.16 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "deberta-v1-base", + "SummEval": 28.91, + "SummEvalFr": 28.45 + }, + { + "Model": "deberta-v1-base", + "SummEval": 28.91, + "SummEvalFr": 28.43 + } + ] }, "MultilabelClassification": { "accuracy": [ @@ -6185,47 +18089,676 @@ ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "deberta-v1-base", + "Core17InstructionRetrieval": -4.16, + "News21InstructionRetrieval": -0.79, + "Robust04InstructionRetrieval": -2.16 + } + ] } }, "dwzhu__e5-base-4k": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "e5-base-4k", + "BornholmBitextMining": 37.44, + "Tatoeba (cym-eng)": 6.6, + "Tatoeba (kzj-eng)": 4.21, + "Tatoeba (ita-eng)": 17.65, + "Tatoeba (gla-eng)": 2.55, + "Tatoeba (hsb-eng)": 5.04, + "Tatoeba (cat-eng)": 17.25, + "Tatoeba (ina-eng)": 32.22, + "Tatoeba (deu-eng)": 24.09, + "Tatoeba (kab-eng)": 0.8, + "Tatoeba (fin-eng)": 2.98, + "Tatoeba (awa-eng)": 0.09, + "Tatoeba (kaz-eng)": 0.43, + "Tatoeba (yue-eng)": 1.35, + "Tatoeba (wuu-eng)": 1.25, + "Tatoeba (orv-eng)": 0.22, + "Tatoeba (ell-eng)": 0.57, + "Tatoeba (spa-eng)": 21.17, + "Tatoeba (dsb-eng)": 5.42, + "Tatoeba (glg-eng)": 19.87, + "Tatoeba (aze-eng)": 3.55, + "Tatoeba (ben-eng)": 0.01, + "Tatoeba (slv-eng)": 5.9, + "Tatoeba (ceb-eng)": 4.75, + "Tatoeba (khm-eng)": 0.17, + "Tatoeba (cha-eng)": 14.24, + "Tatoeba (lat-eng)": 7.74, + "Tatoeba (swh-eng)": 5.56, + "Tatoeba (pms-eng)": 12.14, + "Tatoeba (lit-eng)": 2.8, + "Tatoeba (hun-eng)": 3.16, + "Tatoeba (pol-eng)": 5.18, + "Tatoeba (bel-eng)": 1.62, + "Tatoeba (fra-eng)": 24.72, + "Tatoeba (cor-eng)": 2.78, + "Tatoeba (ukr-eng)": 1.09, + "Tatoeba (nld-eng)": 14.96, + "Tatoeba (cmn-eng)": 1.66, + "Tatoeba (xho-eng)": 3.12, + "Tatoeba (jav-eng)": 4.89, + "Tatoeba (tha-eng)": 0.67, + "Tatoeba (zsm-eng)": 7.68, + "Tatoeba (mar-eng)": 0.05, + "Tatoeba (hin-eng)": 0.1, + "Tatoeba (ang-eng)": 15.17, + "Tatoeba (mhr-eng)": 0.14, + "Tatoeba (kat-eng)": 0.31, + "Tatoeba (eus-eng)": 4.35, + "Tatoeba (bre-eng)": 3.8, + "Tatoeba (hye-eng)": 0.4, + "Tatoeba (kor-eng)": 0.81, + "Tatoeba (afr-eng)": 7.69, + "Tatoeba (oci-eng)": 11.11, + "Tatoeba (mon-eng)": 1.34, + "Tatoeba (swg-eng)": 14.58, + "Tatoeba (amh-eng)": 0.65, + "Tatoeba (ido-eng)": 15.55, + "Tatoeba (isl-eng)": 3.54, + "Tatoeba (gle-eng)": 3.15, + "Tatoeba (pam-eng)": 4.17, + "Tatoeba (est-eng)": 3.4, + "Tatoeba (srp-eng)": 2.59, + "Tatoeba (lfn-eng)": 13.48, + "Tatoeba (pes-eng)": 0.34, + "Tatoeba (nob-eng)": 12.85, + "Tatoeba (heb-eng)": 0.65, + "Tatoeba (mal-eng)": 0.0, + "Tatoeba (gsw-eng)": 16.05, + "Tatoeba (ber-eng)": 4.23, + "Tatoeba (uig-eng)": 0.36, + "Tatoeba (uzb-eng)": 1.78, + "Tatoeba (cbk-eng)": 14.44, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (ron-eng)": 11.04, + "Tatoeba (tam-eng)": 0.84, + "Tatoeba (tur-eng)": 3.5, + "Tatoeba (ara-eng)": 0.37, + "Tatoeba (max-eng)": 11.27, + "Tatoeba (slk-eng)": 5.34, + "Tatoeba (tat-eng)": 1.06, + "Tatoeba (ind-eng)": 6.76, + "Tatoeba (hrv-eng)": 6.57, + "Tatoeba (dan-eng)": 11.31, + "Tatoeba (arq-eng)": 0.51, + "Tatoeba (mkd-eng)": 0.24, + "Tatoeba (ces-eng)": 4.11, + "Tatoeba (tgl-eng)": 4.54, + "Tatoeba (fao-eng)": 7.05, + "Tatoeba (tzl-eng)": 14.85, + "Tatoeba (bul-eng)": 1.4, + "Tatoeba (kur-eng)": 7.91, + "Tatoeba (fry-eng)": 18.03, + "Tatoeba (por-eng)": 20.03, + "Tatoeba (sqi-eng)": 5.52, + "Tatoeba (epo-eng)": 11.31, + "Tatoeba (arz-eng)": 0.42, + "Tatoeba (lvs-eng)": 3.57, + "Tatoeba (war-eng)": 6.97, + "Tatoeba (tel-eng)": 0.53, + "Tatoeba (csb-eng)": 7.24, + "Tatoeba (swe-eng)": 10.78, + "Tatoeba (nov-eng)": 25.6, + "Tatoeba (ast-eng)": 17.6, + "Tatoeba (ile-eng)": 20.4, + "Tatoeba (yid-eng)": 0.24, + "Tatoeba (rus-eng)": 0.83, + "Tatoeba (nds-eng)": 12.48, + "Tatoeba (jpn-eng)": 0.92, + "Tatoeba (bos-eng)": 7.11, + "Tatoeba (vie-eng)": 4.39, + "Tatoeba (tuk-eng)": 2.82, + "Tatoeba (dtp-eng)": 3.12, + "Tatoeba (nno-eng)": 8.52 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "e5-base-4k", + "AllegroReviews": 25.0, + "AmazonCounterfactualClassification (en-ext)": 77.13, + "AmazonCounterfactualClassification (en)": 75.42, + "AmazonCounterfactualClassification (de)": 56.3, + "AmazonCounterfactualClassification (ja)": 59.45, + "AmazonReviewsClassification (en)": 42.61, + "AmazonReviewsClassification (de)": 28.9, + "AmazonReviewsClassification (es)": 33.41, + "AmazonReviewsClassification (fr)": 29.98, + "AmazonReviewsClassification (ja)": 22.06, + "AmazonReviewsClassification (zh)": 23.31, + "AngryTweetsClassification": 45.42, + "CBD": 54.25, + "DanishPoliticalCommentsClassification": 28.73, + "GeoreviewClassification": 29.03, + "HeadlineClassification": 34.62, + "InappropriatenessClassification": 52.6, + "KinopoiskClassification": 38.3, + "LccSentimentClassification": 36.73, + "MTOPDomainClassification (en)": 91.79, + "MTOPDomainClassification (de)": 76.14, + "MTOPDomainClassification (es)": 79.22, + "MTOPDomainClassification (fr)": 80.24, + "MTOPDomainClassification (hi)": 37.21, + "MTOPDomainClassification (th)": 17.87, + "MTOPIntentClassification (en)": 71.07, + "MTOPIntentClassification (de)": 53.67, + "MTOPIntentClassification (es)": 53.94, + "MTOPIntentClassification (fr)": 51.56, + "MTOPIntentClassification (hi)": 23.09, + "MTOPIntentClassification (th)": 4.97, + "MasakhaNEWSClassification (amh)": 34.41, + "MasakhaNEWSClassification (eng)": 78.88, + "MasakhaNEWSClassification (fra)": 78.41, + "MasakhaNEWSClassification (hau)": 65.27, + "MasakhaNEWSClassification (ibo)": 60.33, + "MasakhaNEWSClassification (lin)": 74.57, + "MasakhaNEWSClassification (lug)": 59.1, + "MasakhaNEWSClassification (orm)": 60.98, + "MasakhaNEWSClassification (pcm)": 93.05, + "MasakhaNEWSClassification (run)": 66.71, + "MasakhaNEWSClassification (sna)": 78.75, + "MasakhaNEWSClassification (som)": 56.26, + "MasakhaNEWSClassification (swa)": 58.05, + "MasakhaNEWSClassification (tir)": 24.45, + "MasakhaNEWSClassification (xho)": 62.12, + "MasakhaNEWSClassification (yor)": 66.86, + "MassiveIntentClassification (pl)": 48.05, + "MassiveIntentClassification (it)": 51.27, + "MassiveIntentClassification (ms)": 46.17, + "MassiveIntentClassification (ka)": 16.05, + "MassiveIntentClassification (id)": 48.44, + "MassiveIntentClassification (zh-CN)": 24.16, + "MassiveIntentClassification (ko)": 22.7, + "MassiveIntentClassification (zh-TW)": 22.92, + "MassiveIntentClassification (tl)": 48.06, + "MassiveIntentClassification (te)": 2.89, + "MassiveIntentClassification (bn)": 21.63, + "MassiveIntentClassification (kn)": 3.69, + "MassiveIntentClassification (jv)": 44.23, + "MassiveIntentClassification (en)": 68.79, + "MassiveIntentClassification (th)": 14.7, + "MassiveIntentClassification (af)": 46.12, + "MassiveIntentClassification (sv)": 48.32, + "MassiveIntentClassification (de)": 52.16, + "MassiveIntentClassification (km)": 4.44, + "MassiveIntentClassification (sq)": 48.83, + "MassiveIntentClassification (mn)": 32.05, + "MassiveIntentClassification (hy)": 13.87, + "MassiveIntentClassification (ru)": 40.19, + "MassiveIntentClassification (ja)": 35.46, + "MassiveIntentClassification (lv)": 46.85, + "MassiveIntentClassification (is)": 42.95, + "MassiveIntentClassification (nl)": 48.78, + "MassiveIntentClassification (ur)": 22.85, + "MassiveIntentClassification (ro)": 47.9, + "MassiveIntentClassification (az)": 45.89, + "MassiveIntentClassification (ta)": 15.46, + "MassiveIntentClassification (he)": 25.69, + "MassiveIntentClassification (sl)": 46.21, + "MassiveIntentClassification (fr)": 51.28, + "MassiveIntentClassification (ml)": 2.99, + "MassiveIntentClassification (hi)": 21.33, + "MassiveIntentClassification (fa)": 33.16, + "MassiveIntentClassification (es)": 50.58, + "MassiveIntentClassification (sw)": 42.91, + "MassiveIntentClassification (nb)": 47.51, + "MassiveIntentClassification (hu)": 45.44, + "MassiveIntentClassification (my)": 3.77, + "MassiveIntentClassification (fi)": 46.94, + "MassiveIntentClassification (el)": 37.56, + "MassiveIntentClassification (pt)": 52.64, + "MassiveIntentClassification (vi)": 42.19, + "MassiveIntentClassification (ar)": 27.37, + "MassiveIntentClassification (da)": 50.82, + "MassiveIntentClassification (tr)": 48.14, + "MassiveIntentClassification (cy)": 41.94, + "MassiveIntentClassification (am)": 3.14, + "MassiveScenarioClassification (tr)": 49.88, + "MassiveScenarioClassification (nl)": 53.28, + "MassiveScenarioClassification (ru)": 40.39, + "MassiveScenarioClassification (es)": 56.93, + "MassiveScenarioClassification (bn)": 25.42, + "MassiveScenarioClassification (en)": 74.89, + "MassiveScenarioClassification (ms)": 50.69, + "MassiveScenarioClassification (az)": 48.41, + "MassiveScenarioClassification (hy)": 17.81, + "MassiveScenarioClassification (ro)": 54.43, + "MassiveScenarioClassification (it)": 56.36, + "MassiveScenarioClassification (he)": 24.88, + "MassiveScenarioClassification (pl)": 50.65, + "MassiveScenarioClassification (th)": 23.86, + "MassiveScenarioClassification (ar)": 30.17, + "MassiveScenarioClassification (da)": 54.18, + "MassiveScenarioClassification (ta)": 19.69, + "MassiveScenarioClassification (km)": 9.53, + "MassiveScenarioClassification (is)": 46.28, + "MassiveScenarioClassification (id)": 51.86, + "MassiveScenarioClassification (ja)": 42.15, + "MassiveScenarioClassification (ko)": 24.97, + "MassiveScenarioClassification (mn)": 32.55, + "MassiveScenarioClassification (my)": 9.53, + "MassiveScenarioClassification (kn)": 8.33, + "MassiveScenarioClassification (fa)": 32.18, + "MassiveScenarioClassification (tl)": 52.71, + "MassiveScenarioClassification (vi)": 41.53, + "MassiveScenarioClassification (lv)": 47.51, + "MassiveScenarioClassification (de)": 58.81, + "MassiveScenarioClassification (sq)": 52.08, + "MassiveScenarioClassification (zh-TW)": 29.49, + "MassiveScenarioClassification (af)": 50.27, + "MassiveScenarioClassification (te)": 7.42, + "MassiveScenarioClassification (zh-CN)": 31.79, + "MassiveScenarioClassification (fi)": 48.38, + "MassiveScenarioClassification (ka)": 20.37, + "MassiveScenarioClassification (sl)": 48.83, + "MassiveScenarioClassification (fr)": 57.84, + "MassiveScenarioClassification (sw)": 45.52, + "MassiveScenarioClassification (cy)": 43.49, + "MassiveScenarioClassification (nb)": 52.14, + "MassiveScenarioClassification (el)": 42.01, + "MassiveScenarioClassification (am)": 7.88, + "MassiveScenarioClassification (jv)": 47.79, + "MassiveScenarioClassification (ml)": 6.72, + "MassiveScenarioClassification (ur)": 27.49, + "MassiveScenarioClassification (hu)": 49.43, + "MassiveScenarioClassification (hi)": 23.89, + "MassiveScenarioClassification (sv)": 51.18, + "MassiveScenarioClassification (pt)": 56.74, + "NoRecClassification": 39.24, + "NordicLangClassification": 59.99, + "PAC": 65.23, + "PolEmo2.0-IN": 44.43, + "PolEmo2.0-OUT": 28.2, + "RuReviewsClassification": 44.29, + "RuSciBenchGRNTIClassification": 17.9, + "RuSciBenchOECDClassification": 14.1 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "e5-base-4k", + "AlloProfClusteringP2P": 58.85, + "AlloProfClusteringS2S": 31.56, + "BlurbsClusteringP2P": 26.39, + "BlurbsClusteringS2S": 9.82, + "GeoreviewClusteringP2P": 16.68, + "HALClusteringS2S": 21.16, + "MLSUMClusteringP2P (de)": 39.77, + "MLSUMClusteringP2P (fr)": 43.92, + "MLSUMClusteringP2P (ru)": 24.68, + "MLSUMClusteringP2P (es)": 46.46, + "MLSUMClusteringS2S (de)": 41.62, + "MLSUMClusteringS2S (fr)": 43.87, + "MLSUMClusteringS2S (ru)": 23.86, + "MLSUMClusteringS2S (es)": 47.17, + "MasakhaNEWSClusteringP2P (amh)": 40.56, + "MasakhaNEWSClusteringP2P (eng)": 64.2, + "MasakhaNEWSClusteringP2P (fra)": 64.85, + "MasakhaNEWSClusteringP2P (hau)": 43.08, + "MasakhaNEWSClusteringP2P (ibo)": 37.42, + "MasakhaNEWSClusteringP2P (lin)": 61.0, + "MasakhaNEWSClusteringP2P (lug)": 48.71, + "MasakhaNEWSClusteringP2P (orm)": 26.61, + "MasakhaNEWSClusteringP2P (pcm)": 87.61, + "MasakhaNEWSClusteringP2P (run)": 54.56, + "MasakhaNEWSClusteringP2P (sna)": 52.73, + "MasakhaNEWSClusteringP2P (som)": 34.27, + "MasakhaNEWSClusteringP2P (swa)": 23.29, + "MasakhaNEWSClusteringP2P (tir)": 42.26, + "MasakhaNEWSClusteringP2P (xho)": 30.04, + "MasakhaNEWSClusteringP2P (yor)": 26.99, + "MasakhaNEWSClusteringS2S (amh)": 43.55, + "MasakhaNEWSClusteringS2S (eng)": 45.61, + "MasakhaNEWSClusteringS2S (fra)": 43.42, + "MasakhaNEWSClusteringS2S (hau)": 14.98, + "MasakhaNEWSClusteringS2S (ibo)": 34.4, + "MasakhaNEWSClusteringS2S (lin)": 47.29, + "MasakhaNEWSClusteringS2S (lug)": 43.89, + "MasakhaNEWSClusteringS2S (orm)": 26.09, + "MasakhaNEWSClusteringS2S (pcm)": 62.8, + "MasakhaNEWSClusteringS2S (run)": 46.22, + "MasakhaNEWSClusteringS2S (sna)": 49.67, + "MasakhaNEWSClusteringS2S (som)": 30.8, + "MasakhaNEWSClusteringS2S (swa)": 16.16, + "MasakhaNEWSClusteringS2S (tir)": 49.22, + "MasakhaNEWSClusteringS2S (xho)": 27.73, + "MasakhaNEWSClusteringS2S (yor)": 29.45, + "RuSciBenchGRNTIClusteringP2P": 15.63, + "RuSciBenchOECDClusteringP2P": 13.86, + "TenKGnadClusteringP2P": 36.97, + "TenKGnadClusteringS2S": 19.89 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "e5-base-4k", + "CDSC-E": 55.27, + "FalseFriendsGermanEnglish": 47.99, + "OpusparcusPC (de)": 91.41, + "OpusparcusPC (en)": 98.49, + "OpusparcusPC (fi)": 87.15, + "OpusparcusPC (fr)": 87.64, + "OpusparcusPC (ru)": 78.8, + "OpusparcusPC (sv)": 85.15, + "PSC": 95.48, + "PawsXPairClassification (de)": 52.25, + "PawsXPairClassification (en)": 55.42, + "PawsXPairClassification (es)": 54.44, + "PawsXPairClassification (fr)": 55.38, + "PawsXPairClassification (ja)": 49.52, + "PawsXPairClassification (ko)": 53.39, + "PawsXPairClassification (zh)": 52.8, + "SICK-E-PL": 48.65, + "TERRa": 47.66 + }, + { + "Model": "e5-base-4k", + "CDSC-E": 55.27, + "FalseFriendsGermanEnglish": 48.49, + "OpusparcusPC (de)": 91.69, + "OpusparcusPC (en)": 98.49, + "OpusparcusPC (fi)": 87.19, + "OpusparcusPC (fr)": 87.77, + "OpusparcusPC (ru)": 78.8, + "OpusparcusPC (sv)": 85.15, + "PSC": 95.48, + "PawsXPairClassification (de)": 52.57, + "PawsXPairClassification (en)": 55.42, + "PawsXPairClassification (es)": 54.44, + "PawsXPairClassification (fr)": 55.38, + "PawsXPairClassification (ja)": 49.99, + "PawsXPairClassification (ko)": 53.39, + "PawsXPairClassification (zh)": 52.92, + "SICK-E-PL": 48.72, + "TERRa": 50.83 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "e5-base-4k", + "AlloprofReranking": 58.33, + "RuBQReranking": 30.66, + "SyntecReranking": 66.58, + "T2Reranking": 58.61 + }, + { + "Model": "e5-base-4k", + "MIRACLReranking (ar)": 4.39, + "MIRACLReranking (bn)": 5.48, + "MIRACLReranking (de)": 20.91, + "MIRACLReranking (en)": 50.59, + "MIRACLReranking (es)": 31.16, + "MIRACLReranking (fa)": 7.34, + "MIRACLReranking (fi)": 35.17, + "MIRACLReranking (fr)": 22.29, + "MIRACLReranking (hi)": 8.13, + "MIRACLReranking (id)": 18.96, + "MIRACLReranking (ja)": 9.21, + "MIRACLReranking (ko)": 10.34, + "MIRACLReranking (ru)": 8.03, + "MIRACLReranking (sw)": 23.58, + "MIRACLReranking (te)": 1.48, + "MIRACLReranking (th)": 3.42, + "MIRACLReranking (yo)": 46.31, + "MIRACLReranking (zh)": 8.16 + } + ] }, "Retrieval": { "ndcg_at_10": [ { "Model": "e5-base-4k", - "LEMBNarrativeQARetrieval": 30.35, - "LEMBQMSumRetrieval": 35.6, - "LEMBSummScreenFDRetrieval": 95.23, - "LEMBWikimQARetrieval": 69.19 + "AILACasedocs": 33.09, + "AILAStatutes": 21.14, + "ARCChallenge": 7.69, + "AlloprofRetrieval": 29.3, + "AlphaNLI": 15.44, + "AppsRetrieval": 9.64, + "BSARDRetrieval": 8.77, + "CmedqaRetrieval": 2.97, + "CodeFeedbackMT": 50.26, + "CodeFeedbackST": 68.77, + "CodeSearchNetCCRetrieval (python)": 61.3, + "CodeSearchNetCCRetrieval (javascript)": 58.62, + "CodeSearchNetCCRetrieval (go)": 43.79, + "CodeSearchNetCCRetrieval (ruby)": 55.57, + "CodeSearchNetCCRetrieval (java)": 56.55, + "CodeSearchNetCCRetrieval (php)": 44.61, + "CodeSearchNetRetrieval (python)": 81.0, + "CodeSearchNetRetrieval (javascript)": 62.39, + "CodeSearchNetRetrieval (go)": 85.89, + "CodeSearchNetRetrieval (ruby)": 70.06, + "CodeSearchNetRetrieval (java)": 68.1, + "CodeSearchNetRetrieval (php)": 76.27, + "CodeTransOceanContest": 66.52, + "CodeTransOceanDL": 21.23, + "CosQA": 27.05, + "CovidRetrieval": 4.76, + "GerDaLIR": 4.99, + "GerDaLIRSmall": 12.6, + "GermanQuAD-Retrieval": 79.75, + "HellaSwag": 23.04, + "LEMBNarrativeQARetrieval": 30.03, + "LEMBQMSumRetrieval": 31.27, + "LEMBSummScreenFDRetrieval": 93.87, + "LEMBWikimQARetrieval": 68.88, + "LeCaRDv2": 23.17, + "LegalBenchConsumerContractsQA": 71.44, + "LegalBenchCorporateLobbying": 90.17, + "LegalQuAD": 26.9, + "LegalSummarization": 56.85, + "MIRACLRetrieval (ar)": 0.02, + "MIRACLRetrieval (bn)": 0.04, + "MIRACLRetrieval (de)": 7.87, + "MIRACLRetrieval (en)": 38.19, + "MIRACLRetrieval (es)": 13.31, + "MIRACLRetrieval (fa)": 0.13, + "MIRACLRetrieval (fi)": 15.01, + "MIRACLRetrieval (fr)": 6.84, + "MIRACLRetrieval (hi)": 0.27, + "MIRACLRetrieval (id)": 7.37, + "MIRACLRetrieval (ja)": 0.71, + "MIRACLRetrieval (ko)": 2.23, + "MIRACLRetrieval (ru)": 0.89, + "MIRACLRetrieval (sw)": 12.56, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.5, + "MIRACLRetrieval (yo)": 28.13, + "MIRACLRetrieval (zh)": 0.16, + "MintakaRetrieval (ar)": 1.52, + "MintakaRetrieval (de)": 14.29, + "MintakaRetrieval (es)": 14.17, + "MintakaRetrieval (fr)": 13.58, + "MintakaRetrieval (hi)": 3.77, + "MintakaRetrieval (it)": 12.69, + "MintakaRetrieval (ja)": 7.43, + "MintakaRetrieval (pt)": 13.59, + "PIQA": 21.78, + "Quail": 4.21, + "RARbCode": 43.05, + "RARbMath": 60.52, + "RiaNewsRetrieval": 2.94, + "RuBQRetrieval": 3.12, + "SIQA": 2.94, + "SciFact-PL": 32.21, + "SpartQA": 3.54, + "StackOverflowQA": 79.92, + "SyntecRetrieval": 60.84, + "SyntheticText2SQL": 46.72, + "TRECCOVID-PL": 16.5, + "TempReasonL1": 1.47, + "TempReasonL2Fact": 21.62, + "TempReasonL2Pure": 2.09, + "TempReasonL3Fact": 19.08, + "TempReasonL3Pure": 9.6, + "WinoGrande": 37.33, + "XMarket (de)": 6.28, + "XMarket (en)": 9.09, + "XMarket (es)": 8.74, + "XPQARetrieval (ara-ara)": 7.66, + "XPQARetrieval (eng-ara)": 3.29, + "XPQARetrieval (ara-eng)": 8.24, + "XPQARetrieval (deu-deu)": 55.65, + "XPQARetrieval (eng-deu)": 13.61, + "XPQARetrieval (deu-eng)": 29.81, + "XPQARetrieval (spa-spa)": 41.97, + "XPQARetrieval (eng-spa)": 12.56, + "XPQARetrieval (spa-eng)": 23.27, + "XPQARetrieval (fra-fra)": 49.52, + "XPQARetrieval (eng-fra)": 15.6, + "XPQARetrieval (fra-eng)": 30.23, + "XPQARetrieval (hin-hin)": 24.77, + "XPQARetrieval (eng-hin)": 7.98, + "XPQARetrieval (hin-eng)": 7.88, + "XPQARetrieval (ita-ita)": 51.7, + "XPQARetrieval (eng-ita)": 11.12, + "XPQARetrieval (ita-eng)": 26.41, + "XPQARetrieval (jpn-jpn)": 33.68, + "XPQARetrieval (eng-jpn)": 4.41, + "XPQARetrieval (jpn-eng)": 16.85, + "XPQARetrieval (kor-kor)": 8.61, + "XPQARetrieval (eng-kor)": 7.06, + "XPQARetrieval (kor-eng)": 7.1, + "XPQARetrieval (pol-pol)": 32.38, + "XPQARetrieval (eng-pol)": 11.33, + "XPQARetrieval (pol-eng)": 18.59, + "XPQARetrieval (por-por)": 35.01, + "XPQARetrieval (eng-por)": 8.38, + "XPQARetrieval (por-eng)": 22.92, + "XPQARetrieval (tam-tam)": 5.89, + "XPQARetrieval (eng-tam)": 3.29, + "XPQARetrieval (tam-eng)": 3.85, + "XPQARetrieval (cmn-cmn)": 19.17, + "XPQARetrieval (eng-cmn)": 4.99, + "XPQARetrieval (cmn-eng)": 9.77 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-base-4k", + "CDSC-R": 84.98, + "GermanSTSBenchmark": 61.53, + "RUParaPhraserSTS": 48.39, + "RuSTSBenchmarkSTS": 55.63, + "SICK-R-PL": 56.01, + "SICKFr": 62.07, + "STS22 (it)": 70.79, + "STS22 (ru)": 24.97, + "STS22 (de-pl)": 16.84, + "STS22 (es)": 61.84, + "STS22 (de)": 31.41, + "STS22 (es-en)": 61.41, + "STS22 (es-it)": 55.06, + "STS22 (ar)": 34.56, + "STS22 (fr-pl)": 61.98, + "STS22 (tr)": 48.3, + "STS22 (fr)": 72.42, + "STS22 (de-en)": 48.14, + "STS22 (pl-en)": 55.43, + "STS22 (de-fr)": 47.22, + "STS22 (zh-en)": 33.19, + "STS22 (pl)": 21.75, + "STS22 (en)": 61.92, + "STS22 (zh)": 52.55, + "STSB": 30.49, + "STSBenchmarkMultilingualSTS (zh)": 31.46, + "STSBenchmarkMultilingualSTS (fr)": 65.61, + "STSBenchmarkMultilingualSTS (en)": 82.93, + "STSBenchmarkMultilingualSTS (nl)": 63.39, + "STSBenchmarkMultilingualSTS (es)": 66.75, + "STSBenchmarkMultilingualSTS (ru)": 56.1, + "STSBenchmarkMultilingualSTS (de)": 62.41, + "STSBenchmarkMultilingualSTS (pt)": 63.81, + "STSBenchmarkMultilingualSTS (it)": 64.35, + "STSBenchmarkMultilingualSTS (pl)": 57.73 + }, + { + "Model": "e5-base-4k", + "CDSC-R": 84.98, + "GermanSTSBenchmark": 61.53, + "RUParaPhraserSTS": 48.39, + "RuSTSBenchmarkSTS": 55.64, + "SICK-R-PL": 56.01, + "SICKFr": 62.07, + "STS22 (it)": 70.79, + "STS22 (ru)": 24.96, + "STS22 (de-pl)": 16.84, + "STS22 (es)": 61.84, + "STS22 (de)": 31.42, + "STS22 (es-en)": 61.41, + "STS22 (es-it)": 55.06, + "STS22 (ar)": 34.55, + "STS22 (fr-pl)": 61.98, + "STS22 (tr)": 48.3, + "STS22 (fr)": 72.42, + "STS22 (de-en)": 48.14, + "STS22 (pl-en)": 55.43, + "STS22 (de-fr)": 47.22, + "STS22 (zh-en)": 33.19, + "STS22 (pl)": 21.86, + "STS22 (en)": 61.92, + "STS22 (zh)": 52.55, + "STSB": 30.48, + "STSBenchmarkMultilingualSTS (zh)": 31.46, + "STSBenchmarkMultilingualSTS (fr)": 65.61, + "STSBenchmarkMultilingualSTS (en)": 82.93, + "STSBenchmarkMultilingualSTS (nl)": 63.39, + "STSBenchmarkMultilingualSTS (es)": 66.75, + "STSBenchmarkMultilingualSTS (ru)": 56.1, + "STSBenchmarkMultilingualSTS (de)": 62.41, + "STSBenchmarkMultilingualSTS (pt)": 63.81, + "STSBenchmarkMultilingualSTS (it)": 64.35, + "STSBenchmarkMultilingualSTS (pl)": 57.73 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-base-4k", + "SummEvalFr": 29.55 + }, + { + "Model": "e5-base-4k", + "SummEvalFr": 29.55 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "e5-base-4k", + "CEDRClassification": 33.96, + "SensitiveTopicsClassification": 17.86 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "e5-base-4k", + "Core17InstructionRetrieval": -2.14, + "News21InstructionRetrieval": -0.43, + "Robust04InstructionRetrieval": -6.2 + } + ] } }, "elastic__elser-v2": { @@ -8105,7 +20638,119 @@ "f1": [ { "Model": "e5-base", - "BornholmBitextMining": 40.09 + "BornholmBitextMining": 40.09, + "Tatoeba (ast-eng)": 16.87, + "Tatoeba (tzl-eng)": 15.98, + "Tatoeba (csb-eng)": 8.62, + "Tatoeba (est-eng)": 3.75, + "Tatoeba (nov-eng)": 34.73, + "Tatoeba (por-eng)": 30.46, + "Tatoeba (eus-eng)": 7.28, + "Tatoeba (hun-eng)": 5.8, + "Tatoeba (xho-eng)": 4.37, + "Tatoeba (fra-eng)": 33.47, + "Tatoeba (rus-eng)": 0.3, + "Tatoeba (pam-eng)": 5.21, + "Tatoeba (mal-eng)": 0.15, + "Tatoeba (nld-eng)": 18.58, + "Tatoeba (pol-eng)": 7.24, + "Tatoeba (kzj-eng)": 5.03, + "Tatoeba (tam-eng)": 0.0, + "Tatoeba (kaz-eng)": 0.73, + "Tatoeba (hin-eng)": 0.0, + "Tatoeba (yid-eng)": 0.05, + "Tatoeba (max-eng)": 11.72, + "Tatoeba (nob-eng)": 15.27, + "Tatoeba (slk-eng)": 6.67, + "Tatoeba (hye-eng)": 0.67, + "Tatoeba (nds-eng)": 16.31, + "Tatoeba (khm-eng)": 0.28, + "Tatoeba (tat-eng)": 0.68, + "Tatoeba (tgl-eng)": 6.8, + "Tatoeba (lit-eng)": 3.32, + "Tatoeba (spa-eng)": 30.23, + "Tatoeba (cha-eng)": 14.81, + "Tatoeba (gle-eng)": 3.74, + "Tatoeba (deu-eng)": 24.57, + "Tatoeba (arq-eng)": 0.48, + "Tatoeba (kur-eng)": 7.76, + "Tatoeba (cmn-eng)": 2.06, + "Tatoeba (jpn-eng)": 0.24, + "Tatoeba (glg-eng)": 25.31, + "Tatoeba (vie-eng)": 5.91, + "Tatoeba (swe-eng)": 11.32, + "Tatoeba (heb-eng)": 0.61, + "Tatoeba (war-eng)": 6.66, + "Tatoeba (zsm-eng)": 9.47, + "Tatoeba (ina-eng)": 36.55, + "Tatoeba (pes-eng)": 0.6, + "Tatoeba (dsb-eng)": 6.67, + "Tatoeba (dan-eng)": 15.22, + "Tatoeba (cbk-eng)": 18.11, + "Tatoeba (fin-eng)": 4.66, + "Tatoeba (cat-eng)": 20.57, + "Tatoeba (afr-eng)": 8.9, + "Tatoeba (yue-eng)": 1.15, + "Tatoeba (bel-eng)": 1.63, + "Tatoeba (orv-eng)": 0.01, + "Tatoeba (kor-eng)": 1.31, + "Tatoeba (hrv-eng)": 9.04, + "Tatoeba (srp-eng)": 4.41, + "Tatoeba (ron-eng)": 14.49, + "Tatoeba (mon-eng)": 1.46, + "Tatoeba (ceb-eng)": 6.24, + "Tatoeba (ile-eng)": 27.93, + "Tatoeba (tur-eng)": 4.92, + "Tatoeba (ber-eng)": 5.41, + "Tatoeba (uig-eng)": 0.5, + "Tatoeba (gla-eng)": 2.87, + "Tatoeba (awa-eng)": 0.03, + "Tatoeba (uzb-eng)": 4.12, + "Tatoeba (slv-eng)": 6.94, + "Tatoeba (bul-eng)": 0.77, + "Tatoeba (lfn-eng)": 19.71, + "Tatoeba (bos-eng)": 11.19, + "Tatoeba (ben-eng)": 0.0, + "Tatoeba (cym-eng)": 6.44, + "Tatoeba (fao-eng)": 9.59, + "Tatoeba (bre-eng)": 4.45, + "Tatoeba (fry-eng)": 18.26, + "Tatoeba (sqi-eng)": 7.02, + "Tatoeba (ita-eng)": 22.75, + "Tatoeba (kat-eng)": 0.81, + "Tatoeba (ces-eng)": 5.56, + "Tatoeba (oci-eng)": 14.39, + "Tatoeba (hsb-eng)": 5.43, + "Tatoeba (mkd-eng)": 0.19, + "Tatoeba (ara-eng)": 0.39, + "Tatoeba (ell-eng)": 0.5, + "Tatoeba (mhr-eng)": 0.11, + "Tatoeba (isl-eng)": 5.16, + "Tatoeba (ang-eng)": 17.07, + "Tatoeba (ido-eng)": 22.37, + "Tatoeba (lvs-eng)": 5.23, + "Tatoeba (cor-eng)": 2.91, + "Tatoeba (kab-eng)": 1.2, + "Tatoeba (arz-eng)": 0.45, + "Tatoeba (swh-eng)": 6.96, + "Tatoeba (ind-eng)": 8.47, + "Tatoeba (jav-eng)": 6.0, + "Tatoeba (ukr-eng)": 1.0, + "Tatoeba (amh-eng)": 0.01, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (tel-eng)": 0.02, + "Tatoeba (epo-eng)": 14.41, + "Tatoeba (aze-eng)": 4.7, + "Tatoeba (tha-eng)": 1.22, + "Tatoeba (tuk-eng)": 4.64, + "Tatoeba (dtp-eng)": 3.63, + "Tatoeba (lat-eng)": 10.81, + "Tatoeba (wuu-eng)": 1.36, + "Tatoeba (pms-eng)": 14.19, + "Tatoeba (nno-eng)": 10.62, + "Tatoeba (swg-eng)": 14.61, + "Tatoeba (mar-eng)": 0.01, + "Tatoeba (gsw-eng)": 16.17 } ] }, @@ -8113,105 +20758,1256 @@ "accuracy": [ { "Model": "e5-base", + "AllegroReviews": 25.57, + "AmazonCounterfactualClassification (en-ext)": 79.76, + "AmazonCounterfactualClassification (en)": 78.16, + "AmazonCounterfactualClassification (de)": 55.84, + "AmazonCounterfactualClassification (ja)": 61.0, + "AmazonReviewsClassification (en)": 42.32, + "AmazonReviewsClassification (de)": 28.15, + "AmazonReviewsClassification (es)": 33.78, + "AmazonReviewsClassification (fr)": 30.98, + "AmazonReviewsClassification (ja)": 21.86, + "AmazonReviewsClassification (zh)": 22.48, "AngryTweetsClassification": 45.06, + "CBD": 50.95, "DKHateClassification": 58.51, "DanishPoliticalCommentsClassification": 28.43, + "GeoreviewClassification": 27.81, + "HeadlineClassification": 29.8, + "InappropriatenessClassification": 51.95, + "KinopoiskClassification": 34.33, "LccSentimentClassification": 37.47, - "MassiveIntentClassification (da)": 44.25, - "MassiveIntentClassification (nb)": 41.57, + "MTOPDomainClassification (en)": 90.44, + "MTOPDomainClassification (de)": 75.4, + "MTOPDomainClassification (es)": 77.44, + "MTOPDomainClassification (fr)": 79.77, + "MTOPDomainClassification (hi)": 33.01, + "MTOPDomainClassification (th)": 15.24, + "MTOPIntentClassification (en)": 60.07, + "MTOPIntentClassification (de)": 46.75, + "MTOPIntentClassification (es)": 44.19, + "MTOPIntentClassification (fr)": 40.11, + "MTOPIntentClassification (hi)": 11.91, + "MTOPIntentClassification (th)": 5.26, + "MasakhaNEWSClassification (amh)": 34.36, + "MasakhaNEWSClassification (eng)": 76.82, + "MasakhaNEWSClassification (fra)": 75.0, + "MasakhaNEWSClassification (hau)": 61.32, + "MasakhaNEWSClassification (ibo)": 58.13, + "MasakhaNEWSClassification (lin)": 73.77, + "MasakhaNEWSClassification (lug)": 55.38, + "MasakhaNEWSClassification (orm)": 58.46, + "MasakhaNEWSClassification (pcm)": 91.64, + "MasakhaNEWSClassification (run)": 61.89, + "MasakhaNEWSClassification (sna)": 74.5, + "MasakhaNEWSClassification (som)": 50.88, + "MasakhaNEWSClassification (swa)": 54.96, + "MasakhaNEWSClassification (tir)": 28.57, + "MasakhaNEWSClassification (xho)": 60.2, + "MasakhaNEWSClassification (yor)": 67.69, + "MassiveIntentClassification (mn)": 18.38, + "MassiveIntentClassification (cy)": 37.34, + "MassiveIntentClassification (pl)": 40.23, + "MassiveIntentClassification (zh-TW)": 18.05, + "MassiveIntentClassification (ro)": 43.84, + "MassiveIntentClassification (sq)": 43.14, + "MassiveIntentClassification (hy)": 7.64, + "MassiveIntentClassification (zh-CN)": 18.45, + "MassiveIntentClassification (hi)": 11.95, + "MassiveIntentClassification (fr)": 47.51, + "MassiveIntentClassification (th)": 10.45, + "MassiveIntentClassification (fi)": 41.0, + "MassiveIntentClassification (af)": 40.46, + "MassiveIntentClassification (sw)": 39.67, + "MassiveIntentClassification (he)": 18.66, + "MassiveIntentClassification (az)": 34.85, + "MassiveIntentClassification (ka)": 9.79, + "MassiveIntentClassification (ru)": 21.3, "MassiveIntentClassification (sv)": 41.34, - "MassiveScenarioClassification (da)": 52.99, + "MassiveIntentClassification (am)": 2.64, + "MassiveIntentClassification (ar)": 19.69, + "MassiveIntentClassification (ml)": 2.72, + "MassiveIntentClassification (jv)": 38.65, + "MassiveIntentClassification (vi)": 36.42, + "MassiveIntentClassification (es)": 45.51, + "MassiveIntentClassification (ja)": 21.63, + "MassiveIntentClassification (en)": 66.65, + "MassiveIntentClassification (fa)": 24.17, + "MassiveIntentClassification (pt)": 48.65, + "MassiveIntentClassification (lv)": 41.61, + "MassiveIntentClassification (sl)": 40.01, + "MassiveIntentClassification (nb)": 41.57, + "MassiveIntentClassification (id)": 42.07, + "MassiveIntentClassification (bn)": 14.29, + "MassiveIntentClassification (nl)": 42.0, + "MassiveIntentClassification (it)": 45.7, + "MassiveIntentClassification (my)": 3.5, + "MassiveIntentClassification (da)": 44.25, + "MassiveIntentClassification (ta)": 9.46, + "MassiveIntentClassification (is)": 36.7, + "MassiveIntentClassification (tr)": 40.9, + "MassiveIntentClassification (km)": 4.41, + "MassiveIntentClassification (de)": 45.41, + "MassiveIntentClassification (hu)": 39.2, + "MassiveIntentClassification (kn)": 3.1, + "MassiveIntentClassification (tl)": 41.07, + "MassiveIntentClassification (ms)": 38.71, + "MassiveIntentClassification (te)": 2.29, + "MassiveIntentClassification (el)": 21.94, + "MassiveIntentClassification (ko)": 18.8, + "MassiveIntentClassification (ur)": 14.28, + "MassiveScenarioClassification (id)": 48.83, + "MassiveScenarioClassification (ka)": 16.56, + "MassiveScenarioClassification (ru)": 27.27, + "MassiveScenarioClassification (lv)": 46.2, + "MassiveScenarioClassification (jv)": 46.01, + "MassiveScenarioClassification (sw)": 47.12, + "MassiveScenarioClassification (ur)": 21.3, + "MassiveScenarioClassification (th)": 19.11, + "MassiveScenarioClassification (pt)": 56.73, + "MassiveScenarioClassification (sl)": 46.15, + "MassiveScenarioClassification (km)": 8.75, + "MassiveScenarioClassification (te)": 6.83, + "MassiveScenarioClassification (my)": 10.36, + "MassiveScenarioClassification (ta)": 15.15, + "MassiveScenarioClassification (fi)": 45.22, + "MassiveScenarioClassification (kn)": 7.87, "MassiveScenarioClassification (nb)": 50.33, + "MassiveScenarioClassification (am)": 7.21, + "MassiveScenarioClassification (hi)": 16.29, + "MassiveScenarioClassification (pl)": 47.8, + "MassiveScenarioClassification (it)": 54.55, + "MassiveScenarioClassification (ko)": 25.82, + "MassiveScenarioClassification (tr)": 48.74, + "MassiveScenarioClassification (ar)": 27.66, + "MassiveScenarioClassification (he)": 23.55, "MassiveScenarioClassification (sv)": 50.0, + "MassiveScenarioClassification (el)": 29.77, + "MassiveScenarioClassification (es)": 54.38, + "MassiveScenarioClassification (hu)": 46.8, + "MassiveScenarioClassification (af)": 51.01, + "MassiveScenarioClassification (zh-TW)": 25.6, + "MassiveScenarioClassification (ro)": 53.32, + "MassiveScenarioClassification (cy)": 44.14, + "MassiveScenarioClassification (hy)": 14.18, + "MassiveScenarioClassification (is)": 46.12, + "MassiveScenarioClassification (bn)": 18.33, + "MassiveScenarioClassification (da)": 52.99, + "MassiveScenarioClassification (fr)": 57.6, + "MassiveScenarioClassification (ms)": 48.26, + "MassiveScenarioClassification (sq)": 50.04, + "MassiveScenarioClassification (nl)": 51.15, + "MassiveScenarioClassification (vi)": 42.46, + "MassiveScenarioClassification (tl)": 50.06, + "MassiveScenarioClassification (en)": 71.52, + "MassiveScenarioClassification (ja)": 27.89, + "MassiveScenarioClassification (zh-CN)": 25.12, + "MassiveScenarioClassification (fa)": 30.11, + "MassiveScenarioClassification (ml)": 6.99, + "MassiveScenarioClassification (mn)": 25.07, + "MassiveScenarioClassification (de)": 56.41, + "MassiveScenarioClassification (az)": 42.58, "NoRecClassification": 42.0, "NordicLangClassification": 59.34, "NorwegianParliament": 57.42, + "PAC": 62.64, + "PolEmo2.0-IN": 41.04, + "PolEmo2.0-OUT": 22.04, + "RuReviewsClassification": 41.31, + "RuSciBenchGRNTIClassification": 10.71, + "RuSciBenchOECDClassification": 9.01, "ScalaDaClassification": 50.08, "ScalaNbClassification": 50.18 } ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "e5-base", + "AlloProfClusteringP2P": 59.49, + "AlloProfClusteringS2S": 36.98, + "BlurbsClusteringP2P": 27.05, + "BlurbsClusteringS2S": 11.1, + "GeoreviewClusteringP2P": 17.46, + "HALClusteringS2S": 22.25, + "MLSUMClusteringP2P (de)": 39.08, + "MLSUMClusteringP2P (fr)": 40.92, + "MLSUMClusteringP2P (ru)": 19.41, + "MLSUMClusteringP2P (es)": 42.61, + "MLSUMClusteringS2S (de)": 39.48, + "MLSUMClusteringS2S (fr)": 40.94, + "MLSUMClusteringS2S (ru)": 19.13, + "MLSUMClusteringS2S (es)": 42.12, + "MasakhaNEWSClusteringP2P (amh)": 41.08, + "MasakhaNEWSClusteringP2P (eng)": 41.62, + "MasakhaNEWSClusteringP2P (fra)": 53.43, + "MasakhaNEWSClusteringP2P (hau)": 43.06, + "MasakhaNEWSClusteringP2P (ibo)": 34.51, + "MasakhaNEWSClusteringP2P (lin)": 62.39, + "MasakhaNEWSClusteringP2P (lug)": 60.71, + "MasakhaNEWSClusteringP2P (orm)": 28.2, + "MasakhaNEWSClusteringP2P (pcm)": 73.86, + "MasakhaNEWSClusteringP2P (run)": 55.76, + "MasakhaNEWSClusteringP2P (sna)": 55.4, + "MasakhaNEWSClusteringP2P (som)": 38.72, + "MasakhaNEWSClusteringP2P (swa)": 25.09, + "MasakhaNEWSClusteringP2P (tir)": 42.6, + "MasakhaNEWSClusteringP2P (xho)": 29.35, + "MasakhaNEWSClusteringP2P (yor)": 41.38, + "MasakhaNEWSClusteringS2S (amh)": 44.43, + "MasakhaNEWSClusteringS2S (eng)": 48.11, + "MasakhaNEWSClusteringS2S (fra)": 35.92, + "MasakhaNEWSClusteringS2S (hau)": 17.05, + "MasakhaNEWSClusteringS2S (ibo)": 34.08, + "MasakhaNEWSClusteringS2S (lin)": 43.73, + "MasakhaNEWSClusteringS2S (lug)": 48.53, + "MasakhaNEWSClusteringS2S (orm)": 25.17, + "MasakhaNEWSClusteringS2S (pcm)": 67.45, + "MasakhaNEWSClusteringS2S (run)": 53.55, + "MasakhaNEWSClusteringS2S (sna)": 47.02, + "MasakhaNEWSClusteringS2S (som)": 30.17, + "MasakhaNEWSClusteringS2S (swa)": 18.21, + "MasakhaNEWSClusteringS2S (tir)": 42.48, + "MasakhaNEWSClusteringS2S (xho)": 24.68, + "MasakhaNEWSClusteringS2S (yor)": 30.24, + "RuSciBenchGRNTIClusteringP2P": 14.34, + "RuSciBenchOECDClusteringP2P": 12.36, + "TenKGnadClusteringP2P": 41.9, + "TenKGnadClusteringS2S": 19.9 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "e5-base", + "CDSC-E": 50.26, + "FalseFriendsGermanEnglish": 47.82, + "OpusparcusPC (de)": 92.43, + "OpusparcusPC (en)": 98.55, + "OpusparcusPC (fi)": 86.98, + "OpusparcusPC (fr)": 87.93, + "OpusparcusPC (ru)": 80.41, + "OpusparcusPC (sv)": 84.1, + "PSC": 94.63, + "PawsXPairClassification (de)": 51.53, + "PawsXPairClassification (en)": 60.53, + "PawsXPairClassification (es)": 54.26, + "PawsXPairClassification (fr)": 55.75, + "PawsXPairClassification (ja)": 48.41, + "PawsXPairClassification (ko)": 51.15, + "PawsXPairClassification (zh)": 53.9, + "SICK-E-PL": 47.25, + "TERRa": 46.42 + }, + { + "Model": "e5-base", + "CDSC-E": 50.26, + "FalseFriendsGermanEnglish": 47.9, + "OpusparcusPC (de)": 92.43, + "OpusparcusPC (en)": 98.55, + "OpusparcusPC (fi)": 86.99, + "OpusparcusPC (fr)": 87.93, + "OpusparcusPC (ru)": 80.45, + "OpusparcusPC (sv)": 84.12, + "PSC": 94.63, + "PawsXPairClassification (de)": 51.9, + "PawsXPairClassification (en)": 60.53, + "PawsXPairClassification (es)": 54.29, + "PawsXPairClassification (fr)": 55.85, + "PawsXPairClassification (ja)": 48.41, + "PawsXPairClassification (ko)": 51.33, + "PawsXPairClassification (zh)": 53.91, + "SICK-E-PL": 47.25, + "TERRa": 46.42 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "e5-base", + "AlloprofReranking": 62.92, + "RuBQReranking": 21.8, + "SyntecReranking": 69.01, + "T2Reranking": 57.43 + }, + { + "Model": "e5-base", + "MIRACLReranking (ar)": 4.57, + "MIRACLReranking (bn)": 9.14, + "MIRACLReranking (de)": 21.47, + "MIRACLReranking (en)": 54.98, + "MIRACLReranking (es)": 37.56, + "MIRACLReranking (fa)": 6.97, + "MIRACLReranking (fi)": 39.52, + "MIRACLReranking (fr)": 30.93, + "MIRACLReranking (hi)": 5.55, + "MIRACLReranking (id)": 25.9, + "MIRACLReranking (ja)": 8.26, + "MIRACLReranking (ko)": 11.11, + "MIRACLReranking (ru)": 6.67, + "MIRACLReranking (sw)": 33.98, + "MIRACLReranking (te)": 2.08, + "MIRACLReranking (th)": 3.21, + "MIRACLReranking (yo)": 51.95, + "MIRACLReranking (zh)": 7.82 + } + ] }, "Retrieval": { "ndcg_at_10": [ { "Model": "e5-base", + "AILACasedocs": 23.83, + "AILAStatutes": 17.72, + "ARCChallenge": 7.44, + "AlloprofRetrieval": 29.4, + "AlphaNLI": 13.03, + "AppsRetrieval": 9.31, + "BSARDRetrieval": 6.07, + "CmedqaRetrieval": 2.57, + "CodeFeedbackMT": 44.23, + "CodeFeedbackST": 69.49, + "CodeSearchNetCCRetrieval (python)": 61.67, + "CodeSearchNetCCRetrieval (javascript)": 58.13, + "CodeSearchNetCCRetrieval (go)": 39.41, + "CodeSearchNetCCRetrieval (ruby)": 56.05, + "CodeSearchNetCCRetrieval (java)": 53.01, + "CodeSearchNetCCRetrieval (php)": 40.64, + "CodeSearchNetRetrieval (python)": 82.35, + "CodeSearchNetRetrieval (javascript)": 64.36, + "CodeSearchNetRetrieval (go)": 76.52, + "CodeSearchNetRetrieval (ruby)": 73.49, + "CodeSearchNetRetrieval (java)": 69.23, + "CodeSearchNetRetrieval (php)": 70.34, + "CodeTransOceanContest": 59.1, + "CodeTransOceanDL": 28.57, + "CosQA": 31.45, + "CovidRetrieval": 1.64, + "GerDaLIR": 1.11, + "GerDaLIRSmall": 2.91, + "GermanQuAD-Retrieval": 76.71, + "HellaSwag": 23.88, "LEMBNarrativeQARetrieval": 25.31, "LEMBQMSumRetrieval": 23.83, "LEMBSummScreenFDRetrieval": 74.67, - "LEMBWikimQARetrieval": 55.85 + "LEMBWikimQARetrieval": 55.85, + "LeCaRDv2": 12.43, + "LegalBenchConsumerContractsQA": 72.62, + "LegalBenchCorporateLobbying": 89.47, + "LegalQuAD": 22.76, + "LegalSummarization": 54.14, + "MIRACLRetrieval (ar)": 0.03, + "MIRACLRetrieval (bn)": 0.14, + "MIRACLRetrieval (de)": 11.93, + "MIRACLRetrieval (en)": 46.32, + "MIRACLRetrieval (es)": 26.44, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 22.27, + "MIRACLRetrieval (fr)": 20.84, + "MIRACLRetrieval (hi)": 0.35, + "MIRACLRetrieval (id)": 16.56, + "MIRACLRetrieval (ja)": 0.68, + "MIRACLRetrieval (ko)": 2.76, + "MIRACLRetrieval (ru)": 0.34, + "MIRACLRetrieval (sw)": 24.95, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.41, + "MIRACLRetrieval (yo)": 49.82, + "MIRACLRetrieval (zh)": 0.1, + "MintakaRetrieval (ar)": 2.49, + "MintakaRetrieval (de)": 16.63, + "MintakaRetrieval (es)": 18.39, + "MintakaRetrieval (fr)": 19.89, + "MintakaRetrieval (hi)": 2.33, + "MintakaRetrieval (it)": 16.9, + "MintakaRetrieval (ja)": 4.95, + "MintakaRetrieval (pt)": 17.27, + "PIQA": 22.1, + "Quail": 4.04, + "RARbCode": 42.52, + "RARbMath": 66.63, + "RiaNewsRetrieval": 2.03, + "RuBQRetrieval": 1.74, + "SIQA": 2.79, + "SciFact-PL": 39.16, + "SpartQA": 3.17, + "StackOverflowQA": 81.66, + "SyntecRetrieval": 62.09, + "SyntheticText2SQL": 50.99, + "TRECCOVID-PL": 30.17, + "TempReasonL1": 1.53, + "TempReasonL2Fact": 32.0, + "TempReasonL2Pure": 1.64, + "TempReasonL3Fact": 27.08, + "TempReasonL3Pure": 7.2, + "WinoGrande": 51.98, + "XMarket (de)": 16.14, + "XMarket (en)": 32.8, + "XMarket (es)": 19.03, + "XPQARetrieval (ara-ara)": 7.85, + "XPQARetrieval (eng-ara)": 4.12, + "XPQARetrieval (ara-eng)": 8.5, + "XPQARetrieval (deu-deu)": 56.34, + "XPQARetrieval (eng-deu)": 12.75, + "XPQARetrieval (deu-eng)": 27.96, + "XPQARetrieval (spa-spa)": 40.63, + "XPQARetrieval (eng-spa)": 13.49, + "XPQARetrieval (spa-eng)": 22.02, + "XPQARetrieval (fra-fra)": 47.54, + "XPQARetrieval (eng-fra)": 16.18, + "XPQARetrieval (fra-eng)": 29.25, + "XPQARetrieval (hin-hin)": 18.96, + "XPQARetrieval (eng-hin)": 5.17, + "XPQARetrieval (hin-eng)": 7.48, + "XPQARetrieval (ita-ita)": 50.51, + "XPQARetrieval (eng-ita)": 11.44, + "XPQARetrieval (ita-eng)": 22.3, + "XPQARetrieval (jpn-jpn)": 29.51, + "XPQARetrieval (eng-jpn)": 4.82, + "XPQARetrieval (jpn-eng)": 16.17, + "XPQARetrieval (kor-kor)": 10.63, + "XPQARetrieval (eng-kor)": 8.31, + "XPQARetrieval (kor-eng)": 7.63, + "XPQARetrieval (pol-pol)": 30.5, + "XPQARetrieval (eng-pol)": 11.44, + "XPQARetrieval (pol-eng)": 17.32, + "XPQARetrieval (por-por)": 34.15, + "XPQARetrieval (eng-por)": 10.74, + "XPQARetrieval (por-eng)": 20.65, + "XPQARetrieval (tam-tam)": 10.09, + "XPQARetrieval (eng-tam)": 4.81, + "XPQARetrieval (tam-eng)": 3.6, + "XPQARetrieval (cmn-cmn)": 20.81, + "XPQARetrieval (eng-cmn)": 6.24, + "XPQARetrieval (cmn-eng)": 12.33 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-base", + "CDSC-R": 83.36, + "GermanSTSBenchmark": 65.51, + "RUParaPhraserSTS": 39.12, + "RuSTSBenchmarkSTS": 51.35, + "SICK-R-PL": 56.69, + "SICKFr": 69.46, + "STS22 (de)": 35.15, + "STS22 (de-en)": 49.21, + "STS22 (zh-en)": 32.75, + "STS22 (en)": 63.74, + "STS22 (es)": 58.71, + "STS22 (es-en)": 67.35, + "STS22 (pl-en)": 60.4, + "STS22 (zh)": 43.29, + "STS22 (de-pl)": 45.33, + "STS22 (pl)": 30.13, + "STS22 (de-fr)": 55.99, + "STS22 (it)": 70.51, + "STS22 (ru)": 8.56, + "STS22 (es-it)": 55.56, + "STS22 (fr-pl)": 50.71, + "STS22 (fr)": 77.27, + "STS22 (tr)": 51.02, + "STS22 (ar)": 30.46, + "STSB": 33.72, + "STSBenchmarkMultilingualSTS (zh)": 35.42, + "STSBenchmarkMultilingualSTS (pt)": 70.26, + "STSBenchmarkMultilingualSTS (nl)": 66.67, + "STSBenchmarkMultilingualSTS (pl)": 62.15, + "STSBenchmarkMultilingualSTS (es)": 72.18, + "STSBenchmarkMultilingualSTS (en)": 86.35, + "STSBenchmarkMultilingualSTS (it)": 67.99, + "STSBenchmarkMultilingualSTS (fr)": 71.44, + "STSBenchmarkMultilingualSTS (de)": 66.2, + "STSBenchmarkMultilingualSTS (ru)": 51.32 + }, + { + "Model": "e5-base", + "CDSC-R": 83.36, + "GermanSTSBenchmark": 65.51, + "RUParaPhraserSTS": 39.12, + "RuSTSBenchmarkSTS": 51.35, + "SICK-R-PL": 56.69, + "SICKFr": 69.46, + "STS22 (de)": 35.13, + "STS22 (de-en)": 49.21, + "STS22 (zh-en)": 32.75, + "STS22 (en)": 63.74, + "STS22 (es)": 58.71, + "STS22 (es-en)": 67.35, + "STS22 (pl-en)": 60.4, + "STS22 (zh)": 43.29, + "STS22 (de-pl)": 45.33, + "STS22 (pl)": 30.31, + "STS22 (de-fr)": 55.99, + "STS22 (it)": 70.51, + "STS22 (ru)": 8.56, + "STS22 (es-it)": 55.56, + "STS22 (fr-pl)": 50.71, + "STS22 (fr)": 77.27, + "STS22 (tr)": 51.02, + "STS22 (ar)": 30.48, + "STSB": 33.71, + "STSBenchmarkMultilingualSTS (zh)": 35.4, + "STSBenchmarkMultilingualSTS (pt)": 70.26, + "STSBenchmarkMultilingualSTS (nl)": 66.67, + "STSBenchmarkMultilingualSTS (pl)": 62.16, + "STSBenchmarkMultilingualSTS (es)": 72.18, + "STSBenchmarkMultilingualSTS (en)": 86.35, + "STSBenchmarkMultilingualSTS (it)": 67.99, + "STSBenchmarkMultilingualSTS (fr)": 71.44, + "STSBenchmarkMultilingualSTS (de)": 66.2, + "STSBenchmarkMultilingualSTS (ru)": 51.32 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-base", + "SummEvalFr": 30.06 + }, + { + "Model": "e5-base", + "SummEvalFr": 30.06 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "e5-base", + "CEDRClassification": 33.32, + "SensitiveTopicsClassification": 17.5 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "e5-base", + "Core17InstructionRetrieval": -2.41, + "News21InstructionRetrieval": -2.17, + "Robust04InstructionRetrieval": -5.89 + } + ] } }, "intfloat__e5-base-v2": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "e5-base-v2", + "BornholmBitextMining": 38.49, + "Tatoeba (kor-eng)": 1.23, + "Tatoeba (kab-eng)": 1.42, + "Tatoeba (pes-eng)": 0.52, + "Tatoeba (afr-eng)": 10.45, + "Tatoeba (fao-eng)": 11.7, + "Tatoeba (cor-eng)": 3.76, + "Tatoeba (oci-eng)": 15.52, + "Tatoeba (khm-eng)": 0.47, + "Tatoeba (max-eng)": 13.15, + "Tatoeba (bul-eng)": 1.84, + "Tatoeba (arz-eng)": 0.53, + "Tatoeba (fra-eng)": 34.76, + "Tatoeba (nov-eng)": 37.07, + "Tatoeba (kaz-eng)": 1.15, + "Tatoeba (tha-eng)": 1.03, + "Tatoeba (yue-eng)": 2.48, + "Tatoeba (mon-eng)": 2.62, + "Tatoeba (lvs-eng)": 5.15, + "Tatoeba (slv-eng)": 8.13, + "Tatoeba (mar-eng)": 0.28, + "Tatoeba (ind-eng)": 8.38, + "Tatoeba (por-eng)": 30.56, + "Tatoeba (ron-eng)": 16.3, + "Tatoeba (fry-eng)": 21.29, + "Tatoeba (csb-eng)": 7.91, + "Tatoeba (glg-eng)": 26.68, + "Tatoeba (spa-eng)": 31.62, + "Tatoeba (hin-eng)": 0.0, + "Tatoeba (dsb-eng)": 7.33, + "Tatoeba (lat-eng)": 11.03, + "Tatoeba (gsw-eng)": 17.85, + "Tatoeba (amh-eng)": 0.68, + "Tatoeba (gle-eng)": 3.36, + "Tatoeba (pms-eng)": 16.13, + "Tatoeba (cmn-eng)": 3.15, + "Tatoeba (mal-eng)": 0.15, + "Tatoeba (ara-eng)": 0.51, + "Tatoeba (kur-eng)": 8.55, + "Tatoeba (tam-eng)": 0.01, + "Tatoeba (nob-eng)": 15.99, + "Tatoeba (ces-eng)": 6.83, + "Tatoeba (isl-eng)": 6.57, + "Tatoeba (ita-eng)": 23.44, + "Tatoeba (nno-eng)": 12.53, + "Tatoeba (lfn-eng)": 21.16, + "Tatoeba (tzl-eng)": 16.48, + "Tatoeba (ido-eng)": 21.92, + "Tatoeba (ast-eng)": 25.63, + "Tatoeba (eus-eng)": 8.28, + "Tatoeba (cbk-eng)": 20.37, + "Tatoeba (wuu-eng)": 1.92, + "Tatoeba (ell-eng)": 0.66, + "Tatoeba (xho-eng)": 3.03, + "Tatoeba (kzj-eng)": 5.49, + "Tatoeba (hrv-eng)": 9.74, + "Tatoeba (dtp-eng)": 3.36, + "Tatoeba (hye-eng)": 0.47, + "Tatoeba (ben-eng)": 0.13, + "Tatoeba (ceb-eng)": 6.11, + "Tatoeba (bos-eng)": 12.34, + "Tatoeba (sqi-eng)": 7.49, + "Tatoeba (tgl-eng)": 6.46, + "Tatoeba (ang-eng)": 22.42, + "Tatoeba (urd-eng)": 0.27, + "Tatoeba (pam-eng)": 6.15, + "Tatoeba (ile-eng)": 29.23, + "Tatoeba (arq-eng)": 0.89, + "Tatoeba (bel-eng)": 2.67, + "Tatoeba (swg-eng)": 12.1, + "Tatoeba (kat-eng)": 0.26, + "Tatoeba (swe-eng)": 13.87, + "Tatoeba (hsb-eng)": 6.97, + "Tatoeba (mhr-eng)": 0.46, + "Tatoeba (rus-eng)": 2.58, + "Tatoeba (aze-eng)": 4.82, + "Tatoeba (pol-eng)": 7.65, + "Tatoeba (tuk-eng)": 5.0, + "Tatoeba (lit-eng)": 3.48, + "Tatoeba (yid-eng)": 0.49, + "Tatoeba (zsm-eng)": 9.91, + "Tatoeba (jav-eng)": 5.96, + "Tatoeba (uzb-eng)": 3.94, + "Tatoeba (cym-eng)": 6.76, + "Tatoeba (bre-eng)": 5.12, + "Tatoeba (ber-eng)": 5.43, + "Tatoeba (fin-eng)": 5.15, + "Tatoeba (uig-eng)": 0.5, + "Tatoeba (nds-eng)": 18.57, + "Tatoeba (dan-eng)": 16.58, + "Tatoeba (slk-eng)": 7.19, + "Tatoeba (heb-eng)": 0.91, + "Tatoeba (jpn-eng)": 1.72, + "Tatoeba (mkd-eng)": 0.66, + "Tatoeba (orv-eng)": 0.21, + "Tatoeba (swh-eng)": 8.51, + "Tatoeba (tur-eng)": 5.49, + "Tatoeba (epo-eng)": 16.56, + "Tatoeba (hun-eng)": 6.96, + "Tatoeba (gla-eng)": 2.99, + "Tatoeba (srp-eng)": 4.86, + "Tatoeba (awa-eng)": 0.03, + "Tatoeba (tel-eng)": 0.36, + "Tatoeba (cha-eng)": 18.54, + "Tatoeba (war-eng)": 7.47, + "Tatoeba (est-eng)": 4.68, + "Tatoeba (cat-eng)": 23.42, + "Tatoeba (ukr-eng)": 1.4, + "Tatoeba (tat-eng)": 1.03, + "Tatoeba (deu-eng)": 33.61, + "Tatoeba (nld-eng)": 21.17, + "Tatoeba (ina-eng)": 41.66, + "Tatoeba (vie-eng)": 6.37 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "e5-base-v2", + "AllegroReviews": 26.17, + "AmazonCounterfactualClassification (en-ext)": 76.15, + "AmazonCounterfactualClassification (en)": 75.51, + "AmazonCounterfactualClassification (de)": 56.61, + "AmazonCounterfactualClassification (ja)": 58.58, + "AmazonReviewsClassification (en)": 47.82, + "AmazonReviewsClassification (de)": 30.12, + "AmazonReviewsClassification (es)": 36.78, + "AmazonReviewsClassification (fr)": 32.32, + "AmazonReviewsClassification (ja)": 22.65, + "AmazonReviewsClassification (zh)": 23.28, + "AngryTweetsClassification": 46.32, + "CBD": 52.74, + "DanishPoliticalCommentsClassification": 28.84, + "GeoreviewClassification": 30.9, + "HeadlineClassification": 41.74, + "InappropriatenessClassification": 53.27, + "KinopoiskClassification": 34.67, + "LccSentimentClassification": 38.07, + "MTOPDomainClassification (en)": 92.23, + "MTOPDomainClassification (de)": 77.51, + "MTOPDomainClassification (es)": 78.32, + "MTOPDomainClassification (fr)": 80.17, + "MTOPDomainClassification (hi)": 41.05, + "MTOPDomainClassification (th)": 16.63, + "MTOPIntentClassification (en)": 62.41, + "MTOPIntentClassification (de)": 47.19, + "MTOPIntentClassification (es)": 44.66, + "MTOPIntentClassification (fr)": 40.64, + "MTOPIntentClassification (hi)": 17.59, + "MTOPIntentClassification (th)": 4.73, + "MasakhaNEWSClassification (amh)": 33.91, + "MasakhaNEWSClassification (eng)": 77.48, + "MasakhaNEWSClassification (fra)": 75.36, + "MasakhaNEWSClassification (hau)": 62.34, + "MasakhaNEWSClassification (ibo)": 60.51, + "MasakhaNEWSClassification (lin)": 74.57, + "MasakhaNEWSClassification (lug)": 57.53, + "MasakhaNEWSClassification (orm)": 61.17, + "MasakhaNEWSClassification (pcm)": 91.9, + "MasakhaNEWSClassification (run)": 64.84, + "MasakhaNEWSClassification (sna)": 75.56, + "MasakhaNEWSClassification (som)": 52.24, + "MasakhaNEWSClassification (swa)": 57.71, + "MasakhaNEWSClassification (tir)": 24.15, + "MasakhaNEWSClassification (xho)": 63.84, + "MasakhaNEWSClassification (yor)": 70.58, + "MassiveIntentClassification (ta)": 11.31, + "MassiveIntentClassification (ml)": 2.79, + "MassiveIntentClassification (fi)": 41.47, + "MassiveIntentClassification (zh-TW)": 20.56, + "MassiveIntentClassification (hy)": 11.83, + "MassiveIntentClassification (es)": 46.34, + "MassiveIntentClassification (ms)": 40.24, + "MassiveIntentClassification (nl)": 42.8, + "MassiveIntentClassification (my)": 3.73, + "MassiveIntentClassification (he)": 20.9, + "MassiveIntentClassification (mn)": 26.6, + "MassiveIntentClassification (lv)": 41.59, + "MassiveIntentClassification (hu)": 39.35, + "MassiveIntentClassification (en)": 67.43, + "MassiveIntentClassification (ar)": 22.84, + "MassiveIntentClassification (hi)": 19.05, + "MassiveIntentClassification (sw)": 37.56, + "MassiveIntentClassification (it)": 45.97, + "MassiveIntentClassification (te)": 2.32, + "MassiveIntentClassification (jv)": 37.57, + "MassiveIntentClassification (pt)": 48.51, + "MassiveIntentClassification (sv)": 41.41, + "MassiveIntentClassification (bn)": 19.72, + "MassiveIntentClassification (fa)": 29.84, + "MassiveIntentClassification (sq)": 42.6, + "MassiveIntentClassification (ko)": 19.32, + "MassiveIntentClassification (az)": 37.86, + "MassiveIntentClassification (is)": 35.64, + "MassiveIntentClassification (el)": 33.89, + "MassiveIntentClassification (de)": 46.68, + "MassiveIntentClassification (km)": 4.65, + "MassiveIntentClassification (af)": 39.96, + "MassiveIntentClassification (cy)": 36.9, + "MassiveIntentClassification (nb)": 42.31, + "MassiveIntentClassification (ur)": 21.12, + "MassiveIntentClassification (ro)": 44.32, + "MassiveIntentClassification (da)": 44.0, + "MassiveIntentClassification (kn)": 3.43, + "MassiveIntentClassification (ka)": 11.85, + "MassiveIntentClassification (pl)": 40.34, + "MassiveIntentClassification (fr)": 45.88, + "MassiveIntentClassification (ja)": 33.31, + "MassiveIntentClassification (zh-CN)": 22.87, + "MassiveIntentClassification (ru)": 36.82, + "MassiveIntentClassification (id)": 41.48, + "MassiveIntentClassification (vi)": 34.44, + "MassiveIntentClassification (sl)": 40.43, + "MassiveIntentClassification (th)": 11.62, + "MassiveIntentClassification (tr)": 42.46, + "MassiveIntentClassification (tl)": 41.48, + "MassiveIntentClassification (am)": 2.57, + "MassiveScenarioClassification (jv)": 45.5, + "MassiveScenarioClassification (fa)": 33.21, + "MassiveScenarioClassification (en)": 72.73, + "MassiveScenarioClassification (te)": 7.51, + "MassiveScenarioClassification (mn)": 31.27, + "MassiveScenarioClassification (hy)": 17.6, + "MassiveScenarioClassification (sv)": 51.62, + "MassiveScenarioClassification (el)": 42.78, + "MassiveScenarioClassification (fr)": 56.43, + "MassiveScenarioClassification (ta)": 18.76, + "MassiveScenarioClassification (ro)": 53.73, + "MassiveScenarioClassification (ko)": 24.11, + "MassiveScenarioClassification (it)": 55.31, + "MassiveScenarioClassification (bn)": 25.07, + "MassiveScenarioClassification (ka)": 17.99, + "MassiveScenarioClassification (pt)": 56.15, + "MassiveScenarioClassification (sl)": 46.26, + "MassiveScenarioClassification (km)": 9.97, + "MassiveScenarioClassification (sw)": 45.06, + "MassiveScenarioClassification (hu)": 48.21, + "MassiveScenarioClassification (pl)": 49.18, + "MassiveScenarioClassification (de)": 57.53, + "MassiveScenarioClassification (kn)": 8.25, + "MassiveScenarioClassification (lv)": 45.8, + "MassiveScenarioClassification (ml)": 7.41, + "MassiveScenarioClassification (ar)": 29.99, + "MassiveScenarioClassification (zh-TW)": 29.47, + "MassiveScenarioClassification (ru)": 42.08, + "MassiveScenarioClassification (hi)": 24.44, + "MassiveScenarioClassification (fi)": 47.17, + "MassiveScenarioClassification (nl)": 51.53, + "MassiveScenarioClassification (zh-CN)": 32.08, + "MassiveScenarioClassification (tr)": 48.92, + "MassiveScenarioClassification (vi)": 40.52, + "MassiveScenarioClassification (ur)": 29.73, + "MassiveScenarioClassification (he)": 24.27, + "MassiveScenarioClassification (cy)": 42.85, + "MassiveScenarioClassification (am)": 7.48, + "MassiveScenarioClassification (ms)": 50.7, + "MassiveScenarioClassification (az)": 46.28, + "MassiveScenarioClassification (id)": 49.71, + "MassiveScenarioClassification (is)": 45.13, + "MassiveScenarioClassification (da)": 52.67, + "MassiveScenarioClassification (th)": 20.51, + "MassiveScenarioClassification (nb)": 50.89, + "MassiveScenarioClassification (tl)": 49.67, + "MassiveScenarioClassification (es)": 55.92, + "MassiveScenarioClassification (sq)": 50.57, + "MassiveScenarioClassification (ja)": 41.65, + "MassiveScenarioClassification (my)": 10.56, + "MassiveScenarioClassification (af)": 49.81, + "NoRecClassification": 41.2, + "NordicLangClassification": 60.79, + "PAC": 68.06, + "PolEmo2.0-IN": 42.48, + "PolEmo2.0-OUT": 21.05, + "RuReviewsClassification": 47.13, + "RuSciBenchGRNTIClassification": 20.87, + "RuSciBenchOECDClassification": 16.49, + "ToxicConversationsClassification": 65.87 + } + ] }, "Clustering": { "v_measure": [ { "Model": "e5-base-v2", + "AlloProfClusteringP2P": 58.55, + "AlloProfClusteringS2S": 36.35, "BiorxivClusteringP2P": 37.12, "BiorxivClusteringS2S": 33.41, + "BlurbsClusteringP2P": 29.09, + "BlurbsClusteringS2S": 12.41, + "GeoreviewClusteringP2P": 23.27, + "HALClusteringS2S": 22.33, + "MLSUMClusteringP2P (de)": 39.19, + "MLSUMClusteringP2P (fr)": 42.38, + "MLSUMClusteringP2P (ru)": 24.28, + "MLSUMClusteringP2P (es)": 42.72, + "MLSUMClusteringS2S (de)": 38.13, + "MLSUMClusteringS2S (fr)": 42.2, + "MLSUMClusteringS2S (ru)": 21.66, + "MLSUMClusteringS2S (es)": 42.13, + "MasakhaNEWSClusteringP2P (amh)": 40.74, + "MasakhaNEWSClusteringP2P (eng)": 53.59, + "MasakhaNEWSClusteringP2P (fra)": 50.56, + "MasakhaNEWSClusteringP2P (hau)": 44.87, + "MasakhaNEWSClusteringP2P (ibo)": 38.96, + "MasakhaNEWSClusteringP2P (lin)": 62.06, + "MasakhaNEWSClusteringP2P (lug)": 53.37, + "MasakhaNEWSClusteringP2P (orm)": 30.93, + "MasakhaNEWSClusteringP2P (pcm)": 70.3, + "MasakhaNEWSClusteringP2P (run)": 51.59, + "MasakhaNEWSClusteringP2P (sna)": 50.4, + "MasakhaNEWSClusteringP2P (som)": 32.39, + "MasakhaNEWSClusteringP2P (swa)": 24.14, + "MasakhaNEWSClusteringP2P (tir)": 43.19, + "MasakhaNEWSClusteringP2P (xho)": 31.76, + "MasakhaNEWSClusteringP2P (yor)": 42.67, + "MasakhaNEWSClusteringS2S (amh)": 41.94, + "MasakhaNEWSClusteringS2S (eng)": 53.89, + "MasakhaNEWSClusteringS2S (fra)": 47.26, + "MasakhaNEWSClusteringS2S (hau)": 24.73, + "MasakhaNEWSClusteringS2S (ibo)": 37.06, + "MasakhaNEWSClusteringS2S (lin)": 64.92, + "MasakhaNEWSClusteringS2S (lug)": 46.85, + "MasakhaNEWSClusteringS2S (orm)": 29.17, + "MasakhaNEWSClusteringS2S (pcm)": 63.68, + "MasakhaNEWSClusteringS2S (run)": 54.79, + "MasakhaNEWSClusteringS2S (sna)": 45.56, + "MasakhaNEWSClusteringS2S (som)": 28.23, + "MasakhaNEWSClusteringS2S (swa)": 9.13, + "MasakhaNEWSClusteringS2S (tir)": 48.64, + "MasakhaNEWSClusteringS2S (xho)": 27.14, + "MasakhaNEWSClusteringS2S (yor)": 34.75, "MedrxivClusteringP2P": 31.82, "MedrxivClusteringS2S": 29.68, "RedditClustering": 56.54, "RedditClusteringP2P": 63.23, + "RuSciBenchGRNTIClusteringP2P": 18.28, + "RuSciBenchOECDClusteringP2P": 16.16, "StackExchangeClustering": 64.6, "StackExchangeClusteringP2P": 33.02, + "TenKGnadClusteringP2P": 42.2, + "TenKGnadClusteringS2S": 24.86, "TwentyNewsgroupsClustering": 49.86 } ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "e5-base-v2", + "CDSC-E": 56.82, + "FalseFriendsGermanEnglish": 47.34, + "OpusparcusPC (de)": 91.87, + "OpusparcusPC (en)": 98.58, + "OpusparcusPC (fi)": 86.94, + "OpusparcusPC (fr)": 88.17, + "OpusparcusPC (ru)": 79.74, + "OpusparcusPC (sv)": 84.7, + "PSC": 96.95, + "PawsXPairClassification (de)": 50.76, + "PawsXPairClassification (en)": 57.62, + "PawsXPairClassification (es)": 52.74, + "PawsXPairClassification (fr)": 53.73, + "PawsXPairClassification (ja)": 48.4, + "PawsXPairClassification (ko)": 50.0, + "PawsXPairClassification (zh)": 52.5, + "SICK-E-PL": 50.72, + "SprintDuplicateQuestions": 94.26, + "TERRa": 48.81, + "TwitterURLCorpus": 86.66 + }, + { + "Model": "e5-base-v2", + "CDSC-E": 56.82, + "FalseFriendsGermanEnglish": 47.36, + "OpusparcusPC (de)": 91.87, + "OpusparcusPC (en)": 98.59, + "OpusparcusPC (fi)": 86.94, + "OpusparcusPC (fr)": 88.17, + "OpusparcusPC (ru)": 79.74, + "OpusparcusPC (sv)": 84.72, + "PSC": 96.95, + "PawsXPairClassification (de)": 50.99, + "PawsXPairClassification (en)": 57.62, + "PawsXPairClassification (es)": 52.76, + "PawsXPairClassification (fr)": 53.76, + "PawsXPairClassification (ja)": 48.61, + "PawsXPairClassification (ko)": 50.03, + "PawsXPairClassification (zh)": 52.63, + "SICK-E-PL": 50.72, + "SprintDuplicateQuestions": 94.26, + "TERRa": 49.06, + "TwitterURLCorpus": 86.66 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "e5-base-v2", + "AlloprofReranking": 65.41, + "RuBQReranking": 45.35, + "SyntecReranking": 75.91, + "T2Reranking": 60.5 + }, + { + "Model": "e5-base-v2", + "MIRACLReranking (ar)": 8.62, + "MIRACLReranking (bn)": 8.23, + "MIRACLReranking (de)": 25.63, + "MIRACLReranking (en)": 57.65, + "MIRACLReranking (es)": 38.47, + "MIRACLReranking (fa)": 9.28, + "MIRACLReranking (fi)": 38.49, + "MIRACLReranking (fr)": 31.33, + "MIRACLReranking (hi)": 11.91, + "MIRACLReranking (id)": 24.01, + "MIRACLReranking (ja)": 14.12, + "MIRACLReranking (ko)": 9.68, + "MIRACLReranking (ru)": 16.0, + "MIRACLReranking (sw)": 33.36, + "MIRACLReranking (te)": 2.83, + "MIRACLReranking (th)": 3.61, + "MIRACLReranking (yo)": 54.71, + "MIRACLReranking (zh)": 12.9 + } + ] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "e5-base-v2", + "AILACasedocs": 27.17, + "AILAStatutes": 19.61, + "ARCChallenge": 10.01, + "AlloprofRetrieval": 31.86, + "AlphaNLI": 21.73, + "AppsRetrieval": 11.52, + "ArguAna": 44.57, + "BSARDRetrieval": 11.7, + "CmedqaRetrieval": 3.49, + "CodeFeedbackMT": 41.56, + "CodeFeedbackST": 74.52, + "CodeSearchNetCCRetrieval (python)": 64.84, + "CodeSearchNetCCRetrieval (javascript)": 63.02, + "CodeSearchNetCCRetrieval (go)": 42.29, + "CodeSearchNetCCRetrieval (ruby)": 61.06, + "CodeSearchNetCCRetrieval (java)": 61.35, + "CodeSearchNetCCRetrieval (php)": 48.67, + "CodeSearchNetRetrieval (python)": 88.66, + "CodeSearchNetRetrieval (javascript)": 72.32, + "CodeSearchNetRetrieval (go)": 93.92, + "CodeSearchNetRetrieval (ruby)": 79.43, + "CodeSearchNetRetrieval (java)": 76.68, + "CodeSearchNetRetrieval (php)": 82.86, + "CodeTransOceanContest": 62.5, + "CodeTransOceanDL": 21.87, + "CosQA": 32.59, + "CovidRetrieval": 14.88, + "GerDaLIR": 1.66, + "GerDaLIRSmall": 4.09, + "GermanQuAD-Retrieval": 82.98, + "HellaSwag": 25.48, + "LEMBNarrativeQARetrieval": 25.32, + "LEMBQMSumRetrieval": 23.86, + "LEMBSummScreenFDRetrieval": 74.66, + "LEMBWikimQARetrieval": 55.97, + "LeCaRDv2": 20.53, + "LegalBenchConsumerContractsQA": 71.92, + "LegalBenchCorporateLobbying": 91.92, + "LegalQuAD": 22.32, + "LegalSummarization": 58.72, + "MIRACLRetrieval (ar)": 0.34, + "MIRACLRetrieval (bn)": 0.2, + "MIRACLRetrieval (de)": 14.8, + "MIRACLRetrieval (en)": 49.53, + "MIRACLRetrieval (es)": 25.84, + "MIRACLRetrieval (fa)": 0.26, + "MIRACLRetrieval (fi)": 23.1, + "MIRACLRetrieval (fr)": 19.91, + "MIRACLRetrieval (hi)": 1.02, + "MIRACLRetrieval (id)": 14.33, + "MIRACLRetrieval (ja)": 2.7, + "MIRACLRetrieval (ko)": 2.98, + "MIRACLRetrieval (ru)": 3.39, + "MIRACLRetrieval (sw)": 24.52, + "MIRACLRetrieval (te)": 0.08, + "MIRACLRetrieval (th)": 0.28, + "MIRACLRetrieval (yo)": 48.59, + "MIRACLRetrieval (zh)": 0.61, + "MintakaRetrieval (ar)": 6.1, + "MintakaRetrieval (de)": 23.04, + "MintakaRetrieval (es)": 21.95, + "MintakaRetrieval (fr)": 23.51, + "MintakaRetrieval (hi)": 7.02, + "MintakaRetrieval (it)": 20.37, + "MintakaRetrieval (ja)": 10.84, + "MintakaRetrieval (pt)": 23.03, + "PIQA": 27.71, + "Quail": 4.94, + "RARbCode": 54.47, + "RARbMath": 67.76, + "RiaNewsRetrieval": 14.82, + "RuBQRetrieval": 16.23, + "SCIDOCS": 18.68, + "SIQA": 4.37, + "SciFact-PL": 42.16, + "SpartQA": 7.84, + "StackOverflowQA": 87.85, + "SyntecRetrieval": 67.34, + "SyntheticText2SQL": 51.88, + "TRECCOVID": 69.63, + "TRECCOVID-PL": 20.13, + "TempReasonL1": 1.74, + "TempReasonL2Fact": 37.62, + "TempReasonL2Pure": 2.92, + "TempReasonL3Fact": 32.63, + "TempReasonL3Pure": 10.25, + "WinoGrande": 46.99, + "XMarket (de)": 14.27, + "XMarket (en)": 29.85, + "XMarket (es)": 17.87, + "XPQARetrieval (ara-ara)": 13.74, + "XPQARetrieval (eng-ara)": 4.02, + "XPQARetrieval (ara-eng)": 9.62, + "XPQARetrieval (deu-deu)": 56.5, + "XPQARetrieval (eng-deu)": 15.21, + "XPQARetrieval (deu-eng)": 29.85, + "XPQARetrieval (spa-spa)": 46.27, + "XPQARetrieval (eng-spa)": 13.06, + "XPQARetrieval (spa-eng)": 24.73, + "XPQARetrieval (fra-fra)": 53.68, + "XPQARetrieval (eng-fra)": 18.07, + "XPQARetrieval (fra-eng)": 30.23, + "XPQARetrieval (hin-hin)": 34.7, + "XPQARetrieval (eng-hin)": 8.68, + "XPQARetrieval (hin-eng)": 8.87, + "XPQARetrieval (ita-ita)": 57.76, + "XPQARetrieval (eng-ita)": 12.66, + "XPQARetrieval (ita-eng)": 26.19, + "XPQARetrieval (jpn-jpn)": 44.02, + "XPQARetrieval (eng-jpn)": 6.34, + "XPQARetrieval (jpn-eng)": 18.04, + "XPQARetrieval (kor-kor)": 16.11, + "XPQARetrieval (eng-kor)": 8.23, + "XPQARetrieval (kor-eng)": 7.56, + "XPQARetrieval (pol-pol)": 34.92, + "XPQARetrieval (eng-pol)": 13.06, + "XPQARetrieval (pol-eng)": 18.95, + "XPQARetrieval (por-por)": 37.84, + "XPQARetrieval (eng-por)": 10.58, + "XPQARetrieval (por-eng)": 23.87, + "XPQARetrieval (tam-tam)": 12.57, + "XPQARetrieval (eng-tam)": 4.59, + "XPQARetrieval (tam-eng)": 4.58, + "XPQARetrieval (cmn-cmn)": 25.63, + "XPQARetrieval (eng-cmn)": 7.63, + "XPQARetrieval (cmn-eng)": 11.54 + } + ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-base-v2", + "CDSC-R": 85.88, + "GermanSTSBenchmark": 68.61, + "RUParaPhraserSTS": 54.96, + "RuSTSBenchmarkSTS": 64.35, + "SICK-R": 78.4, + "SICK-R-PL": 58.0, + "SICKFr": 68.97, + "STS12": 73.49, + "STS13": 83.0, + "STS14": 80.45, + "STS15": 88.18, + "STS17 (en-en)": 88.89, + "STS17 (nl-en)": 42.58, + "STS17 (en-tr)": -7.22, + "STS17 (es-en)": 44.73, + "STS17 (en-de)": 41.4, + "STS17 (fr-en)": 51.76, + "STS17 (es-es)": 80.35, + "STS17 (it-en)": 27.04, + "STS17 (ar-ar)": 54.21, + "STS17 (en-ar)": -6.22, + "STS17 (ko-ko)": 44.35, + "STS22 (fr-pl)": 50.71, + "STS22 (en)": 66.51, + "STS22 (zh)": 49.26, + "STS22 (ar)": 28.74, + "STS22 (fr)": 76.93, + "STS22 (de-fr)": 58.23, + "STS22 (pl)": 31.46, + "STS22 (es-en)": 65.16, + "STS22 (tr)": 52.41, + "STS22 (de-en)": 50.28, + "STS22 (ru)": 21.62, + "STS22 (de-pl)": 26.82, + "STS22 (it)": 68.41, + "STS22 (zh-en)": 37.18, + "STS22 (pl-en)": 61.96, + "STS22 (es-it)": 64.1, + "STS22 (de)": 29.17, + "STS22 (es)": 61.48, + "STSB": 34.59, + "STSBenchmark": 85.48, + "STSBenchmarkMultilingualSTS (en)": 85.48, + "STSBenchmarkMultilingualSTS (fr)": 71.25, + "STSBenchmarkMultilingualSTS (zh)": 35.53, + "STSBenchmarkMultilingualSTS (pl)": 63.06, + "STSBenchmarkMultilingualSTS (es)": 72.75, + "STSBenchmarkMultilingualSTS (nl)": 67.07, + "STSBenchmarkMultilingualSTS (pt)": 69.5, + "STSBenchmarkMultilingualSTS (ru)": 64.43, + "STSBenchmarkMultilingualSTS (it)": 69.5, + "STSBenchmarkMultilingualSTS (de)": 68.9 + }, + { + "Model": "e5-base-v2", + "CDSC-R": 85.88, + "GermanSTSBenchmark": 68.61, + "RUParaPhraserSTS": 54.96, + "RuSTSBenchmarkSTS": 64.35, + "SICK-R": 78.4, + "SICK-R-PL": 58.0, + "SICKFr": 68.97, + "STS12": 73.49, + "STS13": 83.0, + "STS14": 80.45, + "STS15": 88.18, + "STS17 (en-en)": 88.89, + "STS17 (nl-en)": 42.58, + "STS17 (en-tr)": -7.22, + "STS17 (es-en)": 44.73, + "STS17 (en-de)": 41.4, + "STS17 (fr-en)": 51.76, + "STS17 (es-es)": 80.35, + "STS17 (it-en)": 27.04, + "STS17 (ar-ar)": 54.21, + "STS17 (en-ar)": -6.22, + "STS17 (ko-ko)": 44.35, + "STS22 (fr-pl)": 50.71, + "STS22 (en)": 66.51, + "STS22 (zh)": 49.26, + "STS22 (ar)": 28.72, + "STS22 (fr)": 76.93, + "STS22 (de-fr)": 58.23, + "STS22 (pl)": 31.37, + "STS22 (es-en)": 65.16, + "STS22 (tr)": 52.41, + "STS22 (de-en)": 50.28, + "STS22 (ru)": 21.62, + "STS22 (de-pl)": 26.82, + "STS22 (it)": 68.41, + "STS22 (zh-en)": 37.18, + "STS22 (pl-en)": 61.96, + "STS22 (es-it)": 64.1, + "STS22 (de)": 29.16, + "STS22 (es)": 61.48, + "STSB": 34.59, + "STSBenchmark": 85.48, + "STSBenchmarkMultilingualSTS (en)": 85.48, + "STSBenchmarkMultilingualSTS (fr)": 71.25, + "STSBenchmarkMultilingualSTS (zh)": 35.52, + "STSBenchmarkMultilingualSTS (pl)": 63.06, + "STSBenchmarkMultilingualSTS (es)": 72.75, + "STSBenchmarkMultilingualSTS (nl)": 67.07, + "STSBenchmarkMultilingualSTS (pt)": 69.5, + "STSBenchmarkMultilingualSTS (ru)": 64.43, + "STSBenchmarkMultilingualSTS (it)": 69.5, + "STSBenchmarkMultilingualSTS (de)": 68.9 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-base-v2", + "SummEvalFr": 31.87 + }, + { + "Model": "e5-base-v2", + "SummEvalFr": 31.87 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "e5-base-v2", + "CEDRClassification": 34.11, + "SensitiveTopicsClassification": 18.14 + } + ] }, "InstructionRetrieval": { "p-MRR": [ { "Model": "e5-base-v2", - "Core17InstructionRetrieval": -2.9, - "News21InstructionRetrieval": -2.0, - "Robust04InstructionRetrieval": -6.73 + "Core17InstructionRetrieval": -2.92, + "News21InstructionRetrieval": -2.08, + "Robust04InstructionRetrieval": -6.81 } ] } @@ -8221,7 +22017,119 @@ "f1": [ { "Model": "e5-large", - "BornholmBitextMining": 40.15 + "BornholmBitextMining": 40.15, + "Tatoeba (pam-eng)": 6.43, + "Tatoeba (kab-eng)": 1.32, + "Tatoeba (tam-eng)": 0.42, + "Tatoeba (cmn-eng)": 2.83, + "Tatoeba (xho-eng)": 2.65, + "Tatoeba (ita-eng)": 29.93, + "Tatoeba (vie-eng)": 7.01, + "Tatoeba (fry-eng)": 19.73, + "Tatoeba (wuu-eng)": 1.46, + "Tatoeba (hin-eng)": 0.0, + "Tatoeba (bel-eng)": 1.67, + "Tatoeba (est-eng)": 3.57, + "Tatoeba (gla-eng)": 3.37, + "Tatoeba (kzj-eng)": 5.68, + "Tatoeba (srp-eng)": 4.41, + "Tatoeba (uzb-eng)": 3.84, + "Tatoeba (nds-eng)": 20.18, + "Tatoeba (ber-eng)": 5.52, + "Tatoeba (fra-eng)": 44.31, + "Tatoeba (slk-eng)": 8.08, + "Tatoeba (lvs-eng)": 5.02, + "Tatoeba (amh-eng)": 0.08, + "Tatoeba (kaz-eng)": 0.95, + "Tatoeba (ina-eng)": 45.6, + "Tatoeba (dan-eng)": 18.87, + "Tatoeba (ell-eng)": 0.6, + "Tatoeba (lat-eng)": 13.19, + "Tatoeba (kur-eng)": 10.07, + "Tatoeba (deu-eng)": 39.83, + "Tatoeba (zsm-eng)": 10.44, + "Tatoeba (ang-eng)": 15.06, + "Tatoeba (swh-eng)": 6.72, + "Tatoeba (heb-eng)": 0.47, + "Tatoeba (nob-eng)": 19.22, + "Tatoeba (hrv-eng)": 11.06, + "Tatoeba (aze-eng)": 5.52, + "Tatoeba (csb-eng)": 11.53, + "Tatoeba (tha-eng)": 1.4, + "Tatoeba (nno-eng)": 13.17, + "Tatoeba (rus-eng)": 0.33, + "Tatoeba (fin-eng)": 6.29, + "Tatoeba (slv-eng)": 9.23, + "Tatoeba (uig-eng)": 0.6, + "Tatoeba (gle-eng)": 3.77, + "Tatoeba (ces-eng)": 6.27, + "Tatoeba (isl-eng)": 6.28, + "Tatoeba (mar-eng)": 0.14, + "Tatoeba (swe-eng)": 15.61, + "Tatoeba (cor-eng)": 2.95, + "Tatoeba (ceb-eng)": 7.57, + "Tatoeba (arq-eng)": 0.61, + "Tatoeba (bre-eng)": 4.9, + "Tatoeba (tat-eng)": 1.18, + "Tatoeba (hun-eng)": 6.26, + "Tatoeba (jpn-eng)": 0.74, + "Tatoeba (bos-eng)": 13.26, + "Tatoeba (yid-eng)": 0.31, + "Tatoeba (kat-eng)": 0.59, + "Tatoeba (pms-eng)": 16.27, + "Tatoeba (dsb-eng)": 7.25, + "Tatoeba (dtp-eng)": 3.64, + "Tatoeba (mkd-eng)": 0.22, + "Tatoeba (hye-eng)": 0.4, + "Tatoeba (tzl-eng)": 22.2, + "Tatoeba (lit-eng)": 3.4, + "Tatoeba (ben-eng)": 0.0, + "Tatoeba (ile-eng)": 34.31, + "Tatoeba (lfn-eng)": 24.09, + "Tatoeba (max-eng)": 12.62, + "Tatoeba (tel-eng)": 0.43, + "Tatoeba (swg-eng)": 16.76, + "Tatoeba (oci-eng)": 16.23, + "Tatoeba (fao-eng)": 10.97, + "Tatoeba (ara-eng)": 0.31, + "Tatoeba (ind-eng)": 9.53, + "Tatoeba (cbk-eng)": 24.06, + "Tatoeba (tuk-eng)": 4.12, + "Tatoeba (ido-eng)": 26.18, + "Tatoeba (spa-eng)": 40.6, + "Tatoeba (afr-eng)": 11.53, + "Tatoeba (mhr-eng)": 0.01, + "Tatoeba (jav-eng)": 7.79, + "Tatoeba (cym-eng)": 8.04, + "Tatoeba (mon-eng)": 2.03, + "Tatoeba (bul-eng)": 0.5, + "Tatoeba (yue-eng)": 1.76, + "Tatoeba (ukr-eng)": 0.83, + "Tatoeba (eus-eng)": 8.28, + "Tatoeba (urd-eng)": 0.11, + "Tatoeba (tgl-eng)": 6.7, + "Tatoeba (ast-eng)": 25.64, + "Tatoeba (mal-eng)": 0.3, + "Tatoeba (tur-eng)": 6.06, + "Tatoeba (sqi-eng)": 7.94, + "Tatoeba (khm-eng)": 0.42, + "Tatoeba (gsw-eng)": 17.47, + "Tatoeba (por-eng)": 38.89, + "Tatoeba (kor-eng)": 1.44, + "Tatoeba (arz-eng)": 0.21, + "Tatoeba (epo-eng)": 17.29, + "Tatoeba (nld-eng)": 22.82, + "Tatoeba (orv-eng)": 0.1, + "Tatoeba (hsb-eng)": 6.93, + "Tatoeba (nov-eng)": 34.03, + "Tatoeba (glg-eng)": 29.58, + "Tatoeba (cha-eng)": 20.74, + "Tatoeba (ron-eng)": 14.73, + "Tatoeba (cat-eng)": 25.7, + "Tatoeba (pol-eng)": 8.06, + "Tatoeba (awa-eng)": 0.76, + "Tatoeba (war-eng)": 7.61, + "Tatoeba (pes-eng)": 0.8 } ] }, @@ -8229,89 +22137,1248 @@ "accuracy": [ { "Model": "e5-large", + "AllegroReviews": 25.75, + "AmazonCounterfactualClassification (en-ext)": 76.67, + "AmazonCounterfactualClassification (en)": 75.07, + "AmazonCounterfactualClassification (de)": 53.79, + "AmazonCounterfactualClassification (ja)": 57.11, + "AmazonReviewsClassification (en)": 41.52, + "AmazonReviewsClassification (de)": 29.28, + "AmazonReviewsClassification (es)": 34.19, + "AmazonReviewsClassification (fr)": 32.4, + "AmazonReviewsClassification (ja)": 22.62, + "AmazonReviewsClassification (zh)": 23.0, "AngryTweetsClassification": 46.14, + "CBD": 47.88, "DKHateClassification": 58.72, "DanishPoliticalCommentsClassification": 28.67, + "GeoreviewClassification": 28.65, + "HeadlineClassification": 29.77, + "InappropriatenessClassification": 52.5, + "KinopoiskClassification": 35.86, "LccSentimentClassification": 42.13, - "MassiveIntentClassification (da)": 42.29, - "MassiveIntentClassification (nb)": 40.63, + "MTOPDomainClassification (en)": 92.07, + "MTOPDomainClassification (de)": 76.63, + "MTOPDomainClassification (es)": 80.4, + "MTOPDomainClassification (fr)": 78.82, + "MTOPDomainClassification (hi)": 39.36, + "MTOPDomainClassification (th)": 15.72, + "MTOPIntentClassification (en)": 61.22, + "MTOPIntentClassification (de)": 43.21, + "MTOPIntentClassification (es)": 42.85, + "MTOPIntentClassification (fr)": 37.12, + "MTOPIntentClassification (hi)": 16.58, + "MTOPIntentClassification (th)": 4.55, + "MasakhaNEWSClassification (amh)": 34.97, + "MasakhaNEWSClassification (eng)": 79.34, + "MasakhaNEWSClassification (fra)": 77.49, + "MasakhaNEWSClassification (hau)": 67.19, + "MasakhaNEWSClassification (ibo)": 59.51, + "MasakhaNEWSClassification (lin)": 73.66, + "MasakhaNEWSClassification (lug)": 60.45, + "MasakhaNEWSClassification (orm)": 64.31, + "MasakhaNEWSClassification (pcm)": 90.85, + "MasakhaNEWSClassification (run)": 66.15, + "MasakhaNEWSClassification (sna)": 74.77, + "MasakhaNEWSClassification (som)": 51.09, + "MasakhaNEWSClassification (swa)": 57.31, + "MasakhaNEWSClassification (tir)": 24.52, + "MasakhaNEWSClassification (xho)": 64.65, + "MasakhaNEWSClassification (yor)": 70.56, + "MassiveIntentClassification (bn)": 19.59, + "MassiveIntentClassification (lv)": 41.56, + "MassiveIntentClassification (ko)": 19.9, + "MassiveIntentClassification (fr)": 47.2, + "MassiveIntentClassification (zh-CN)": 19.53, + "MassiveIntentClassification (af)": 38.87, "MassiveIntentClassification (sv)": 40.69, - "MassiveScenarioClassification (da)": 52.95, + "MassiveIntentClassification (ml)": 2.4, + "MassiveIntentClassification (tr)": 40.61, + "MassiveIntentClassification (ro)": 42.2, + "MassiveIntentClassification (ar)": 19.0, + "MassiveIntentClassification (az)": 39.86, + "MassiveIntentClassification (ka)": 10.95, + "MassiveIntentClassification (km)": 4.71, + "MassiveIntentClassification (ur)": 13.5, + "MassiveIntentClassification (kn)": 2.8, + "MassiveIntentClassification (mn)": 18.67, + "MassiveIntentClassification (es)": 45.1, + "MassiveIntentClassification (hu)": 38.79, + "MassiveIntentClassification (is)": 35.21, + "MassiveIntentClassification (am)": 2.55, + "MassiveIntentClassification (jv)": 38.55, + "MassiveIntentClassification (th)": 11.37, + "MassiveIntentClassification (pl)": 39.06, + "MassiveIntentClassification (hi)": 17.3, + "MassiveIntentClassification (nl)": 40.28, + "MassiveIntentClassification (he)": 19.98, + "MassiveIntentClassification (sl)": 40.13, + "MassiveIntentClassification (ta)": 10.87, + "MassiveIntentClassification (cy)": 36.04, + "MassiveIntentClassification (it)": 45.25, + "MassiveIntentClassification (te)": 2.58, + "MassiveIntentClassification (vi)": 37.57, + "MassiveIntentClassification (nb)": 40.63, + "MassiveIntentClassification (sq)": 42.14, + "MassiveIntentClassification (my)": 3.72, + "MassiveIntentClassification (de)": 44.57, + "MassiveIntentClassification (ms)": 38.31, + "MassiveIntentClassification (pt)": 46.8, + "MassiveIntentClassification (en)": 67.77, + "MassiveIntentClassification (fa)": 25.28, + "MassiveIntentClassification (sw)": 39.86, + "MassiveIntentClassification (ja)": 25.24, + "MassiveIntentClassification (id)": 41.65, + "MassiveIntentClassification (zh-TW)": 19.52, + "MassiveIntentClassification (ru)": 22.51, + "MassiveIntentClassification (tl)": 40.76, + "MassiveIntentClassification (el)": 25.9, + "MassiveIntentClassification (fi)": 40.87, + "MassiveIntentClassification (hy)": 9.33, + "MassiveIntentClassification (da)": 42.29, + "MassiveScenarioClassification (km)": 8.73, + "MassiveScenarioClassification (hu)": 46.67, + "MassiveScenarioClassification (lv)": 47.37, + "MassiveScenarioClassification (ml)": 6.8, + "MassiveScenarioClassification (vi)": 42.45, + "MassiveScenarioClassification (fr)": 57.57, + "MassiveScenarioClassification (tl)": 52.61, + "MassiveScenarioClassification (my)": 10.65, "MassiveScenarioClassification (nb)": 51.91, + "MassiveScenarioClassification (fa)": 30.19, + "MassiveScenarioClassification (fi)": 48.25, + "MassiveScenarioClassification (ro)": 53.29, + "MassiveScenarioClassification (cy)": 44.51, + "MassiveScenarioClassification (is)": 45.03, + "MassiveScenarioClassification (ms)": 49.02, + "MassiveScenarioClassification (am)": 7.76, + "MassiveScenarioClassification (ru)": 26.94, + "MassiveScenarioClassification (az)": 47.25, + "MassiveScenarioClassification (hi)": 22.69, + "MassiveScenarioClassification (da)": 52.95, + "MassiveScenarioClassification (ja)": 30.28, + "MassiveScenarioClassification (en)": 71.99, "MassiveScenarioClassification (sv)": 50.97, + "MassiveScenarioClassification (ur)": 20.58, + "MassiveScenarioClassification (el)": 35.32, + "MassiveScenarioClassification (es)": 55.79, + "MassiveScenarioClassification (pt)": 57.43, + "MassiveScenarioClassification (th)": 20.16, + "MassiveScenarioClassification (zh-TW)": 27.76, + "MassiveScenarioClassification (ar)": 27.2, + "MassiveScenarioClassification (ka)": 17.42, + "MassiveScenarioClassification (sw)": 46.97, + "MassiveScenarioClassification (ta)": 17.1, + "MassiveScenarioClassification (id)": 48.89, + "MassiveScenarioClassification (sl)": 47.11, + "MassiveScenarioClassification (kn)": 7.98, + "MassiveScenarioClassification (ko)": 27.82, + "MassiveScenarioClassification (af)": 49.65, + "MassiveScenarioClassification (hy)": 16.0, + "MassiveScenarioClassification (he)": 23.14, + "MassiveScenarioClassification (te)": 7.01, + "MassiveScenarioClassification (bn)": 26.41, + "MassiveScenarioClassification (sq)": 51.61, + "MassiveScenarioClassification (it)": 55.68, + "MassiveScenarioClassification (tr)": 48.86, + "MassiveScenarioClassification (zh-CN)": 28.01, + "MassiveScenarioClassification (de)": 59.26, + "MassiveScenarioClassification (nl)": 52.19, + "MassiveScenarioClassification (mn)": 26.77, + "MassiveScenarioClassification (pl)": 48.0, + "MassiveScenarioClassification (jv)": 47.57, "NoRecClassification": 41.83, "NordicLangClassification": 58.3, "NorwegianParliament": 57.26, + "PAC": 67.56, + "PolEmo2.0-IN": 43.98, + "PolEmo2.0-OUT": 23.54, + "RuReviewsClassification": 43.2, + "RuSciBenchGRNTIClassification": 14.83, + "RuSciBenchOECDClassification": 11.64, "ScalaDaClassification": 49.9, "ScalaNbClassification": 50.13 } ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "e5-large", + "AlloProfClusteringP2P": 58.4, + "AlloProfClusteringS2S": 37.22, + "BlurbsClusteringP2P": 31.08, + "BlurbsClusteringS2S": 11.45, + "GeoreviewClusteringP2P": 18.26, + "HALClusteringS2S": 23.44, + "MLSUMClusteringP2P (de)": 36.9, + "MLSUMClusteringP2P (fr)": 42.92, + "MLSUMClusteringP2P (ru)": 21.22, + "MLSUMClusteringP2P (es)": 42.73, + "MLSUMClusteringS2S (de)": 39.73, + "MLSUMClusteringS2S (fr)": 42.99, + "MLSUMClusteringS2S (ru)": 21.36, + "MLSUMClusteringS2S (es)": 43.02, + "MasakhaNEWSClusteringP2P (amh)": 41.03, + "MasakhaNEWSClusteringP2P (eng)": 56.6, + "MasakhaNEWSClusteringP2P (fra)": 59.73, + "MasakhaNEWSClusteringP2P (hau)": 32.38, + "MasakhaNEWSClusteringP2P (ibo)": 35.88, + "MasakhaNEWSClusteringP2P (lin)": 46.97, + "MasakhaNEWSClusteringP2P (lug)": 61.55, + "MasakhaNEWSClusteringP2P (orm)": 33.01, + "MasakhaNEWSClusteringP2P (pcm)": 74.24, + "MasakhaNEWSClusteringP2P (run)": 55.65, + "MasakhaNEWSClusteringP2P (sna)": 48.59, + "MasakhaNEWSClusteringP2P (som)": 38.98, + "MasakhaNEWSClusteringP2P (swa)": 25.93, + "MasakhaNEWSClusteringP2P (tir)": 43.52, + "MasakhaNEWSClusteringP2P (xho)": 29.18, + "MasakhaNEWSClusteringP2P (yor)": 30.58, + "MasakhaNEWSClusteringS2S (amh)": 45.44, + "MasakhaNEWSClusteringS2S (eng)": 51.0, + "MasakhaNEWSClusteringS2S (fra)": 33.53, + "MasakhaNEWSClusteringS2S (hau)": 16.31, + "MasakhaNEWSClusteringS2S (ibo)": 42.6, + "MasakhaNEWSClusteringS2S (lin)": 41.71, + "MasakhaNEWSClusteringS2S (lug)": 45.92, + "MasakhaNEWSClusteringS2S (orm)": 24.84, + "MasakhaNEWSClusteringS2S (pcm)": 65.43, + "MasakhaNEWSClusteringS2S (run)": 45.69, + "MasakhaNEWSClusteringS2S (sna)": 44.39, + "MasakhaNEWSClusteringS2S (som)": 29.42, + "MasakhaNEWSClusteringS2S (swa)": 17.13, + "MasakhaNEWSClusteringS2S (tir)": 44.01, + "MasakhaNEWSClusteringS2S (xho)": 31.38, + "MasakhaNEWSClusteringS2S (yor)": 40.1, + "RuSciBenchGRNTIClusteringP2P": 14.85, + "RuSciBenchOECDClusteringP2P": 13.66, + "TenKGnadClusteringP2P": 43.22, + "TenKGnadClusteringS2S": 21.2 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "e5-large", + "CDSC-E": 69.58, + "FalseFriendsGermanEnglish": 47.55, + "OpusparcusPC (de)": 91.79, + "OpusparcusPC (en)": 98.74, + "OpusparcusPC (fi)": 86.87, + "OpusparcusPC (fr)": 88.26, + "OpusparcusPC (ru)": 81.03, + "OpusparcusPC (sv)": 84.1, + "PSC": 96.15, + "PawsXPairClassification (de)": 51.97, + "PawsXPairClassification (en)": 64.65, + "PawsXPairClassification (es)": 53.65, + "PawsXPairClassification (fr)": 55.63, + "PawsXPairClassification (ja)": 48.78, + "PawsXPairClassification (ko)": 51.23, + "PawsXPairClassification (zh)": 52.84, + "SICK-E-PL": 56.09, + "TERRa": 47.37 + }, + { + "Model": "e5-large", + "CDSC-E": 69.58, + "FalseFriendsGermanEnglish": 47.55, + "OpusparcusPC (de)": 91.79, + "OpusparcusPC (en)": 98.74, + "OpusparcusPC (fi)": 86.89, + "OpusparcusPC (fr)": 88.26, + "OpusparcusPC (ru)": 81.05, + "OpusparcusPC (sv)": 84.1, + "PSC": 96.15, + "PawsXPairClassification (de)": 52.46, + "PawsXPairClassification (en)": 64.66, + "PawsXPairClassification (es)": 53.65, + "PawsXPairClassification (fr)": 55.72, + "PawsXPairClassification (ja)": 48.92, + "PawsXPairClassification (ko)": 51.36, + "PawsXPairClassification (zh)": 52.93, + "SICK-E-PL": 56.09, + "TERRa": 47.37 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "e5-large", + "AlloprofReranking": 61.93, + "RuBQReranking": 30.83, + "SyntecReranking": 75.95, + "T2Reranking": 59.72 + }, + { + "Model": "e5-large", + "MIRACLReranking (ar)": 6.68, + "MIRACLReranking (bn)": 9.74, + "MIRACLReranking (de)": 27.94, + "MIRACLReranking (en)": 55.79, + "MIRACLReranking (es)": 44.12, + "MIRACLReranking (fa)": 8.37, + "MIRACLReranking (fi)": 42.68, + "MIRACLReranking (fr)": 37.76, + "MIRACLReranking (hi)": 8.17, + "MIRACLReranking (id)": 28.76, + "MIRACLReranking (ja)": 10.84, + "MIRACLReranking (ko)": 13.09, + "MIRACLReranking (ru)": 10.52, + "MIRACLReranking (sw)": 34.27, + "MIRACLReranking (te)": 1.19, + "MIRACLReranking (th)": 3.31, + "MIRACLReranking (yo)": 58.1, + "MIRACLReranking (zh)": 10.4 + } + ] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "e5-large", + "AILACasedocs": 26.91, + "AILAStatutes": 22.9, + "ARCChallenge": 8.01, + "AlloprofRetrieval": 33.73, + "AlphaNLI": 13.3, + "AppsRetrieval": 9.18, + "BSARDRetrieval": 11.87, + "CmedqaRetrieval": 2.58, + "CodeFeedbackMT": 45.83, + "CodeFeedbackST": 73.29, + "CodeSearchNetCCRetrieval (python)": 63.21, + "CodeSearchNetCCRetrieval (javascript)": 60.51, + "CodeSearchNetCCRetrieval (go)": 42.42, + "CodeSearchNetCCRetrieval (ruby)": 57.91, + "CodeSearchNetCCRetrieval (java)": 55.47, + "CodeSearchNetCCRetrieval (php)": 44.14, + "CodeSearchNetRetrieval (python)": 83.25, + "CodeSearchNetRetrieval (javascript)": 64.28, + "CodeSearchNetRetrieval (go)": 80.19, + "CodeSearchNetRetrieval (ruby)": 74.52, + "CodeSearchNetRetrieval (java)": 75.38, + "CodeSearchNetRetrieval (php)": 72.31, + "CodeTransOceanContest": 63.15, + "CodeTransOceanDL": 31.77, + "CosQA": 32.86, + "CovidRetrieval": 4.01, + "GerDaLIR": 3.65, + "GerDaLIRSmall": 8.26, + "GermanQuAD-Retrieval": 85.05, + "HellaSwag": 26.48, + "LEMBNarrativeQARetrieval": 23.45, + "LEMBQMSumRetrieval": 22.61, + "LEMBSummScreenFDRetrieval": 73.65, + "LEMBWikimQARetrieval": 47.74, + "LeCaRDv2": 19.56, + "LegalBenchConsumerContractsQA": 75.67, + "LegalBenchCorporateLobbying": 91.61, + "LegalQuAD": 27.15, + "LegalSummarization": 57.9, + "MIRACLRetrieval (ar)": 0.15, + "MIRACLRetrieval (bn)": 0.34, + "MIRACLRetrieval (de)": 19.7, + "MIRACLRetrieval (en)": 47.81, + "MIRACLRetrieval (es)": 32.6, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 24.84, + "MIRACLRetrieval (fr)": 31.33, + "MIRACLRetrieval (hi)": 0.34, + "MIRACLRetrieval (id)": 18.93, + "MIRACLRetrieval (ja)": 0.77, + "MIRACLRetrieval (ko)": 4.79, + "MIRACLRetrieval (ru)": 1.49, + "MIRACLRetrieval (sw)": 27.95, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.26, + "MIRACLRetrieval (yo)": 56.96, + "MIRACLRetrieval (zh)": 0.14, + "MintakaRetrieval (ar)": 3.13, + "MintakaRetrieval (de)": 20.81, + "MintakaRetrieval (es)": 20.17, + "MintakaRetrieval (fr)": 22.39, + "MintakaRetrieval (hi)": 3.41, + "MintakaRetrieval (it)": 18.62, + "MintakaRetrieval (ja)": 6.35, + "MintakaRetrieval (pt)": 18.68, + "PIQA": 25.21, + "Quail": 4.96, + "RARbCode": 44.8, + "RARbMath": 72.81, + "RiaNewsRetrieval": 4.39, + "RuBQRetrieval": 4.69, + "SIQA": 2.86, + "SciFact-PL": 44.95, + "SpartQA": 1.75, + "StackOverflowQA": 82.85, + "SyntecRetrieval": 70.86, + "SyntheticText2SQL": 50.85, + "TRECCOVID-PL": 35.81, + "TempReasonL1": 1.74, + "TempReasonL2Fact": 33.63, + "TempReasonL2Pure": 1.25, + "TempReasonL3Fact": 27.44, + "TempReasonL3Pure": 7.58, + "WinoGrande": 51.0, + "XMarket (de)": 16.74, + "XMarket (en)": 32.99, + "XMarket (es)": 19.63, + "XPQARetrieval (ara-ara)": 9.6, + "XPQARetrieval (eng-ara)": 4.13, + "XPQARetrieval (ara-eng)": 8.82, + "XPQARetrieval (deu-deu)": 58.49, + "XPQARetrieval (eng-deu)": 15.55, + "XPQARetrieval (deu-eng)": 31.58, + "XPQARetrieval (spa-spa)": 44.1, + "XPQARetrieval (eng-spa)": 16.48, + "XPQARetrieval (spa-eng)": 26.66, + "XPQARetrieval (fra-fra)": 50.79, + "XPQARetrieval (eng-fra)": 19.38, + "XPQARetrieval (fra-eng)": 32.08, + "XPQARetrieval (hin-hin)": 32.52, + "XPQARetrieval (eng-hin)": 6.95, + "XPQARetrieval (hin-eng)": 6.86, + "XPQARetrieval (ita-ita)": 56.26, + "XPQARetrieval (eng-ita)": 13.54, + "XPQARetrieval (ita-eng)": 29.12, + "XPQARetrieval (jpn-jpn)": 34.82, + "XPQARetrieval (eng-jpn)": 6.61, + "XPQARetrieval (jpn-eng)": 17.54, + "XPQARetrieval (kor-kor)": 10.57, + "XPQARetrieval (eng-kor)": 10.03, + "XPQARetrieval (kor-eng)": 7.86, + "XPQARetrieval (pol-pol)": 29.41, + "XPQARetrieval (eng-pol)": 11.89, + "XPQARetrieval (pol-eng)": 17.28, + "XPQARetrieval (por-por)": 36.71, + "XPQARetrieval (eng-por)": 13.2, + "XPQARetrieval (por-eng)": 24.48, + "XPQARetrieval (tam-tam)": 6.6, + "XPQARetrieval (eng-tam)": 4.91, + "XPQARetrieval (tam-eng)": 3.67, + "XPQARetrieval (cmn-cmn)": 23.41, + "XPQARetrieval (eng-cmn)": 7.33, + "XPQARetrieval (cmn-eng)": 15.24 + } + ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-large", + "CDSC-R": 84.14, + "GermanSTSBenchmark": 67.39, + "RUParaPhraserSTS": 46.38, + "RuSTSBenchmarkSTS": 53.27, + "SICK-R-PL": 58.82, + "SICKFr": 70.03, + "STS22 (fr-pl)": 73.25, + "STS22 (de)": 44.63, + "STS22 (it)": 71.03, + "STS22 (es)": 61.75, + "STS22 (pl-en)": 64.87, + "STS22 (es-en)": 68.92, + "STS22 (de-fr)": 54.29, + "STS22 (de-en)": 54.9, + "STS22 (ar)": 37.03, + "STS22 (tr)": 55.9, + "STS22 (ru)": 21.54, + "STS22 (es-it)": 63.53, + "STS22 (pl)": 32.97, + "STS22 (en)": 65.65, + "STS22 (zh)": 44.9, + "STS22 (fr)": 81.45, + "STS22 (de-pl)": 40.53, + "STS22 (zh-en)": 35.44, + "STSB": 37.53, + "STSBenchmarkMultilingualSTS (pl)": 61.52, + "STSBenchmarkMultilingualSTS (pt)": 73.29, + "STSBenchmarkMultilingualSTS (it)": 69.59, + "STSBenchmarkMultilingualSTS (es)": 74.87, + "STSBenchmarkMultilingualSTS (de)": 67.25, + "STSBenchmarkMultilingualSTS (zh)": 39.93, + "STSBenchmarkMultilingualSTS (nl)": 66.31, + "STSBenchmarkMultilingualSTS (en)": 87.15, + "STSBenchmarkMultilingualSTS (fr)": 72.65, + "STSBenchmarkMultilingualSTS (ru)": 53.29 + }, + { + "Model": "e5-large", + "CDSC-R": 84.14, + "GermanSTSBenchmark": 67.39, + "RUParaPhraserSTS": 46.38, + "RuSTSBenchmarkSTS": 53.27, + "SICK-R-PL": 58.82, + "SICKFr": 70.03, + "STS22 (fr-pl)": 73.25, + "STS22 (de)": 44.63, + "STS22 (it)": 71.03, + "STS22 (es)": 61.75, + "STS22 (pl-en)": 64.87, + "STS22 (es-en)": 68.92, + "STS22 (de-fr)": 54.29, + "STS22 (de-en)": 54.9, + "STS22 (ar)": 37.01, + "STS22 (tr)": 55.9, + "STS22 (ru)": 21.54, + "STS22 (es-it)": 63.53, + "STS22 (pl)": 32.96, + "STS22 (en)": 65.65, + "STS22 (zh)": 44.9, + "STS22 (fr)": 81.45, + "STS22 (de-pl)": 40.53, + "STS22 (zh-en)": 35.44, + "STSB": 37.53, + "STSBenchmarkMultilingualSTS (pl)": 61.52, + "STSBenchmarkMultilingualSTS (pt)": 73.29, + "STSBenchmarkMultilingualSTS (it)": 69.59, + "STSBenchmarkMultilingualSTS (es)": 74.87, + "STSBenchmarkMultilingualSTS (de)": 67.25, + "STSBenchmarkMultilingualSTS (zh)": 39.93, + "STSBenchmarkMultilingualSTS (nl)": 66.31, + "STSBenchmarkMultilingualSTS (en)": 87.15, + "STSBenchmarkMultilingualSTS (fr)": 72.65, + "STSBenchmarkMultilingualSTS (ru)": 53.29 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-large", + "SummEvalFr": 31.82 + }, + { + "Model": "e5-large", + "SummEvalFr": 31.82 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "e5-large", + "CEDRClassification": 33.15, + "SensitiveTopicsClassification": 17.69 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "e5-large", + "Core17InstructionRetrieval": 0.21, + "News21InstructionRetrieval": -0.94, + "Robust04InstructionRetrieval": -6.25 + } + ] } }, "intfloat__e5-large-v2": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "e5-large-v2", + "BornholmBitextMining": 40.86, + "Tatoeba (nld-eng)": 28.79, + "Tatoeba (cor-eng)": 3.96, + "Tatoeba (ita-eng)": 31.16, + "Tatoeba (orv-eng)": 0.51, + "Tatoeba (swe-eng)": 21.42, + "Tatoeba (isl-eng)": 10.27, + "Tatoeba (ang-eng)": 20.52, + "Tatoeba (awa-eng)": 0.07, + "Tatoeba (swh-eng)": 8.5, + "Tatoeba (fin-eng)": 5.89, + "Tatoeba (kur-eng)": 10.32, + "Tatoeba (afr-eng)": 14.06, + "Tatoeba (sqi-eng)": 10.53, + "Tatoeba (csb-eng)": 12.24, + "Tatoeba (aze-eng)": 5.82, + "Tatoeba (ces-eng)": 7.5, + "Tatoeba (hun-eng)": 6.66, + "Tatoeba (lvs-eng)": 6.81, + "Tatoeba (tam-eng)": 0.04, + "Tatoeba (por-eng)": 49.24, + "Tatoeba (mon-eng)": 2.73, + "Tatoeba (pes-eng)": 0.42, + "Tatoeba (gla-eng)": 3.45, + "Tatoeba (war-eng)": 8.24, + "Tatoeba (mar-eng)": 0.25, + "Tatoeba (gsw-eng)": 20.97, + "Tatoeba (hye-eng)": 0.81, + "Tatoeba (kor-eng)": 1.7, + "Tatoeba (max-eng)": 14.68, + "Tatoeba (est-eng)": 4.5, + "Tatoeba (ceb-eng)": 8.33, + "Tatoeba (lfn-eng)": 25.13, + "Tatoeba (ara-eng)": 0.54, + "Tatoeba (nob-eng)": 27.97, + "Tatoeba (lat-eng)": 14.13, + "Tatoeba (pam-eng)": 6.46, + "Tatoeba (mkd-eng)": 1.64, + "Tatoeba (hrv-eng)": 14.76, + "Tatoeba (vie-eng)": 7.3, + "Tatoeba (ukr-eng)": 2.09, + "Tatoeba (fao-eng)": 15.09, + "Tatoeba (fry-eng)": 25.25, + "Tatoeba (dsb-eng)": 10.42, + "Tatoeba (yid-eng)": 0.49, + "Tatoeba (kab-eng)": 1.64, + "Tatoeba (ind-eng)": 9.85, + "Tatoeba (ell-eng)": 1.44, + "Tatoeba (gle-eng)": 4.29, + "Tatoeba (zsm-eng)": 12.54, + "Tatoeba (ile-eng)": 33.88, + "Tatoeba (tzl-eng)": 21.14, + "Tatoeba (tha-eng)": 1.4, + "Tatoeba (bul-eng)": 4.13, + "Tatoeba (ast-eng)": 34.17, + "Tatoeba (mal-eng)": 0.32, + "Tatoeba (bre-eng)": 5.13, + "Tatoeba (tur-eng)": 5.49, + "Tatoeba (cbk-eng)": 29.76, + "Tatoeba (tuk-eng)": 4.64, + "Tatoeba (hin-eng)": 0.3, + "Tatoeba (tat-eng)": 1.03, + "Tatoeba (hsb-eng)": 9.41, + "Tatoeba (kat-eng)": 1.27, + "Tatoeba (jav-eng)": 7.58, + "Tatoeba (kaz-eng)": 1.44, + "Tatoeba (fra-eng)": 51.44, + "Tatoeba (dtp-eng)": 4.58, + "Tatoeba (yue-eng)": 3.74, + "Tatoeba (rus-eng)": 4.8, + "Tatoeba (spa-eng)": 49.84, + "Tatoeba (dan-eng)": 29.27, + "Tatoeba (uig-eng)": 0.61, + "Tatoeba (bos-eng)": 14.46, + "Tatoeba (swg-eng)": 20.81, + "Tatoeba (ina-eng)": 49.41, + "Tatoeba (xho-eng)": 4.91, + "Tatoeba (nov-eng)": 37.2, + "Tatoeba (ido-eng)": 22.58, + "Tatoeba (cym-eng)": 7.79, + "Tatoeba (lit-eng)": 4.44, + "Tatoeba (jpn-eng)": 2.29, + "Tatoeba (cha-eng)": 16.95, + "Tatoeba (srp-eng)": 7.67, + "Tatoeba (epo-eng)": 18.77, + "Tatoeba (uzb-eng)": 3.43, + "Tatoeba (bel-eng)": 2.84, + "Tatoeba (urd-eng)": 0.1, + "Tatoeba (tgl-eng)": 8.73, + "Tatoeba (tel-eng)": 0.38, + "Tatoeba (nds-eng)": 25.08, + "Tatoeba (kzj-eng)": 6.54, + "Tatoeba (glg-eng)": 36.15, + "Tatoeba (arz-eng)": 0.21, + "Tatoeba (khm-eng)": 0.32, + "Tatoeba (slk-eng)": 9.2, + "Tatoeba (deu-eng)": 57.44, + "Tatoeba (ber-eng)": 5.31, + "Tatoeba (slv-eng)": 11.91, + "Tatoeba (pms-eng)": 16.64, + "Tatoeba (oci-eng)": 17.76, + "Tatoeba (nno-eng)": 18.76, + "Tatoeba (cat-eng)": 29.27, + "Tatoeba (eus-eng)": 9.2, + "Tatoeba (heb-eng)": 0.87, + "Tatoeba (ron-eng)": 17.06, + "Tatoeba (amh-eng)": 0.05, + "Tatoeba (cmn-eng)": 2.63, + "Tatoeba (mhr-eng)": 0.67, + "Tatoeba (arq-eng)": 0.93, + "Tatoeba (wuu-eng)": 2.59, + "Tatoeba (pol-eng)": 10.63, + "Tatoeba (ben-eng)": 0.0 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "e5-large-v2", + "AllegroReviews": 26.1, + "AmazonCounterfactualClassification (en-ext)": 77.78, + "AmazonCounterfactualClassification (en)": 78.34, + "AmazonCounterfactualClassification (de)": 58.14, + "AmazonCounterfactualClassification (ja)": 59.74, + "AmazonReviewsClassification (en)": 49.45, + "AmazonReviewsClassification (de)": 34.5, + "AmazonReviewsClassification (es)": 40.02, + "AmazonReviewsClassification (fr)": 37.46, + "AmazonReviewsClassification (ja)": 23.15, + "AmazonReviewsClassification (zh)": 24.05, + "AngryTweetsClassification": 47.98, + "CBD": 51.48, + "DanishPoliticalCommentsClassification": 29.08, + "GeoreviewClassification": 32.09, + "HeadlineClassification": 44.38, + "InappropriatenessClassification": 53.71, + "KinopoiskClassification": 37.27, + "LccSentimentClassification": 45.87, + "MTOPDomainClassification (en)": 93.08, + "MTOPDomainClassification (de)": 81.84, + "MTOPDomainClassification (es)": 82.67, + "MTOPDomainClassification (fr)": 83.05, + "MTOPDomainClassification (hi)": 41.93, + "MTOPDomainClassification (th)": 15.39, + "MTOPIntentClassification (en)": 64.58, + "MTOPIntentClassification (de)": 53.16, + "MTOPIntentClassification (es)": 49.57, + "MTOPIntentClassification (fr)": 44.56, + "MTOPIntentClassification (hi)": 19.16, + "MTOPIntentClassification (th)": 5.33, + "MasakhaNEWSClassification (amh)": 34.65, + "MasakhaNEWSClassification (eng)": 77.31, + "MasakhaNEWSClassification (fra)": 75.47, + "MasakhaNEWSClassification (hau)": 68.08, + "MasakhaNEWSClassification (ibo)": 61.36, + "MasakhaNEWSClassification (lin)": 73.66, + "MasakhaNEWSClassification (lug)": 62.87, + "MasakhaNEWSClassification (orm)": 65.08, + "MasakhaNEWSClassification (pcm)": 90.46, + "MasakhaNEWSClassification (run)": 65.78, + "MasakhaNEWSClassification (sna)": 77.29, + "MasakhaNEWSClassification (som)": 51.33, + "MasakhaNEWSClassification (swa)": 59.94, + "MasakhaNEWSClassification (tir)": 24.38, + "MasakhaNEWSClassification (xho)": 64.71, + "MasakhaNEWSClassification (yor)": 72.0, + "MassiveIntentClassification (am)": 2.69, + "MassiveIntentClassification (ko)": 24.94, + "MassiveIntentClassification (sl)": 41.54, + "MassiveIntentClassification (th)": 13.15, + "MassiveIntentClassification (hu)": 40.23, + "MassiveIntentClassification (tl)": 42.84, + "MassiveIntentClassification (my)": 3.88, + "MassiveIntentClassification (id)": 43.12, + "MassiveIntentClassification (ur)": 23.97, + "MassiveIntentClassification (sv)": 44.13, + "MassiveIntentClassification (nl)": 44.37, + "MassiveIntentClassification (jv)": 38.7, + "MassiveIntentClassification (ml)": 3.0, + "MassiveIntentClassification (zh-CN)": 22.35, + "MassiveIntentClassification (af)": 41.84, + "MassiveIntentClassification (es)": 49.84, + "MassiveIntentClassification (ja)": 34.0, + "MassiveIntentClassification (ru)": 40.62, + "MassiveIntentClassification (te)": 2.35, + "MassiveIntentClassification (lv)": 42.13, + "MassiveIntentClassification (zh-TW)": 20.97, + "MassiveIntentClassification (en)": 68.14, + "MassiveIntentClassification (vi)": 37.67, + "MassiveIntentClassification (mn)": 28.0, + "MassiveIntentClassification (is)": 35.18, + "MassiveIntentClassification (el)": 37.8, + "MassiveIntentClassification (kn)": 3.3, + "MassiveIntentClassification (pl)": 41.75, + "MassiveIntentClassification (nb)": 43.32, + "MassiveIntentClassification (fa)": 35.75, + "MassiveIntentClassification (bn)": 24.23, + "MassiveIntentClassification (hi)": 22.04, + "MassiveIntentClassification (hy)": 13.79, + "MassiveIntentClassification (pt)": 51.74, + "MassiveIntentClassification (da)": 45.7, + "MassiveIntentClassification (sq)": 43.45, + "MassiveIntentClassification (ar)": 27.84, + "MassiveIntentClassification (he)": 28.2, + "MassiveIntentClassification (ms)": 40.45, + "MassiveIntentClassification (cy)": 38.39, + "MassiveIntentClassification (tr)": 43.02, + "MassiveIntentClassification (fi)": 41.93, + "MassiveIntentClassification (it)": 47.73, + "MassiveIntentClassification (de)": 49.47, + "MassiveIntentClassification (sw)": 38.6, + "MassiveIntentClassification (az)": 41.8, + "MassiveIntentClassification (ta)": 14.95, + "MassiveIntentClassification (ro)": 44.27, + "MassiveIntentClassification (ka)": 14.56, + "MassiveIntentClassification (km)": 4.57, + "MassiveIntentClassification (fr)": 50.66, + "MassiveScenarioClassification (ja)": 42.73, + "MassiveScenarioClassification (pt)": 57.88, + "MassiveScenarioClassification (ar)": 34.93, + "MassiveScenarioClassification (ta)": 20.23, + "MassiveScenarioClassification (fr)": 57.97, + "MassiveScenarioClassification (am)": 7.54, + "MassiveScenarioClassification (sv)": 53.47, + "MassiveScenarioClassification (id)": 49.05, + "MassiveScenarioClassification (vi)": 42.88, + "MassiveScenarioClassification (bn)": 31.56, + "MassiveScenarioClassification (da)": 55.41, + "MassiveScenarioClassification (hi)": 25.87, + "MassiveScenarioClassification (ml)": 7.22, + "MassiveScenarioClassification (sq)": 51.31, + "MassiveScenarioClassification (tr)": 49.95, + "MassiveScenarioClassification (en)": 71.5, + "MassiveScenarioClassification (az)": 48.4, + "MassiveScenarioClassification (ka)": 20.37, + "MassiveScenarioClassification (ko)": 30.68, + "MassiveScenarioClassification (ro)": 53.72, + "MassiveScenarioClassification (el)": 46.24, + "MassiveScenarioClassification (ru)": 45.16, + "MassiveScenarioClassification (sl)": 47.66, + "MassiveScenarioClassification (fa)": 38.06, + "MassiveScenarioClassification (nb)": 53.32, + "MassiveScenarioClassification (te)": 7.09, + "MassiveScenarioClassification (fi)": 48.37, + "MassiveScenarioClassification (he)": 30.38, + "MassiveScenarioClassification (nl)": 55.05, + "MassiveScenarioClassification (is)": 44.26, + "MassiveScenarioClassification (th)": 22.92, + "MassiveScenarioClassification (hu)": 48.6, + "MassiveScenarioClassification (km)": 9.84, + "MassiveScenarioClassification (sw)": 45.69, + "MassiveScenarioClassification (ur)": 32.19, + "MassiveScenarioClassification (ms)": 49.14, + "MassiveScenarioClassification (jv)": 47.09, + "MassiveScenarioClassification (mn)": 32.73, + "MassiveScenarioClassification (cy)": 45.72, + "MassiveScenarioClassification (af)": 51.77, + "MassiveScenarioClassification (hy)": 19.82, + "MassiveScenarioClassification (kn)": 8.32, + "MassiveScenarioClassification (zh-CN)": 31.82, + "MassiveScenarioClassification (it)": 56.57, + "MassiveScenarioClassification (zh-TW)": 30.42, + "MassiveScenarioClassification (my)": 10.49, + "MassiveScenarioClassification (pl)": 49.17, + "MassiveScenarioClassification (es)": 58.39, + "MassiveScenarioClassification (tl)": 51.52, + "MassiveScenarioClassification (lv)": 47.14, + "MassiveScenarioClassification (de)": 60.64, + "NoRecClassification": 43.18, + "NordicLangClassification": 59.82, + "PAC": 69.79, + "PolEmo2.0-IN": 46.99, + "PolEmo2.0-OUT": 23.28, + "RuReviewsClassification": 47.99, + "RuSciBenchGRNTIClassification": 24.33, + "RuSciBenchOECDClassification": 18.45, + "ToxicConversationsClassification": 63.29 + } + ] }, "Clustering": { "v_measure": [ { "Model": "e5-large-v2", + "AlloProfClusteringP2P": 58.75, + "AlloProfClusteringS2S": 34.92, "BiorxivClusteringP2P": 36.72, "BiorxivClusteringS2S": 35.47, + "BlurbsClusteringP2P": 31.19, + "BlurbsClusteringS2S": 12.4, + "GeoreviewClusteringP2P": 27.6, + "HALClusteringS2S": 23.39, + "MLSUMClusteringP2P (de)": 38.35, + "MLSUMClusteringP2P (fr)": 42.34, + "MLSUMClusteringP2P (ru)": 26.59, + "MLSUMClusteringP2P (es)": 43.65, + "MLSUMClusteringS2S (de)": 38.99, + "MLSUMClusteringS2S (fr)": 42.95, + "MLSUMClusteringS2S (ru)": 24.89, + "MLSUMClusteringS2S (es)": 43.69, + "MasakhaNEWSClusteringP2P (amh)": 41.67, + "MasakhaNEWSClusteringP2P (eng)": 53.4, + "MasakhaNEWSClusteringP2P (fra)": 36.7, + "MasakhaNEWSClusteringP2P (hau)": 47.77, + "MasakhaNEWSClusteringP2P (ibo)": 39.78, + "MasakhaNEWSClusteringP2P (lin)": 46.47, + "MasakhaNEWSClusteringP2P (lug)": 60.71, + "MasakhaNEWSClusteringP2P (orm)": 30.18, + "MasakhaNEWSClusteringP2P (pcm)": 72.23, + "MasakhaNEWSClusteringP2P (run)": 53.93, + "MasakhaNEWSClusteringP2P (sna)": 49.14, + "MasakhaNEWSClusteringP2P (som)": 33.73, + "MasakhaNEWSClusteringP2P (swa)": 26.38, + "MasakhaNEWSClusteringP2P (tir)": 45.28, + "MasakhaNEWSClusteringP2P (xho)": 33.98, + "MasakhaNEWSClusteringP2P (yor)": 38.67, + "MasakhaNEWSClusteringS2S (amh)": 41.49, + "MasakhaNEWSClusteringS2S (eng)": 33.38, + "MasakhaNEWSClusteringS2S (fra)": 45.31, + "MasakhaNEWSClusteringS2S (hau)": 18.89, + "MasakhaNEWSClusteringS2S (ibo)": 42.67, + "MasakhaNEWSClusteringS2S (lin)": 47.62, + "MasakhaNEWSClusteringS2S (lug)": 44.12, + "MasakhaNEWSClusteringS2S (orm)": 23.86, + "MasakhaNEWSClusteringS2S (pcm)": 55.76, + "MasakhaNEWSClusteringS2S (run)": 46.23, + "MasakhaNEWSClusteringS2S (sna)": 40.38, + "MasakhaNEWSClusteringS2S (som)": 24.86, + "MasakhaNEWSClusteringS2S (swa)": 17.28, + "MasakhaNEWSClusteringS2S (tir)": 42.63, + "MasakhaNEWSClusteringS2S (xho)": 24.27, + "MasakhaNEWSClusteringS2S (yor)": 35.68, "MedrxivClusteringP2P": 31.45, "MedrxivClusteringS2S": 29.91, "RedditClustering": 55.5, "RedditClusteringP2P": 63.71, + "RuSciBenchGRNTIClusteringP2P": 20.36, + "RuSciBenchOECDClusteringP2P": 18.0, "StackExchangeClustering": 65.23, "StackExchangeClusteringP2P": 33.62, + "TenKGnadClusteringP2P": 44.85, + "TenKGnadClusteringS2S": 24.61, "TwentyNewsgroupsClustering": 48.73 } ] }, - "PairClassification": { - "max_ap": [] - }, + "PairClassification": { + "max_ap": [ + { + "Model": "e5-large-v2", + "CDSC-E": 65.96, + "FalseFriendsGermanEnglish": 47.9, + "OpusparcusPC (de)": 92.28, + "OpusparcusPC (en)": 98.02, + "OpusparcusPC (fi)": 86.16, + "OpusparcusPC (fr)": 88.99, + "OpusparcusPC (ru)": 79.79, + "OpusparcusPC (sv)": 85.77, + "PSC": 96.59, + "PawsXPairClassification (de)": 50.96, + "PawsXPairClassification (en)": 60.33, + "PawsXPairClassification (es)": 52.65, + "PawsXPairClassification (fr)": 56.07, + "PawsXPairClassification (ja)": 49.23, + "PawsXPairClassification (ko)": 50.21, + "PawsXPairClassification (zh)": 52.84, + "SICK-E-PL": 58.41, + "SprintDuplicateQuestions": 94.83, + "TERRa": 47.24, + "TwitterURLCorpus": 86.44 + }, + { + "Model": "e5-large-v2", + "CDSC-E": 65.96, + "FalseFriendsGermanEnglish": 47.9, + "OpusparcusPC (de)": 92.28, + "OpusparcusPC (en)": 98.02, + "OpusparcusPC (fi)": 86.16, + "OpusparcusPC (fr)": 89.0, + "OpusparcusPC (ru)": 79.79, + "OpusparcusPC (sv)": 85.77, + "PSC": 96.64, + "PawsXPairClassification (de)": 51.15, + "PawsXPairClassification (en)": 60.36, + "PawsXPairClassification (es)": 52.65, + "PawsXPairClassification (fr)": 56.08, + "PawsXPairClassification (ja)": 49.25, + "PawsXPairClassification (ko)": 50.21, + "PawsXPairClassification (zh)": 53.0, + "SICK-E-PL": 58.41, + "SprintDuplicateQuestions": 94.84, + "TERRa": 47.27, + "TwitterURLCorpus": 86.44 + } + ] + }, "Reranking": { - "map": [] + "map": [ + { + "Model": "e5-large-v2", + "AlloprofReranking": 65.12, + "RuBQReranking": 47.84, + "SyntecReranking": 75.92, + "T2Reranking": 59.61 + }, + { + "Model": "e5-large-v2", + "MIRACLReranking (ar)": 12.57, + "MIRACLReranking (bn)": 12.42, + "MIRACLReranking (de)": 34.86, + "MIRACLReranking (en)": 57.3, + "MIRACLReranking (es)": 45.36, + "MIRACLReranking (fa)": 12.44, + "MIRACLReranking (fi)": 44.32, + "MIRACLReranking (fr)": 39.0, + "MIRACLReranking (hi)": 11.72, + "MIRACLReranking (id)": 27.98, + "MIRACLReranking (ja)": 14.96, + "MIRACLReranking (ko)": 13.9, + "MIRACLReranking (ru)": 21.61, + "MIRACLReranking (sw)": 30.43, + "MIRACLReranking (te)": 1.79, + "MIRACLReranking (th)": 4.38, + "MIRACLReranking (yo)": 51.77, + "MIRACLReranking (zh)": 12.59 + } + ] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "e5-large-v2", + "AILACasedocs": 31.23, + "AILAStatutes": 17.63, + "ARCChallenge": 11.29, + "AlloprofRetrieval": 34.13, + "AlphaNLI": 15.16, + "AppsRetrieval": 14.18, + "ArguAna": 46.43, + "BSARDRetrieval": 11.5, + "CmedqaRetrieval": 3.27, + "CodeFeedbackMT": 47.83, + "CodeFeedbackST": 76.16, + "CodeSearchNetCCRetrieval (python)": 67.59, + "CodeSearchNetCCRetrieval (javascript)": 65.18, + "CodeSearchNetCCRetrieval (go)": 47.64, + "CodeSearchNetCCRetrieval (ruby)": 62.31, + "CodeSearchNetCCRetrieval (java)": 63.92, + "CodeSearchNetCCRetrieval (php)": 53.38, + "CodeSearchNetRetrieval (python)": 88.15, + "CodeSearchNetRetrieval (javascript)": 71.79, + "CodeSearchNetRetrieval (go)": 91.6, + "CodeSearchNetRetrieval (ruby)": 80.25, + "CodeSearchNetRetrieval (java)": 83.2, + "CodeSearchNetRetrieval (php)": 84.16, + "CodeTransOceanContest": 65.14, + "CodeTransOceanDL": 32.4, + "CosQA": 32.09, + "CovidRetrieval": 19.59, + "GerDaLIR": 2.66, + "GerDaLIRSmall": 6.47, + "GermanQuAD-Retrieval": 87.74, + "HellaSwag": 27.86, + "LEMBNarrativeQARetrieval": 26.38, + "LEMBQMSumRetrieval": 25.08, + "LEMBSummScreenFDRetrieval": 77.36, + "LEMBWikimQARetrieval": 58.43, + "LeCaRDv2": 22.94, + "LegalBenchConsumerContractsQA": 77.32, + "LegalBenchCorporateLobbying": 91.5, + "LegalQuAD": 27.18, + "LegalSummarization": 59.53, + "MIRACLRetrieval (ar)": 0.44, + "MIRACLRetrieval (bn)": 0.94, + "MIRACLRetrieval (de)": 21.89, + "MIRACLRetrieval (en)": 50.42, + "MIRACLRetrieval (es)": 29.08, + "MIRACLRetrieval (fa)": 0.16, + "MIRACLRetrieval (fi)": 28.1, + "MIRACLRetrieval (fr)": 26.84, + "MIRACLRetrieval (hi)": 1.45, + "MIRACLRetrieval (id)": 17.92, + "MIRACLRetrieval (ja)": 2.38, + "MIRACLRetrieval (ko)": 4.12, + "MIRACLRetrieval (ru)": 6.14, + "MIRACLRetrieval (sw)": 21.81, + "MIRACLRetrieval (te)": 0.04, + "MIRACLRetrieval (th)": 0.21, + "MIRACLRetrieval (yo)": 47.36, + "MIRACLRetrieval (zh)": 0.58, + "MintakaRetrieval (ar)": 5.94, + "MintakaRetrieval (de)": 26.31, + "MintakaRetrieval (es)": 23.76, + "MintakaRetrieval (fr)": 25.45, + "MintakaRetrieval (hi)": 6.51, + "MintakaRetrieval (it)": 19.44, + "MintakaRetrieval (ja)": 10.25, + "MintakaRetrieval (pt)": 23.69, + "PIQA": 29.55, + "Quail": 5.28, + "RARbCode": 58.67, + "RARbMath": 75.51, + "RiaNewsRetrieval": 17.66, + "RuBQRetrieval": 21.29, + "SCIDOCS": 20.5, + "SIQA": 4.07, + "SciFact-PL": 39.68, + "SpartQA": 10.37, + "StackOverflowQA": 89.93, + "SyntecRetrieval": 71.14, + "SyntheticText2SQL": 49.66, + "TRECCOVID": 66.64, + "TRECCOVID-PL": 26.57, + "TempReasonL1": 1.77, + "TempReasonL2Fact": 49.91, + "TempReasonL2Pure": 3.29, + "TempReasonL3Fact": 41.83, + "TempReasonL3Pure": 9.7, + "WinoGrande": 51.52, + "XMarket (de)": 15.44, + "XMarket (en)": 31.88, + "XMarket (es)": 17.15, + "XPQARetrieval (ara-ara)": 14.05, + "XPQARetrieval (eng-ara)": 4.24, + "XPQARetrieval (ara-eng)": 10.86, + "XPQARetrieval (deu-deu)": 57.02, + "XPQARetrieval (eng-deu)": 16.48, + "XPQARetrieval (deu-eng)": 33.15, + "XPQARetrieval (spa-spa)": 45.31, + "XPQARetrieval (eng-spa)": 15.12, + "XPQARetrieval (spa-eng)": 29.12, + "XPQARetrieval (fra-fra)": 53.52, + "XPQARetrieval (eng-fra)": 18.82, + "XPQARetrieval (fra-eng)": 37.01, + "XPQARetrieval (hin-hin)": 39.8, + "XPQARetrieval (eng-hin)": 8.58, + "XPQARetrieval (hin-eng)": 10.09, + "XPQARetrieval (ita-ita)": 57.54, + "XPQARetrieval (eng-ita)": 11.68, + "XPQARetrieval (ita-eng)": 31.21, + "XPQARetrieval (jpn-jpn)": 45.76, + "XPQARetrieval (eng-jpn)": 6.83, + "XPQARetrieval (jpn-eng)": 20.11, + "XPQARetrieval (kor-kor)": 15.97, + "XPQARetrieval (eng-kor)": 8.65, + "XPQARetrieval (kor-eng)": 9.01, + "XPQARetrieval (pol-pol)": 33.84, + "XPQARetrieval (eng-pol)": 12.23, + "XPQARetrieval (pol-eng)": 19.21, + "XPQARetrieval (por-por)": 36.56, + "XPQARetrieval (eng-por)": 11.36, + "XPQARetrieval (por-eng)": 26.54, + "XPQARetrieval (tam-tam)": 10.17, + "XPQARetrieval (eng-tam)": 4.15, + "XPQARetrieval (tam-eng)": 4.9, + "XPQARetrieval (cmn-cmn)": 24.68, + "XPQARetrieval (eng-cmn)": 8.97, + "XPQARetrieval (cmn-eng)": 14.98 + } + ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-large-v2", + "CDSC-R": 83.06, + "GermanSTSBenchmark": 69.37, + "RUParaPhraserSTS": 54.89, + "RuSTSBenchmarkSTS": 63.79, + "SICK-R": 79.16, + "SICK-R-PL": 60.7, + "SICKFr": 71.54, + "STS12": 73.55, + "STS13": 80.98, + "STS14": 79.16, + "STS15": 88.29, + "STS17 (it-en)": 38.07, + "STS17 (en-tr)": 1.19, + "STS17 (en-ar)": -1.96, + "STS17 (nl-en)": 39.39, + "STS17 (ko-ko)": 51.73, + "STS17 (en-de)": 61.58, + "STS17 (ar-ar)": 59.75, + "STS17 (es-en)": 50.83, + "STS17 (en-en)": 89.55, + "STS17 (es-es)": 82.16, + "STS17 (fr-en)": 56.29, + "STS22 (es)": 61.36, + "STS22 (es-en)": 71.31, + "STS22 (zh-en)": 39.49, + "STS22 (tr)": 53.57, + "STS22 (ar)": 43.95, + "STS22 (ru)": 34.37, + "STS22 (en)": 67.2, + "STS22 (fr-pl)": 84.52, + "STS22 (zh)": 50.33, + "STS22 (pl)": 31.98, + "STS22 (es-it)": 65.36, + "STS22 (de)": 38.45, + "STS22 (it)": 71.04, + "STS22 (pl-en)": 48.81, + "STS22 (de-fr)": 60.02, + "STS22 (de-en)": 49.8, + "STS22 (fr)": 78.75, + "STS22 (de-pl)": 40.19, + "STSB": 36.95, + "STSBenchmark": 84.55, + "STSBenchmarkMultilingualSTS (pl)": 62.31, + "STSBenchmarkMultilingualSTS (fr)": 71.98, + "STSBenchmarkMultilingualSTS (de)": 69.73, + "STSBenchmarkMultilingualSTS (en)": 84.55, + "STSBenchmarkMultilingualSTS (nl)": 66.44, + "STSBenchmarkMultilingualSTS (zh)": 37.55, + "STSBenchmarkMultilingualSTS (pt)": 70.38, + "STSBenchmarkMultilingualSTS (es)": 74.51, + "STSBenchmarkMultilingualSTS (it)": 70.29, + "STSBenchmarkMultilingualSTS (ru)": 64.21 + }, + { + "Model": "e5-large-v2", + "CDSC-R": 83.06, + "GermanSTSBenchmark": 69.37, + "RUParaPhraserSTS": 54.89, + "RuSTSBenchmarkSTS": 63.79, + "SICK-R": 79.16, + "SICK-R-PL": 60.7, + "SICKFr": 71.54, + "STS12": 73.55, + "STS13": 80.98, + "STS14": 79.16, + "STS15": 88.29, + "STS17 (it-en)": 38.07, + "STS17 (en-tr)": 1.19, + "STS17 (en-ar)": -1.96, + "STS17 (nl-en)": 39.39, + "STS17 (ko-ko)": 51.73, + "STS17 (en-de)": 61.58, + "STS17 (ar-ar)": 59.75, + "STS17 (es-en)": 50.83, + "STS17 (en-en)": 89.55, + "STS17 (es-es)": 82.16, + "STS17 (fr-en)": 56.29, + "STS22 (es)": 61.36, + "STS22 (es-en)": 71.31, + "STS22 (zh-en)": 39.49, + "STS22 (tr)": 53.57, + "STS22 (ar)": 43.98, + "STS22 (ru)": 34.37, + "STS22 (en)": 67.2, + "STS22 (fr-pl)": 84.52, + "STS22 (zh)": 50.33, + "STS22 (pl)": 31.98, + "STS22 (es-it)": 65.36, + "STS22 (de)": 38.45, + "STS22 (it)": 71.04, + "STS22 (pl-en)": 48.81, + "STS22 (de-fr)": 60.02, + "STS22 (de-en)": 49.8, + "STS22 (fr)": 78.75, + "STS22 (de-pl)": 40.19, + "STSB": 36.95, + "STSBenchmark": 84.55, + "STSBenchmarkMultilingualSTS (pl)": 62.31, + "STSBenchmarkMultilingualSTS (fr)": 71.97, + "STSBenchmarkMultilingualSTS (de)": 69.73, + "STSBenchmarkMultilingualSTS (en)": 84.55, + "STSBenchmarkMultilingualSTS (nl)": 66.44, + "STSBenchmarkMultilingualSTS (zh)": 37.57, + "STSBenchmarkMultilingualSTS (pt)": 70.38, + "STSBenchmarkMultilingualSTS (es)": 74.51, + "STSBenchmarkMultilingualSTS (it)": 70.29, + "STSBenchmarkMultilingualSTS (ru)": 64.21 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-large-v2", + "SummEvalFr": 30.39 + }, + { + "Model": "e5-large-v2", + "SummEvalFr": 30.39 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "e5-large-v2", + "CEDRClassification": 35.55, + "SensitiveTopicsClassification": 18.85 + } + ] }, "InstructionRetrieval": { "p-MRR": [ @@ -8627,9 +23694,19 @@ "Model": "e5-mistral-7b-instruct", "AlloProfClusteringP2P": 61.06, "AlloProfClusteringS2S": 28.12, + "BlurbsClusteringP2P": 47.07, + "BlurbsClusteringS2S": 21.35, "GeoreviewClusteringP2P": 76.32, "HALClusteringS2S": 19.69, + "MLSUMClusteringP2P (de)": 52.88, + "MLSUMClusteringP2P (fr)": 48.33, + "MLSUMClusteringP2P (ru)": 58.06, + "MLSUMClusteringP2P (es)": 50.23, "MLSUMClusteringP2P": 45.59, + "MLSUMClusteringS2S (de)": 52.23, + "MLSUMClusteringS2S (fr)": 47.79, + "MLSUMClusteringS2S (ru)": 56.58, + "MLSUMClusteringS2S (es)": 49.27, "MLSUMClusteringS2S": 32.0, "MasakhaNEWSClusteringP2P (amh)": 47.57, "MasakhaNEWSClusteringP2P (eng)": 71.97, @@ -8664,7 +23741,9 @@ "MasakhaNEWSClusteringS2S (xho)": 26.61, "MasakhaNEWSClusteringS2S (yor)": 63.18, "RuSciBenchGRNTIClusteringP2P": 62.27, - "RuSciBenchOECDClusteringP2P": 54.13 + "RuSciBenchOECDClusteringP2P": 54.13, + "TenKGnadClusteringP2P": 54.92, + "TenKGnadClusteringS2S": 40.21 } ] }, @@ -8673,6 +23752,7 @@ { "Model": "e5-mistral-7b-instruct", "CDSC-E": 75.86, + "FalseFriendsGermanEnglish": 54.05, "OpusparcusPC (de)": 97.63, "OpusparcusPC (en)": 99.1, "OpusparcusPC (fi)": 92.76, @@ -8696,6 +23776,7 @@ { "Model": "e5-mistral-7b-instruct", "CDSC-E": 75.86, + "FalseFriendsGermanEnglish": 54.05, "OpusparcusPC (de)": 97.63, "OpusparcusPC (en)": 99.1, "OpusparcusPC (fi)": 92.76, @@ -8739,7 +23820,24 @@ }, { "Model": "e5-mistral-7b-instruct", - "MIRACLReranking (ru)": 63.61 + "MIRACLReranking (ru)": 63.61, + "MIRACLReranking (ar)": 73.64, + "MIRACLReranking (bn)": 69.72, + "MIRACLReranking (de)": 52.52, + "MIRACLReranking (en)": 62.21, + "MIRACLReranking (es)": 61.1, + "MIRACLReranking (fa)": 54.85, + "MIRACLReranking (fi)": 76.94, + "MIRACLReranking (fr)": 53.64, + "MIRACLReranking (hi)": 58.2, + "MIRACLReranking (id)": 56.08, + "MIRACLReranking (ja)": 62.67, + "MIRACLReranking (ko)": 56.25, + "MIRACLReranking (sw)": 61.5, + "MIRACLReranking (te)": 74.42, + "MIRACLReranking (th)": 70.8, + "MIRACLReranking (yo)": 66.72, + "MIRACLReranking (zh)": 49.74 } ] }, @@ -8791,7 +23889,10 @@ "EcomRetrieval": 45.94, "FiQA-PL": 35.34, "FiQA2018": 56.81, + "GerDaLIR": 7.22, "GerDaLIRSmall": 37.18, + "GermanDPR": 84.25, + "GermanQuAD-Retrieval": 95.21, "HellaSwag": 35.37, "LEMBNarrativeQARetrieval": 44.62, "LEMBQMSumRetrieval": 43.63, @@ -8841,6 +23942,9 @@ "Touche2020": 26.27, "VideoRetrieval": 45.34, "WinoGrande": 39.51, + "XMarket (de)": 26.33, + "XMarket (en)": 32.82, + "XMarket (es)": 27.14, "XPQARetrieval (ara-ara)": 45.94, "XPQARetrieval (eng-ara)": 30.38, "XPQARetrieval (ara-eng)": 41.53, @@ -8903,6 +24007,7 @@ "BIOSSES": 85.5, "BQ": 50.63, "CDSC-R": 92.19, + "GermanSTSBenchmark": 85.11, "LCQMC": 75.48, "PAWSX": 16.81, "RUParaPhraserSTS": 76.17, @@ -8927,6 +24032,23 @@ "STS17 (fr-en)": 88.08, "STS17 (it-en)": 89.69, "STS22 (ru)": 60.83, + "STS22 (de)": 49.12, + "STS22 (tr)": 68.72, + "STS22 (en)": 67.3, + "STS22 (it)": 75.88, + "STS22 (pl-en)": 73.18, + "STS22 (es)": 66.98, + "STS22 (es-it)": 75.05, + "STS22 (de-fr)": 61.39, + "STS22 (ar)": 54.12, + "STS22 (pl)": 39.19, + "STS22 (zh-en)": 71.9, + "STS22 (fr-pl)": 84.52, + "STS22 (fr)": 79.83, + "STS22 (de-en)": 60.92, + "STS22 (es-en)": 75.85, + "STS22 (de-pl)": 54.47, + "STS22 (zh)": 63.38, "STSB": 81.81, "STSBenchmark": 88.6, "STSBenchmarkMultilingualSTS (pl)": 83.62, @@ -8947,6 +24069,7 @@ "BIOSSES": 85.5, "BQ": 50.63, "CDSC-R": 92.19, + "GermanSTSBenchmark": 85.11, "LCQMC": 75.48, "PAWSX": 16.81, "RUParaPhraserSTS": 76.17, @@ -8971,6 +24094,23 @@ "STS17 (fr-en)": 88.08, "STS17 (it-en)": 89.69, "STS22 (ru)": 60.83, + "STS22 (de)": 49.12, + "STS22 (tr)": 68.72, + "STS22 (en)": 67.3, + "STS22 (it)": 75.88, + "STS22 (pl-en)": 73.18, + "STS22 (es)": 66.98, + "STS22 (es-it)": 75.05, + "STS22 (de-fr)": 61.39, + "STS22 (ar)": 54.12, + "STS22 (pl)": 39.19, + "STS22 (zh-en)": 71.9, + "STS22 (fr-pl)": 84.52, + "STS22 (fr)": 79.83, + "STS22 (de-en)": 60.92, + "STS22 (es-en)": 75.85, + "STS22 (de-pl)": 54.47, + "STS22 (zh)": 63.38, "STSB": 81.81, "STSBenchmark": 88.6, "STSBenchmarkMultilingualSTS (pl)": 83.62, @@ -9086,7 +24226,119 @@ "f1": [ { "Model": "e5-small", - "BornholmBitextMining": 40.27 + "BornholmBitextMining": 40.27, + "Tatoeba (dsb-eng)": 6.51, + "Tatoeba (spa-eng)": 35.33, + "Tatoeba (kat-eng)": 0.42, + "Tatoeba (lvs-eng)": 4.56, + "Tatoeba (afr-eng)": 8.44, + "Tatoeba (vie-eng)": 5.09, + "Tatoeba (pes-eng)": 0.6, + "Tatoeba (epo-eng)": 15.45, + "Tatoeba (kzj-eng)": 5.09, + "Tatoeba (ido-eng)": 17.98, + "Tatoeba (nob-eng)": 13.56, + "Tatoeba (dan-eng)": 13.05, + "Tatoeba (yid-eng)": 0.31, + "Tatoeba (slv-eng)": 6.94, + "Tatoeba (bul-eng)": 0.42, + "Tatoeba (jpn-eng)": 0.3, + "Tatoeba (yue-eng)": 1.1, + "Tatoeba (pms-eng)": 14.19, + "Tatoeba (tuk-eng)": 4.07, + "Tatoeba (pol-eng)": 6.56, + "Tatoeba (srp-eng)": 3.72, + "Tatoeba (glg-eng)": 24.9, + "Tatoeba (tzl-eng)": 17.55, + "Tatoeba (aze-eng)": 4.19, + "Tatoeba (csb-eng)": 6.78, + "Tatoeba (lfn-eng)": 15.24, + "Tatoeba (uzb-eng)": 3.58, + "Tatoeba (gla-eng)": 2.72, + "Tatoeba (tgl-eng)": 6.37, + "Tatoeba (ceb-eng)": 5.59, + "Tatoeba (hye-eng)": 0.4, + "Tatoeba (hin-eng)": 0.01, + "Tatoeba (mon-eng)": 1.82, + "Tatoeba (swh-eng)": 6.44, + "Tatoeba (hrv-eng)": 8.26, + "Tatoeba (kaz-eng)": 0.54, + "Tatoeba (awa-eng)": 0.01, + "Tatoeba (cmn-eng)": 1.62, + "Tatoeba (fao-eng)": 8.66, + "Tatoeba (fin-eng)": 3.82, + "Tatoeba (zsm-eng)": 7.91, + "Tatoeba (mhr-eng)": 0.17, + "Tatoeba (ita-eng)": 20.04, + "Tatoeba (fra-eng)": 30.19, + "Tatoeba (nno-eng)": 10.77, + "Tatoeba (isl-eng)": 5.86, + "Tatoeba (lat-eng)": 10.38, + "Tatoeba (dtp-eng)": 3.59, + "Tatoeba (tha-eng)": 0.85, + "Tatoeba (wuu-eng)": 1.32, + "Tatoeba (orv-eng)": 0.09, + "Tatoeba (gsw-eng)": 14.93, + "Tatoeba (mar-eng)": 0.0, + "Tatoeba (nov-eng)": 30.82, + "Tatoeba (uig-eng)": 0.54, + "Tatoeba (est-eng)": 3.51, + "Tatoeba (ben-eng)": 0.0, + "Tatoeba (urd-eng)": 0.01, + "Tatoeba (kur-eng)": 8.01, + "Tatoeba (swe-eng)": 9.28, + "Tatoeba (ber-eng)": 5.33, + "Tatoeba (hsb-eng)": 6.14, + "Tatoeba (tat-eng)": 0.85, + "Tatoeba (mal-eng)": 0.15, + "Tatoeba (khm-eng)": 0.14, + "Tatoeba (slk-eng)": 6.65, + "Tatoeba (swg-eng)": 10.97, + "Tatoeba (cbk-eng)": 19.13, + "Tatoeba (ina-eng)": 38.44, + "Tatoeba (ara-eng)": 0.38, + "Tatoeba (ron-eng)": 13.08, + "Tatoeba (max-eng)": 10.19, + "Tatoeba (oci-eng)": 13.94, + "Tatoeba (eus-eng)": 7.24, + "Tatoeba (tam-eng)": 0.7, + "Tatoeba (ell-eng)": 0.5, + "Tatoeba (bos-eng)": 7.77, + "Tatoeba (heb-eng)": 0.3, + "Tatoeba (arq-eng)": 0.44, + "Tatoeba (kab-eng)": 1.36, + "Tatoeba (ile-eng)": 25.58, + "Tatoeba (cym-eng)": 6.71, + "Tatoeba (tur-eng)": 3.97, + "Tatoeba (bel-eng)": 1.47, + "Tatoeba (ukr-eng)": 0.86, + "Tatoeba (hun-eng)": 4.62, + "Tatoeba (cat-eng)": 22.05, + "Tatoeba (pam-eng)": 4.7, + "Tatoeba (cha-eng)": 17.69, + "Tatoeba (sqi-eng)": 7.01, + "Tatoeba (mkd-eng)": 0.11, + "Tatoeba (lit-eng)": 3.47, + "Tatoeba (jav-eng)": 4.78, + "Tatoeba (ces-eng)": 4.36, + "Tatoeba (rus-eng)": 0.17, + "Tatoeba (deu-eng)": 20.28, + "Tatoeba (xho-eng)": 3.71, + "Tatoeba (amh-eng)": 0.01, + "Tatoeba (ind-eng)": 6.89, + "Tatoeba (kor-eng)": 1.18, + "Tatoeba (arz-eng)": 0.06, + "Tatoeba (por-eng)": 32.48, + "Tatoeba (gle-eng)": 3.99, + "Tatoeba (nld-eng)": 17.28, + "Tatoeba (cor-eng)": 2.93, + "Tatoeba (fry-eng)": 15.8, + "Tatoeba (bre-eng)": 3.76, + "Tatoeba (tel-eng)": 0.43, + "Tatoeba (nds-eng)": 14.38, + "Tatoeba (ast-eng)": 26.43, + "Tatoeba (ang-eng)": 19.24, + "Tatoeba (war-eng)": 6.15 } ] }, @@ -9094,19 +24346,165 @@ "accuracy": [ { "Model": "e5-small", + "AllegroReviews": 25.72, + "AmazonCounterfactualClassification (en-ext)": 76.9, + "AmazonCounterfactualClassification (en)": 73.42, + "AmazonCounterfactualClassification (de)": 56.98, + "AmazonCounterfactualClassification (ja)": 56.66, + "AmazonReviewsClassification (en)": 41.5, + "AmazonReviewsClassification (de)": 29.2, + "AmazonReviewsClassification (es)": 35.79, + "AmazonReviewsClassification (fr)": 32.31, + "AmazonReviewsClassification (ja)": 21.99, + "AmazonReviewsClassification (zh)": 22.37, "AngryTweetsClassification": 43.6, + "CBD": 51.9, "DKHateClassification": 57.57, "DanishPoliticalCommentsClassification": 28.37, + "GeoreviewClassification": 27.15, + "HeadlineClassification": 28.01, + "InappropriatenessClassification": 51.46, + "KinopoiskClassification": 33.59, "LccSentimentClassification": 40.27, + "MTOPDomainClassification (en)": 87.88, + "MTOPDomainClassification (de)": 70.66, + "MTOPDomainClassification (es)": 80.16, + "MTOPDomainClassification (fr)": 74.2, + "MTOPDomainClassification (hi)": 33.51, + "MTOPDomainClassification (th)": 16.14, + "MTOPIntentClassification (en)": 52.97, + "MTOPIntentClassification (de)": 41.29, + "MTOPIntentClassification (es)": 41.87, + "MTOPIntentClassification (fr)": 37.43, + "MTOPIntentClassification (hi)": 12.14, + "MTOPIntentClassification (th)": 3.94, + "MasakhaNEWSClassification (amh)": 30.96, + "MasakhaNEWSClassification (eng)": 74.17, + "MasakhaNEWSClassification (fra)": 73.36, + "MasakhaNEWSClassification (hau)": 58.1, + "MasakhaNEWSClassification (ibo)": 52.49, + "MasakhaNEWSClassification (lin)": 65.71, + "MasakhaNEWSClassification (lug)": 56.23, + "MasakhaNEWSClassification (orm)": 57.88, + "MasakhaNEWSClassification (pcm)": 91.25, + "MasakhaNEWSClassification (run)": 59.1, + "MasakhaNEWSClassification (sna)": 73.41, + "MasakhaNEWSClassification (som)": 50.07, + "MasakhaNEWSClassification (swa)": 49.58, + "MasakhaNEWSClassification (tir)": 26.58, + "MasakhaNEWSClassification (xho)": 58.99, + "MasakhaNEWSClassification (yor)": 63.94, + "MassiveIntentClassification (fr)": 43.86, "MassiveIntentClassification (da)": 41.89, - "MassiveIntentClassification (nb)": 40.25, + "MassiveIntentClassification (zh-CN)": 15.28, + "MassiveIntentClassification (pt)": 43.79, + "MassiveIntentClassification (ml)": 2.43, + "MassiveIntentClassification (tr)": 40.61, + "MassiveIntentClassification (jv)": 37.53, + "MassiveIntentClassification (ka)": 5.84, + "MassiveIntentClassification (hu)": 38.95, + "MassiveIntentClassification (am)": 2.33, + "MassiveIntentClassification (sq)": 40.53, + "MassiveIntentClassification (sl)": 38.93, + "MassiveIntentClassification (ko)": 14.25, + "MassiveIntentClassification (hi)": 12.03, + "MassiveIntentClassification (vi)": 34.35, + "MassiveIntentClassification (af)": 37.7, + "MassiveIntentClassification (he)": 14.19, + "MassiveIntentClassification (pl)": 37.54, + "MassiveIntentClassification (km)": 5.02, + "MassiveIntentClassification (de)": 39.86, + "MassiveIntentClassification (ru)": 13.65, + "MassiveIntentClassification (ta)": 9.05, + "MassiveIntentClassification (my)": 3.72, + "MassiveIntentClassification (hy)": 5.85, + "MassiveIntentClassification (zh-TW)": 15.31, + "MassiveIntentClassification (id)": 41.2, + "MassiveIntentClassification (fa)": 16.92, + "MassiveIntentClassification (az)": 36.22, + "MassiveIntentClassification (it)": 42.07, "MassiveIntentClassification (sv)": 40.07, - "MassiveScenarioClassification (da)": 49.93, - "MassiveScenarioClassification (nb)": 48.58, + "MassiveIntentClassification (es)": 42.88, + "MassiveIntentClassification (lv)": 39.91, + "MassiveIntentClassification (ro)": 41.5, + "MassiveIntentClassification (nl)": 40.9, + "MassiveIntentClassification (en)": 64.47, + "MassiveIntentClassification (ja)": 17.46, + "MassiveIntentClassification (mn)": 13.5, + "MassiveIntentClassification (is)": 35.37, + "MassiveIntentClassification (tl)": 38.77, + "MassiveIntentClassification (fi)": 39.24, + "MassiveIntentClassification (ur)": 7.04, + "MassiveIntentClassification (th)": 8.9, + "MassiveIntentClassification (kn)": 2.98, + "MassiveIntentClassification (ms)": 37.52, + "MassiveIntentClassification (ar)": 10.6, + "MassiveIntentClassification (cy)": 35.74, + "MassiveIntentClassification (nb)": 40.25, + "MassiveIntentClassification (el)": 17.22, + "MassiveIntentClassification (te)": 2.06, + "MassiveIntentClassification (sw)": 37.87, + "MassiveIntentClassification (bn)": 8.06, + "MassiveScenarioClassification (af)": 45.65, + "MassiveScenarioClassification (lv)": 45.66, + "MassiveScenarioClassification (bn)": 13.57, + "MassiveScenarioClassification (hu)": 45.63, + "MassiveScenarioClassification (sq)": 49.52, + "MassiveScenarioClassification (de)": 51.82, + "MassiveScenarioClassification (kn)": 7.95, + "MassiveScenarioClassification (az)": 44.79, "MassiveScenarioClassification (sv)": 47.06, + "MassiveScenarioClassification (ru)": 18.39, + "MassiveScenarioClassification (vi)": 40.19, + "MassiveScenarioClassification (ur)": 14.5, + "MassiveScenarioClassification (ta)": 14.99, + "MassiveScenarioClassification (fi)": 45.24, + "MassiveScenarioClassification (ka)": 10.16, + "MassiveScenarioClassification (sl)": 44.26, + "MassiveScenarioClassification (cy)": 42.34, + "MassiveScenarioClassification (km)": 9.27, + "MassiveScenarioClassification (sw)": 45.98, + "MassiveScenarioClassification (tr)": 45.9, + "MassiveScenarioClassification (zh-TW)": 23.24, + "MassiveScenarioClassification (he)": 16.06, + "MassiveScenarioClassification (ms)": 45.47, + "MassiveScenarioClassification (en)": 68.78, + "MassiveScenarioClassification (is)": 42.96, + "MassiveScenarioClassification (tl)": 48.34, + "MassiveScenarioClassification (mn)": 19.67, + "MassiveScenarioClassification (el)": 26.04, + "MassiveScenarioClassification (my)": 10.53, + "MassiveScenarioClassification (nl)": 49.09, + "MassiveScenarioClassification (th)": 17.88, + "MassiveScenarioClassification (it)": 50.68, + "MassiveScenarioClassification (ko)": 19.59, + "MassiveScenarioClassification (id)": 45.81, + "MassiveScenarioClassification (es)": 52.79, + "MassiveScenarioClassification (nb)": 48.58, + "MassiveScenarioClassification (ro)": 51.69, + "MassiveScenarioClassification (hy)": 13.0, + "MassiveScenarioClassification (fa)": 21.99, + "MassiveScenarioClassification (da)": 49.93, + "MassiveScenarioClassification (pt)": 52.65, + "MassiveScenarioClassification (jv)": 45.73, + "MassiveScenarioClassification (ml)": 6.53, + "MassiveScenarioClassification (te)": 7.26, + "MassiveScenarioClassification (fr)": 55.22, + "MassiveScenarioClassification (hi)": 18.9, + "MassiveScenarioClassification (pl)": 45.06, + "MassiveScenarioClassification (am)": 7.25, + "MassiveScenarioClassification (zh-CN)": 22.16, + "MassiveScenarioClassification (ar)": 17.93, + "MassiveScenarioClassification (ja)": 23.45, "NoRecClassification": 41.84, "NordicLangClassification": 53.47, "NorwegianParliament": 56.57, + "PAC": 64.07, + "PolEmo2.0-IN": 41.3, + "PolEmo2.0-OUT": 26.8, + "RuReviewsClassification": 39.68, + "RuSciBenchGRNTIClassification": 8.39, + "RuSciBenchOECDClassification": 7.24, "ScalaDaClassification": 50.15, "ScalaNbClassification": 50.03 } @@ -9116,38 +24514,386 @@ "v_measure": [ { "Model": "e5-small", + "AlloProfClusteringP2P": 59.37, + "AlloProfClusteringS2S": 31.27, "BiorxivClusteringP2P": 36.1, "BiorxivClusteringS2S": 31.51, + "BlurbsClusteringP2P": 24.39, + "BlurbsClusteringS2S": 9.82, + "GeoreviewClusteringP2P": 17.95, + "HALClusteringS2S": 20.02, + "MLSUMClusteringP2P (de)": 34.5, + "MLSUMClusteringP2P (fr)": 39.55, + "MLSUMClusteringP2P (ru)": 20.5, + "MLSUMClusteringP2P (es)": 41.18, + "MLSUMClusteringS2S (de)": 37.15, + "MLSUMClusteringS2S (fr)": 39.48, + "MLSUMClusteringS2S (ru)": 21.72, + "MLSUMClusteringS2S (es)": 40.83, + "MasakhaNEWSClusteringP2P (amh)": 40.12, + "MasakhaNEWSClusteringP2P (eng)": 48.25, + "MasakhaNEWSClusteringP2P (fra)": 37.37, + "MasakhaNEWSClusteringP2P (hau)": 36.18, + "MasakhaNEWSClusteringP2P (ibo)": 32.4, + "MasakhaNEWSClusteringP2P (lin)": 61.22, + "MasakhaNEWSClusteringP2P (lug)": 49.59, + "MasakhaNEWSClusteringP2P (orm)": 34.6, + "MasakhaNEWSClusteringP2P (pcm)": 81.16, + "MasakhaNEWSClusteringP2P (run)": 51.16, + "MasakhaNEWSClusteringP2P (sna)": 46.57, + "MasakhaNEWSClusteringP2P (som)": 36.83, + "MasakhaNEWSClusteringP2P (swa)": 20.68, + "MasakhaNEWSClusteringP2P (tir)": 42.93, + "MasakhaNEWSClusteringP2P (xho)": 35.15, + "MasakhaNEWSClusteringP2P (yor)": 36.69, + "MasakhaNEWSClusteringS2S (amh)": 45.0, + "MasakhaNEWSClusteringS2S (eng)": 45.62, + "MasakhaNEWSClusteringS2S (fra)": 32.27, + "MasakhaNEWSClusteringS2S (hau)": 16.48, + "MasakhaNEWSClusteringS2S (ibo)": 38.94, + "MasakhaNEWSClusteringS2S (lin)": 54.36, + "MasakhaNEWSClusteringS2S (lug)": 46.99, + "MasakhaNEWSClusteringS2S (orm)": 25.01, + "MasakhaNEWSClusteringS2S (pcm)": 55.79, + "MasakhaNEWSClusteringS2S (run)": 50.72, + "MasakhaNEWSClusteringS2S (sna)": 41.33, + "MasakhaNEWSClusteringS2S (som)": 28.2, + "MasakhaNEWSClusteringS2S (swa)": 16.31, + "MasakhaNEWSClusteringS2S (tir)": 43.0, + "MasakhaNEWSClusteringS2S (xho)": 23.47, + "MasakhaNEWSClusteringS2S (yor)": 34.42, "MedrxivClusteringP2P": 31.31, "MedrxivClusteringS2S": 28.32, "RedditClustering": 43.27, "RedditClusteringP2P": 57.22, + "RuSciBenchGRNTIClusteringP2P": 12.29, + "RuSciBenchOECDClusteringP2P": 11.19, "StackExchangeClustering": 59.6, "StackExchangeClusteringP2P": 30.82, + "TenKGnadClusteringP2P": 37.23, + "TenKGnadClusteringS2S": 16.54, "TwentyNewsgroupsClustering": 37.65 } ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "e5-small", + "CDSC-E": 66.65, + "FalseFriendsGermanEnglish": 47.65, + "OpusparcusPC (de)": 91.65, + "OpusparcusPC (en)": 98.34, + "OpusparcusPC (fi)": 86.59, + "OpusparcusPC (fr)": 87.26, + "OpusparcusPC (ru)": 78.63, + "OpusparcusPC (sv)": 85.48, + "PSC": 90.03, + "PawsXPairClassification (de)": 50.72, + "PawsXPairClassification (en)": 60.62, + "PawsXPairClassification (es)": 53.6, + "PawsXPairClassification (fr)": 55.22, + "PawsXPairClassification (ja)": 48.14, + "PawsXPairClassification (ko)": 52.35, + "PawsXPairClassification (zh)": 53.36, + "SICK-E-PL": 52.6, + "TERRa": 46.91 + }, + { + "Model": "e5-small", + "CDSC-E": 66.65, + "FalseFriendsGermanEnglish": 47.65, + "OpusparcusPC (de)": 91.65, + "OpusparcusPC (en)": 98.34, + "OpusparcusPC (fi)": 86.59, + "OpusparcusPC (fr)": 87.26, + "OpusparcusPC (ru)": 78.63, + "OpusparcusPC (sv)": 85.48, + "PSC": 90.03, + "PawsXPairClassification (de)": 51.15, + "PawsXPairClassification (en)": 60.82, + "PawsXPairClassification (es)": 53.6, + "PawsXPairClassification (fr)": 55.29, + "PawsXPairClassification (ja)": 48.31, + "PawsXPairClassification (ko)": 52.57, + "PawsXPairClassification (zh)": 53.47, + "SICK-E-PL": 52.68, + "TERRa": 46.91 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "e5-small", + "AlloprofReranking": 59.86, + "RuBQReranking": 17.63, + "SyntecReranking": 75.29, + "T2Reranking": 58.81 + }, + { + "Model": "e5-small", + "MIRACLReranking (ar)": 2.85, + "MIRACLReranking (bn)": 2.19, + "MIRACLReranking (de)": 21.97, + "MIRACLReranking (en)": 51.23, + "MIRACLReranking (es)": 39.23, + "MIRACLReranking (fa)": 4.9, + "MIRACLReranking (fi)": 38.89, + "MIRACLReranking (fr)": 30.5, + "MIRACLReranking (hi)": 4.77, + "MIRACLReranking (id)": 23.31, + "MIRACLReranking (ja)": 5.82, + "MIRACLReranking (ko)": 6.92, + "MIRACLReranking (ru)": 3.22, + "MIRACLReranking (sw)": 33.97, + "MIRACLReranking (te)": 5.39, + "MIRACLReranking (th)": 3.67, + "MIRACLReranking (yo)": 56.21, + "MIRACLReranking (zh)": 6.9 + } + ] }, "Retrieval": { - "ndcg_at_10": [] + "ndcg_at_10": [ + { + "Model": "e5-small", + "AILACasedocs": 22.47, + "AILAStatutes": 20.78, + "ARCChallenge": 6.21, + "AlloprofRetrieval": 27.41, + "AlphaNLI": 15.1, + "AppsRetrieval": 4.32, + "BSARDRetrieval": 9.94, + "CmedqaRetrieval": 2.09, + "CodeFeedbackMT": 36.47, + "CodeFeedbackST": 66.82, + "CodeSearchNetCCRetrieval (python)": 55.19, + "CodeSearchNetCCRetrieval (javascript)": 50.9, + "CodeSearchNetCCRetrieval (go)": 35.3, + "CodeSearchNetCCRetrieval (ruby)": 51.14, + "CodeSearchNetCCRetrieval (java)": 47.64, + "CodeSearchNetCCRetrieval (php)": 35.44, + "CodeSearchNetRetrieval (python)": 78.07, + "CodeSearchNetRetrieval (javascript)": 60.72, + "CodeSearchNetRetrieval (go)": 75.26, + "CodeSearchNetRetrieval (ruby)": 70.33, + "CodeSearchNetRetrieval (java)": 57.51, + "CodeSearchNetRetrieval (php)": 68.6, + "CodeTransOceanContest": 45.87, + "CodeTransOceanDL": 27.67, + "CosQA": 29.9, + "CovidRetrieval": 1.51, + "GerDaLIR": 2.15, + "GerDaLIRSmall": 5.14, + "GermanQuAD-Retrieval": 74.27, + "HellaSwag": 22.97, + "LEMBNarrativeQARetrieval": 18.34, + "LEMBQMSumRetrieval": 20.99, + "LEMBSummScreenFDRetrieval": 66.38, + "LEMBWikimQARetrieval": 46.75, + "LeCaRDv2": 15.35, + "LegalBenchConsumerContractsQA": 73.83, + "LegalBenchCorporateLobbying": 90.21, + "LegalQuAD": 20.49, + "LegalSummarization": 54.25, + "MIRACLRetrieval (ar)": 0.02, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 12.21, + "MIRACLRetrieval (en)": 43.04, + "MIRACLRetrieval (es)": 26.61, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 23.91, + "MIRACLRetrieval (fr)": 21.55, + "MIRACLRetrieval (hi)": 0.18, + "MIRACLRetrieval (id)": 13.54, + "MIRACLRetrieval (ja)": 0.6, + "MIRACLRetrieval (ko)": 2.78, + "MIRACLRetrieval (ru)": 0.21, + "MIRACLRetrieval (sw)": 25.66, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.36, + "MIRACLRetrieval (yo)": 52.12, + "MIRACLRetrieval (zh)": 0.0, + "MintakaRetrieval (ar)": 0.64, + "MintakaRetrieval (de)": 17.45, + "MintakaRetrieval (es)": 19.14, + "MintakaRetrieval (fr)": 20.03, + "MintakaRetrieval (hi)": 2.28, + "MintakaRetrieval (it)": 16.7, + "MintakaRetrieval (ja)": 3.39, + "MintakaRetrieval (pt)": 18.38, + "PIQA": 20.91, + "Quail": 4.1, + "RARbCode": 34.44, + "RARbMath": 61.73, + "RiaNewsRetrieval": 1.35, + "RuBQRetrieval": 1.48, + "SIQA": 2.65, + "SciFact-PL": 34.48, + "SpartQA": 4.32, + "StackOverflowQA": 71.5, + "SyntecRetrieval": 66.58, + "SyntheticText2SQL": 50.95, + "TRECCOVID-PL": 23.8, + "TempReasonL1": 1.7, + "TempReasonL2Fact": 28.06, + "TempReasonL2Pure": 0.47, + "TempReasonL3Fact": 24.34, + "TempReasonL3Pure": 3.4, + "WinoGrande": 46.73, + "XMarket (de)": 15.22, + "XMarket (en)": 31.36, + "XMarket (es)": 17.83, + "XPQARetrieval (ara-ara)": 5.91, + "XPQARetrieval (eng-ara)": 3.32, + "XPQARetrieval (ara-eng)": 7.1, + "XPQARetrieval (deu-deu)": 56.84, + "XPQARetrieval (eng-deu)": 13.47, + "XPQARetrieval (deu-eng)": 25.24, + "XPQARetrieval (spa-spa)": 38.23, + "XPQARetrieval (eng-spa)": 13.27, + "XPQARetrieval (spa-eng)": 19.29, + "XPQARetrieval (fra-fra)": 47.45, + "XPQARetrieval (eng-fra)": 15.06, + "XPQARetrieval (fra-eng)": 24.79, + "XPQARetrieval (hin-hin)": 22.81, + "XPQARetrieval (eng-hin)": 5.31, + "XPQARetrieval (hin-eng)": 5.66, + "XPQARetrieval (ita-ita)": 55.62, + "XPQARetrieval (eng-ita)": 10.4, + "XPQARetrieval (ita-eng)": 20.19, + "XPQARetrieval (jpn-jpn)": 26.1, + "XPQARetrieval (eng-jpn)": 4.86, + "XPQARetrieval (jpn-eng)": 15.59, + "XPQARetrieval (kor-kor)": 6.89, + "XPQARetrieval (eng-kor)": 8.11, + "XPQARetrieval (kor-eng)": 7.33, + "XPQARetrieval (pol-pol)": 31.37, + "XPQARetrieval (eng-pol)": 10.7, + "XPQARetrieval (pol-eng)": 15.43, + "XPQARetrieval (por-por)": 31.21, + "XPQARetrieval (eng-por)": 10.95, + "XPQARetrieval (por-eng)": 19.74, + "XPQARetrieval (tam-tam)": 7.42, + "XPQARetrieval (eng-tam)": 4.98, + "XPQARetrieval (tam-eng)": 4.79, + "XPQARetrieval (cmn-cmn)": 19.09, + "XPQARetrieval (eng-cmn)": 5.85, + "XPQARetrieval (cmn-eng)": 9.93 + } + ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-small", + "CDSC-R": 84.87, + "GermanSTSBenchmark": 65.29, + "RUParaPhraserSTS": 31.91, + "RuSTSBenchmarkSTS": 40.23, + "SICK-R-PL": 57.14, + "SICKFr": 68.03, + "STS22 (de-en)": 43.7, + "STS22 (tr)": 54.58, + "STS22 (es-it)": 57.13, + "STS22 (es-en)": 66.1, + "STS22 (de-fr)": 50.67, + "STS22 (es)": 60.03, + "STS22 (ru)": 18.1, + "STS22 (fr)": 77.03, + "STS22 (pl)": 26.18, + "STS22 (de-pl)": 32.33, + "STS22 (en)": 64.77, + "STS22 (de)": 41.84, + "STS22 (ar)": 28.26, + "STS22 (fr-pl)": 84.52, + "STS22 (pl-en)": 49.23, + "STS22 (zh)": 36.76, + "STS22 (zh-en)": 21.97, + "STS22 (it)": 71.58, + "STSB": 30.14, + "STSBenchmarkMultilingualSTS (es)": 73.64, + "STSBenchmarkMultilingualSTS (fr)": 70.47, + "STSBenchmarkMultilingualSTS (zh)": 35.3, + "STSBenchmarkMultilingualSTS (ru)": 40.24, + "STSBenchmarkMultilingualSTS (pt)": 71.46, + "STSBenchmarkMultilingualSTS (nl)": 65.35, + "STSBenchmarkMultilingualSTS (de)": 65.97, + "STSBenchmarkMultilingualSTS (pl)": 61.07, + "STSBenchmarkMultilingualSTS (it)": 68.17, + "STSBenchmarkMultilingualSTS (en)": 86.63 + }, + { + "Model": "e5-small", + "CDSC-R": 84.87, + "GermanSTSBenchmark": 65.29, + "RUParaPhraserSTS": 31.9, + "RuSTSBenchmarkSTS": 40.23, + "SICK-R-PL": 57.14, + "SICKFr": 68.03, + "STS22 (de-en)": 43.7, + "STS22 (tr)": 54.58, + "STS22 (es-it)": 57.13, + "STS22 (es-en)": 66.1, + "STS22 (de-fr)": 50.67, + "STS22 (es)": 60.03, + "STS22 (ru)": 18.1, + "STS22 (fr)": 77.03, + "STS22 (pl)": 26.4, + "STS22 (de-pl)": 32.33, + "STS22 (en)": 64.77, + "STS22 (de)": 41.84, + "STS22 (ar)": 28.24, + "STS22 (fr-pl)": 84.52, + "STS22 (pl-en)": 49.23, + "STS22 (zh)": 36.76, + "STS22 (zh-en)": 21.97, + "STS22 (it)": 71.58, + "STSB": 30.15, + "STSBenchmarkMultilingualSTS (es)": 73.64, + "STSBenchmarkMultilingualSTS (fr)": 70.47, + "STSBenchmarkMultilingualSTS (zh)": 35.3, + "STSBenchmarkMultilingualSTS (ru)": 40.24, + "STSBenchmarkMultilingualSTS (pt)": 71.46, + "STSBenchmarkMultilingualSTS (nl)": 65.35, + "STSBenchmarkMultilingualSTS (de)": 65.97, + "STSBenchmarkMultilingualSTS (pl)": 61.07, + "STSBenchmarkMultilingualSTS (it)": 68.17, + "STSBenchmarkMultilingualSTS (en)": 86.63 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "e5-small", + "SummEvalFr": 30.79 + }, + { + "Model": "e5-small", + "SummEvalFr": 30.79 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "e5-small", + "CEDRClassification": 31.09, + "SensitiveTopicsClassification": 17.42 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "e5-small", + "Core17InstructionRetrieval": -0.54, + "News21InstructionRetrieval": 0.87, + "Robust04InstructionRetrieval": -4.69 + } + ] } }, "intfloat__multilingual-e5-base": { @@ -9462,13 +25208,21 @@ "ArxivClusteringS2S": 36.0, "BiorxivClusteringP2P": 37.55, "BiorxivClusteringS2S": 30.33, + "BlurbsClusteringP2P": 38.35, + "BlurbsClusteringS2S": 16.25, "CLSClusteringP2P": 32.41, "CLSClusteringS2S": 36.99, "GeoreviewClusteringP2P": 54.46, "HALClusteringS2S": 22.48, "MLSUMClusteringP2P (ru)": 43.47, + "MLSUMClusteringP2P (de)": 38.86, + "MLSUMClusteringP2P (fr)": 43.53, + "MLSUMClusteringP2P (es)": 47.58, "MLSUMClusteringP2P": 43.48, "MLSUMClusteringS2S (ru)": 40.87, + "MLSUMClusteringS2S (de)": 37.63, + "MLSUMClusteringS2S (fr)": 44.01, + "MLSUMClusteringS2S (es)": 47.19, "MLSUMClusteringS2S": 38.53, "MasakhaNEWSClusteringP2P (amh)": 58.05, "MasakhaNEWSClusteringP2P (eng)": 43.8, @@ -9510,6 +25264,8 @@ "RuSciBenchOECDClusteringP2P": 44.79, "StackExchangeClustering": 55.31, "StackExchangeClusteringP2P": 33.51, + "TenKGnadClusteringP2P": 41.92, + "TenKGnadClusteringS2S": 33.01, "ThuNewsClusteringP2P": 40.98, "ThuNewsClusteringS2S": 52.36, "TwentyNewsgroupsClustering": 35.55 @@ -9521,6 +25277,7 @@ { "Model": "multilingual-e5-base", "CDSC-E": 72.7, + "FalseFriendsGermanEnglish": 51.62, "OpusparcusPC (de)": 95.83, "OpusparcusPC (en)": 98.71, "OpusparcusPC (fi)": 90.3, @@ -9545,6 +25302,7 @@ "Model": "multilingual-e5-base", "CDSC-E": 72.67, "Cmnli": 74.98, + "FalseFriendsGermanEnglish": 51.64, "Ocnli": 60.47, "OpusparcusPC (de)": 95.83, "OpusparcusPC (en)": 98.71, @@ -9598,7 +25356,24 @@ }, { "Model": "multilingual-e5-base", - "MIRACLReranking (ru)": 60.47 + "MIRACLReranking (ru)": 60.47, + "MIRACLReranking (ar)": 73.8, + "MIRACLReranking (bn)": 70.78, + "MIRACLReranking (de)": 50.26, + "MIRACLReranking (en)": 56.97, + "MIRACLReranking (es)": 60.25, + "MIRACLReranking (fa)": 57.36, + "MIRACLReranking (fi)": 77.01, + "MIRACLReranking (fr)": 50.27, + "MIRACLReranking (hi)": 61.78, + "MIRACLReranking (id)": 56.25, + "MIRACLReranking (ja)": 63.27, + "MIRACLReranking (ko)": 53.53, + "MIRACLReranking (sw)": 63.77, + "MIRACLReranking (te)": 73.38, + "MIRACLReranking (th)": 73.81, + "MIRACLReranking (yo)": 64.37, + "MIRACLReranking (zh)": 49.11 } ] }, @@ -9639,7 +25414,10 @@ "EcomRetrieval": 54.17, "FiQA-PL": 25.52, "FiQA2018": 38.15, + "GerDaLIR": 6.89, "GerDaLIRSmall": 15.3, + "GermanDPR": 79.51, + "GermanQuAD-Retrieval": 93.93, "HellaSwag": 24.79, "HotpotQA-PL": 63.52, "LEMBNarrativeQARetrieval": 23.6, @@ -9652,6 +25430,23 @@ "LegalQuAD": 47.85, "LegalSummarization": 61.69, "MIRACLRetrieval (ru)": 61.6, + "MIRACLRetrieval (ar)": 71.62, + "MIRACLRetrieval (bn)": 70.23, + "MIRACLRetrieval (de)": 52.05, + "MIRACLRetrieval (en)": 51.17, + "MIRACLRetrieval (es)": 51.54, + "MIRACLRetrieval (fa)": 57.48, + "MIRACLRetrieval (fi)": 74.35, + "MIRACLRetrieval (fr)": 49.63, + "MIRACLRetrieval (hi)": 58.33, + "MIRACLRetrieval (id)": 51.04, + "MIRACLRetrieval (ja)": 64.74, + "MIRACLRetrieval (ko)": 62.27, + "MIRACLRetrieval (sw)": 71.06, + "MIRACLRetrieval (te)": 75.07, + "MIRACLRetrieval (th)": 75.27, + "MIRACLRetrieval (yo)": 70.56, + "MIRACLRetrieval (zh)": 51.52, "MMarcoRetrieval": 76.04, "MSMARCO-PL": 29.52, "MedicalRetrieval": 48.35, @@ -9663,7 +25458,7 @@ "MintakaRetrieval (it)": 29.77, "MintakaRetrieval (ja)": 22.98, "MintakaRetrieval (pt)": 30.62, - "NFCorpus": 32.49, + "NFCorpus": 32.45, "NFCorpus-PL": 25.98, "NQ-PL": 44.8, "PIQA": 25.09, @@ -9676,14 +25471,14 @@ "SCIDOCS": 17.17, "SCIDOCS-PL": 12.35, "SIQA": 3.72, - "SciFact": 69.39, + "SciFact": 69.65, "SciFact-PL": 62.11, "SpartQA": 7.91, "StackOverflowQA": 85.11, "SyntecRetrieval": 80.49, "SyntheticText2SQL": 53.61, "T2Retrieval": 70.86, - "TRECCOVID": 69.5, + "TRECCOVID": 69.49, "TRECCOVID-PL": 66.06, "TempReasonL1": 0.72, "TempReasonL2Fact": 38.76, @@ -9693,6 +25488,9 @@ "Touche2020": 21.5, "VideoRetrieval": 61.3, "WinoGrande": 56.18, + "XMarket (de)": 16.27, + "XMarket (en)": 22.04, + "XMarket (es)": 11.89, "XPQARetrieval (ara-ara)": 39.97, "XPQARetrieval (eng-ara)": 17.23, "XPQARetrieval (ara-eng)": 34.35, @@ -9742,6 +25540,7 @@ "BIOSSES": 85.05, "BQ": 45.45, "CDSC-R": 90.09, + "GermanSTSBenchmark": 78.86, "LCQMC": 74.15, "PAWSX": 12.13, "RUParaPhraserSTS": 70.17, @@ -9803,6 +25602,7 @@ "BIOSSES": 85.05, "BQ": 45.45, "CDSC-R": 90.09, + "GermanSTSBenchmark": 78.86, "LCQMC": 74.15, "PAWSX": 12.13, "RUParaPhraserSTS": 70.17, @@ -10223,14 +26023,22 @@ "AlloProfClusteringS2S": 32.26, "BiorxivClusteringP2P": 35.5, "BiorxivClusteringS2S": 33.3, + "BlurbsClusteringP2P": 41.52, + "BlurbsClusteringS2S": 16.8, "CLSClusteringP2P": 40.68, "CLSClusteringS2S": 38.59, "GeoreviewClusteringP2P": 59.59, "HALClusteringS2S": 22.44, - "MLSUMClusteringP2P (ru)": 42.79, + "MLSUMClusteringP2P (ru)": 42.04, "MLSUMClusteringP2P": 44.04, - "MLSUMClusteringS2S (ru)": 44.32, + "MLSUMClusteringP2P (de)": 39.43, + "MLSUMClusteringP2P (fr)": 44.04, + "MLSUMClusteringP2P (es)": 47.42, + "MLSUMClusteringS2S (ru)": 44.84, "MLSUMClusteringS2S": 37.65, + "MLSUMClusteringS2S (de)": 39.14, + "MLSUMClusteringS2S (fr)": 45.24, + "MLSUMClusteringS2S (es)": 48.17, "MasakhaNEWSClusteringP2P (amh)": 67.16, "MasakhaNEWSClusteringP2P (eng)": 61.1, "MasakhaNEWSClusteringP2P (fra)": 40.94, @@ -10271,6 +26079,8 @@ "RuSciBenchOECDClusteringP2P": 45.12, "StackExchangeClustering": 58.37, "StackExchangeClusteringP2P": 32.9, + "TenKGnadClusteringP2P": 44.83, + "TenKGnadClusteringS2S": 30.83, "ThuNewsClusteringP2P": 58.05, "ThuNewsClusteringS2S": 55.59, "TwentyNewsgroupsClustering": 39.4 @@ -10282,6 +26092,7 @@ { "Model": "multilingual-e5-large", "CDSC-E": 74.47, + "FalseFriendsGermanEnglish": 53.45, "OpusparcusPC (de)": 97.27, "OpusparcusPC (en)": 98.74, "OpusparcusPC (fi)": 94.26, @@ -10306,6 +26117,7 @@ "Model": "multilingual-e5-large", "CDSC-E": 74.47, "Cmnli": 78.18, + "FalseFriendsGermanEnglish": 53.53, "Ocnli": 61.6, "OpusparcusPC (de)": 97.27, "OpusparcusPC (en)": 98.74, @@ -10359,7 +26171,24 @@ }, { "Model": "multilingual-e5-large", - "MIRACLReranking (ru)": 63.71 + "MIRACLReranking (ru)": 63.71, + "MIRACLReranking (ar)": 77.24, + "MIRACLReranking (bn)": 75.31, + "MIRACLReranking (de)": 53.61, + "MIRACLReranking (en)": 58.11, + "MIRACLReranking (es)": 61.65, + "MIRACLReranking (fa)": 59.36, + "MIRACLReranking (fi)": 79.22, + "MIRACLReranking (fr)": 53.96, + "MIRACLReranking (hi)": 65.84, + "MIRACLReranking (id)": 58.56, + "MIRACLReranking (ja)": 66.7, + "MIRACLReranking (ko)": 55.31, + "MIRACLReranking (sw)": 65.06, + "MIRACLReranking (te)": 81.64, + "MIRACLReranking (th)": 77.06, + "MIRACLReranking (yo)": 65.32, + "MIRACLReranking (zh)": 51.5 } ] }, @@ -10376,7 +26205,8 @@ "ArguAna": 54.36, "ArguAna-PL": 53.02, "BSARDRetrieval": 0.27, - "CmedqaRetrieval": 28.67, + "ClimateFEVER": 25.73, + "CmedqaRetrieval": 28.66, "CodeFeedbackMT": 42.78, "CodeFeedbackST": 74.26, "CodeSearchNetCCRetrieval (python)": 84.45, @@ -10395,13 +26225,18 @@ "CodeTransOceanDL": 31.28, "CosQA": 34.8, "CovidRetrieval": 75.51, + "DBPedia": 41.3, "DBPedia-PL": 35.82, "DuRetrieval": 85.32, "EcomRetrieval": 54.75, "FiQA-PL": 33.0, "FiQA2018": 43.81, + "GerDaLIR": 6.53, "GerDaLIRSmall": 15.72, + "GermanDPR": 82.89, + "GermanQuAD-Retrieval": 94.66, "HellaSwag": 27.35, + "HotpotQA": 71.22, "HotpotQA-PL": 67.41, "LEMBNarrativeQARetrieval": 24.22, "LEMBQMSumRetrieval": 24.26, @@ -10424,12 +26259,14 @@ "MintakaRetrieval (it)": 33.84, "MintakaRetrieval (ja)": 26.45, "MintakaRetrieval (pt)": 35.9, - "NFCorpus": 33.95, + "NFCorpus": 33.98, "NFCorpus-PL": 30.24, + "NQ": 64.03, "NQ-PL": 52.79, "PIQA": 28.82, "Quail": 4.85, "Quora-PL": 83.65, + "QuoraRetrieval": 89.26, "RARbCode": 58.92, "RARbMath": 67.32, "RiaNewsRetrieval": 80.67, @@ -10437,15 +26274,15 @@ "SCIDOCS": 17.45, "SCIDOCS-PL": 13.81, "SIQA": 5.36, - "SciFact": 70.42, - "SciFact-PL": 65.66, + "SciFact": 70.2, + "SciFact-PL": 65.8, "SpartQA": 5.64, "StackOverflowQA": 88.89, "SyntecRetrieval": 81.07, "SyntheticText2SQL": 53.07, "T2Retrieval": 76.11, - "TRECCOVID": 71.21, - "TRECCOVID-PL": 70.03, + "TRECCOVID": 71.15, + "TRECCOVID-PL": 69.91, "TempReasonL1": 1.14, "TempReasonL2Fact": 42.96, "TempReasonL2Pure": 2.05, @@ -10454,6 +26291,9 @@ "Touche2020": 23.13, "VideoRetrieval": 58.25, "WinoGrande": 54.99, + "XMarket (de)": 17.46, + "XMarket (en)": 20.59, + "XMarket (es)": 13.48, "XPQARetrieval (ara-ara)": 43.69, "XPQARetrieval (eng-ara)": 30.86, "XPQARetrieval (ara-eng)": 39.11, @@ -10503,6 +26343,7 @@ "BIOSSES": 82.49, "BQ": 46.44, "CDSC-R": 91.0, + "GermanSTSBenchmark": 83.64, "LCQMC": 75.95, "PAWSX": 14.63, "RUParaPhraserSTS": 71.82, @@ -10564,6 +26405,7 @@ "BIOSSES": 82.49, "BQ": 46.44, "CDSC-R": 91.0, + "GermanSTSBenchmark": 83.64, "LCQMC": 75.95, "PAWSX": 14.63, "RUParaPhraserSTS": 71.82, @@ -10975,9 +26817,20 @@ "v_measure": [ { "Model": "multilingual-e5-large-instruct", + "AlloProfClusteringP2P": 70.39, + "AlloProfClusteringS2S": 60.95, + "BlurbsClusteringP2P": 44.35, + "BlurbsClusteringS2S": 20.81, "GeoreviewClusteringP2P": 74.34, + "HALClusteringS2S": 28.53, "MLSUMClusteringP2P (ru)": 57.77, + "MLSUMClusteringP2P (de)": 49.88, + "MLSUMClusteringP2P (fr)": 47.09, + "MLSUMClusteringP2P (es)": 49.74, "MLSUMClusteringS2S (ru)": 57.5, + "MLSUMClusteringS2S (de)": 49.23, + "MLSUMClusteringS2S (fr)": 46.51, + "MLSUMClusteringS2S (es)": 48.93, "MasakhaNEWSClusteringP2P (amh)": 74.82, "MasakhaNEWSClusteringP2P (eng)": 70.12, "MasakhaNEWSClusteringP2P (fra)": 70.48, @@ -11011,7 +26864,9 @@ "MasakhaNEWSClusteringS2S (xho)": 43.19, "MasakhaNEWSClusteringS2S (yor)": 54.23, "RuSciBenchGRNTIClusteringP2P": 62.21, - "RuSciBenchOECDClusteringP2P": 53.09, + "RuSciBenchOECDClusteringP2P": 52.83, + "TenKGnadClusteringP2P": 54.02, + "TenKGnadClusteringS2S": 41.71, "TwentyNewsgroupsClustering": 51.03 } ] @@ -11021,6 +26876,7 @@ { "Model": "multilingual-e5-large-instruct", "CDSC-E": 76.17, + "FalseFriendsGermanEnglish": 52.3, "OpusparcusPC (de)": 97.56, "OpusparcusPC (en)": 98.91, "OpusparcusPC (fi)": 94.74, @@ -11044,6 +26900,7 @@ { "Model": "multilingual-e5-large-instruct", "CDSC-E": 76.17, + "FalseFriendsGermanEnglish": 52.3, "OpusparcusPC (de)": 97.56, "OpusparcusPC (en)": 98.92, "OpusparcusPC (fi)": 94.74, @@ -11074,7 +26931,7 @@ "AskUbuntuDupQuestions": 64.41, "MMarcoReranking": 23.6, "MindSmallReranking": 33.07, - "RuBQReranking": 75.84, + "RuBQReranking": 71.66, "SciDocsRR": 85.75, "StackOverflowDupQuestions": 52.45, "SyntecReranking": 89.95, @@ -11082,7 +26939,24 @@ }, { "Model": "multilingual-e5-large-instruct", - "MIRACLReranking (ru)": 62.49 + "MIRACLReranking (ru)": 57.03, + "MIRACLReranking (ar)": 68.84, + "MIRACLReranking (bn)": 68.46, + "MIRACLReranking (de)": 46.65, + "MIRACLReranking (en)": 52.2, + "MIRACLReranking (es)": 53.13, + "MIRACLReranking (fa)": 53.97, + "MIRACLReranking (fi)": 73.31, + "MIRACLReranking (fr)": 45.44, + "MIRACLReranking (hi)": 59.44, + "MIRACLReranking (id)": 53.13, + "MIRACLReranking (ja)": 57.35, + "MIRACLReranking (ko)": 52.34, + "MIRACLReranking (sw)": 59.61, + "MIRACLReranking (te)": 72.85, + "MIRACLReranking (th)": 68.61, + "MIRACLReranking (yo)": 64.19, + "MIRACLReranking (zh)": 46.74 } ] }, @@ -11122,7 +26996,10 @@ "EcomRetrieval": 53.92, "FiQA-PL": 32.01, "FiQA2018": 48.42, + "GerDaLIR": 9.31, "GerDaLIRSmall": 21.34, + "GermanDPR": 80.84, + "GermanQuAD-Retrieval": 94.67, "HellaSwag": 32.02, "LEMBNarrativeQARetrieval": 26.71, "LEMBQMSumRetrieval": 26.08, @@ -11133,7 +27010,24 @@ "LegalBenchCorporateLobbying": 94.25, "LegalQuAD": 51.25, "LegalSummarization": 68.07, - "MIRACLRetrieval (ru)": 66.08, + "MIRACLRetrieval (ru)": 53.08, + "MIRACLRetrieval (ar)": 63.3, + "MIRACLRetrieval (bn)": 65.27, + "MIRACLRetrieval (de)": 43.92, + "MIRACLRetrieval (en)": 43.09, + "MIRACLRetrieval (es)": 39.67, + "MIRACLRetrieval (fa)": 50.97, + "MIRACLRetrieval (fi)": 67.97, + "MIRACLRetrieval (fr)": 38.88, + "MIRACLRetrieval (hi)": 51.57, + "MIRACLRetrieval (id)": 45.73, + "MIRACLRetrieval (ja)": 55.26, + "MIRACLRetrieval (ko)": 59.95, + "MIRACLRetrieval (sw)": 63.4, + "MIRACLRetrieval (te)": 74.51, + "MIRACLRetrieval (th)": 67.95, + "MIRACLRetrieval (yo)": 78.29, + "MIRACLRetrieval (zh)": 46.05, "MMarcoRetrieval": 78.81, "MedicalRetrieval": 56.55, "MintakaRetrieval (ar)": 26.13, @@ -11150,8 +27044,8 @@ "Quail": 8.63, "RARbCode": 71.22, "RARbMath": 71.95, - "RiaNewsRetrieval": 83.26, - "RuBQRetrieval": 73.9, + "RiaNewsRetrieval": 82.44, + "RuBQRetrieval": 69.18, "SCIDOCS": 19.24, "SCIDOCS-PL": 17.15, "SIQA": 7.33, @@ -11172,6 +27066,9 @@ "Touche2020": 27.4, "VideoRetrieval": 52.24, "WinoGrande": 54.27, + "XMarket (de)": 24.4, + "XMarket (en)": 27.51, + "XMarket (es)": 25.01, "XPQARetrieval (ara-ara)": 48.56, "XPQARetrieval (eng-ara)": 34.01, "XPQARetrieval (ara-eng)": 45.13, @@ -11220,6 +27117,7 @@ "BIOSSES": 87.46, "BQ": 48.8, "CDSC-R": 92.35, + "GermanSTSBenchmark": 84.84, "LCQMC": 76.06, "PAWSX": 15.06, "RUParaPhraserSTS": 75.4, @@ -11249,7 +27147,7 @@ "STS22 (es)": 68.45, "STS22 (de-fr)": 65.52, "STS22 (pl)": 40.97, - "STS22 (ru)": 65.17, + "STS22 (ru)": 70.06, "STS22 (en)": 68.67, "STS22 (fr)": 82.25, "STS22 (es-it)": 75.25, @@ -11281,6 +27179,7 @@ "BIOSSES": 87.46, "BQ": 48.8, "CDSC-R": 92.35, + "GermanSTSBenchmark": 84.84, "LCQMC": 76.06, "PAWSX": 15.06, "RUParaPhraserSTS": 75.4, @@ -11310,7 +27209,7 @@ "STS22 (es)": 68.45, "STS22 (de-fr)": 65.52, "STS22 (pl)": 40.97, - "STS22 (ru)": 65.17, + "STS22 (ru)": 70.06, "STS22 (en)": 68.67, "STS22 (fr)": 82.25, "STS22 (es-it)": 75.25, @@ -11497,13 +27396,13 @@ { "Model": "multilingual-e5-small", "AllegroReviews": 37.42, + "AmazonCounterfactualClassification (de)": 71.72, "AmazonCounterfactualClassification (en-ext)": 73.07, "AmazonCounterfactualClassification (en)": 71.87, - "AmazonCounterfactualClassification (de)": 71.72, "AmazonCounterfactualClassification (ja)": 61.46, "AmazonPolarityClassification": 88.61, - "AmazonReviewsClassification (en)": 45.75, "AmazonReviewsClassification (de)": 41.07, + "AmazonReviewsClassification (en)": 45.75, "AmazonReviewsClassification (es)": 41.37, "AmazonReviewsClassification (fr)": 39.68, "AmazonReviewsClassification (ja)": 38.55, @@ -11522,14 +27421,14 @@ "JDReview": 79.34, "KinopoiskClassification": 49.96, "LccSentimentClassification": 57.87, - "MTOPDomainClassification (en)": 88.99, "MTOPDomainClassification (de)": 86.15, + "MTOPDomainClassification (en)": 88.99, "MTOPDomainClassification (es)": 85.53, "MTOPDomainClassification (fr)": 81.2, "MTOPDomainClassification (hi)": 84.07, "MTOPDomainClassification (th)": 83.16, - "MTOPIntentClassification (en)": 56.69, "MTOPIntentClassification (de)": 55.88, + "MTOPIntentClassification (en)": 56.69, "MTOPIntentClassification (es)": 53.15, "MTOPIntentClassification (fr)": 46.01, "MTOPIntentClassification (hi)": 52.26, @@ -11550,108 +27449,108 @@ "MasakhaNEWSClassification (tir)": 68.01, "MasakhaNEWSClassification (xho)": 72.22, "MasakhaNEWSClassification (yor)": 73.84, - "MassiveIntentClassification (is)": 41.53, - "MassiveIntentClassification (tl)": 48.7, - "MassiveIntentClassification (he)": 51.11, - "MassiveIntentClassification (ta)": 47.65, - "MassiveIntentClassification (ar)": 47.78, - "MassiveIntentClassification (my)": 45.64, - "MassiveIntentClassification (sl)": 47.71, - "MassiveIntentClassification (af)": 48.74, + "MassiveIntentClassification (ru)": 58.43, + "MassiveIntentClassification (sq)": 48.68, + "MassiveIntentClassification (ms)": 50.8, + "MassiveIntentClassification (hi)": 55.69, + "MassiveIntentClassification (pt)": 60.12, + "MassiveIntentClassification (zh-TW)": 53.75, + "MassiveIntentClassification (id)": 56.2, + "MassiveIntentClassification (nl)": 59.27, + "MassiveIntentClassification (ur)": 50.51, + "MassiveIntentClassification (te)": 48.85, "MassiveIntentClassification (de)": 55.52, - "MassiveIntentClassification (pl)": 57.4, + "MassiveIntentClassification (ro)": 52.82, + "MassiveIntentClassification (da)": 54.63, + "MassiveIntentClassification (am)": 43.52, "MassiveIntentClassification (en)": 63.87, - "MassiveIntentClassification (fi)": 55.14, - "MassiveIntentClassification (lv)": 44.93, "MassiveIntentClassification (fr)": 57.9, - "MassiveIntentClassification (ur)": 50.51, + "MassiveIntentClassification (sw)": 44.84, + "MassiveIntentClassification (bn)": 50.68, "MassiveIntentClassification (mn)": 47.38, - "MassiveIntentClassification (it)": 58.8, + "MassiveIntentClassification (kn)": 47.85, + "MassiveIntentClassification (ja)": 61.58, + "MassiveIntentClassification (hy)": 47.89, "MassiveIntentClassification (ko)": 57.12, - "MassiveIntentClassification (nb)": 53.96, "MassiveIntentClassification (es)": 59.19, - "MassiveIntentClassification (ja)": 61.58, - "MassiveIntentClassification (da)": 54.63, - "MassiveIntentClassification (zh-TW)": 53.75, - "MassiveIntentClassification (id)": 56.2, - "MassiveIntentClassification (ka)": 39.52, - "MassiveIntentClassification (hi)": 55.69, - "MassiveIntentClassification (cy)": 36.62, - "MassiveIntentClassification (kn)": 47.85, - "MassiveIntentClassification (pt)": 60.12, - "MassiveIntentClassification (th)": 56.26, - "MassiveIntentClassification (fa)": 57.73, - "MassiveIntentClassification (bn)": 50.68, - "MassiveIntentClassification (ml)": 52.81, - "MassiveIntentClassification (ro)": 52.82, - "MassiveIntentClassification (am)": 43.52, - "MassiveIntentClassification (hu)": 53.21, - "MassiveIntentClassification (sw)": 44.84, - "MassiveIntentClassification (ms)": 50.8, - "MassiveIntentClassification (tr)": 56.88, + "MassiveIntentClassification (is)": 41.53, "MassiveIntentClassification (km)": 33.45, - "MassiveIntentClassification (ru)": 58.43, - "MassiveIntentClassification (az)": 49.32, - "MassiveIntentClassification (te)": 48.85, - "MassiveIntentClassification (nl)": 59.27, + "MassiveIntentClassification (af)": 48.74, "MassiveIntentClassification (zh-CN)": 62.04, - "MassiveIntentClassification (sq)": 48.68, + "MassiveIntentClassification (he)": 51.11, + "MassiveIntentClassification (fa)": 57.73, + "MassiveIntentClassification (nb)": 53.96, + "MassiveIntentClassification (sv)": 56.6, + "MassiveIntentClassification (ta)": 47.65, + "MassiveIntentClassification (tr)": 56.88, + "MassiveIntentClassification (sl)": 47.71, + "MassiveIntentClassification (it)": 58.8, + "MassiveIntentClassification (tl)": 48.7, "MassiveIntentClassification (vi)": 56.19, + "MassiveIntentClassification (lv)": 44.93, + "MassiveIntentClassification (fi)": 55.14, + "MassiveIntentClassification (pl)": 57.4, "MassiveIntentClassification (jv)": 42.96, - "MassiveIntentClassification (sv)": 56.6, - "MassiveIntentClassification (hy)": 47.89, + "MassiveIntentClassification (my)": 45.64, + "MassiveIntentClassification (ka)": 39.52, + "MassiveIntentClassification (th)": 56.26, "MassiveIntentClassification (el)": 54.14, + "MassiveIntentClassification (ar)": 47.78, + "MassiveIntentClassification (az)": 49.32, + "MassiveIntentClassification (cy)": 36.62, + "MassiveIntentClassification (hu)": 53.21, + "MassiveIntentClassification (ml)": 52.81, + "MassiveScenarioClassification (ru)": 63.89, + "MassiveScenarioClassification (fr)": 63.9, + "MassiveScenarioClassification (my)": 51.07, + "MassiveScenarioClassification (sq)": 56.15, + "MassiveScenarioClassification (af)": 58.0, "MassiveScenarioClassification (de)": 65.88, - "MassiveScenarioClassification (nb)": 59.9, - "MassiveScenarioClassification (th)": 65.72, "MassiveScenarioClassification (ka)": 44.96, - "MassiveScenarioClassification (jv)": 51.39, - "MassiveScenarioClassification (sv)": 65.54, - "MassiveScenarioClassification (fr)": 63.9, "MassiveScenarioClassification (tl)": 55.3, - "MassiveScenarioClassification (hu)": 61.93, - "MassiveScenarioClassification (ur)": 55.91, + "MassiveScenarioClassification (zh-CN)": 68.96, + "MassiveScenarioClassification (pl)": 64.25, + "MassiveScenarioClassification (mn)": 52.41, "MassiveScenarioClassification (ms)": 59.18, - "MassiveScenarioClassification (az)": 53.27, - "MassiveScenarioClassification (af)": 58.0, - "MassiveScenarioClassification (zh-TW)": 61.15, - "MassiveScenarioClassification (lv)": 51.0, - "MassiveScenarioClassification (km)": 39.01, - "MassiveScenarioClassification (el)": 62.29, - "MassiveScenarioClassification (bn)": 57.38, "MassiveScenarioClassification (da)": 62.34, - "MassiveScenarioClassification (ml)": 60.31, - "MassiveScenarioClassification (ro)": 60.0, - "MassiveScenarioClassification (ru)": 63.89, - "MassiveScenarioClassification (it)": 64.03, "MassiveScenarioClassification (am)": 50.53, + "MassiveScenarioClassification (en)": 69.28, "MassiveScenarioClassification (is)": 49.66, - "MassiveScenarioClassification (ja)": 67.75, - "MassiveScenarioClassification (zh-CN)": 68.96, - "MassiveScenarioClassification (id)": 62.0, - "MassiveScenarioClassification (tr)": 62.14, - "MassiveScenarioClassification (fa)": 63.32, - "MassiveScenarioClassification (ta)": 52.74, - "MassiveScenarioClassification (kn)": 52.73, - "MassiveScenarioClassification (pt)": 62.75, - "MassiveScenarioClassification (cy)": 44.63, - "MassiveScenarioClassification (my)": 51.07, + "MassiveScenarioClassification (it)": 64.03, + "MassiveScenarioClassification (jv)": 51.39, + "MassiveScenarioClassification (nl)": 67.01, + "MassiveScenarioClassification (ro)": 60.0, + "MassiveScenarioClassification (te)": 54.86, + "MassiveScenarioClassification (bn)": 57.38, "MassiveScenarioClassification (es)": 64.43, + "MassiveScenarioClassification (el)": 62.29, + "MassiveScenarioClassification (lv)": 51.0, + "MassiveScenarioClassification (he)": 59.22, "MassiveScenarioClassification (hi)": 62.22, - "MassiveScenarioClassification (te)": 54.86, - "MassiveScenarioClassification (mn)": 52.41, + "MassiveScenarioClassification (ar)": 54.56, + "MassiveScenarioClassification (pt)": 62.75, + "MassiveScenarioClassification (sv)": 65.54, "MassiveScenarioClassification (ko)": 65.7, - "MassiveScenarioClassification (sl)": 54.05, + "MassiveScenarioClassification (ta)": 52.74, + "MassiveScenarioClassification (vi)": 62.67, + "MassiveScenarioClassification (fa)": 63.32, "MassiveScenarioClassification (sw)": 52.42, - "MassiveScenarioClassification (hy)": 52.93, - "MassiveScenarioClassification (nl)": 67.01, - "MassiveScenarioClassification (sq)": 56.15, + "MassiveScenarioClassification (ml)": 60.31, + "MassiveScenarioClassification (sl)": 54.05, + "MassiveScenarioClassification (ja)": 67.75, "MassiveScenarioClassification (fi)": 61.89, - "MassiveScenarioClassification (en)": 69.28, - "MassiveScenarioClassification (vi)": 62.67, - "MassiveScenarioClassification (he)": 59.22, - "MassiveScenarioClassification (ar)": 54.56, - "MassiveScenarioClassification (pl)": 64.25, + "MassiveScenarioClassification (zh-TW)": 61.15, + "MassiveScenarioClassification (id)": 62.0, + "MassiveScenarioClassification (tr)": 62.14, + "MassiveScenarioClassification (km)": 39.01, + "MassiveScenarioClassification (nb)": 59.9, + "MassiveScenarioClassification (az)": 53.27, + "MassiveScenarioClassification (hy)": 52.93, + "MassiveScenarioClassification (cy)": 44.63, + "MassiveScenarioClassification (th)": 65.72, + "MassiveScenarioClassification (hu)": 61.93, + "MassiveScenarioClassification (kn)": 52.73, + "MassiveScenarioClassification (ur)": 55.91, "MultilingualSentiment": 64.74, "NoRecClassification": 53.96, "NordicLangClassification": 75.15, @@ -11681,13 +27580,21 @@ "AlloProfClusteringS2S": 32.52, "BiorxivClusteringP2P": 35.84, "BiorxivClusteringS2S": 27.35, + "BlurbsClusteringP2P": 37.05, + "BlurbsClusteringS2S": 15.24, "CLSClusteringP2P": 39.14, "CLSClusteringS2S": 37.79, "GeoreviewClusteringP2P": 58.57, "HALClusteringS2S": 18.95, + "MLSUMClusteringP2P (de)": 40.19, + "MLSUMClusteringP2P (fr)": 43.01, "MLSUMClusteringP2P (ru)": 39.69, + "MLSUMClusteringP2P (es)": 46.38, "MLSUMClusteringP2P": 43.2, + "MLSUMClusteringS2S (de)": 39.39, + "MLSUMClusteringS2S (fr)": 42.84, "MLSUMClusteringS2S (ru)": 39.9, + "MLSUMClusteringS2S (es)": 45.78, "MLSUMClusteringS2S": 37.61, "MasakhaNEWSClusteringP2P (amh)": 66.2, "MasakhaNEWSClusteringP2P (eng)": 50.08, @@ -11729,6 +27636,8 @@ "RuSciBenchOECDClusteringP2P": 44.33, "StackExchangeClustering": 53.32, "StackExchangeClusteringP2P": 31.87, + "TenKGnadClusteringP2P": 43.77, + "TenKGnadClusteringS2S": 30.52, "ThuNewsClusteringP2P": 55.18, "ThuNewsClusteringS2S": 48.93, "TwentyNewsgroupsClustering": 33.67 @@ -11740,6 +27649,7 @@ { "Model": "multilingual-e5-small", "CDSC-E": 69.69, + "FalseFriendsGermanEnglish": 49.46, "OpusparcusPC (de)": 94.9, "OpusparcusPC (en)": 98.42, "OpusparcusPC (fi)": 88.29, @@ -11764,6 +27674,7 @@ "Model": "multilingual-e5-small", "CDSC-E": 69.84, "Cmnli": 72.12, + "FalseFriendsGermanEnglish": 49.49, "Ocnli": 60.77, "OpusparcusPC (de)": 94.9, "OpusparcusPC (en)": 98.42, @@ -11817,7 +27728,24 @@ }, { "Model": "multilingual-e5-small", - "MIRACLReranking (ru)": 59.12 + "MIRACLReranking (ru)": 59.12, + "MIRACLReranking (ar)": 73.56, + "MIRACLReranking (bn)": 69.43, + "MIRACLReranking (de)": 48.16, + "MIRACLReranking (en)": 55.99, + "MIRACLReranking (es)": 60.46, + "MIRACLReranking (fa)": 54.65, + "MIRACLReranking (fi)": 76.06, + "MIRACLReranking (fr)": 48.53, + "MIRACLReranking (hi)": 59.93, + "MIRACLReranking (id)": 56.22, + "MIRACLReranking (ja)": 62.58, + "MIRACLReranking (ko)": 54.56, + "MIRACLReranking (sw)": 62.33, + "MIRACLReranking (te)": 77.27, + "MIRACLReranking (th)": 73.44, + "MIRACLReranking (yo)": 58.12, + "MIRACLReranking (zh)": 45.45 } ] }, @@ -11858,7 +27786,10 @@ "EcomRetrieval": 53.56, "FiQA-PL": 22.03, "FiQA2018": 33.13, + "GerDaLIR": 6.87, "GerDaLIRSmall": 14.81, + "GermanDPR": 78.94, + "GermanQuAD-Retrieval": 93.14, "HellaSwag": 23.73, "HotpotQA-PL": 60.15, "LEMBNarrativeQARetrieval": 22.6, @@ -11871,6 +27802,23 @@ "LegalQuAD": 47.8, "LegalSummarization": 55.76, "MIRACLRetrieval (ru)": 59.01, + "MIRACLRetrieval (ar)": 71.35, + "MIRACLRetrieval (bn)": 68.27, + "MIRACLRetrieval (de)": 48.75, + "MIRACLRetrieval (en)": 47.98, + "MIRACLRetrieval (es)": 51.23, + "MIRACLRetrieval (fa)": 53.34, + "MIRACLRetrieval (fi)": 73.35, + "MIRACLRetrieval (fr)": 47.61, + "MIRACLRetrieval (hi)": 55.13, + "MIRACLRetrieval (id)": 50.67, + "MIRACLRetrieval (ja)": 63.61, + "MIRACLRetrieval (ko)": 61.24, + "MIRACLRetrieval (sw)": 68.48, + "MIRACLRetrieval (te)": 81.31, + "MIRACLRetrieval (th)": 74.9, + "MIRACLRetrieval (yo)": 45.28, + "MIRACLRetrieval (zh)": 45.95, "MMarcoRetrieval": 73.17, "MSMARCO-PL": 26.94, "MedicalRetrieval": 44.84, @@ -11912,6 +27860,9 @@ "Touche2020": 21.16, "VideoRetrieval": 58.09, "WinoGrande": 37.46, + "XMarket (de)": 15.55, + "XMarket (en)": 18.11, + "XMarket (es)": 11.55, "XPQARetrieval (ara-ara)": 39.93, "XPQARetrieval (eng-ara)": 18.09, "XPQARetrieval (ara-eng)": 31.64, @@ -11961,6 +27912,7 @@ "BIOSSES": 82.46, "BQ": 43.27, "CDSC-R": 90.27, + "GermanSTSBenchmark": 78.42, "LCQMC": 72.7, "PAWSX": 11.0, "RUParaPhraserSTS": 70.46, @@ -11984,36 +27936,36 @@ "STS17 (es-es)": 84.83, "STS17 (en-de)": 76.82, "STS17 (es-en)": 72.43, - "STS22 (pl-en)": 72.69, + "STS22 (ru)": 59.9, + "STS22 (de-fr)": 60.62, + "STS22 (es-en)": 74.2, + "STS22 (it)": 76.53, + "STS22 (zh)": 66.85, + "STS22 (zh-en)": 65.32, "STS22 (de-en)": 56.07, + "STS22 (de-pl)": 28.24, "STS22 (es)": 66.86, + "STS22 (tr)": 63.69, "STS22 (ar)": 56.65, - "STS22 (ru)": 59.9, + "STS22 (fr)": 76.58, + "STS22 (fr-pl)": 84.52, "STS22 (de)": 53.45, - "STS22 (zh-en)": 65.32, + "STS22 (pl)": 35.78, + "STS22 (pl-en)": 72.69, "STS22 (en)": 61.25, - "STS22 (fr)": 76.58, - "STS22 (it)": 76.53, "STS22 (es-it)": 71.74, - "STS22 (es-en)": 74.2, - "STS22 (de-fr)": 60.62, - "STS22 (tr)": 63.69, - "STS22 (pl)": 35.78, - "STS22 (fr-pl)": 84.52, - "STS22 (zh)": 66.85, - "STS22 (de-pl)": 28.24, "STSB": 77.73, "STSBenchmark": 84.11, - "STSBenchmarkMultilingualSTS (zh)": 78.49, - "STSBenchmarkMultilingualSTS (pl)": 72.61, - "STSBenchmarkMultilingualSTS (ru)": 78.24, + "STSBenchmarkMultilingualSTS (es)": 80.31, "STSBenchmarkMultilingualSTS (en)": 84.11, - "STSBenchmarkMultilingualSTS (pt)": 77.39, - "STSBenchmarkMultilingualSTS (it)": 78.21, - "STSBenchmarkMultilingualSTS (fr)": 79.2, "STSBenchmarkMultilingualSTS (de)": 79.17, "STSBenchmarkMultilingualSTS (nl)": 76.04, - "STSBenchmarkMultilingualSTS (es)": 80.31 + "STSBenchmarkMultilingualSTS (it)": 78.21, + "STSBenchmarkMultilingualSTS (zh)": 78.49, + "STSBenchmarkMultilingualSTS (ru)": 78.24, + "STSBenchmarkMultilingualSTS (fr)": 79.2, + "STSBenchmarkMultilingualSTS (pt)": 77.39, + "STSBenchmarkMultilingualSTS (pl)": 72.61 }, { "Model": "multilingual-e5-small", @@ -12022,6 +27974,7 @@ "BIOSSES": 82.46, "BQ": 43.27, "CDSC-R": 90.27, + "GermanSTSBenchmark": 78.42, "LCQMC": 72.7, "PAWSX": 11.0, "RUParaPhraserSTS": 70.46, @@ -12045,36 +27998,36 @@ "STS17 (es-es)": 84.83, "STS17 (en-de)": 76.82, "STS17 (es-en)": 72.43, - "STS22 (pl-en)": 72.69, + "STS22 (ru)": 59.9, + "STS22 (de-fr)": 60.62, + "STS22 (es-en)": 74.2, + "STS22 (it)": 76.53, + "STS22 (zh)": 66.85, + "STS22 (zh-en)": 65.32, "STS22 (de-en)": 56.07, + "STS22 (de-pl)": 28.24, "STS22 (es)": 66.86, + "STS22 (tr)": 63.69, "STS22 (ar)": 56.65, - "STS22 (ru)": 59.9, + "STS22 (fr)": 76.58, + "STS22 (fr-pl)": 84.52, "STS22 (de)": 53.45, - "STS22 (zh-en)": 65.32, + "STS22 (pl)": 35.78, + "STS22 (pl-en)": 72.69, "STS22 (en)": 61.25, - "STS22 (fr)": 76.58, - "STS22 (it)": 76.53, "STS22 (es-it)": 71.74, - "STS22 (es-en)": 74.2, - "STS22 (de-fr)": 60.62, - "STS22 (tr)": 63.69, - "STS22 (pl)": 35.78, - "STS22 (fr-pl)": 84.52, - "STS22 (zh)": 66.85, - "STS22 (de-pl)": 28.24, "STSB": 77.73, "STSBenchmark": 84.11, - "STSBenchmarkMultilingualSTS (zh)": 78.49, - "STSBenchmarkMultilingualSTS (pl)": 72.61, - "STSBenchmarkMultilingualSTS (ru)": 78.24, + "STSBenchmarkMultilingualSTS (es)": 80.31, "STSBenchmarkMultilingualSTS (en)": 84.11, - "STSBenchmarkMultilingualSTS (pt)": 77.39, - "STSBenchmarkMultilingualSTS (it)": 78.21, - "STSBenchmarkMultilingualSTS (fr)": 79.2, "STSBenchmarkMultilingualSTS (de)": 79.17, "STSBenchmarkMultilingualSTS (nl)": 76.04, - "STSBenchmarkMultilingualSTS (es)": 80.31 + "STSBenchmarkMultilingualSTS (it)": 78.21, + "STSBenchmarkMultilingualSTS (zh)": 78.49, + "STSBenchmarkMultilingualSTS (ru)": 78.24, + "STSBenchmarkMultilingualSTS (fr)": 79.2, + "STSBenchmarkMultilingualSTS (pt)": 77.39, + "STSBenchmarkMultilingualSTS (pl)": 72.61 }, { "Model": "multilingual-e5-small", @@ -12303,18 +28256,40 @@ }, "izhx__udever-bloom-1b1": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "udever-bloom-1b1", + "BornholmBitextMining": 21.91 + } + ] }, "Classification": { "accuracy": [ { "Model": "udever-bloom-1b1", + "AllegroReviews": 23.37, "AmazonReviewsClassification (fr)": 35.12, + "AngryTweetsClassification": 44.39, + "CBD": 51.92, + "DanishPoliticalCommentsClassification": 27.83, + "GeoreviewClassification": 30.5, + "HeadlineClassification": 36.09, + "InappropriatenessClassification": 54.64, + "KinopoiskClassification": 41.23, + "LccSentimentClassification": 36.87, "MTOPDomainClassification (fr)": 69.24, "MTOPIntentClassification (fr)": 51.25, "MasakhaNEWSClassification (fra)": 80.83, "MassiveIntentClassification (fr)": 43.21, - "MassiveScenarioClassification (fr)": 49.78 + "MassiveScenarioClassification (fr)": 49.78, + "NoRecClassification": 38.75, + "NordicLangClassification": 59.31, + "PAC": 64.44, + "PolEmo2.0-IN": 41.79, + "PolEmo2.0-OUT": 32.67, + "RuReviewsClassification": 45.26, + "RuSciBenchGRNTIClassification": 30.37, + "RuSciBenchOECDClassification": 22.65 } ] }, @@ -12322,13 +28297,35 @@ "v_measure": [ { "Model": "udever-bloom-1b1", - "AlloProfClusteringP2P": 62.22, - "AlloProfClusteringS2S": 27.06, - "HALClusteringS2S": 13.86, + "AlloProfClusteringP2P": 61.84, + "AlloProfClusteringS2S": 25.56, + "BlurbsClusteringP2P": 24.28, + "BlurbsClusteringS2S": 8.08, + "GeoreviewClusteringP2P": 26.01, + "HALClusteringS2S": 14.15, "MLSUMClusteringP2P": 44.11, "MLSUMClusteringS2S": 30.47, "MasakhaNEWSClusteringP2P (fra)": 40.2, - "MasakhaNEWSClusteringS2S (fra)": 27.35 + "MasakhaNEWSClusteringS2S (fra)": 28.75, + "MasakhaNEWSClusteringS2S (amh)": 44.47, + "MasakhaNEWSClusteringS2S (eng)": 12.52, + "MasakhaNEWSClusteringS2S (hau)": 5.12, + "MasakhaNEWSClusteringS2S (ibo)": 30.74, + "MasakhaNEWSClusteringS2S (lin)": 45.75, + "MasakhaNEWSClusteringS2S (lug)": 44.31, + "MasakhaNEWSClusteringS2S (orm)": 21.26, + "MasakhaNEWSClusteringS2S (pcm)": 50.75, + "MasakhaNEWSClusteringS2S (run)": 47.24, + "MasakhaNEWSClusteringS2S (sna)": 43.2, + "MasakhaNEWSClusteringS2S (som)": 25.55, + "MasakhaNEWSClusteringS2S (swa)": 7.31, + "MasakhaNEWSClusteringS2S (tir)": 42.49, + "MasakhaNEWSClusteringS2S (xho)": 22.07, + "MasakhaNEWSClusteringS2S (yor)": 29.83, + "RuSciBenchGRNTIClusteringP2P": 22.15, + "RuSciBenchOECDClusteringP2P": 19.23, + "TenKGnadClusteringP2P": 36.81, + "TenKGnadClusteringS2S": 7.05 } ] }, @@ -12336,13 +28333,50 @@ "max_ap": [ { "Model": "udever-bloom-1b1", + "CDSC-E": 45.12, + "FalseFriendsGermanEnglish": 48.72, + "OpusparcusPC (de)": 88.82, + "OpusparcusPC (en)": 94.61, + "OpusparcusPC (fi)": 79.58, "OpusparcusPC (fr)": 85.54, - "PawsXPairClassification (fr)": 61.99 + "OpusparcusPC (ru)": 79.54, + "OpusparcusPC (sv)": 78.74, + "PSC": 80.86, + "PawsXPairClassification (de)": 57.54, + "PawsXPairClassification (en)": 64.77, + "PawsXPairClassification (es)": 60.22, + "PawsXPairClassification (fr)": 61.99, + "PawsXPairClassification (ja)": 52.85, + "PawsXPairClassification (ko)": 55.63, + "PawsXPairClassification (zh)": 59.83, + "SICK-E-PL": 43.86, + "TERRa": 48.11 }, { "Model": "udever-bloom-1b1", + "CDSC-E": 46.68, + "FalseFriendsGermanEnglish": 48.89, "OpusparcusPC (fr)": 90.15, - "PawsXPairClassification (fr)": 63.95 + "OpusparcusPC (de)": 90.29, + "OpusparcusPC (en)": 95.92, + "OpusparcusPC (fi)": 81.01, + "OpusparcusPC (ru)": 80.51, + "OpusparcusPC (sv)": 79.73, + "PSC": 85.54, + "PawsXPairClassification (fr)": 64.15, + "PawsXPairClassification (de)": 58.58, + "PawsXPairClassification (en)": 66.19, + "PawsXPairClassification (es)": 62.39, + "PawsXPairClassification (ja)": 53.39, + "PawsXPairClassification (ko)": 55.7, + "PawsXPairClassification (zh)": 60.76, + "SICK-E-PL": 44.32, + "TERRa": 50.86 + }, + { + "Model": "udever-bloom-1b1", + "OpusparcusPC (fr)": 85.54, + "PawsXPairClassification (fr)": 61.99 } ] }, @@ -12350,8 +28384,30 @@ "map": [ { "Model": "udever-bloom-1b1", - "AlloprofReranking": 39.13, - "SyntecReranking": 62.58 + "AlloprofReranking": 38.6, + "RuBQReranking": 23.18, + "SyntecReranking": 47.99 + }, + { + "Model": "udever-bloom-1b1", + "MIRACLReranking (ar)": 17.32, + "MIRACLReranking (bn)": 33.92, + "MIRACLReranking (de)": 7.29, + "MIRACLReranking (en)": 25.91, + "MIRACLReranking (es)": 20.2, + "MIRACLReranking (fa)": 11.09, + "MIRACLReranking (fi)": 11.8, + "MIRACLReranking (fr)": 13.63, + "MIRACLReranking (hi)": 32.18, + "MIRACLReranking (id)": 12.28, + "MIRACLReranking (ja)": 13.57, + "MIRACLReranking (ko)": 21.09, + "MIRACLReranking (ru)": 7.12, + "MIRACLReranking (sw)": 17.07, + "MIRACLReranking (te)": 17.5, + "MIRACLReranking (th)": 8.98, + "MIRACLReranking (yo)": 6.34, + "MIRACLReranking (zh)": 18.61 } ] }, @@ -12359,16 +28415,120 @@ "ndcg_at_10": [ { "Model": "udever-bloom-1b1", - "AlloprofRetrieval": 12.37, - "BSARDRetrieval": 0.0, + "AILAStatutes": 20.96, + "ARCChallenge": 4.32, + "AlloprofRetrieval": 12.25, + "AlphaNLI": 2.95, + "BSARDRetrieval": 6.61, + "GermanQuAD-Retrieval": 18.27, + "HellaSwag": 13.79, + "LegalBenchConsumerContractsQA": 43.75, + "LegalBenchCorporateLobbying": 75.83, + "LegalSummarization": 46.2, "MintakaRetrieval (fr)": 2.78, - "SyntecRetrieval": 40.57, - "XPQARetrieval (fr)": 33.82 + "MintakaRetrieval (ar)": 3.36, + "MintakaRetrieval (de)": 1.01, + "MintakaRetrieval (es)": 2.35, + "MintakaRetrieval (hi)": 6.76, + "MintakaRetrieval (it)": 2.16, + "MintakaRetrieval (ja)": 3.72, + "MintakaRetrieval (pt)": 1.67, + "PIQA": 5.79, + "Quail": 1.65, + "RARbCode": 10.53, + "RARbMath": 26.48, + "RiaNewsRetrieval": 1.3, + "RuBQRetrieval": 0.91, + "SIQA": 0.78, + "SciFact-PL": 8.27, + "SpartQA": 0.11, + "SyntecRetrieval": 43.25, + "TempReasonL1": 0.2, + "TempReasonL2Fact": 4.22, + "TempReasonL2Pure": 0.24, + "TempReasonL3Fact": 5.01, + "TempReasonL3Pure": 2.13, + "WinoGrande": 2.56, + "XPQARetrieval (fr)": 33.82, + "XPQARetrieval (ara-ara)": 16.5, + "XPQARetrieval (eng-ara)": 1.9, + "XPQARetrieval (ara-eng)": 5.89, + "XPQARetrieval (deu-deu)": 18.76, + "XPQARetrieval (eng-deu)": 0.64, + "XPQARetrieval (deu-eng)": 10.08, + "XPQARetrieval (spa-spa)": 28.65, + "XPQARetrieval (eng-spa)": 2.55, + "XPQARetrieval (spa-eng)": 14.99, + "XPQARetrieval (fra-fra)": 33.82, + "XPQARetrieval (eng-fra)": 3.6, + "XPQARetrieval (fra-eng)": 13.52, + "XPQARetrieval (hin-hin)": 49.89, + "XPQARetrieval (eng-hin)": 9.46, + "XPQARetrieval (hin-eng)": 17.68, + "XPQARetrieval (ita-ita)": 22.47, + "XPQARetrieval (eng-ita)": 2.05, + "XPQARetrieval (ita-eng)": 14.23, + "XPQARetrieval (jpn-jpn)": 32.34, + "XPQARetrieval (eng-jpn)": 1.32, + "XPQARetrieval (jpn-eng)": 3.18, + "XPQARetrieval (kor-kor)": 12.19, + "XPQARetrieval (eng-kor)": 3.13, + "XPQARetrieval (kor-eng)": 2.06, + "XPQARetrieval (pol-pol)": 6.88, + "XPQARetrieval (eng-pol)": 0.74, + "XPQARetrieval (pol-eng)": 5.26, + "XPQARetrieval (por-por)": 21.11, + "XPQARetrieval (eng-por)": 1.57, + "XPQARetrieval (por-eng)": 7.35, + "XPQARetrieval (tam-tam)": 12.77, + "XPQARetrieval (eng-tam)": 3.73, + "XPQARetrieval (tam-eng)": 1.54, + "XPQARetrieval (cmn-cmn)": 36.29, + "XPQARetrieval (eng-cmn)": 2.16, + "XPQARetrieval (cmn-eng)": 8.26 } ] }, "STS": { "cosine_spearman": [ + { + "Model": "udever-bloom-1b1", + "CDSC-R": 65.58, + "GermanSTSBenchmark": 35.78, + "RUParaPhraserSTS": 39.08, + "RuSTSBenchmarkSTS": 44.22, + "SICK-R-PL": 33.35, + "SICKFr": 59.94, + "STSBenchmarkMultilingualSTS (zh)": 49.47, + "STSBenchmarkMultilingualSTS (de)": 37.51, + "STSBenchmarkMultilingualSTS (en)": 48.85, + "STSBenchmarkMultilingualSTS (ru)": 45.06, + "STSBenchmarkMultilingualSTS (pl)": 35.54, + "STSBenchmarkMultilingualSTS (it)": 42.62, + "STSBenchmarkMultilingualSTS (fr)": 49.97, + "STSBenchmarkMultilingualSTS (nl)": 32.41, + "STSBenchmarkMultilingualSTS (es)": 49.08, + "STSBenchmarkMultilingualSTS (pt)": 35.28 + }, + { + "Model": "udever-bloom-1b1", + "CDSC-R": 65.58, + "GermanSTSBenchmark": 35.78, + "RUParaPhraserSTS": 39.08, + "RuSTSBenchmarkSTS": 44.22, + "SICK-R-PL": 33.35, + "SICKFr": 59.94, + "STSBenchmarkMultilingualSTS (zh)": 49.47, + "STSBenchmarkMultilingualSTS (de)": 37.51, + "STSBenchmarkMultilingualSTS (en)": 48.85, + "STSBenchmarkMultilingualSTS (ru)": 45.06, + "STSBenchmarkMultilingualSTS (pl)": 35.54, + "STSBenchmarkMultilingualSTS (it)": 42.62, + "STSBenchmarkMultilingualSTS (fr)": 49.97, + "STSBenchmarkMultilingualSTS (nl)": 32.41, + "STSBenchmarkMultilingualSTS (es)": 49.08, + "STSBenchmarkMultilingualSTS (pt)": 35.28 + }, { "Model": "udever-bloom-1b1", "SICKFr": 59.94, @@ -12379,6 +28539,14 @@ }, "Summarization": { "cosine_spearman": [ + { + "Model": "udever-bloom-1b1", + "SummEvalFr": 29.48 + }, + { + "Model": "udever-bloom-1b1", + "SummEvalFr": 29.48 + }, { "Model": "udever-bloom-1b1", "SummEvalFr": 29.48 @@ -12386,26 +28554,61 @@ ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "udever-bloom-1b1", + "CEDRClassification": 31.77, + "SensitiveTopicsClassification": 17.36 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "udever-bloom-1b1", + "Core17InstructionRetrieval": 0.29, + "News21InstructionRetrieval": -0.24, + "Robust04InstructionRetrieval": -3.35 + } + ] } }, "izhx__udever-bloom-560m": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "udever-bloom-560m", + "BornholmBitextMining": 11.73 + } + ] }, "Classification": { "accuracy": [ { "Model": "udever-bloom-560m", + "AllegroReviews": 24.59, "AmazonReviewsClassification (fr)": 26.85, + "AngryTweetsClassification": 40.61, + "CBD": 51.69, + "DanishPoliticalCommentsClassification": 24.87, + "GeoreviewClassification": 28.08, + "HeadlineClassification": 28.25, + "InappropriatenessClassification": 52.74, + "KinopoiskClassification": 39.85, + "LccSentimentClassification": 33.27, "MTOPDomainClassification (fr)": 34.99, "MTOPIntentClassification (fr)": 15.76, "MasakhaNEWSClassification (fra)": 67.94, "MassiveIntentClassification (fr)": 15.09, - "MassiveScenarioClassification (fr)": 21.67 + "MassiveScenarioClassification (fr)": 21.67, + "NoRecClassification": 35.14, + "NordicLangClassification": 49.08, + "PAC": 62.49, + "PolEmo2.0-IN": 33.59, + "PolEmo2.0-OUT": 30.18, + "RuReviewsClassification": 41.19, + "RuSciBenchGRNTIClassification": 16.08, + "RuSciBenchOECDClassification": 12.1 } ] }, @@ -12413,13 +28616,35 @@ "v_measure": [ { "Model": "udever-bloom-560m", - "AlloProfClusteringP2P": 53.57, - "AlloProfClusteringS2S": 22.13, - "HALClusteringS2S": 7.68, + "AlloProfClusteringP2P": 35.43, + "AlloProfClusteringS2S": 18.33, + "BlurbsClusteringP2P": 7.37, + "BlurbsClusteringS2S": 6.68, + "GeoreviewClusteringP2P": 18.84, + "HALClusteringS2S": 3.92, "MLSUMClusteringP2P": 36.43, "MLSUMClusteringS2S": 25.26, "MasakhaNEWSClusteringP2P (fra)": 37.57, - "MasakhaNEWSClusteringS2S (fra)": 20.58 + "MasakhaNEWSClusteringS2S (fra)": 21.0, + "MasakhaNEWSClusteringS2S (amh)": 41.35, + "MasakhaNEWSClusteringS2S (eng)": 1.71, + "MasakhaNEWSClusteringS2S (hau)": 1.76, + "MasakhaNEWSClusteringS2S (ibo)": 20.8, + "MasakhaNEWSClusteringS2S (lin)": 46.14, + "MasakhaNEWSClusteringS2S (lug)": 40.37, + "MasakhaNEWSClusteringS2S (orm)": 21.45, + "MasakhaNEWSClusteringS2S (pcm)": 22.96, + "MasakhaNEWSClusteringS2S (run)": 41.53, + "MasakhaNEWSClusteringS2S (sna)": 40.61, + "MasakhaNEWSClusteringS2S (som)": 20.85, + "MasakhaNEWSClusteringS2S (swa)": 7.08, + "MasakhaNEWSClusteringS2S (tir)": 44.0, + "MasakhaNEWSClusteringS2S (xho)": 22.49, + "MasakhaNEWSClusteringS2S (yor)": 20.56, + "RuSciBenchGRNTIClusteringP2P": 9.23, + "RuSciBenchOECDClusteringP2P": 9.31, + "TenKGnadClusteringP2P": 3.47, + "TenKGnadClusteringS2S": 3.19 } ] }, @@ -12427,13 +28652,50 @@ "max_ap": [ { "Model": "udever-bloom-560m", + "CDSC-E": 37.82, + "FalseFriendsGermanEnglish": 48.65, + "OpusparcusPC (de)": 85.58, + "OpusparcusPC (en)": 89.75, + "OpusparcusPC (fi)": 73.74, "OpusparcusPC (fr)": 82.1, - "PawsXPairClassification (fr)": 59.69 + "OpusparcusPC (ru)": 75.19, + "OpusparcusPC (sv)": 74.82, + "PSC": 52.97, + "PawsXPairClassification (de)": 54.05, + "PawsXPairClassification (en)": 59.57, + "PawsXPairClassification (es)": 57.1, + "PawsXPairClassification (fr)": 59.69, + "PawsXPairClassification (ja)": 50.78, + "PawsXPairClassification (ko)": 54.88, + "PawsXPairClassification (zh)": 58.85, + "SICK-E-PL": 40.96, + "TERRa": 49.05 }, { "Model": "udever-bloom-560m", + "CDSC-E": 39.2, + "FalseFriendsGermanEnglish": 49.26, "OpusparcusPC (fr)": 85.87, - "PawsXPairClassification (fr)": 61.99 + "OpusparcusPC (de)": 87.15, + "OpusparcusPC (en)": 93.01, + "OpusparcusPC (fi)": 78.29, + "OpusparcusPC (ru)": 77.35, + "OpusparcusPC (sv)": 77.29, + "PSC": 59.59, + "PawsXPairClassification (fr)": 62.02, + "PawsXPairClassification (de)": 55.9, + "PawsXPairClassification (en)": 63.29, + "PawsXPairClassification (es)": 59.6, + "PawsXPairClassification (ja)": 51.72, + "PawsXPairClassification (ko)": 55.07, + "PawsXPairClassification (zh)": 59.72, + "SICK-E-PL": 42.39, + "TERRa": 49.05 + }, + { + "Model": "udever-bloom-560m", + "OpusparcusPC (fr)": 82.1, + "PawsXPairClassification (fr)": 59.69 } ] }, @@ -12441,8 +28703,30 @@ "map": [ { "Model": "udever-bloom-560m", - "AlloprofReranking": 28.75, - "SyntecReranking": 50.88 + "AlloprofReranking": 30.7, + "RuBQReranking": 15.98, + "SyntecReranking": 47.21 + }, + { + "Model": "udever-bloom-560m", + "MIRACLReranking (ar)": 17.46, + "MIRACLReranking (bn)": 18.26, + "MIRACLReranking (de)": 3.33, + "MIRACLReranking (en)": 9.5, + "MIRACLReranking (es)": 11.12, + "MIRACLReranking (fa)": 8.22, + "MIRACLReranking (fi)": 6.01, + "MIRACLReranking (fr)": 6.85, + "MIRACLReranking (hi)": 20.97, + "MIRACLReranking (id)": 8.65, + "MIRACLReranking (ja)": 10.92, + "MIRACLReranking (ko)": 11.1, + "MIRACLReranking (ru)": 2.65, + "MIRACLReranking (sw)": 6.05, + "MIRACLReranking (te)": 11.42, + "MIRACLReranking (th)": 4.03, + "MIRACLReranking (yo)": 2.41, + "MIRACLReranking (zh)": 12.44 } ] }, @@ -12450,16 +28734,120 @@ "ndcg_at_10": [ { "Model": "udever-bloom-560m", - "AlloprofRetrieval": 1.98, - "BSARDRetrieval": 0.0, + "AILAStatutes": 15.61, + "ARCChallenge": 1.82, + "AlloprofRetrieval": 1.93, + "AlphaNLI": 0.79, + "BSARDRetrieval": 1.54, + "GermanQuAD-Retrieval": 3.72, + "HellaSwag": 4.43, + "LegalBenchConsumerContractsQA": 14.08, + "LegalBenchCorporateLobbying": 45.49, + "LegalSummarization": 19.72, "MintakaRetrieval (fr)": 0.48, - "SyntecRetrieval": 24.45, - "XPQARetrieval (fr)": 12.98 + "MintakaRetrieval (ar)": 1.54, + "MintakaRetrieval (de)": 0.48, + "MintakaRetrieval (es)": 0.75, + "MintakaRetrieval (hi)": 3.03, + "MintakaRetrieval (it)": 0.71, + "MintakaRetrieval (ja)": 2.11, + "MintakaRetrieval (pt)": 0.68, + "PIQA": 2.45, + "Quail": 0.28, + "RARbCode": 0.71, + "RARbMath": 3.05, + "RiaNewsRetrieval": 0.16, + "RuBQRetrieval": 0.19, + "SIQA": 0.08, + "SciFact-PL": 1.19, + "SpartQA": 0.04, + "SyntecRetrieval": 25.01, + "TempReasonL1": 0.02, + "TempReasonL2Fact": 0.81, + "TempReasonL2Pure": 0.13, + "TempReasonL3Fact": 1.14, + "TempReasonL3Pure": 0.55, + "WinoGrande": 0.03, + "XPQARetrieval (fr)": 12.98, + "XPQARetrieval (ara-ara)": 8.23, + "XPQARetrieval (eng-ara)": 0.43, + "XPQARetrieval (ara-eng)": 0.65, + "XPQARetrieval (deu-deu)": 2.14, + "XPQARetrieval (eng-deu)": 0.56, + "XPQARetrieval (deu-eng)": 1.09, + "XPQARetrieval (spa-spa)": 6.89, + "XPQARetrieval (eng-spa)": 2.03, + "XPQARetrieval (spa-eng)": 2.04, + "XPQARetrieval (fra-fra)": 12.98, + "XPQARetrieval (eng-fra)": 2.55, + "XPQARetrieval (fra-eng)": 1.84, + "XPQARetrieval (hin-hin)": 33.22, + "XPQARetrieval (eng-hin)": 7.69, + "XPQARetrieval (hin-eng)": 0.42, + "XPQARetrieval (ita-ita)": 6.4, + "XPQARetrieval (eng-ita)": 0.84, + "XPQARetrieval (ita-eng)": 2.84, + "XPQARetrieval (jpn-jpn)": 12.26, + "XPQARetrieval (eng-jpn)": 0.49, + "XPQARetrieval (jpn-eng)": 0.6, + "XPQARetrieval (kor-kor)": 3.69, + "XPQARetrieval (eng-kor)": 0.61, + "XPQARetrieval (kor-eng)": 0.69, + "XPQARetrieval (pol-pol)": 1.62, + "XPQARetrieval (eng-pol)": 0.72, + "XPQARetrieval (pol-eng)": 0.62, + "XPQARetrieval (por-por)": 6.58, + "XPQARetrieval (eng-por)": 1.99, + "XPQARetrieval (por-eng)": 2.64, + "XPQARetrieval (tam-tam)": 5.82, + "XPQARetrieval (eng-tam)": 0.79, + "XPQARetrieval (tam-eng)": 0.47, + "XPQARetrieval (cmn-cmn)": 19.15, + "XPQARetrieval (eng-cmn)": 0.41, + "XPQARetrieval (cmn-eng)": 1.01 } ] }, "STS": { "cosine_spearman": [ + { + "Model": "udever-bloom-560m", + "CDSC-R": 48.51, + "GermanSTSBenchmark": 23.42, + "RUParaPhraserSTS": 29.73, + "RuSTSBenchmarkSTS": 32.6, + "SICK-R-PL": 28.02, + "SICKFr": 54.54, + "STSBenchmarkMultilingualSTS (fr)": 36.78, + "STSBenchmarkMultilingualSTS (nl)": 22.06, + "STSBenchmarkMultilingualSTS (de)": 25.43, + "STSBenchmarkMultilingualSTS (pt)": 29.27, + "STSBenchmarkMultilingualSTS (it)": 22.74, + "STSBenchmarkMultilingualSTS (zh)": 35.7, + "STSBenchmarkMultilingualSTS (en)": 30.66, + "STSBenchmarkMultilingualSTS (pl)": 29.04, + "STSBenchmarkMultilingualSTS (ru)": 33.44, + "STSBenchmarkMultilingualSTS (es)": 36.23 + }, + { + "Model": "udever-bloom-560m", + "CDSC-R": 48.51, + "GermanSTSBenchmark": 23.42, + "RUParaPhraserSTS": 29.73, + "RuSTSBenchmarkSTS": 32.6, + "SICK-R-PL": 28.02, + "SICKFr": 54.54, + "STSBenchmarkMultilingualSTS (fr)": 36.78, + "STSBenchmarkMultilingualSTS (nl)": 22.06, + "STSBenchmarkMultilingualSTS (de)": 25.43, + "STSBenchmarkMultilingualSTS (pt)": 29.27, + "STSBenchmarkMultilingualSTS (it)": 22.74, + "STSBenchmarkMultilingualSTS (zh)": 35.7, + "STSBenchmarkMultilingualSTS (en)": 30.66, + "STSBenchmarkMultilingualSTS (pl)": 29.04, + "STSBenchmarkMultilingualSTS (ru)": 33.44, + "STSBenchmarkMultilingualSTS (es)": 36.23 + }, { "Model": "udever-bloom-560m", "SICKFr": 54.54, @@ -12470,6 +28858,14 @@ }, "Summarization": { "cosine_spearman": [ + { + "Model": "udever-bloom-560m", + "SummEvalFr": 23.63 + }, + { + "Model": "udever-bloom-560m", + "SummEvalFr": 23.64 + }, { "Model": "udever-bloom-560m", "SummEvalFr": 23.63 @@ -12477,10 +28873,23 @@ ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "udever-bloom-560m", + "CEDRClassification": 32.34, + "SensitiveTopicsClassification": 17.7 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "udever-bloom-560m", + "Core17InstructionRetrieval": -0.94, + "News21InstructionRetrieval": -0.31, + "Robust04InstructionRetrieval": -1.17 + } + ] } }, "jhu-clsp__FollowIR-7B": { @@ -12524,42 +28933,552 @@ }, "jinaai__jina-embeddings-v2-base-en": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "jina-embeddings-v2-base-en", + "BornholmBitextMining": 0.62, + "Tatoeba (tel-eng)": 0.0, + "Tatoeba (por-eng)": 0.18, + "Tatoeba (heb-eng)": 0.0, + "Tatoeba (fra-eng)": 0.1, + "Tatoeba (uig-eng)": 0.0, + "Tatoeba (swg-eng)": 0.0, + "Tatoeba (ukr-eng)": 0.04, + "Tatoeba (kat-eng)": 0.02, + "Tatoeba (pam-eng)": 0.1, + "Tatoeba (lit-eng)": 0.05, + "Tatoeba (ind-eng)": 0.1, + "Tatoeba (vie-eng)": 0.13, + "Tatoeba (slk-eng)": 0.06, + "Tatoeba (mal-eng)": 0.03, + "Tatoeba (mon-eng)": 0.0, + "Tatoeba (zsm-eng)": 0.0, + "Tatoeba (arq-eng)": 0.11, + "Tatoeba (xho-eng)": 0.08, + "Tatoeba (bos-eng)": 0.0, + "Tatoeba (max-eng)": 0.0, + "Tatoeba (khm-eng)": 0.0, + "Tatoeba (yue-eng)": 0.0, + "Tatoeba (swh-eng)": 0.13, + "Tatoeba (tgl-eng)": 0.1, + "Tatoeba (eus-eng)": 0.15, + "Tatoeba (gla-eng)": 0.27, + "Tatoeba (cym-eng)": 0.11, + "Tatoeba (hye-eng)": 0.14, + "Tatoeba (bre-eng)": 0.01, + "Tatoeba (amh-eng)": 0.48, + "Tatoeba (hun-eng)": 0.1, + "Tatoeba (nno-eng)": 0.01, + "Tatoeba (ido-eng)": 0.0, + "Tatoeba (oci-eng)": 0.01, + "Tatoeba (wuu-eng)": 0.0, + "Tatoeba (uzb-eng)": 0.25, + "Tatoeba (deu-eng)": 0.12, + "Tatoeba (kor-eng)": 0.07, + "Tatoeba (ceb-eng)": 0.09, + "Tatoeba (mkd-eng)": 0.02, + "Tatoeba (ber-eng)": 0.04, + "Tatoeba (aze-eng)": 0.1, + "Tatoeba (csb-eng)": 0.0, + "Tatoeba (dan-eng)": 0.21, + "Tatoeba (hin-eng)": 0.1, + "Tatoeba (jav-eng)": 0.0, + "Tatoeba (cmn-eng)": 0.0, + "Tatoeba (arz-eng)": 0.09, + "Tatoeba (rus-eng)": 0.0, + "Tatoeba (orv-eng)": 0.09, + "Tatoeba (sqi-eng)": 0.18, + "Tatoeba (bel-eng)": 0.01, + "Tatoeba (dtp-eng)": 0.03, + "Tatoeba (ron-eng)": 0.16, + "Tatoeba (tuk-eng)": 0.0, + "Tatoeba (isl-eng)": 0.04, + "Tatoeba (cha-eng)": 0.66, + "Tatoeba (ara-eng)": 0.02, + "Tatoeba (mhr-eng)": 0.25, + "Tatoeba (cor-eng)": 0.05, + "Tatoeba (pms-eng)": 0.13, + "Tatoeba (lat-eng)": 0.19, + "Tatoeba (spa-eng)": 0.0, + "Tatoeba (ang-eng)": 1.11, + "Tatoeba (tzl-eng)": 0.64, + "Tatoeba (bul-eng)": 0.0, + "Tatoeba (ben-eng)": 0.04, + "Tatoeba (nld-eng)": 0.19, + "Tatoeba (fao-eng)": 0.15, + "Tatoeba (nds-eng)": 0.06, + "Tatoeba (tha-eng)": 0.0, + "Tatoeba (cat-eng)": 0.19, + "Tatoeba (swe-eng)": 0.05, + "Tatoeba (kur-eng)": 0.24, + "Tatoeba (dsb-eng)": 0.06, + "Tatoeba (kab-eng)": 0.05, + "Tatoeba (war-eng)": 0.03, + "Tatoeba (fin-eng)": 0.08, + "Tatoeba (tat-eng)": 0.07, + "Tatoeba (slv-eng)": 0.0, + "Tatoeba (epo-eng)": 0.05, + "Tatoeba (hrv-eng)": 0.0, + "Tatoeba (kaz-eng)": 0.0, + "Tatoeba (gle-eng)": 0.12, + "Tatoeba (pol-eng)": 0.16, + "Tatoeba (mar-eng)": 0.0, + "Tatoeba (jpn-eng)": 0.0, + "Tatoeba (nov-eng)": 1.47, + "Tatoeba (kzj-eng)": 0.1, + "Tatoeba (ell-eng)": 0.0, + "Tatoeba (lfn-eng)": 0.12, + "Tatoeba (hsb-eng)": 0.08, + "Tatoeba (tam-eng)": 0.03, + "Tatoeba (glg-eng)": 0.04, + "Tatoeba (pes-eng)": 0.07, + "Tatoeba (fry-eng)": 0.0, + "Tatoeba (nob-eng)": 0.19, + "Tatoeba (tur-eng)": 0.0, + "Tatoeba (ina-eng)": 0.07, + "Tatoeba (awa-eng)": 0.0, + "Tatoeba (ast-eng)": 0.98, + "Tatoeba (urd-eng)": 0.11, + "Tatoeba (yid-eng)": 0.0, + "Tatoeba (gsw-eng)": 0.43, + "Tatoeba (est-eng)": 0.0, + "Tatoeba (ita-eng)": 0.2, + "Tatoeba (lvs-eng)": 0.04, + "Tatoeba (afr-eng)": 0.01, + "Tatoeba (srp-eng)": 0.2, + "Tatoeba (ile-eng)": 0.14, + "Tatoeba (cbk-eng)": 0.02, + "Tatoeba (ces-eng)": 0.03 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "jina-embeddings-v2-base-en", + "AllegroReviews": 20.51, + "AmazonCounterfactualClassification (en-ext)": 52.46, + "AmazonCounterfactualClassification (en)": 52.79, + "AmazonCounterfactualClassification (de)": 50.26, + "AmazonCounterfactualClassification (ja)": 51.52, + "AmazonReviewsClassification (en)": 21.39, + "AmazonReviewsClassification (de)": 21.09, + "AmazonReviewsClassification (es)": 21.59, + "AmazonReviewsClassification (fr)": 21.01, + "AmazonReviewsClassification (ja)": 20.43, + "AmazonReviewsClassification (zh)": 20.77, + "AngryTweetsClassification": 36.78, + "CBD": 51.45, + "DanishPoliticalCommentsClassification": 22.89, + "GeoreviewClassification": 21.25, + "HeadlineClassification": 19.82, + "InappropriatenessClassification": 51.48, + "LccSentimentClassification": 32.27, + "MTOPDomainClassification (en)": 20.7, + "MTOPDomainClassification (de)": 21.39, + "MTOPDomainClassification (es)": 19.5, + "MTOPDomainClassification (fr)": 17.71, + "MTOPDomainClassification (hi)": 21.09, + "MTOPDomainClassification (th)": 15.02, + "MTOPIntentClassification (en)": 6.2, + "MTOPIntentClassification (de)": 5.88, + "MTOPIntentClassification (es)": 6.74, + "MTOPIntentClassification (fr)": 4.33, + "MTOPIntentClassification (hi)": 5.96, + "MTOPIntentClassification (th)": 4.51, + "MasakhaNEWSClassification (amh)": 28.56, + "MasakhaNEWSClassification (eng)": 21.54, + "MasakhaNEWSClassification (fra)": 23.53, + "MasakhaNEWSClassification (hau)": 17.96, + "MasakhaNEWSClassification (ibo)": 19.85, + "MasakhaNEWSClassification (lin)": 25.43, + "MasakhaNEWSClassification (lug)": 24.57, + "MasakhaNEWSClassification (orm)": 21.72, + "MasakhaNEWSClassification (pcm)": 28.26, + "MasakhaNEWSClassification (run)": 20.31, + "MasakhaNEWSClassification (sna)": 33.63, + "MasakhaNEWSClassification (som)": 16.77, + "MasakhaNEWSClassification (swa)": 19.39, + "MasakhaNEWSClassification (tir)": 19.15, + "MasakhaNEWSClassification (xho)": 21.82, + "MasakhaNEWSClassification (yor)": 24.14, + "MassiveIntentClassification (fr)": 6.07, + "MassiveIntentClassification (kn)": 3.19, + "MassiveIntentClassification (sq)": 7.31, + "MassiveIntentClassification (ja)": 6.83, + "MassiveIntentClassification (am)": 2.38, + "MassiveIntentClassification (es)": 6.03, + "MassiveIntentClassification (ro)": 6.29, + "MassiveIntentClassification (ta)": 7.24, + "MassiveIntentClassification (hu)": 6.78, + "MassiveIntentClassification (az)": 6.46, + "MassiveIntentClassification (af)": 5.23, + "MassiveIntentClassification (ar)": 8.97, + "MassiveIntentClassification (mn)": 7.65, + "MassiveIntentClassification (pl)": 5.77, + "MassiveIntentClassification (tl)": 7.32, + "MassiveIntentClassification (it)": 7.34, + "MassiveIntentClassification (id)": 7.61, + "MassiveIntentClassification (cy)": 4.53, + "MassiveIntentClassification (sv)": 5.81, + "MassiveIntentClassification (el)": 10.05, + "MassiveIntentClassification (tr)": 5.59, + "MassiveIntentClassification (ko)": 6.61, + "MassiveIntentClassification (zh-TW)": 6.45, + "MassiveIntentClassification (zh-CN)": 7.23, + "MassiveIntentClassification (vi)": 6.18, + "MassiveIntentClassification (he)": 6.89, + "MassiveIntentClassification (my)": 3.51, + "MassiveIntentClassification (hy)": 5.72, + "MassiveIntentClassification (th)": 9.23, + "MassiveIntentClassification (sw)": 8.14, + "MassiveIntentClassification (pt)": 6.96, + "MassiveIntentClassification (ka)": 7.24, + "MassiveIntentClassification (ur)": 8.07, + "MassiveIntentClassification (sl)": 6.62, + "MassiveIntentClassification (nb)": 5.47, + "MassiveIntentClassification (hi)": 6.49, + "MassiveIntentClassification (bn)": 6.27, + "MassiveIntentClassification (ml)": 3.17, + "MassiveIntentClassification (km)": 4.02, + "MassiveIntentClassification (nl)": 6.17, + "MassiveIntentClassification (de)": 5.96, + "MassiveIntentClassification (ms)": 6.56, + "MassiveIntentClassification (fa)": 9.5, + "MassiveIntentClassification (da)": 6.46, + "MassiveIntentClassification (jv)": 6.8, + "MassiveIntentClassification (fi)": 6.58, + "MassiveIntentClassification (te)": 2.64, + "MassiveIntentClassification (lv)": 6.12, + "MassiveIntentClassification (is)": 6.69, + "MassiveIntentClassification (en)": 7.27, + "MassiveIntentClassification (ru)": 7.62, + "MassiveScenarioClassification (mn)": 12.47, + "MassiveScenarioClassification (af)": 8.85, + "MassiveScenarioClassification (jv)": 11.21, + "MassiveScenarioClassification (id)": 11.66, + "MassiveScenarioClassification (en)": 12.97, + "MassiveScenarioClassification (my)": 9.66, + "MassiveScenarioClassification (hy)": 10.41, + "MassiveScenarioClassification (sv)": 9.95, + "MassiveScenarioClassification (hi)": 10.14, + "MassiveScenarioClassification (ka)": 11.04, + "MassiveScenarioClassification (cy)": 9.59, + "MassiveScenarioClassification (is)": 12.42, + "MassiveScenarioClassification (az)": 11.18, + "MassiveScenarioClassification (tl)": 12.21, + "MassiveScenarioClassification (fi)": 10.56, + "MassiveScenarioClassification (km)": 8.93, + "MassiveScenarioClassification (ml)": 6.64, + "MassiveScenarioClassification (sw)": 12.76, + "MassiveScenarioClassification (lv)": 9.75, + "MassiveScenarioClassification (he)": 10.68, + "MassiveScenarioClassification (ko)": 11.67, + "MassiveScenarioClassification (hu)": 10.74, + "MassiveScenarioClassification (ar)": 14.67, + "MassiveScenarioClassification (nb)": 10.52, + "MassiveScenarioClassification (th)": 17.38, + "MassiveScenarioClassification (sl)": 11.11, + "MassiveScenarioClassification (da)": 10.65, + "MassiveScenarioClassification (vi)": 11.1, + "MassiveScenarioClassification (kn)": 8.12, + "MassiveScenarioClassification (ms)": 11.84, + "MassiveScenarioClassification (fr)": 10.69, + "MassiveScenarioClassification (ja)": 10.74, + "MassiveScenarioClassification (pt)": 10.5, + "MassiveScenarioClassification (am)": 7.81, + "MassiveScenarioClassification (zh-CN)": 11.7, + "MassiveScenarioClassification (pl)": 9.47, + "MassiveScenarioClassification (nl)": 9.88, + "MassiveScenarioClassification (es)": 10.31, + "MassiveScenarioClassification (te)": 6.59, + "MassiveScenarioClassification (it)": 10.59, + "MassiveScenarioClassification (de)": 10.65, + "MassiveScenarioClassification (ur)": 12.93, + "MassiveScenarioClassification (tr)": 10.63, + "MassiveScenarioClassification (zh-TW)": 10.76, + "MassiveScenarioClassification (ta)": 11.39, + "MassiveScenarioClassification (ro)": 10.48, + "MassiveScenarioClassification (sq)": 11.16, + "MassiveScenarioClassification (el)": 16.72, + "MassiveScenarioClassification (ru)": 11.82, + "MassiveScenarioClassification (bn)": 10.95, + "MassiveScenarioClassification (fa)": 14.71, + "NoRecClassification": 35.31, + "NordicLangClassification": 26.91, + "PAC": 50.97, + "PolEmo2.0-IN": 26.34, + "PolEmo2.0-OUT": 26.68, + "RuReviewsClassification": 33.7 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "jina-embeddings-v2-base-en", + "AlloProfClusteringS2S": 12.62, + "BlurbsClusteringP2P": 4.57, + "BlurbsClusteringS2S": 4.9, + "GeoreviewClusteringP2P": 14.71, + "HALClusteringS2S": 0.94, + "MasakhaNEWSClusteringS2S (amh)": 40.28, + "MasakhaNEWSClusteringS2S (eng)": 0.84, + "MasakhaNEWSClusteringS2S (fra)": 20.99, + "MasakhaNEWSClusteringS2S (hau)": 1.12, + "MasakhaNEWSClusteringS2S (ibo)": 23.32, + "MasakhaNEWSClusteringS2S (lin)": 41.56, + "MasakhaNEWSClusteringS2S (lug)": 41.7, + "MasakhaNEWSClusteringS2S (orm)": 20.93, + "MasakhaNEWSClusteringS2S (pcm)": 22.24, + "MasakhaNEWSClusteringS2S (run)": 42.22, + "MasakhaNEWSClusteringS2S (sna)": 40.57, + "MasakhaNEWSClusteringS2S (som)": 23.45, + "MasakhaNEWSClusteringS2S (swa)": 1.75, + "MasakhaNEWSClusteringS2S (tir)": 42.63, + "MasakhaNEWSClusteringS2S (xho)": 20.26, + "MasakhaNEWSClusteringS2S (yor)": 21.53, + "RuSciBenchGRNTIClusteringP2P": 7.13, + "TenKGnadClusteringS2S": 1.45 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "jina-embeddings-v2-base-en", + "CDSC-E": 23.16, + "FalseFriendsGermanEnglish": 48.17, + "OpusparcusPC (de)": 78.43, + "OpusparcusPC (en)": 81.21, + "OpusparcusPC (fi)": 72.33, + "OpusparcusPC (fr)": 73.14, + "OpusparcusPC (ru)": 72.46, + "OpusparcusPC (sv)": 65.72, + "PSC": 34.13, + "PawsXPairClassification (de)": 46.44, + "PawsXPairClassification (en)": 42.97, + "PawsXPairClassification (es)": 46.48, + "PawsXPairClassification (fr)": 48.6, + "PawsXPairClassification (ja)": 46.7, + "PawsXPairClassification (ko)": 46.28, + "PawsXPairClassification (zh)": 48.42, + "SICK-E-PL": 40.41, + "TERRa": 44.8 + }, + { + "Model": "jina-embeddings-v2-base-en", + "CDSC-E": 23.17, + "FalseFriendsGermanEnglish": 48.17, + "OpusparcusPC (de)": 78.61, + "OpusparcusPC (en)": 81.21, + "OpusparcusPC (fi)": 72.33, + "OpusparcusPC (fr)": 73.24, + "OpusparcusPC (ru)": 72.57, + "OpusparcusPC (sv)": 65.72, + "PSC": 34.13, + "PawsXPairClassification (de)": 46.79, + "PawsXPairClassification (en)": 43.17, + "PawsXPairClassification (es)": 47.31, + "PawsXPairClassification (fr)": 49.4, + "PawsXPairClassification (ja)": 46.98, + "PawsXPairClassification (ko)": 46.79, + "PawsXPairClassification (zh)": 51.23, + "SICK-E-PL": 40.46, + "TERRa": 44.8 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "jina-embeddings-v2-base-en", + "AlloprofReranking": 28.82, + "RuBQReranking": 18.6, + "SyntecReranking": 30.07, + "T2Reranking": 54.47 + } + ] }, "Retrieval": { "ndcg_at_10": [ { "Model": "jina-embeddings-v2-base-en", + "AILACasedocs": 3.49, + "AILAStatutes": 10.48, + "ARCChallenge": 0.0, + "AlloprofRetrieval": 0.16, + "AlphaNLI": 0.0, + "AppsRetrieval": 0.05, + "CmedqaRetrieval": 0.09, + "CodeFeedbackMT": 0.03, + "CodeFeedbackST": 0.02, + "CodeSearchNetCCRetrieval (python)": 0.0, + "CodeSearchNetCCRetrieval (javascript)": 0.08, + "CodeSearchNetCCRetrieval (go)": 0.03, + "CodeSearchNetCCRetrieval (ruby)": 0.11, + "CodeSearchNetCCRetrieval (java)": 0.03, + "CodeSearchNetCCRetrieval (php)": 0.0, + "CodeSearchNetRetrieval (python)": 0.83, + "CodeSearchNetRetrieval (javascript)": 0.36, + "CodeSearchNetRetrieval (go)": 0.81, + "CodeSearchNetRetrieval (ruby)": 0.66, + "CodeSearchNetRetrieval (java)": 0.73, + "CodeSearchNetRetrieval (php)": 0.41, + "CodeTransOceanContest": 1.3, + "CodeTransOceanDL": 1.92, + "CosQA": 0.0, + "CovidRetrieval": 0.0, + "GerDaLIRSmall": 0.07, + "HellaSwag": 0.0, "LEMBNarrativeQARetrieval": 37.89, "LEMBQMSumRetrieval": 38.87, "LEMBSummScreenFDRetrieval": 93.48, - "LEMBWikimQARetrieval": 73.99 + "LEMBWikimQARetrieval": 73.99, + "LeCaRDv2": 2.09, + "LegalBenchConsumerContractsQA": 4.23, + "LegalBenchCorporateLobbying": 1.27, + "LegalQuAD": 2.6, + "LegalSummarization": 1.35, + "MintakaRetrieval (ar)": 0.19, + "MintakaRetrieval (de)": 0.53, + "MintakaRetrieval (es)": 0.35, + "MintakaRetrieval (fr)": 0.32, + "MintakaRetrieval (hi)": 0.44, + "MintakaRetrieval (it)": 0.54, + "MintakaRetrieval (ja)": 0.32, + "MintakaRetrieval (pt)": 0.45, + "PIQA": 0.0, + "Quail": 0.0, + "RARbCode": 0.0, + "RARbMath": 0.01, + "RiaNewsRetrieval": 0.01, + "SIQA": 0.03, + "SciFact-PL": 13.78, + "SpartQA": 0.56, + "StackOverflowQA": 0.24, + "SyntecRetrieval": 0.79, + "SyntheticText2SQL": 0.0, + "TRECCOVID-PL": 0.14, + "TempReasonL1": 0.05, + "TempReasonL2Fact": 0.11, + "TempReasonL2Pure": 0.04, + "TempReasonL3Fact": 0.05, + "TempReasonL3Pure": 0.09, + "WinoGrande": 0.12, + "XPQARetrieval (ara-ara)": 0.66, + "XPQARetrieval (eng-ara)": 0.29, + "XPQARetrieval (ara-eng)": 1.02, + "XPQARetrieval (deu-deu)": 1.28, + "XPQARetrieval (eng-deu)": 0.38, + "XPQARetrieval (deu-eng)": 0.64, + "XPQARetrieval (spa-spa)": 0.43, + "XPQARetrieval (eng-spa)": 0.3, + "XPQARetrieval (spa-eng)": 0.62, + "XPQARetrieval (fra-fra)": 0.47, + "XPQARetrieval (eng-fra)": 0.4, + "XPQARetrieval (fra-eng)": 0.46, + "XPQARetrieval (hin-hin)": 4.04, + "XPQARetrieval (eng-hin)": 0.95, + "XPQARetrieval (hin-eng)": 0.35, + "XPQARetrieval (ita-ita)": 0.98, + "XPQARetrieval (eng-ita)": 0.32, + "XPQARetrieval (ita-eng)": 0.68, + "XPQARetrieval (jpn-jpn)": 1.85, + "XPQARetrieval (eng-jpn)": 0.83, + "XPQARetrieval (jpn-eng)": 0.41, + "XPQARetrieval (kor-kor)": 0.36, + "XPQARetrieval (eng-kor)": 0.52, + "XPQARetrieval (kor-eng)": 0.5, + "XPQARetrieval (pol-pol)": 0.6, + "XPQARetrieval (eng-pol)": 0.3, + "XPQARetrieval (pol-eng)": 0.3, + "XPQARetrieval (por-por)": 0.69, + "XPQARetrieval (eng-por)": 0.45, + "XPQARetrieval (por-eng)": 0.42, + "XPQARetrieval (tam-tam)": 0.77, + "XPQARetrieval (eng-tam)": 0.35, + "XPQARetrieval (tam-eng)": 0.46, + "XPQARetrieval (cmn-cmn)": 1.19, + "XPQARetrieval (eng-cmn)": 1.2, + "XPQARetrieval (cmn-eng)": 0.44 + } + ] + }, + "STS": { + "cosine_spearman": [ + { + "Model": "jina-embeddings-v2-base-en", + "CDSC-R": 26.0, + "GermanSTSBenchmark": 20.08, + "RUParaPhraserSTS": 9.13, + "RuSTSBenchmarkSTS": 16.85, + "SICK-R-PL": 20.96, + "SICKFr": 26.02, + "STSB": 1.21, + "STSBenchmarkMultilingualSTS (fr)": 19.61, + "STSBenchmarkMultilingualSTS (ru)": 16.52, + "STSBenchmarkMultilingualSTS (es)": 16.52, + "STSBenchmarkMultilingualSTS (de)": 22.57, + "STSBenchmarkMultilingualSTS (nl)": 15.75, + "STSBenchmarkMultilingualSTS (pl)": 19.86, + "STSBenchmarkMultilingualSTS (it)": 14.09, + "STSBenchmarkMultilingualSTS (pt)": 19.29, + "STSBenchmarkMultilingualSTS (zh)": 2.67, + "STSBenchmarkMultilingualSTS (en)": 12.86 + }, + { + "Model": "jina-embeddings-v2-base-en", + "CDSC-R": 26.0, + "GermanSTSBenchmark": 20.08, + "RUParaPhraserSTS": 9.13, + "RuSTSBenchmarkSTS": 16.85, + "SICK-R-PL": 20.96, + "SICKFr": 26.01, + "STSB": 1.21, + "STSBenchmarkMultilingualSTS (fr)": 19.61, + "STSBenchmarkMultilingualSTS (ru)": 16.52, + "STSBenchmarkMultilingualSTS (es)": 16.51, + "STSBenchmarkMultilingualSTS (de)": 22.57, + "STSBenchmarkMultilingualSTS (nl)": 15.75, + "STSBenchmarkMultilingualSTS (pl)": 19.86, + "STSBenchmarkMultilingualSTS (it)": 14.09, + "STSBenchmarkMultilingualSTS (pt)": 19.29, + "STSBenchmarkMultilingualSTS (zh)": 2.68, + "STSBenchmarkMultilingualSTS (en)": 12.86 + } + ] + }, + "Summarization": { + "cosine_spearman": [ + { + "Model": "jina-embeddings-v2-base-en", + "SummEvalFr": 27.71 + }, + { + "Model": "jina-embeddings-v2-base-en", + "SummEvalFr": 27.71 } ] }, - "STS": { - "cosine_spearman": [] - }, - "Summarization": { - "cosine_spearman": [] - }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "jina-embeddings-v2-base-en", + "CEDRClassification": 29.49, + "SensitiveTopicsClassification": 17.63 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "jina-embeddings-v2-base-en", + "Core17InstructionRetrieval": -1.15, + "News21InstructionRetrieval": 0.38, + "Robust04InstructionRetrieval": -4.16 + } + ] } }, "jonfd__electra-small-nordic": { @@ -13101,42 +30020,664 @@ }, "nomic-ai__nomic-embed-text-v1": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "nomic-embed-text-v1", + "BornholmBitextMining": 48.41, + "Tatoeba (swh-eng)": 7.41, + "Tatoeba (wuu-eng)": 1.25, + "Tatoeba (ind-eng)": 5.42, + "Tatoeba (cbk-eng)": 10.21, + "Tatoeba (mon-eng)": 1.05, + "Tatoeba (ast-eng)": 12.01, + "Tatoeba (dtp-eng)": 2.55, + "Tatoeba (bre-eng)": 3.47, + "Tatoeba (kat-eng)": 0.57, + "Tatoeba (nld-eng)": 13.62, + "Tatoeba (hin-eng)": 0.14, + "Tatoeba (cym-eng)": 5.81, + "Tatoeba (ces-eng)": 3.77, + "Tatoeba (swg-eng)": 12.84, + "Tatoeba (aze-eng)": 3.13, + "Tatoeba (vie-eng)": 4.41, + "Tatoeba (tur-eng)": 3.56, + "Tatoeba (slv-eng)": 4.47, + "Tatoeba (urd-eng)": 0.0, + "Tatoeba (ben-eng)": 0.03, + "Tatoeba (gla-eng)": 1.82, + "Tatoeba (epo-eng)": 8.91, + "Tatoeba (pam-eng)": 4.62, + "Tatoeba (kur-eng)": 6.43, + "Tatoeba (rus-eng)": 0.11, + "Tatoeba (bel-eng)": 0.72, + "Tatoeba (pms-eng)": 8.55, + "Tatoeba (ell-eng)": 0.27, + "Tatoeba (gle-eng)": 3.46, + "Tatoeba (fin-eng)": 2.13, + "Tatoeba (nob-eng)": 11.05, + "Tatoeba (bos-eng)": 10.44, + "Tatoeba (tat-eng)": 0.76, + "Tatoeba (zsm-eng)": 7.62, + "Tatoeba (tam-eng)": 0.33, + "Tatoeba (ron-eng)": 8.6, + "Tatoeba (arz-eng)": 0.0, + "Tatoeba (ara-eng)": 0.07, + "Tatoeba (hun-eng)": 4.09, + "Tatoeba (tel-eng)": 0.5, + "Tatoeba (glg-eng)": 15.6, + "Tatoeba (mhr-eng)": 0.05, + "Tatoeba (dsb-eng)": 4.29, + "Tatoeba (kab-eng)": 0.73, + "Tatoeba (cha-eng)": 15.67, + "Tatoeba (yid-eng)": 0.45, + "Tatoeba (est-eng)": 2.58, + "Tatoeba (fao-eng)": 8.1, + "Tatoeba (afr-eng)": 7.44, + "Tatoeba (ile-eng)": 20.76, + "Tatoeba (yue-eng)": 1.61, + "Tatoeba (spa-eng)": 15.79, + "Tatoeba (eus-eng)": 6.37, + "Tatoeba (heb-eng)": 0.1, + "Tatoeba (slk-eng)": 4.36, + "Tatoeba (isl-eng)": 4.2, + "Tatoeba (kaz-eng)": 0.13, + "Tatoeba (ita-eng)": 16.28, + "Tatoeba (hsb-eng)": 4.34, + "Tatoeba (ina-eng)": 28.51, + "Tatoeba (khm-eng)": 0.14, + "Tatoeba (mal-eng)": 0.0, + "Tatoeba (nno-eng)": 9.33, + "Tatoeba (cat-eng)": 12.95, + "Tatoeba (dan-eng)": 10.9, + "Tatoeba (bul-eng)": 0.25, + "Tatoeba (mar-eng)": 0.12, + "Tatoeba (por-eng)": 13.64, + "Tatoeba (hrv-eng)": 6.28, + "Tatoeba (nds-eng)": 12.62, + "Tatoeba (fra-eng)": 19.4, + "Tatoeba (tgl-eng)": 3.61, + "Tatoeba (nov-eng)": 26.96, + "Tatoeba (swe-eng)": 9.4, + "Tatoeba (cor-eng)": 2.82, + "Tatoeba (ido-eng)": 13.67, + "Tatoeba (cmn-eng)": 2.32, + "Tatoeba (ceb-eng)": 3.68, + "Tatoeba (ang-eng)": 20.19, + "Tatoeba (srp-eng)": 1.71, + "Tatoeba (sqi-eng)": 6.88, + "Tatoeba (max-eng)": 9.71, + "Tatoeba (jav-eng)": 5.45, + "Tatoeba (pes-eng)": 0.01, + "Tatoeba (lvs-eng)": 3.57, + "Tatoeba (awa-eng)": 0.61, + "Tatoeba (gsw-eng)": 13.28, + "Tatoeba (oci-eng)": 9.03, + "Tatoeba (orv-eng)": 0.09, + "Tatoeba (csb-eng)": 5.85, + "Tatoeba (kor-eng)": 0.9, + "Tatoeba (lit-eng)": 2.04, + "Tatoeba (pol-eng)": 4.34, + "Tatoeba (deu-eng)": 14.13, + "Tatoeba (war-eng)": 5.65, + "Tatoeba (hye-eng)": 0.44, + "Tatoeba (xho-eng)": 1.5, + "Tatoeba (ber-eng)": 4.51, + "Tatoeba (uzb-eng)": 2.89, + "Tatoeba (arq-eng)": 0.18, + "Tatoeba (tuk-eng)": 3.91, + "Tatoeba (uig-eng)": 0.2, + "Tatoeba (mkd-eng)": 0.07, + "Tatoeba (kzj-eng)": 4.02, + "Tatoeba (tzl-eng)": 6.54, + "Tatoeba (tha-eng)": 0.73, + "Tatoeba (lfn-eng)": 10.78, + "Tatoeba (ukr-eng)": 0.48, + "Tatoeba (amh-eng)": 0.02, + "Tatoeba (fry-eng)": 21.97, + "Tatoeba (lat-eng)": 8.81, + "Tatoeba (jpn-eng)": 0.82 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "nomic-embed-text-v1", + "AllegroReviews": 24.94, + "AmazonCounterfactualClassification (en-ext)": 76.94, + "AmazonCounterfactualClassification (en)": 74.1, + "AmazonCounterfactualClassification (de)": 55.48, + "AmazonCounterfactualClassification (ja)": 57.85, + "AmazonReviewsClassification (en)": 49.47, + "AmazonReviewsClassification (de)": 27.58, + "AmazonReviewsClassification (es)": 33.09, + "AmazonReviewsClassification (fr)": 29.65, + "AmazonReviewsClassification (ja)": 23.47, + "AmazonReviewsClassification (zh)": 24.22, + "AngryTweetsClassification": 46.23, + "CBD": 51.88, + "DanishPoliticalCommentsClassification": 29.23, + "GeoreviewClassification": 28.69, + "HeadlineClassification": 31.06, + "InappropriatenessClassification": 51.92, + "KinopoiskClassification": 37.39, + "LccSentimentClassification": 38.53, + "MTOPDomainClassification (en)": 92.83, + "MTOPDomainClassification (de)": 71.23, + "MTOPDomainClassification (es)": 74.85, + "MTOPDomainClassification (fr)": 77.32, + "MTOPDomainClassification (hi)": 43.76, + "MTOPDomainClassification (th)": 15.77, + "MTOPIntentClassification (en)": 65.84, + "MTOPIntentClassification (de)": 40.67, + "MTOPIntentClassification (es)": 43.26, + "MTOPIntentClassification (fr)": 37.39, + "MTOPIntentClassification (hi)": 20.74, + "MTOPIntentClassification (th)": 4.6, + "MasakhaNEWSClassification (amh)": 35.45, + "MasakhaNEWSClassification (eng)": 79.83, + "MasakhaNEWSClassification (fra)": 79.91, + "MasakhaNEWSClassification (hau)": 65.78, + "MasakhaNEWSClassification (ibo)": 61.41, + "MasakhaNEWSClassification (lin)": 75.2, + "MasakhaNEWSClassification (lug)": 64.17, + "MasakhaNEWSClassification (orm)": 66.49, + "MasakhaNEWSClassification (pcm)": 92.39, + "MasakhaNEWSClassification (run)": 67.17, + "MasakhaNEWSClassification (sna)": 80.54, + "MasakhaNEWSClassification (som)": 57.69, + "MasakhaNEWSClassification (swa)": 59.98, + "MasakhaNEWSClassification (tir)": 23.93, + "MasakhaNEWSClassification (xho)": 67.21, + "MasakhaNEWSClassification (yor)": 70.22, + "MassiveIntentClassification (fa)": 31.13, + "MassiveIntentClassification (pl)": 38.6, + "MassiveIntentClassification (hu)": 37.7, + "MassiveIntentClassification (ms)": 36.19, + "MassiveIntentClassification (en)": 69.46, + "MassiveIntentClassification (sv)": 39.05, + "MassiveIntentClassification (cy)": 36.05, + "MassiveIntentClassification (th)": 11.62, + "MassiveIntentClassification (hy)": 12.26, + "MassiveIntentClassification (lv)": 38.53, + "MassiveIntentClassification (zh-TW)": 21.9, + "MassiveIntentClassification (fi)": 40.19, + "MassiveIntentClassification (ml)": 2.71, + "MassiveIntentClassification (ja)": 31.36, + "MassiveIntentClassification (sl)": 38.71, + "MassiveIntentClassification (mn)": 25.23, + "MassiveIntentClassification (de)": 42.62, + "MassiveIntentClassification (ru)": 29.22, + "MassiveIntentClassification (it)": 43.77, + "MassiveIntentClassification (hi)": 19.66, + "MassiveIntentClassification (az)": 38.52, + "MassiveIntentClassification (af)": 36.66, + "MassiveIntentClassification (da)": 42.12, + "MassiveIntentClassification (id)": 39.14, + "MassiveIntentClassification (ta)": 11.37, + "MassiveIntentClassification (km)": 5.47, + "MassiveIntentClassification (is)": 34.59, + "MassiveIntentClassification (sq)": 38.33, + "MassiveIntentClassification (ar)": 25.17, + "MassiveIntentClassification (tl)": 40.36, + "MassiveIntentClassification (es)": 43.09, + "MassiveIntentClassification (te)": 2.35, + "MassiveIntentClassification (tr)": 40.1, + "MassiveIntentClassification (my)": 3.75, + "MassiveIntentClassification (el)": 30.08, + "MassiveIntentClassification (nl)": 39.26, + "MassiveIntentClassification (ro)": 39.47, + "MassiveIntentClassification (sw)": 37.86, + "MassiveIntentClassification (ur)": 21.46, + "MassiveIntentClassification (he)": 25.09, + "MassiveIntentClassification (am)": 2.58, + "MassiveIntentClassification (jv)": 34.78, + "MassiveIntentClassification (kn)": 3.05, + "MassiveIntentClassification (pt)": 43.64, + "MassiveIntentClassification (zh-CN)": 23.66, + "MassiveIntentClassification (fr)": 44.42, + "MassiveIntentClassification (nb)": 39.21, + "MassiveIntentClassification (vi)": 35.8, + "MassiveIntentClassification (ka)": 11.31, + "MassiveIntentClassification (bn)": 19.27, + "MassiveIntentClassification (ko)": 20.66, + "MassiveScenarioClassification (am)": 7.4, + "MassiveScenarioClassification (sv)": 46.07, + "MassiveScenarioClassification (sl)": 43.56, + "MassiveScenarioClassification (lv)": 42.58, + "MassiveScenarioClassification (fa)": 34.37, + "MassiveScenarioClassification (kn)": 8.16, + "MassiveScenarioClassification (ur)": 28.77, + "MassiveScenarioClassification (cy)": 40.67, + "MassiveScenarioClassification (es)": 54.3, + "MassiveScenarioClassification (nl)": 49.39, + "MassiveScenarioClassification (ta)": 16.94, + "MassiveScenarioClassification (bn)": 24.28, + "MassiveScenarioClassification (jv)": 43.09, + "MassiveScenarioClassification (sq)": 47.46, + "MassiveScenarioClassification (is)": 43.54, + "MassiveScenarioClassification (el)": 39.9, + "MassiveScenarioClassification (ar)": 31.38, + "MassiveScenarioClassification (hi)": 25.2, + "MassiveScenarioClassification (en)": 74.64, + "MassiveScenarioClassification (ko)": 26.73, + "MassiveScenarioClassification (pl)": 46.63, + "MassiveScenarioClassification (zh-TW)": 31.89, + "MassiveScenarioClassification (az)": 46.1, + "MassiveScenarioClassification (he)": 28.28, + "MassiveScenarioClassification (ro)": 50.87, + "MassiveScenarioClassification (id)": 47.36, + "MassiveScenarioClassification (th)": 21.64, + "MassiveScenarioClassification (nb)": 49.28, + "MassiveScenarioClassification (mn)": 31.36, + "MassiveScenarioClassification (pt)": 54.03, + "MassiveScenarioClassification (vi)": 41.13, + "MassiveScenarioClassification (ms)": 46.51, + "MassiveScenarioClassification (fr)": 55.41, + "MassiveScenarioClassification (ka)": 17.61, + "MassiveScenarioClassification (da)": 49.98, + "MassiveScenarioClassification (hy)": 18.61, + "MassiveScenarioClassification (ml)": 7.32, + "MassiveScenarioClassification (hu)": 45.44, + "MassiveScenarioClassification (te)": 7.16, + "MassiveScenarioClassification (ru)": 33.56, + "MassiveScenarioClassification (de)": 54.59, + "MassiveScenarioClassification (ja)": 38.77, + "MassiveScenarioClassification (sw)": 44.78, + "MassiveScenarioClassification (af)": 45.97, + "MassiveScenarioClassification (it)": 53.73, + "MassiveScenarioClassification (fi)": 45.07, + "MassiveScenarioClassification (my)": 10.72, + "MassiveScenarioClassification (tl)": 50.41, + "MassiveScenarioClassification (zh-CN)": 34.02, + "MassiveScenarioClassification (tr)": 46.56, + "MassiveScenarioClassification (km)": 9.29, + "NoRecClassification": 41.05, + "NordicLangClassification": 52.66, + "PAC": 69.14, + "PolEmo2.0-IN": 41.22, + "PolEmo2.0-OUT": 22.83, + "RuReviewsClassification": 44.25, + "RuSciBenchGRNTIClassification": 19.29, + "RuSciBenchOECDClassification": 14.8 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "nomic-embed-text-v1", + "AlloProfClusteringP2P": 63.95, + "AlloProfClusteringS2S": 35.03, + "BlurbsClusteringP2P": 26.46, + "BlurbsClusteringS2S": 9.73, + "GeoreviewClusteringP2P": 22.34, + "HALClusteringS2S": 20.6, + "MLSUMClusteringP2P (de)": 42.92, + "MLSUMClusteringP2P (fr)": 44.41, + "MLSUMClusteringP2P (ru)": 24.98, + "MLSUMClusteringP2P (es)": 47.0, + "MLSUMClusteringS2S (de)": 42.96, + "MLSUMClusteringS2S (fr)": 44.16, + "MLSUMClusteringS2S (ru)": 26.26, + "MLSUMClusteringS2S (es)": 46.52, + "MasakhaNEWSClusteringP2P (amh)": 40.07, + "MasakhaNEWSClusteringP2P (eng)": 48.62, + "MasakhaNEWSClusteringP2P (fra)": 64.41, + "MasakhaNEWSClusteringP2P (hau)": 34.91, + "MasakhaNEWSClusteringP2P (ibo)": 44.15, + "MasakhaNEWSClusteringP2P (lin)": 65.38, + "MasakhaNEWSClusteringP2P (lug)": 46.46, + "MasakhaNEWSClusteringP2P (orm)": 29.2, + "MasakhaNEWSClusteringP2P (pcm)": 77.04, + "MasakhaNEWSClusteringP2P (run)": 50.6, + "MasakhaNEWSClusteringP2P (sna)": 56.66, + "MasakhaNEWSClusteringP2P (som)": 37.87, + "MasakhaNEWSClusteringP2P (swa)": 22.09, + "MasakhaNEWSClusteringP2P (tir)": 44.62, + "MasakhaNEWSClusteringP2P (xho)": 34.04, + "MasakhaNEWSClusteringP2P (yor)": 42.36, + "MasakhaNEWSClusteringS2S (amh)": 44.61, + "MasakhaNEWSClusteringS2S (eng)": 47.44, + "MasakhaNEWSClusteringS2S (fra)": 28.87, + "MasakhaNEWSClusteringS2S (hau)": 13.5, + "MasakhaNEWSClusteringS2S (ibo)": 39.01, + "MasakhaNEWSClusteringS2S (lin)": 55.75, + "MasakhaNEWSClusteringS2S (lug)": 43.28, + "MasakhaNEWSClusteringS2S (orm)": 27.05, + "MasakhaNEWSClusteringS2S (pcm)": 79.9, + "MasakhaNEWSClusteringS2S (run)": 53.17, + "MasakhaNEWSClusteringS2S (sna)": 50.67, + "MasakhaNEWSClusteringS2S (som)": 25.86, + "MasakhaNEWSClusteringS2S (swa)": 18.28, + "MasakhaNEWSClusteringS2S (tir)": 46.22, + "MasakhaNEWSClusteringS2S (xho)": 21.01, + "MasakhaNEWSClusteringS2S (yor)": 27.62, + "RuSciBenchGRNTIClusteringP2P": 16.46, + "RuSciBenchOECDClusteringP2P": 14.67, + "TenKGnadClusteringP2P": 45.55, + "TenKGnadClusteringS2S": 20.66 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "nomic-embed-text-v1", + "CDSC-E": 46.69, + "FalseFriendsGermanEnglish": 47.77, + "OpusparcusPC (de)": 91.07, + "OpusparcusPC (en)": 97.95, + "OpusparcusPC (fi)": 86.78, + "OpusparcusPC (fr)": 87.81, + "OpusparcusPC (ru)": 80.53, + "OpusparcusPC (sv)": 84.72, + "PSC": 95.9, + "PawsXPairClassification (de)": 51.49, + "PawsXPairClassification (en)": 61.67, + "PawsXPairClassification (es)": 53.23, + "PawsXPairClassification (fr)": 54.96, + "PawsXPairClassification (ja)": 49.26, + "PawsXPairClassification (ko)": 51.36, + "PawsXPairClassification (zh)": 53.29, + "SICK-E-PL": 47.37, + "TERRa": 46.32 + }, + { + "Model": "nomic-embed-text-v1", + "CDSC-E": 46.69, + "FalseFriendsGermanEnglish": 47.77, + "OpusparcusPC (de)": 91.07, + "OpusparcusPC (en)": 97.95, + "OpusparcusPC (fi)": 86.78, + "OpusparcusPC (fr)": 87.89, + "OpusparcusPC (ru)": 80.53, + "OpusparcusPC (sv)": 84.77, + "PSC": 95.96, + "PawsXPairClassification (de)": 51.51, + "PawsXPairClassification (en)": 61.68, + "PawsXPairClassification (es)": 53.23, + "PawsXPairClassification (fr)": 54.96, + "PawsXPairClassification (ja)": 49.41, + "PawsXPairClassification (ko)": 51.36, + "PawsXPairClassification (zh)": 53.32, + "SICK-E-PL": 47.38, + "TERRa": 46.32 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "nomic-embed-text-v1", + "AlloprofReranking": 62.47, + "RuBQReranking": 34.13, + "SyntecReranking": 71.41, + "T2Reranking": 61.43 + }, + { + "Model": "nomic-embed-text-v1", + "MIRACLReranking (ar)": 15.17, + "MIRACLReranking (bn)": 20.85, + "MIRACLReranking (de)": 21.85, + "MIRACLReranking (en)": 51.92, + "MIRACLReranking (es)": 32.35, + "MIRACLReranking (fa)": 18.46, + "MIRACLReranking (fi)": 35.54, + "MIRACLReranking (fr)": 18.42, + "MIRACLReranking (hi)": 8.7, + "MIRACLReranking (id)": 22.39, + "MIRACLReranking (ja)": 11.45, + "MIRACLReranking (ko)": 9.77, + "MIRACLReranking (ru)": 14.74, + "MIRACLReranking (sw)": 30.3, + "MIRACLReranking (te)": 1.51, + "MIRACLReranking (th)": 3.48, + "MIRACLReranking (yo)": 51.97, + "MIRACLReranking (zh)": 11.67 + } + ] }, "Retrieval": { "ndcg_at_10": [ { "Model": "nomic-embed-text-v1", + "AILACasedocs": 28.87, + "AILAStatutes": 16.25, + "ARCChallenge": 10.06, + "AlloprofRetrieval": 33.93, + "AlphaNLI": 29.55, + "AppsRetrieval": 5.3, + "BSARDRetrieval": 5.83, + "CmedqaRetrieval": 2.23, + "CodeFeedbackMT": 45.4, + "CodeFeedbackST": 66.32, + "CodeSearchNetCCRetrieval (python)": 56.7, + "CodeSearchNetCCRetrieval (javascript)": 57.78, + "CodeSearchNetCCRetrieval (go)": 42.45, + "CodeSearchNetCCRetrieval (ruby)": 59.09, + "CodeSearchNetCCRetrieval (java)": 54.08, + "CodeSearchNetCCRetrieval (php)": 45.3, + "CodeSearchNetRetrieval (python)": 86.99, + "CodeSearchNetRetrieval (javascript)": 70.2, + "CodeSearchNetRetrieval (go)": 91.5, + "CodeSearchNetRetrieval (ruby)": 76.44, + "CodeSearchNetRetrieval (java)": 82.0, + "CodeSearchNetRetrieval (php)": 83.86, + "CodeTransOceanContest": 49.87, + "CodeTransOceanDL": 20.66, + "CosQA": 27.23, + "CovidRetrieval": 18.66, + "GerDaLIR": 5.68, + "GerDaLIRSmall": 13.57, + "GermanQuAD-Retrieval": 81.22, + "HellaSwag": 24.75, "LEMBNarrativeQARetrieval": 41.23, "LEMBQMSumRetrieval": 36.65, "LEMBSummScreenFDRetrieval": 92.97, - "LEMBWikimQARetrieval": 73.75 + "LEMBWikimQARetrieval": 73.75, + "LeCaRDv2": 22.76, + "LegalBenchConsumerContractsQA": 71.37, + "LegalBenchCorporateLobbying": 92.73, + "LegalQuAD": 25.97, + "LegalSummarization": 62.72, + "MIRACLRetrieval (ar)": 2.38, + "MIRACLRetrieval (bn)": 6.1, + "MIRACLRetrieval (de)": 15.68, + "MIRACLRetrieval (en)": 47.1, + "MIRACLRetrieval (es)": 24.39, + "MIRACLRetrieval (fa)": 2.51, + "MIRACLRetrieval (fi)": 25.45, + "MIRACLRetrieval (fr)": 15.24, + "MIRACLRetrieval (hi)": 1.35, + "MIRACLRetrieval (id)": 17.19, + "MIRACLRetrieval (ja)": 4.75, + "MIRACLRetrieval (ko)": 4.27, + "MIRACLRetrieval (ru)": 3.73, + "MIRACLRetrieval (sw)": 30.75, + "MIRACLRetrieval (te)": 0.06, + "MIRACLRetrieval (th)": 0.62, + "MIRACLRetrieval (yo)": 57.75, + "MIRACLRetrieval (zh)": 1.08, + "MintakaRetrieval (ar)": 5.85, + "MintakaRetrieval (de)": 16.46, + "MintakaRetrieval (es)": 15.44, + "MintakaRetrieval (fr)": 17.28, + "MintakaRetrieval (hi)": 6.02, + "MintakaRetrieval (it)": 12.58, + "MintakaRetrieval (ja)": 9.73, + "MintakaRetrieval (pt)": 15.43, + "PIQA": 25.33, + "Quail": 4.61, + "RARbCode": 29.77, + "RARbMath": 36.49, + "RiaNewsRetrieval": 12.92, + "RuBQRetrieval": 6.7, + "SIQA": 2.57, + "SciFact-PL": 37.77, + "SpartQA": 6.27, + "StackOverflowQA": 70.96, + "SyntecRetrieval": 51.91, + "SyntheticText2SQL": 51.02, + "TRECCOVID-PL": 30.44, + "TempReasonL1": 1.59, + "TempReasonL2Fact": 11.43, + "TempReasonL2Pure": 1.33, + "TempReasonL3Fact": 11.89, + "TempReasonL3Pure": 8.53, + "WinoGrande": 44.56, + "XMarket (de)": 12.76, + "XMarket (en)": 31.24, + "XMarket (es)": 15.04, + "XPQARetrieval (ara-ara)": 14.14, + "XPQARetrieval (eng-ara)": 4.51, + "XPQARetrieval (ara-eng)": 9.51, + "XPQARetrieval (deu-deu)": 59.09, + "XPQARetrieval (eng-deu)": 11.22, + "XPQARetrieval (deu-eng)": 33.02, + "XPQARetrieval (spa-spa)": 44.32, + "XPQARetrieval (eng-spa)": 7.2, + "XPQARetrieval (spa-eng)": 25.13, + "XPQARetrieval (fra-fra)": 55.47, + "XPQARetrieval (eng-fra)": 12.08, + "XPQARetrieval (fra-eng)": 34.68, + "XPQARetrieval (hin-hin)": 40.12, + "XPQARetrieval (eng-hin)": 8.2, + "XPQARetrieval (hin-eng)": 8.09, + "XPQARetrieval (ita-ita)": 61.93, + "XPQARetrieval (eng-ita)": 7.39, + "XPQARetrieval (ita-eng)": 30.87, + "XPQARetrieval (jpn-jpn)": 39.23, + "XPQARetrieval (eng-jpn)": 8.36, + "XPQARetrieval (jpn-eng)": 17.16, + "XPQARetrieval (kor-kor)": 12.58, + "XPQARetrieval (eng-kor)": 9.23, + "XPQARetrieval (kor-eng)": 7.14, + "XPQARetrieval (pol-pol)": 31.17, + "XPQARetrieval (eng-pol)": 11.38, + "XPQARetrieval (pol-eng)": 16.32, + "XPQARetrieval (por-por)": 37.72, + "XPQARetrieval (eng-por)": 8.47, + "XPQARetrieval (por-eng)": 25.62, + "XPQARetrieval (tam-tam)": 7.28, + "XPQARetrieval (eng-tam)": 4.82, + "XPQARetrieval (tam-eng)": 4.31, + "XPQARetrieval (cmn-cmn)": 26.41, + "XPQARetrieval (eng-cmn)": 8.91, + "XPQARetrieval (cmn-eng)": 15.94 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "nomic-embed-text-v1", + "CDSC-R": 82.61, + "GermanSTSBenchmark": 62.7, + "RUParaPhraserSTS": 51.29, + "RuSTSBenchmarkSTS": 57.12, + "SICK-R-PL": 58.22, + "SICKFr": 65.29, + "STS22 (de-en)": 50.97, + "STS22 (fr-pl)": 73.25, + "STS22 (pl-en)": 64.12, + "STS22 (pl)": 30.72, + "STS22 (de-fr)": 50.64, + "STS22 (es-en)": 65.53, + "STS22 (es)": 65.18, + "STS22 (zh-en)": 46.19, + "STS22 (ru)": 32.23, + "STS22 (ar)": 36.31, + "STS22 (it)": 74.8, + "STS22 (es-it)": 61.2, + "STS22 (tr)": 54.48, + "STS22 (en)": 64.77, + "STS22 (zh)": 58.38, + "STS22 (de-pl)": 30.76, + "STS22 (de)": 38.66, + "STS22 (fr)": 79.58, + "STSB": 36.95, + "STSBenchmarkMultilingualSTS (it)": 64.94, + "STSBenchmarkMultilingualSTS (fr)": 65.88, + "STSBenchmarkMultilingualSTS (pl)": 61.0, + "STSBenchmarkMultilingualSTS (ru)": 56.56, + "STSBenchmarkMultilingualSTS (es)": 67.53, + "STSBenchmarkMultilingualSTS (de)": 63.17, + "STSBenchmarkMultilingualSTS (nl)": 60.97, + "STSBenchmarkMultilingualSTS (zh)": 39.96, + "STSBenchmarkMultilingualSTS (pt)": 64.39, + "STSBenchmarkMultilingualSTS (en)": 79.75 + }, + { + "Model": "nomic-embed-text-v1", + "CDSC-R": 82.61, + "GermanSTSBenchmark": 62.7, + "RUParaPhraserSTS": 51.29, + "RuSTSBenchmarkSTS": 57.12, + "SICK-R-PL": 58.22, + "SICKFr": 65.29, + "STS22 (de-en)": 50.97, + "STS22 (fr-pl)": 73.25, + "STS22 (pl-en)": 64.12, + "STS22 (pl)": 30.72, + "STS22 (de-fr)": 50.64, + "STS22 (es-en)": 65.53, + "STS22 (es)": 65.18, + "STS22 (zh-en)": 46.19, + "STS22 (ru)": 32.23, + "STS22 (ar)": 36.31, + "STS22 (it)": 74.8, + "STS22 (es-it)": 61.2, + "STS22 (tr)": 54.48, + "STS22 (en)": 64.77, + "STS22 (zh)": 58.38, + "STS22 (de-pl)": 30.76, + "STS22 (de)": 38.66, + "STS22 (fr)": 79.58, + "STSB": 36.95, + "STSBenchmarkMultilingualSTS (it)": 64.94, + "STSBenchmarkMultilingualSTS (fr)": 65.88, + "STSBenchmarkMultilingualSTS (pl)": 61.0, + "STSBenchmarkMultilingualSTS (ru)": 56.56, + "STSBenchmarkMultilingualSTS (es)": 67.53, + "STSBenchmarkMultilingualSTS (de)": 63.17, + "STSBenchmarkMultilingualSTS (nl)": 60.97, + "STSBenchmarkMultilingualSTS (zh)": 39.96, + "STSBenchmarkMultilingualSTS (pt)": 64.39, + "STSBenchmarkMultilingualSTS (en)": 79.75 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "nomic-embed-text-v1", + "SummEvalFr": 31.11 + }, + { + "Model": "nomic-embed-text-v1", + "SummEvalFr": 31.11 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "nomic-embed-text-v1", + "CEDRClassification": 34.96, + "SensitiveTopicsClassification": 17.91 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "nomic-embed-text-v1", + "Core17InstructionRetrieval": -2.11, + "News21InstructionRetrieval": 3.56, + "Robust04InstructionRetrieval": -11.19 + } + ] } }, "nomic-ai__nomic-embed-text-v1.5-128": { @@ -13758,23 +31299,291 @@ }, "openai__text-embedding-3-large": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "text-embedding-3-large", + "BornholmBitextMining": 43.85, + "Tatoeba (ron-eng)": 96.98, + "Tatoeba (slk-eng)": 95.33, + "Tatoeba (kab-eng)": 2.79, + "Tatoeba (tam-eng)": 39.2, + "Tatoeba (vie-eng)": 96.22, + "Tatoeba (uzb-eng)": 20.71, + "Tatoeba (war-eng)": 47.28, + "Tatoeba (bel-eng)": 82.25, + "Tatoeba (gla-eng)": 35.82, + "Tatoeba (swh-eng)": 65.95, + "Tatoeba (heb-eng)": 83.69, + "Tatoeba (orv-eng)": 53.49, + "Tatoeba (tgl-eng)": 95.18, + "Tatoeba (cym-eng)": 64.53, + "Tatoeba (kat-eng)": 2.84, + "Tatoeba (hrv-eng)": 96.22, + "Tatoeba (cha-eng)": 41.66, + "Tatoeba (tha-eng)": 89.4, + "Tatoeba (est-eng)": 93.91, + "Tatoeba (lvs-eng)": 74.8, + "Tatoeba (hye-eng)": 4.19, + "Tatoeba (pms-eng)": 60.53, + "Tatoeba (ast-eng)": 82.93, + "Tatoeba (nld-eng)": 97.27, + "Tatoeba (glg-eng)": 92.93, + "Tatoeba (gle-eng)": 46.47, + "Tatoeba (lfn-eng)": 66.2, + "Tatoeba (swe-eng)": 96.0, + "Tatoeba (zsm-eng)": 95.24, + "Tatoeba (ile-eng)": 78.1, + "Tatoeba (max-eng)": 54.53, + "Tatoeba (cat-eng)": 92.16, + "Tatoeba (nno-eng)": 93.82, + "Tatoeba (bos-eng)": 93.27, + "Tatoeba (cor-eng)": 7.54, + "Tatoeba (swg-eng)": 71.74, + "Tatoeba (pol-eng)": 97.73, + "Tatoeba (ara-eng)": 92.4, + "Tatoeba (ina-eng)": 94.98, + "Tatoeba (csb-eng)": 60.18, + "Tatoeba (pes-eng)": 89.77, + "Tatoeba (mar-eng)": 48.29, + "Tatoeba (khm-eng)": 1.87, + "Tatoeba (deu-eng)": 99.37, + "Tatoeba (nds-eng)": 81.35, + "Tatoeba (arz-eng)": 80.65, + "Tatoeba (gsw-eng)": 63.08, + "Tatoeba (arq-eng)": 42.73, + "Tatoeba (dan-eng)": 96.42, + "Tatoeba (epo-eng)": 88.32, + "Tatoeba (nov-eng)": 68.88, + "Tatoeba (nob-eng)": 98.07, + "Tatoeba (amh-eng)": 0.84, + "Tatoeba (xho-eng)": 13.86, + "Tatoeba (hun-eng)": 94.47, + "Tatoeba (wuu-eng)": 86.08, + "Tatoeba (hsb-eng)": 72.4, + "Tatoeba (afr-eng)": 93.93, + "Tatoeba (aze-eng)": 78.63, + "Tatoeba (spa-eng)": 98.03, + "Tatoeba (ell-eng)": 94.86, + "Tatoeba (dtp-eng)": 7.82, + "Tatoeba (ang-eng)": 77.64, + "Tatoeba (ita-eng)": 94.02, + "Tatoeba (ceb-eng)": 41.74, + "Tatoeba (kor-eng)": 88.92, + "Tatoeba (jav-eng)": 36.65, + "Tatoeba (cmn-eng)": 96.02, + "Tatoeba (tzl-eng)": 51.83, + "Tatoeba (yue-eng)": 88.84, + "Tatoeba (bre-eng)": 9.53, + "Tatoeba (rus-eng)": 93.4, + "Tatoeba (slv-eng)": 92.39, + "Tatoeba (uig-eng)": 4.24, + "Tatoeba (ukr-eng)": 93.22, + "Tatoeba (lat-eng)": 72.55, + "Tatoeba (mal-eng)": 50.26, + "Tatoeba (ber-eng)": 6.77, + "Tatoeba (bul-eng)": 93.3, + "Tatoeba (kzj-eng)": 9.08, + "Tatoeba (mhr-eng)": 11.87, + "Tatoeba (fry-eng)": 69.52, + "Tatoeba (por-eng)": 94.75, + "Tatoeba (tuk-eng)": 27.35, + "Tatoeba (fra-eng)": 96.0, + "Tatoeba (urd-eng)": 68.27, + "Tatoeba (pam-eng)": 15.43, + "Tatoeba (srp-eng)": 94.22, + "Tatoeba (lit-eng)": 87.59, + "Tatoeba (mon-eng)": 15.74, + "Tatoeba (tat-eng)": 17.64, + "Tatoeba (ind-eng)": 94.75, + "Tatoeba (ido-eng)": 74.09, + "Tatoeba (ces-eng)": 96.67, + "Tatoeba (sqi-eng)": 76.63, + "Tatoeba (kaz-eng)": 18.5, + "Tatoeba (cbk-eng)": 81.09, + "Tatoeba (isl-eng)": 89.9, + "Tatoeba (eus-eng)": 40.27, + "Tatoeba (oci-eng)": 67.46, + "Tatoeba (ben-eng)": 56.3, + "Tatoeba (hin-eng)": 91.49, + "Tatoeba (awa-eng)": 50.66, + "Tatoeba (dsb-eng)": 54.56, + "Tatoeba (kur-eng)": 24.66, + "Tatoeba (tel-eng)": 19.63, + "Tatoeba (fin-eng)": 97.83, + "Tatoeba (yid-eng)": 6.05, + "Tatoeba (fao-eng)": 74.77, + "Tatoeba (jpn-eng)": 95.78, + "Tatoeba (mkd-eng)": 88.11, + "Tatoeba (tur-eng)": 95.88 + } + ] }, "Classification": { "accuracy": [ { "Model": "text-embedding-3-large", - "AmazonCounterfactualClassification (en)": 78.93, + "AllegroReviews": 48.85, + "AmazonCounterfactualClassification (en-ext)": 80.09, + "AmazonCounterfactualClassification (en)": 78.99, + "AmazonCounterfactualClassification (de)": 71.7, + "AmazonCounterfactualClassification (ja)": 79.45, "AmazonPolarityClassification": 92.85, "AmazonReviewsClassification (en)": 48.7, + "AmazonReviewsClassification (de)": 48.18, + "AmazonReviewsClassification (es)": 45.87, + "AmazonReviewsClassification (fr)": 46.41, + "AmazonReviewsClassification (ja)": 44.0, + "AmazonReviewsClassification (zh)": 41.31, + "AngryTweetsClassification": 57.84, "Banking77Classification": 85.69, + "CBD": 74.55, + "DanishPoliticalCommentsClassification": 43.41, "EmotionClassification": 51.58, + "GeoreviewClassification": 47.06, + "HeadlineClassification": 77.19, "ImdbClassification": 87.67, + "InappropriatenessClassification": 61.5, + "KinopoiskClassification": 60.21, + "LccSentimentClassification": 58.07, "MTOPDomainClassification (en)": 95.36, + "MTOPDomainClassification (de)": 94.14, + "MTOPDomainClassification (es)": 95.07, + "MTOPDomainClassification (fr)": 93.39, + "MTOPDomainClassification (hi)": 88.28, + "MTOPDomainClassification (th)": 82.0, "MTOPIntentClassification (en)": 75.07, - "MassiveIntentClassification (en)": 74.64, + "MTOPIntentClassification (de)": 77.08, + "MTOPIntentClassification (es)": 78.24, + "MTOPIntentClassification (fr)": 73.68, + "MTOPIntentClassification (hi)": 64.64, + "MTOPIntentClassification (th)": 63.92, + "MasakhaNEWSClassification (amh)": 51.86, + "MasakhaNEWSClassification (eng)": 81.14, + "MasakhaNEWSClassification (fra)": 77.25, + "MasakhaNEWSClassification (hau)": 76.22, + "MasakhaNEWSClassification (ibo)": 65.33, + "MasakhaNEWSClassification (lin)": 78.29, + "MasakhaNEWSClassification (lug)": 70.31, + "MasakhaNEWSClassification (orm)": 77.2, + "MasakhaNEWSClassification (pcm)": 93.05, + "MasakhaNEWSClassification (run)": 78.66, + "MasakhaNEWSClassification (sna)": 85.64, + "MasakhaNEWSClassification (som)": 64.86, + "MasakhaNEWSClassification (swa)": 74.08, + "MasakhaNEWSClassification (tir)": 35.44, + "MasakhaNEWSClassification (xho)": 81.41, + "MasakhaNEWSClassification (yor)": 82.92, + "MassiveIntentClassification (nb)": 69.13, + "MassiveIntentClassification (vi)": 65.6, + "MassiveIntentClassification (sq)": 57.38, + "MassiveIntentClassification (fr)": 71.29, + "MassiveIntentClassification (ur)": 49.97, + "MassiveIntentClassification (zh-CN)": 69.44, + "MassiveIntentClassification (da)": 69.19, + "MassiveIntentClassification (pl)": 70.0, + "MassiveIntentClassification (am)": 34.46, + "MassiveIntentClassification (kn)": 43.85, + "MassiveIntentClassification (jv)": 52.81, + "MassiveIntentClassification (az)": 58.97, + "MassiveIntentClassification (ro)": 66.05, + "MassiveIntentClassification (pt)": 70.57, + "MassiveIntentClassification (el)": 67.62, + "MassiveIntentClassification (it)": 71.74, + "MassiveIntentClassification (hi)": 61.22, + "MassiveIntentClassification (te)": 41.49, + "MassiveIntentClassification (ja)": 71.12, + "MassiveIntentClassification (tl)": 61.1, + "MassiveIntentClassification (tr)": 67.96, + "MassiveIntentClassification (ar)": 62.79, + "MassiveIntentClassification (es)": 70.62, + "MassiveIntentClassification (zh-TW)": 65.52, + "MassiveIntentClassification (af)": 62.56, + "MassiveIntentClassification (ka)": 29.48, + "MassiveIntentClassification (is)": 54.4, + "MassiveIntentClassification (ko)": 63.93, + "MassiveIntentClassification (bn)": 50.02, + "MassiveIntentClassification (de)": 71.07, + "MassiveIntentClassification (mn)": 34.05, + "MassiveIntentClassification (en)": 74.63, + "MassiveIntentClassification (hu)": 67.59, + "MassiveIntentClassification (lv)": 59.58, + "MassiveIntentClassification (nl)": 71.56, + "MassiveIntentClassification (sw)": 55.94, + "MassiveIntentClassification (fa)": 64.77, + "MassiveIntentClassification (my)": 22.95, + "MassiveIntentClassification (sv)": 70.27, + "MassiveIntentClassification (sl)": 67.13, + "MassiveIntentClassification (km)": 32.55, + "MassiveIntentClassification (ms)": 65.4, + "MassiveIntentClassification (fi)": 69.9, + "MassiveIntentClassification (hy)": 33.21, + "MassiveIntentClassification (ml)": 43.95, + "MassiveIntentClassification (ru)": 70.11, + "MassiveIntentClassification (he)": 59.97, + "MassiveIntentClassification (cy)": 48.21, + "MassiveIntentClassification (th)": 57.86, + "MassiveIntentClassification (id)": 67.89, + "MassiveIntentClassification (ta)": 37.86, + "MassiveScenarioClassification (mn)": 40.58, + "MassiveScenarioClassification (hi)": 67.03, + "MassiveScenarioClassification (ar)": 69.99, + "MassiveScenarioClassification (cy)": 57.2, + "MassiveScenarioClassification (ms)": 71.7, + "MassiveScenarioClassification (it)": 76.95, + "MassiveScenarioClassification (ru)": 75.69, + "MassiveScenarioClassification (nb)": 75.27, + "MassiveScenarioClassification (zh-CN)": 76.96, + "MassiveScenarioClassification (bn)": 56.68, + "MassiveScenarioClassification (tl)": 69.52, + "MassiveScenarioClassification (kn)": 49.17, + "MassiveScenarioClassification (el)": 73.79, + "MassiveScenarioClassification (ko)": 71.49, + "MassiveScenarioClassification (da)": 75.91, + "MassiveScenarioClassification (nl)": 77.52, + "MassiveScenarioClassification (de)": 77.18, + "MassiveScenarioClassification (th)": 65.15, + "MassiveScenarioClassification (tr)": 72.26, + "MassiveScenarioClassification (jv)": 61.9, + "MassiveScenarioClassification (zh-TW)": 73.43, + "MassiveScenarioClassification (lv)": 66.96, + "MassiveScenarioClassification (ur)": 55.83, + "MassiveScenarioClassification (fr)": 76.46, + "MassiveScenarioClassification (id)": 74.18, + "MassiveScenarioClassification (sl)": 74.63, + "MassiveScenarioClassification (he)": 66.42, + "MassiveScenarioClassification (az)": 62.69, + "MassiveScenarioClassification (af)": 71.67, + "MassiveScenarioClassification (sw)": 65.09, + "MassiveScenarioClassification (te)": 48.1, + "MassiveScenarioClassification (km)": 38.68, + "MassiveScenarioClassification (hy)": 39.05, + "MassiveScenarioClassification (vi)": 70.48, + "MassiveScenarioClassification (ro)": 72.37, + "MassiveScenarioClassification (pl)": 74.98, "MassiveScenarioClassification (en)": 79.79, - "ToxicConversationsClassification": 72.92, + "MassiveScenarioClassification (sq)": 65.73, + "MassiveScenarioClassification (my)": 30.49, + "MassiveScenarioClassification (es)": 76.09, + "MassiveScenarioClassification (am)": 39.63, + "MassiveScenarioClassification (ml)": 48.25, + "MassiveScenarioClassification (is)": 64.11, + "MassiveScenarioClassification (ja)": 76.25, + "MassiveScenarioClassification (ka)": 33.8, + "MassiveScenarioClassification (sv)": 77.29, + "MassiveScenarioClassification (ta)": 43.66, + "MassiveScenarioClassification (fa)": 70.27, + "MassiveScenarioClassification (hu)": 73.14, + "MassiveScenarioClassification (fi)": 74.44, + "MassiveScenarioClassification (pt)": 74.66, + "NoRecClassification": 53.94, + "NordicLangClassification": 79.75, + "PAC": 70.33, + "PolEmo2.0-IN": 74.72, + "PolEmo2.0-OUT": 50.06, + "RuReviewsClassification": 66.11, + "RuSciBenchGRNTIClassification": 61.04, + "RuSciBenchOECDClassification": 47.07, + "ToxicConversationsClassification": 68.82, "TweetSentimentExtractionClassification": 62.22 } ] @@ -13783,22 +31592,94 @@ "v_measure": [ { "Model": "text-embedding-3-large", + "AlloProfClusteringS2S": 52.89, "ArxivClusteringP2P": 49.01, "ArxivClusteringS2S": 44.45, "BiorxivClusteringP2P": 38.03, "BiorxivClusteringS2S": 36.53, + "BlurbsClusteringP2P": 44.1, + "BlurbsClusteringS2S": 21.85, + "GeoreviewClusteringP2P": 72.6, + "HALClusteringS2S": 27.68, + "MasakhaNEWSClusteringS2S (amh)": 44.44, + "MasakhaNEWSClusteringS2S (eng)": 53.54, + "MasakhaNEWSClusteringS2S (fra)": 39.71, + "MasakhaNEWSClusteringS2S (hau)": 28.77, + "MasakhaNEWSClusteringS2S (ibo)": 56.75, + "MasakhaNEWSClusteringS2S (lin)": 55.37, + "MasakhaNEWSClusteringS2S (lug)": 47.22, + "MasakhaNEWSClusteringS2S (orm)": 37.09, + "MasakhaNEWSClusteringS2S (pcm)": 68.2, + "MasakhaNEWSClusteringS2S (run)": 53.4, + "MasakhaNEWSClusteringS2S (sna)": 61.4, + "MasakhaNEWSClusteringS2S (som)": 32.0, + "MasakhaNEWSClusteringS2S (swa)": 29.03, + "MasakhaNEWSClusteringS2S (tir)": 46.06, + "MasakhaNEWSClusteringS2S (xho)": 35.97, + "MasakhaNEWSClusteringS2S (yor)": 41.0, "MedrxivClusteringP2P": 32.7, "MedrxivClusteringS2S": 31.27, "RedditClustering": 67.84, "RedditClusteringP2P": 67.96, + "RuSciBenchGRNTIClusteringP2P": 54.17, + "RuSciBenchOECDClusteringP2P": 46.73, "StackExchangeClustering": 76.26, "StackExchangeClusteringP2P": 36.88, + "TenKGnadClusteringS2S": 37.78, "TwentyNewsgroupsClustering": 58.14 } ] }, "PairClassification": { "max_ap": [ + { + "Model": "text-embedding-3-large", + "CDSC-E": 70.46, + "FalseFriendsGermanEnglish": 56.04, + "OpusparcusPC (de)": 98.55, + "OpusparcusPC (en)": 98.79, + "OpusparcusPC (fi)": 95.88, + "OpusparcusPC (fr)": 95.64, + "OpusparcusPC (ru)": 91.16, + "OpusparcusPC (sv)": 96.74, + "PSC": 99.55, + "PawsXPairClassification (de)": 61.83, + "PawsXPairClassification (en)": 66.41, + "PawsXPairClassification (es)": 63.25, + "PawsXPairClassification (fr)": 64.9, + "PawsXPairClassification (ja)": 53.92, + "PawsXPairClassification (ko)": 53.64, + "PawsXPairClassification (zh)": 59.71, + "SICK-E-PL": 70.11, + "SprintDuplicateQuestions": 92.24, + "TERRa": 57.99, + "TwitterSemEval2015": 77.15, + "TwitterURLCorpus": 87.79 + }, + { + "Model": "text-embedding-3-large", + "CDSC-E": 70.48, + "FalseFriendsGermanEnglish": 56.04, + "OpusparcusPC (de)": 98.55, + "OpusparcusPC (en)": 98.79, + "OpusparcusPC (fi)": 95.88, + "OpusparcusPC (fr)": 95.64, + "OpusparcusPC (ru)": 91.16, + "OpusparcusPC (sv)": 96.74, + "PSC": 99.57, + "PawsXPairClassification (de)": 62.29, + "PawsXPairClassification (en)": 66.41, + "PawsXPairClassification (es)": 63.25, + "PawsXPairClassification (fr)": 64.99, + "PawsXPairClassification (ja)": 54.25, + "PawsXPairClassification (ko)": 53.88, + "PawsXPairClassification (zh)": 59.97, + "SICK-E-PL": 70.11, + "SprintDuplicateQuestions": 92.51, + "TERRa": 58.08, + "TwitterSemEval2015": 77.15, + "TwitterURLCorpus": 87.79 + }, { "Model": "text-embedding-3-large", "SprintDuplicateQuestions": 92.25, @@ -13811,10 +31692,14 @@ "map": [ { "Model": "text-embedding-3-large", + "AlloprofReranking": 80.3, "AskUbuntuDupQuestions": 65.03, "MindSmallReranking": 29.86, + "RuBQReranking": 75.19, "SciDocsRR": 86.66, - "StackOverflowDupQuestions": 55.08 + "StackOverflowDupQuestions": 55.08, + "SyntecReranking": 90.38, + "T2Reranking": 67.05 } ] }, @@ -13823,10 +31708,12 @@ { "Model": "text-embedding-3-large", "AILACasedocs": 39.0, - "AILAStatutes": 41.31, + "AILAStatutes": 41.99, "ARCChallenge": 23.98, + "AlloprofRetrieval": 60.28, "AlphaNLI": 37.27, - "ArguAna": 58.05, + "AppsRetrieval": 28.37, + "ArguAna": 57.99, "BrightRetrieval (theoremqa_questions)": 22.22, "BrightRetrieval (leetcode)": 23.65, "BrightRetrieval (earth_science)": 26.27, @@ -13841,6 +31728,25 @@ "BrightRetrieval (aops)": 8.45, "CQADupstackRetrieval": 47.54, "ClimateFEVER": 30.27, + "CmedqaRetrieval": 27.43, + "CodeFeedbackMT": 68.92, + "CodeFeedbackST": 80.42, + "CodeSearchNetCCRetrieval (python)": 76.01, + "CodeSearchNetCCRetrieval (javascript)": 77.36, + "CodeSearchNetCCRetrieval (go)": 67.19, + "CodeSearchNetCCRetrieval (ruby)": 75.43, + "CodeSearchNetCCRetrieval (java)": 77.23, + "CodeSearchNetCCRetrieval (php)": 65.83, + "CodeSearchNetRetrieval (python)": 92.36, + "CodeSearchNetRetrieval (javascript)": 83.14, + "CodeSearchNetRetrieval (go)": 96.48, + "CodeSearchNetRetrieval (ruby)": 87.96, + "CodeSearchNetRetrieval (java)": 93.18, + "CodeSearchNetRetrieval (php)": 89.89, + "CodeTransOceanContest": 84.25, + "CodeTransOceanDL": 34.23, + "CosQA": 31.0, + "CovidRetrieval": 68.43, "DBPedia": 44.76, "FEVER": 87.94, "FiQA2018": 55.0, @@ -13853,10 +31759,18 @@ "LEMBWikimQARetrieval": 54.16, "LeCaRDv2": 57.2, "LegalBenchConsumerContractsQA": 79.39, - "LegalBenchCorporateLobbying": 95.09, + "LegalBenchCorporateLobbying": 95.22, "LegalQuAD": 57.47, "LegalSummarization": 71.55, "MSMARCO": 40.24, + "MintakaRetrieval (ar)": 28.07, + "MintakaRetrieval (de)": 61.51, + "MintakaRetrieval (es)": 60.26, + "MintakaRetrieval (fr)": 62.88, + "MintakaRetrieval (hi)": 22.55, + "MintakaRetrieval (it)": 59.89, + "MintakaRetrieval (ja)": 39.29, + "MintakaRetrieval (pt)": 59.99, "NFCorpus": 42.07, "NQ": 61.27, "PIQA": 41.96, @@ -13864,18 +31778,60 @@ "QuoraRetrieval": 89.05, "RARbCode": 89.64, "RARbMath": 90.08, - "SCIDOCS": 23.11, + "RuBQRetrieval": 72.32, + "SCIDOCS": 23.07, "SIQA": 3.44, "SciFact": 77.77, - "SpartQA": 7.51, + "SciFact-PL": 71.04, + "SpartQA": 7.44, + "StackOverflowQA": 92.44, + "SyntecRetrieval": 87.36, + "SyntheticText2SQL": 68.45, "TRECCOVID": 79.56, + "TRECCOVID-PL": 62.88, "TempReasonL1": 2.13, "TempReasonL2Fact": 28.65, "TempReasonL2Pure": 10.34, "TempReasonL3Fact": 25.52, "TempReasonL3Pure": 15.28, "Touche2020": 23.35, - "WinoGrande": 29.11 + "WinoGrande": 29.11, + "XPQARetrieval (ara-ara)": 50.83, + "XPQARetrieval (eng-ara)": 43.19, + "XPQARetrieval (ara-eng)": 50.85, + "XPQARetrieval (deu-deu)": 83.97, + "XPQARetrieval (eng-deu)": 62.93, + "XPQARetrieval (deu-eng)": 82.24, + "XPQARetrieval (spa-spa)": 70.56, + "XPQARetrieval (eng-spa)": 55.51, + "XPQARetrieval (spa-eng)": 68.74, + "XPQARetrieval (fra-fra)": 76.53, + "XPQARetrieval (eng-fra)": 60.93, + "XPQARetrieval (fra-eng)": 74.02, + "XPQARetrieval (hin-hin)": 74.81, + "XPQARetrieval (eng-hin)": 29.67, + "XPQARetrieval (hin-eng)": 65.92, + "XPQARetrieval (ita-ita)": 80.95, + "XPQARetrieval (eng-ita)": 54.83, + "XPQARetrieval (ita-eng)": 79.25, + "XPQARetrieval (jpn-jpn)": 76.92, + "XPQARetrieval (eng-jpn)": 54.56, + "XPQARetrieval (jpn-eng)": 75.09, + "XPQARetrieval (kor-kor)": 37.47, + "XPQARetrieval (eng-kor)": 36.63, + "XPQARetrieval (kor-eng)": 36.82, + "XPQARetrieval (pol-pol)": 53.28, + "XPQARetrieval (eng-pol)": 40.5, + "XPQARetrieval (pol-eng)": 51.6, + "XPQARetrieval (por-por)": 54.28, + "XPQARetrieval (eng-por)": 37.79, + "XPQARetrieval (por-eng)": 53.49, + "XPQARetrieval (tam-tam)": 32.85, + "XPQARetrieval (eng-tam)": 11.39, + "XPQARetrieval (tam-eng)": 14.02, + "XPQARetrieval (cmn-cmn)": 66.56, + "XPQARetrieval (eng-cmn)": 41.22, + "XPQARetrieval (cmn-eng)": 63.92 } ], "recall_at_1": [ @@ -13906,11 +31862,59 @@ "STS17 (en-en)": 90.22, "STS22 (en)": 66.14, "STSBenchmark": 83.56 + }, + { + "Model": "text-embedding-3-large", + "CDSC-R": 92.19, + "GermanSTSBenchmark": 82.32, + "RUParaPhraserSTS": 72.97, + "RuSTSBenchmarkSTS": 80.81, + "SICK-R-PL": 72.68, + "SICKFr": 78.74, + "STSB": 78.0, + "STSBenchmarkMultilingualSTS (it)": 80.97, + "STSBenchmarkMultilingualSTS (es)": 82.99, + "STSBenchmarkMultilingualSTS (nl)": 81.41, + "STSBenchmarkMultilingualSTS (en)": 83.6, + "STSBenchmarkMultilingualSTS (fr)": 82.51, + "STSBenchmarkMultilingualSTS (pl)": 79.85, + "STSBenchmarkMultilingualSTS (pt)": 80.34, + "STSBenchmarkMultilingualSTS (de)": 82.73, + "STSBenchmarkMultilingualSTS (zh)": 79.32, + "STSBenchmarkMultilingualSTS (ru)": 80.85 + }, + { + "Model": "text-embedding-3-large", + "CDSC-R": 92.19, + "GermanSTSBenchmark": 82.32, + "RUParaPhraserSTS": 72.97, + "RuSTSBenchmarkSTS": 80.81, + "SICK-R-PL": 72.68, + "SICKFr": 78.74, + "STSB": 78.0, + "STSBenchmarkMultilingualSTS (it)": 80.97, + "STSBenchmarkMultilingualSTS (es)": 82.99, + "STSBenchmarkMultilingualSTS (nl)": 81.41, + "STSBenchmarkMultilingualSTS (en)": 83.6, + "STSBenchmarkMultilingualSTS (fr)": 82.51, + "STSBenchmarkMultilingualSTS (pl)": 79.85, + "STSBenchmarkMultilingualSTS (pt)": 80.34, + "STSBenchmarkMultilingualSTS (de)": 82.73, + "STSBenchmarkMultilingualSTS (zh)": 79.32, + "STSBenchmarkMultilingualSTS (ru)": 80.85 } ] }, "Summarization": { "cosine_spearman": [ + { + "Model": "text-embedding-3-large", + "SummEval": 30.05 + }, + { + "Model": "text-embedding-3-large", + "SummEval": 30.05 + }, { "Model": "text-embedding-3-large", "SummEval": 29.92 @@ -13918,7 +31922,13 @@ ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "text-embedding-3-large", + "CEDRClassification": 46.77, + "SensitiveTopicsClassification": 31.97 + } + ] }, "InstructionRetrieval": { "p-MRR": [ @@ -14100,22 +32110,290 @@ }, "openai__text-embedding-3-small": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "text-embedding-3-small", + "BornholmBitextMining": 40.88, + "Tatoeba (afr-eng)": 79.77, + "Tatoeba (dtp-eng)": 6.86, + "Tatoeba (tel-eng)": 0.68, + "Tatoeba (glg-eng)": 88.25, + "Tatoeba (kor-eng)": 61.61, + "Tatoeba (dsb-eng)": 38.66, + "Tatoeba (est-eng)": 67.96, + "Tatoeba (oci-eng)": 48.97, + "Tatoeba (mar-eng)": 9.51, + "Tatoeba (awa-eng)": 15.07, + "Tatoeba (zsm-eng)": 88.16, + "Tatoeba (fao-eng)": 42.44, + "Tatoeba (lat-eng)": 43.22, + "Tatoeba (tuk-eng)": 17.76, + "Tatoeba (ile-eng)": 75.59, + "Tatoeba (bul-eng)": 83.54, + "Tatoeba (heb-eng)": 44.33, + "Tatoeba (rus-eng)": 91.1, + "Tatoeba (urd-eng)": 12.54, + "Tatoeba (pes-eng)": 54.92, + "Tatoeba (pam-eng)": 8.23, + "Tatoeba (ang-eng)": 48.75, + "Tatoeba (tur-eng)": 83.92, + "Tatoeba (ara-eng)": 75.08, + "Tatoeba (kzj-eng)": 8.19, + "Tatoeba (war-eng)": 17.98, + "Tatoeba (sqi-eng)": 35.63, + "Tatoeba (ron-eng)": 90.68, + "Tatoeba (slv-eng)": 83.81, + "Tatoeba (fra-eng)": 93.75, + "Tatoeba (kat-eng)": 0.74, + "Tatoeba (ast-eng)": 78.26, + "Tatoeba (jpn-eng)": 89.22, + "Tatoeba (tam-eng)": 4.31, + "Tatoeba (yue-eng)": 71.79, + "Tatoeba (lfn-eng)": 56.5, + "Tatoeba (fry-eng)": 46.68, + "Tatoeba (nob-eng)": 95.91, + "Tatoeba (xho-eng)": 10.93, + "Tatoeba (mon-eng)": 11.71, + "Tatoeba (hin-eng)": 43.69, + "Tatoeba (orv-eng)": 28.28, + "Tatoeba (cmn-eng)": 93.33, + "Tatoeba (ell-eng)": 76.42, + "Tatoeba (cor-eng)": 4.18, + "Tatoeba (max-eng)": 43.93, + "Tatoeba (lit-eng)": 43.09, + "Tatoeba (fin-eng)": 89.08, + "Tatoeba (dan-eng)": 92.71, + "Tatoeba (uzb-eng)": 12.23, + "Tatoeba (tat-eng)": 11.31, + "Tatoeba (nld-eng)": 96.3, + "Tatoeba (hrv-eng)": 90.64, + "Tatoeba (bos-eng)": 86.53, + "Tatoeba (nds-eng)": 61.73, + "Tatoeba (kaz-eng)": 9.81, + "Tatoeba (spa-eng)": 97.04, + "Tatoeba (swe-eng)": 91.98, + "Tatoeba (ukr-eng)": 81.09, + "Tatoeba (kur-eng)": 17.64, + "Tatoeba (gla-eng)": 4.26, + "Tatoeba (ido-eng)": 62.7, + "Tatoeba (nov-eng)": 63.74, + "Tatoeba (vie-eng)": 85.38, + "Tatoeba (csb-eng)": 38.6, + "Tatoeba (ina-eng)": 90.77, + "Tatoeba (gsw-eng)": 37.93, + "Tatoeba (khm-eng)": 0.7, + "Tatoeba (lvs-eng)": 31.88, + "Tatoeba (cym-eng)": 8.58, + "Tatoeba (isl-eng)": 35.53, + "Tatoeba (ita-eng)": 92.32, + "Tatoeba (uig-eng)": 1.67, + "Tatoeba (wuu-eng)": 76.54, + "Tatoeba (yid-eng)": 1.18, + "Tatoeba (eus-eng)": 17.25, + "Tatoeba (tzl-eng)": 48.13, + "Tatoeba (bel-eng)": 47.19, + "Tatoeba (tha-eng)": 36.24, + "Tatoeba (cat-eng)": 84.23, + "Tatoeba (hye-eng)": 0.86, + "Tatoeba (gle-eng)": 4.65, + "Tatoeba (jav-eng)": 21.76, + "Tatoeba (arz-eng)": 46.12, + "Tatoeba (mkd-eng)": 65.77, + "Tatoeba (epo-eng)": 60.93, + "Tatoeba (ceb-eng)": 15.93, + "Tatoeba (por-eng)": 93.69, + "Tatoeba (pms-eng)": 47.24, + "Tatoeba (deu-eng)": 98.6, + "Tatoeba (ces-eng)": 88.56, + "Tatoeba (nno-eng)": 86.34, + "Tatoeba (ind-eng)": 88.64, + "Tatoeba (hsb-eng)": 53.23, + "Tatoeba (cha-eng)": 36.96, + "Tatoeba (cbk-eng)": 71.54, + "Tatoeba (aze-eng)": 36.72, + "Tatoeba (mal-eng)": 3.09, + "Tatoeba (arq-eng)": 17.45, + "Tatoeba (bre-eng)": 7.35, + "Tatoeba (ben-eng)": 13.3, + "Tatoeba (pol-eng)": 94.93, + "Tatoeba (mhr-eng)": 9.01, + "Tatoeba (tgl-eng)": 58.15, + "Tatoeba (srp-eng)": 85.3, + "Tatoeba (kab-eng)": 2.39, + "Tatoeba (amh-eng)": 0.01, + "Tatoeba (swg-eng)": 50.73, + "Tatoeba (hun-eng)": 81.65, + "Tatoeba (ber-eng)": 6.13, + "Tatoeba (swh-eng)": 27.61, + "Tatoeba (slk-eng)": 82.56 + } + ] }, "Classification": { "accuracy": [ { "Model": "text-embedding-3-small", + "AllegroReviews": 38.71, + "AmazonCounterfactualClassification (en-ext)": 77.29, "AmazonCounterfactualClassification (en)": 76.42, + "AmazonCounterfactualClassification (de)": 72.25, + "AmazonCounterfactualClassification (ja)": 71.86, "AmazonPolarityClassification": 90.84, "AmazonReviewsClassification (en)": 45.73, + "AmazonReviewsClassification (de)": 44.2, + "AmazonReviewsClassification (es)": 43.33, + "AmazonReviewsClassification (fr)": 43.64, + "AmazonReviewsClassification (ja)": 37.48, + "AmazonReviewsClassification (zh)": 37.42, + "AngryTweetsClassification": 55.68, "Banking77Classification": 83.01, + "CBD": 65.23, + "DanishPoliticalCommentsClassification": 39.83, "EmotionClassification": 50.63, + "GeoreviewClassification": 42.96, + "HeadlineClassification": 74.06, "ImdbClassification": 83.66, + "InappropriatenessClassification": 58.86, + "KinopoiskClassification": 55.03, + "LccSentimentClassification": 59.33, "MTOPDomainClassification (en)": 93.91, + "MTOPDomainClassification (de)": 92.77, + "MTOPDomainClassification (es)": 93.11, + "MTOPDomainClassification (fr)": 90.9, + "MTOPDomainClassification (hi)": 71.85, + "MTOPDomainClassification (th)": 70.32, "MTOPIntentClassification (en)": 70.98, + "MTOPIntentClassification (de)": 71.6, + "MTOPIntentClassification (es)": 73.4, + "MTOPIntentClassification (fr)": 67.83, + "MTOPIntentClassification (hi)": 42.3, + "MTOPIntentClassification (th)": 48.04, + "MasakhaNEWSClassification (amh)": 47.85, + "MasakhaNEWSClassification (eng)": 80.57, + "MasakhaNEWSClassification (fra)": 76.49, + "MasakhaNEWSClassification (hau)": 69.91, + "MasakhaNEWSClassification (ibo)": 60.85, + "MasakhaNEWSClassification (lin)": 76.46, + "MasakhaNEWSClassification (lug)": 66.55, + "MasakhaNEWSClassification (orm)": 70.71, + "MasakhaNEWSClassification (pcm)": 92.16, + "MasakhaNEWSClassification (run)": 75.28, + "MasakhaNEWSClassification (sna)": 83.41, + "MasakhaNEWSClassification (som)": 61.39, + "MasakhaNEWSClassification (swa)": 75.06, + "MasakhaNEWSClassification (tir)": 29.89, + "MasakhaNEWSClassification (xho)": 79.53, + "MasakhaNEWSClassification (yor)": 76.42, + "MassiveIntentClassification (cy)": 40.7, + "MassiveIntentClassification (af)": 55.03, + "MassiveIntentClassification (pt)": 67.6, + "MassiveIntentClassification (ar)": 51.72, + "MassiveIntentClassification (zh-CN)": 64.38, + "MassiveIntentClassification (sq)": 46.1, + "MassiveIntentClassification (da)": 63.51, + "MassiveIntentClassification (is)": 41.21, + "MassiveIntentClassification (ka)": 28.86, + "MassiveIntentClassification (ml)": 40.9, + "MassiveIntentClassification (ta)": 37.65, + "MassiveIntentClassification (el)": 54.11, + "MassiveIntentClassification (fa)": 52.0, + "MassiveIntentClassification (hy)": 31.93, + "MassiveIntentClassification (km)": 33.33, + "MassiveIntentClassification (fr)": 67.8, + "MassiveIntentClassification (hu)": 55.75, + "MassiveIntentClassification (ru)": 63.87, + "MassiveIntentClassification (hi)": 44.77, + "MassiveIntentClassification (pl)": 64.59, + "MassiveIntentClassification (sw)": 45.12, + "MassiveIntentClassification (jv)": 48.29, + "MassiveIntentClassification (zh-TW)": 59.91, + "MassiveIntentClassification (it)": 66.6, "MassiveIntentClassification (en)": 72.86, + "MassiveIntentClassification (am)": 31.68, + "MassiveIntentClassification (ja)": 64.14, + "MassiveIntentClassification (kn)": 37.15, + "MassiveIntentClassification (mn)": 40.05, + "MassiveIntentClassification (sv)": 64.6, + "MassiveIntentClassification (tl)": 50.93, + "MassiveIntentClassification (ms)": 57.19, + "MassiveIntentClassification (de)": 66.56, + "MassiveIntentClassification (tr)": 56.52, + "MassiveIntentClassification (nl)": 67.82, + "MassiveIntentClassification (id)": 60.52, + "MassiveIntentClassification (he)": 45.11, + "MassiveIntentClassification (ko)": 52.12, + "MassiveIntentClassification (th)": 45.14, + "MassiveIntentClassification (es)": 66.91, + "MassiveIntentClassification (te)": 37.19, + "MassiveIntentClassification (az)": 45.36, + "MassiveIntentClassification (ro)": 60.05, + "MassiveIntentClassification (nb)": 63.31, + "MassiveIntentClassification (fi)": 60.27, + "MassiveIntentClassification (sl)": 58.77, + "MassiveIntentClassification (lv)": 47.21, + "MassiveIntentClassification (bn)": 42.02, + "MassiveIntentClassification (vi)": 53.28, + "MassiveIntentClassification (ur)": 39.51, + "MassiveIntentClassification (my)": 28.57, + "MassiveScenarioClassification (nl)": 74.25, + "MassiveScenarioClassification (el)": 60.87, + "MassiveScenarioClassification (de)": 74.88, + "MassiveScenarioClassification (ar)": 58.95, + "MassiveScenarioClassification (af)": 66.43, + "MassiveScenarioClassification (km)": 39.58, + "MassiveScenarioClassification (sq)": 55.13, + "MassiveScenarioClassification (cy)": 47.54, + "MassiveScenarioClassification (ro)": 65.97, + "MassiveScenarioClassification (vi)": 61.03, + "MassiveScenarioClassification (pl)": 70.0, + "MassiveScenarioClassification (fi)": 66.29, + "MassiveScenarioClassification (pt)": 71.06, + "MassiveScenarioClassification (ja)": 71.02, + "MassiveScenarioClassification (sl)": 66.65, + "MassiveScenarioClassification (hy)": 38.01, + "MassiveScenarioClassification (sv)": 72.82, + "MassiveScenarioClassification (sw)": 54.09, + "MassiveScenarioClassification (ka)": 32.87, + "MassiveScenarioClassification (fr)": 73.3, + "MassiveScenarioClassification (zh-CN)": 73.14, + "MassiveScenarioClassification (ms)": 65.35, + "MassiveScenarioClassification (ur)": 47.76, + "MassiveScenarioClassification (da)": 71.31, + "MassiveScenarioClassification (am)": 36.06, + "MassiveScenarioClassification (nb)": 71.57, + "MassiveScenarioClassification (is)": 50.91, + "MassiveScenarioClassification (my)": 35.19, + "MassiveScenarioClassification (ru)": 69.58, + "MassiveScenarioClassification (es)": 72.19, + "MassiveScenarioClassification (th)": 54.2, + "MassiveScenarioClassification (lv)": 52.16, + "MassiveScenarioClassification (id)": 69.04, + "MassiveScenarioClassification (te)": 42.89, + "MassiveScenarioClassification (it)": 72.59, + "MassiveScenarioClassification (zh-TW)": 67.61, + "MassiveScenarioClassification (kn)": 41.2, "MassiveScenarioClassification (en)": 76.84, + "MassiveScenarioClassification (ta)": 44.43, + "MassiveScenarioClassification (ml)": 44.68, + "MassiveScenarioClassification (fa)": 57.04, + "MassiveScenarioClassification (hi)": 51.29, + "MassiveScenarioClassification (hu)": 64.06, + "MassiveScenarioClassification (mn)": 41.77, + "MassiveScenarioClassification (jv)": 55.21, + "MassiveScenarioClassification (bn)": 46.02, + "MassiveScenarioClassification (he)": 51.68, + "MassiveScenarioClassification (az)": 52.6, + "MassiveScenarioClassification (tr)": 63.72, + "MassiveScenarioClassification (tl)": 60.12, + "MassiveScenarioClassification (ko)": 57.88, + "NoRecClassification": 49.41, + "NordicLangClassification": 67.85, + "PAC": 66.83, + "PolEmo2.0-IN": 66.73, + "PolEmo2.0-OUT": 31.46, + "RuReviewsClassification": 61.07, + "RuSciBenchGRNTIClassification": 55.56, + "RuSciBenchOECDClassification": 43.35, "ToxicConversationsClassification": 71.91, "TweetSentimentExtractionClassification": 61.72 } @@ -14125,16 +32403,40 @@ "v_measure": [ { "Model": "text-embedding-3-small", + "AlloProfClusteringS2S": 51.23, "ArxivClusteringP2P": 46.57, "ArxivClusteringS2S": 39.35, "BiorxivClusteringP2P": 37.77, "BiorxivClusteringS2S": 34.68, + "BlurbsClusteringP2P": 41.0, + "BlurbsClusteringS2S": 18.51, + "GeoreviewClusteringP2P": 63.89, + "HALClusteringS2S": 27.05, + "MasakhaNEWSClusteringS2S (amh)": 46.58, + "MasakhaNEWSClusteringS2S (eng)": 47.96, + "MasakhaNEWSClusteringS2S (fra)": 54.76, + "MasakhaNEWSClusteringS2S (hau)": 27.33, + "MasakhaNEWSClusteringS2S (ibo)": 49.27, + "MasakhaNEWSClusteringS2S (lin)": 57.97, + "MasakhaNEWSClusteringS2S (lug)": 48.93, + "MasakhaNEWSClusteringS2S (orm)": 28.39, + "MasakhaNEWSClusteringS2S (pcm)": 66.55, + "MasakhaNEWSClusteringS2S (run)": 51.66, + "MasakhaNEWSClusteringS2S (sna)": 49.83, + "MasakhaNEWSClusteringS2S (som)": 30.26, + "MasakhaNEWSClusteringS2S (swa)": 20.16, + "MasakhaNEWSClusteringS2S (tir)": 44.26, + "MasakhaNEWSClusteringS2S (xho)": 32.96, + "MasakhaNEWSClusteringS2S (yor)": 43.33, "MedrxivClusteringP2P": 32.77, "MedrxivClusteringS2S": 31.85, "RedditClustering": 64.09, "RedditClusteringP2P": 65.12, + "RuSciBenchGRNTIClusteringP2P": 50.18, + "RuSciBenchOECDClusteringP2P": 44.16, "StackExchangeClustering": 72.05, "StackExchangeClusteringP2P": 34.04, + "TenKGnadClusteringS2S": 34.51, "TwentyNewsgroupsClustering": 54.81 } ] @@ -14143,14 +32445,58 @@ "max_ap": [ { "Model": "text-embedding-3-small", + "CDSC-E": 66.28, + "FalseFriendsGermanEnglish": 53.14, + "OpusparcusPC (de)": 97.13, + "OpusparcusPC (en)": 98.76, + "OpusparcusPC (fi)": 91.51, "OpusparcusPC (fr)": 94.45, - "SprintDuplicateQuestions": 94.58, - "TwitterSemEval2015": 73.33, - "TwitterURLCorpus": 87.21 + "OpusparcusPC (ru)": 86.04, + "OpusparcusPC (sv)": 93.92, + "PSC": 99.22, + "PawsXPairClassification (de)": 58.5, + "PawsXPairClassification (en)": 64.2, + "PawsXPairClassification (es)": 59.62, + "PawsXPairClassification (fr)": 61.36, + "PawsXPairClassification (ja)": 52.67, + "PawsXPairClassification (ko)": 52.15, + "PawsXPairClassification (zh)": 58.59, + "SICK-E-PL": 60.78, + "SprintDuplicateQuestions": 94.6, + "TERRa": 53.73, + "TwitterSemEval2015": 73.38, + "TwitterURLCorpus": 87.18 + }, + { + "Model": "text-embedding-3-small", + "CDSC-E": 66.28, + "FalseFriendsGermanEnglish": 53.14, + "OpusparcusPC (de)": 97.14, + "OpusparcusPC (en)": 98.76, + "OpusparcusPC (fi)": 91.51, + "OpusparcusPC (fr)": 94.45, + "OpusparcusPC (ru)": 86.04, + "OpusparcusPC (sv)": 93.92, + "PSC": 99.23, + "PawsXPairClassification (de)": 58.72, + "PawsXPairClassification (en)": 64.24, + "PawsXPairClassification (es)": 59.65, + "PawsXPairClassification (fr)": 61.37, + "PawsXPairClassification (ja)": 52.71, + "PawsXPairClassification (ko)": 52.31, + "PawsXPairClassification (zh)": 59.01, + "SICK-E-PL": 60.78, + "SprintDuplicateQuestions": 94.61, + "TERRa": 53.73, + "TwitterSemEval2015": 73.38, + "TwitterURLCorpus": 87.18 }, { "Model": "text-embedding-3-small", - "OpusparcusPC (fr)": 94.45 + "OpusparcusPC (fr)": 94.45, + "SprintDuplicateQuestions": 94.58, + "TwitterSemEval2015": 73.33, + "TwitterURLCorpus": 87.21 } ] }, @@ -14158,10 +32504,14 @@ "map": [ { "Model": "text-embedding-3-small", + "AlloprofReranking": 74.84, "AskUbuntuDupQuestions": 62.18, "MindSmallReranking": 29.93, + "RuBQReranking": 66.0, "SciDocsRR": 83.25, - "StackOverflowDupQuestions": 51.53 + "StackOverflowDupQuestions": 51.53, + "SyntecReranking": 85.45, + "T2Reranking": 65.71 } ] }, @@ -14169,17 +32519,58 @@ "ndcg_at_10": [ { "Model": "text-embedding-3-small", + "AILACasedocs": 34.9, + "AILAStatutes": 28.21, "ARCChallenge": 14.63, + "AlloprofRetrieval": 52.09, "AlphaNLI": 30.61, + "AppsRetrieval": 17.01, "ArguAna": 55.49, "CQADupstackRetrieval": 42.58, "ClimateFEVER": 26.86, + "CmedqaRetrieval": 21.84, + "CodeFeedbackMT": 60.48, + "CodeFeedbackST": 73.89, + "CodeSearchNetCCRetrieval (python)": 59.4, + "CodeSearchNetCCRetrieval (javascript)": 67.6, + "CodeSearchNetCCRetrieval (go)": 49.21, + "CodeSearchNetCCRetrieval (ruby)": 65.89, + "CodeSearchNetCCRetrieval (java)": 68.63, + "CodeSearchNetCCRetrieval (php)": 54.06, + "CodeSearchNetRetrieval (python)": 88.52, + "CodeSearchNetRetrieval (javascript)": 78.92, + "CodeSearchNetRetrieval (go)": 95.24, + "CodeSearchNetRetrieval (ruby)": 83.76, + "CodeSearchNetRetrieval (java)": 90.71, + "CodeSearchNetRetrieval (php)": 87.39, + "CodeTransOceanContest": 72.05, + "CodeTransOceanDL": 27.39, + "CosQA": 28.93, + "CovidRetrieval": 61.07, "DBPedia": 39.97, "FEVER": 79.42, "FiQA2018": 44.91, + "GerDaLIRSmall": 34.74, "HellaSwag": 30.94, "HotpotQA": 63.63, + "LEMBNarrativeQARetrieval": 47.23, + "LEMBQMSumRetrieval": 38.15, + "LEMBSummScreenFDRetrieval": 92.12, + "LEMBWikimQARetrieval": 68.55, + "LeCaRDv2": 55.0, + "LegalBenchConsumerContractsQA": 79.15, + "LegalBenchCorporateLobbying": 93.9, + "LegalQuAD": 54.7, + "LegalSummarization": 69.34, "MSMARCO": 37.02, + "MintakaRetrieval (ar)": 16.78, + "MintakaRetrieval (de)": 42.76, + "MintakaRetrieval (es)": 41.79, + "MintakaRetrieval (fr)": 41.69, + "MintakaRetrieval (hi)": 14.78, + "MintakaRetrieval (it)": 41.84, + "MintakaRetrieval (ja)": 21.16, + "MintakaRetrieval (pt)": 43.9, "NFCorpus": 38.33, "NQ": 52.86, "PIQA": 33.69, @@ -14187,18 +32578,60 @@ "QuoraRetrieval": 88.83, "RARbCode": 72.03, "RARbMath": 71.07, + "RuBQRetrieval": 60.1, "SCIDOCS": 20.8, "SIQA": 3.03, "SciFact": 73.37, + "SciFact-PL": 59.37, "SpartQA": 6.63, + "StackOverflowQA": 88.67, + "SyntecRetrieval": 86.42, + "SyntheticText2SQL": 58.33, "TRECCOVID": 77.9, + "TRECCOVID-PL": 65.36, "TempReasonL1": 2.35, "TempReasonL2Fact": 25.68, "TempReasonL2Pure": 2.76, "TempReasonL3Fact": 22.09, "TempReasonL3Pure": 9.79, "Touche2020": 24.28, - "WinoGrande": 31.53 + "WinoGrande": 31.53, + "XPQARetrieval (ara-ara)": 42.5, + "XPQARetrieval (eng-ara)": 24.35, + "XPQARetrieval (ara-eng)": 35.1, + "XPQARetrieval (deu-deu)": 81.28, + "XPQARetrieval (eng-deu)": 55.73, + "XPQARetrieval (deu-eng)": 77.68, + "XPQARetrieval (spa-spa)": 66.82, + "XPQARetrieval (eng-spa)": 51.75, + "XPQARetrieval (spa-eng)": 65.02, + "XPQARetrieval (fra-fra)": 74.87, + "XPQARetrieval (eng-fra)": 55.75, + "XPQARetrieval (fra-eng)": 70.01, + "XPQARetrieval (hin-hin)": 66.94, + "XPQARetrieval (eng-hin)": 16.95, + "XPQARetrieval (hin-eng)": 26.17, + "XPQARetrieval (ita-ita)": 76.15, + "XPQARetrieval (eng-ita)": 48.92, + "XPQARetrieval (ita-eng)": 71.54, + "XPQARetrieval (jpn-jpn)": 70.79, + "XPQARetrieval (eng-jpn)": 43.54, + "XPQARetrieval (jpn-eng)": 66.14, + "XPQARetrieval (kor-kor)": 32.07, + "XPQARetrieval (eng-kor)": 25.15, + "XPQARetrieval (kor-eng)": 22.43, + "XPQARetrieval (pol-pol)": 47.44, + "XPQARetrieval (eng-pol)": 32.81, + "XPQARetrieval (pol-eng)": 43.7, + "XPQARetrieval (por-por)": 52.24, + "XPQARetrieval (eng-por)": 35.7, + "XPQARetrieval (por-eng)": 51.92, + "XPQARetrieval (tam-tam)": 32.97, + "XPQARetrieval (eng-tam)": 6.47, + "XPQARetrieval (tam-eng)": 4.98, + "XPQARetrieval (cmn-cmn)": 62.98, + "XPQARetrieval (eng-cmn)": 34.62, + "XPQARetrieval (cmn-eng)": 56.64 } ] }, @@ -14216,11 +32649,59 @@ "STS17 (en-en)": 90.94, "STS22 (en)": 64.96, "STSBenchmark": 84.24 + }, + { + "Model": "text-embedding-3-small", + "CDSC-R": 89.48, + "GermanSTSBenchmark": 80.32, + "RUParaPhraserSTS": 65.18, + "RuSTSBenchmarkSTS": 74.52, + "SICK-R-PL": 64.77, + "SICKFr": 75.69, + "STSB": 75.24, + "STSBenchmarkMultilingualSTS (fr)": 81.11, + "STSBenchmarkMultilingualSTS (es)": 81.59, + "STSBenchmarkMultilingualSTS (it)": 79.0, + "STSBenchmarkMultilingualSTS (nl)": 79.4, + "STSBenchmarkMultilingualSTS (pt)": 80.64, + "STSBenchmarkMultilingualSTS (ru)": 74.58, + "STSBenchmarkMultilingualSTS (en)": 84.22, + "STSBenchmarkMultilingualSTS (zh)": 76.55, + "STSBenchmarkMultilingualSTS (de)": 80.57, + "STSBenchmarkMultilingualSTS (pl)": 74.06 + }, + { + "Model": "text-embedding-3-small", + "CDSC-R": 89.48, + "GermanSTSBenchmark": 80.32, + "RUParaPhraserSTS": 65.18, + "RuSTSBenchmarkSTS": 74.52, + "SICK-R-PL": 64.77, + "SICKFr": 75.69, + "STSB": 75.24, + "STSBenchmarkMultilingualSTS (fr)": 81.11, + "STSBenchmarkMultilingualSTS (es)": 81.59, + "STSBenchmarkMultilingualSTS (it)": 79.0, + "STSBenchmarkMultilingualSTS (nl)": 79.4, + "STSBenchmarkMultilingualSTS (pt)": 80.64, + "STSBenchmarkMultilingualSTS (ru)": 74.58, + "STSBenchmarkMultilingualSTS (en)": 84.22, + "STSBenchmarkMultilingualSTS (zh)": 76.55, + "STSBenchmarkMultilingualSTS (de)": 80.57, + "STSBenchmarkMultilingualSTS (pl)": 74.06 } ] }, "Summarization": { "cosine_spearman": [ + { + "Model": "text-embedding-3-small", + "SummEval": 31.18 + }, + { + "Model": "text-embedding-3-small", + "SummEval": 31.18 + }, { "Model": "text-embedding-3-small", "SummEval": 31.12 @@ -14228,10 +32709,23 @@ ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "text-embedding-3-small", + "CEDRClassification": 39.38, + "SensitiveTopicsClassification": 27.2 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "text-embedding-3-small", + "Core17InstructionRetrieval": 1.15, + "News21InstructionRetrieval": -1.81, + "Robust04InstructionRetrieval": -6.49 + } + ] } }, "openai__text-embedding-3-small-instruct": { @@ -15847,11 +34341,19 @@ "ArxivClusteringS2S": 22.05, "BiorxivClusteringP2P": 29.84, "BiorxivClusteringS2S": 20.57, + "BlurbsClusteringP2P": 36.52, + "BlurbsClusteringS2S": 14.27, "GeoreviewClusteringP2P": 52.19, "HALClusteringS2S": 20.62, "MLSUMClusteringP2P (ru)": 39.45, + "MLSUMClusteringP2P (de)": 37.06, + "MLSUMClusteringP2P (fr)": 42.09, + "MLSUMClusteringP2P (es)": 43.19, "MLSUMClusteringP2P": 42.09, "MLSUMClusteringS2S (ru)": 35.77, + "MLSUMClusteringS2S (de)": 38.85, + "MLSUMClusteringS2S (fr)": 41.69, + "MLSUMClusteringS2S (es)": 42.85, "MLSUMClusteringS2S": 34.84, "MasakhaNEWSClusteringP2P (amh)": 67.78, "MasakhaNEWSClusteringP2P (eng)": 48.16, @@ -15893,6 +34395,8 @@ "RuSciBenchOECDClusteringP2P": 41.97, "StackExchangeClustering": 35.43, "StackExchangeClusteringP2P": 28.83, + "TenKGnadClusteringP2P": 40.64, + "TenKGnadClusteringS2S": 21.67, "TwentyNewsgroupsClustering": 23.28 } ] @@ -15902,6 +34406,7 @@ { "Model": "LaBSE", "CDSC-E": 68.92, + "FalseFriendsGermanEnglish": 51.15, "OpusparcusPC (de)": 96.58, "OpusparcusPC (en)": 98.12, "OpusparcusPC (fi)": 94.44, @@ -15925,6 +34430,7 @@ { "Model": "LaBSE", "CDSC-E": 68.92, + "FalseFriendsGermanEnglish": 51.28, "OpusparcusPC (de)": 96.58, "OpusparcusPC (en)": 98.12, "OpusparcusPC (fi)": 94.44, @@ -15973,6 +34479,27 @@ "StackOverflowDupQuestions": 42.42, "SyntecReranking": 73.28, "T2Reranking": 63.29 + }, + { + "Model": "LaBSE", + "MIRACLReranking (ar)": 30.84, + "MIRACLReranking (bn)": 39.77, + "MIRACLReranking (de)": 24.42, + "MIRACLReranking (en)": 24.96, + "MIRACLReranking (es)": 28.37, + "MIRACLReranking (fa)": 29.05, + "MIRACLReranking (fi)": 48.16, + "MIRACLReranking (fr)": 21.36, + "MIRACLReranking (hi)": 32.92, + "MIRACLReranking (id)": 22.95, + "MIRACLReranking (ja)": 29.55, + "MIRACLReranking (ko)": 30.38, + "MIRACLReranking (ru)": 28.92, + "MIRACLReranking (sw)": 33.09, + "MIRACLReranking (te)": 33.39, + "MIRACLReranking (th)": 38.0, + "MIRACLReranking (yo)": 47.58, + "MIRACLReranking (zh)": 20.23 } ] }, @@ -16017,7 +34544,10 @@ "FEVER": 12.18, "FiQA-PL": 7.63, "FiQA2018": 7.0, + "GerDaLIR": 2.02, "GerDaLIRSmall": 4.59, + "GermanDPR": 60.82, + "GermanQuAD-Retrieval": 78.87, "HellaSwag": 5.59, "HotpotQA": 18.75, "HotpotQA-PL": 19.72, @@ -16030,6 +34560,24 @@ "LegalBenchCorporateLobbying": 69.39, "LegalQuAD": 16.64, "LegalSummarization": 53.89, + "MIRACLRetrieval (ar)": 8.82, + "MIRACLRetrieval (bn)": 19.85, + "MIRACLRetrieval (de)": 7.84, + "MIRACLRetrieval (en)": 5.0, + "MIRACLRetrieval (es)": 8.19, + "MIRACLRetrieval (fa)": 10.53, + "MIRACLRetrieval (fi)": 28.1, + "MIRACLRetrieval (fr)": 7.93, + "MIRACLRetrieval (hi)": 13.97, + "MIRACLRetrieval (id)": 8.2, + "MIRACLRetrieval (ja)": 7.93, + "MIRACLRetrieval (ko)": 17.25, + "MIRACLRetrieval (ru)": 10.86, + "MIRACLRetrieval (sw)": 23.07, + "MIRACLRetrieval (te)": 12.79, + "MIRACLRetrieval (th)": 17.29, + "MIRACLRetrieval (yo)": 30.69, + "MIRACLRetrieval (zh)": 4.95, "MMarcoRetrieval": 34.78, "MSMARCO": 7.6, "MSMARCO-PL": 7.22, @@ -16074,6 +34622,9 @@ "Touche2020": 4.88, "VideoRetrieval": 22.04, "WinoGrande": 54.3, + "XMarket (de)": 4.27, + "XMarket (en)": 2.95, + "XMarket (es)": 6.44, "XPQARetrieval (ara-ara)": 35.19, "XPQARetrieval (eng-ara)": 20.64, "XPQARetrieval (ara-eng)": 32.47, @@ -16123,6 +34674,7 @@ "BIOSSES": 78.7, "BQ": 42.6, "CDSC-R": 85.53, + "GermanSTSBenchmark": 71.91, "LCQMC": 52.19, "PAWSX": 10.23, "RUParaPhraserSTS": 65.74, @@ -16569,9 +35121,19 @@ "ArxivClusteringS2S": 37.5, "BiorxivClusteringP2P": 36.99, "BiorxivClusteringS2S": 33.21, + "BlurbsClusteringP2P": 17.44, + "BlurbsClusteringS2S": 10.09, "GeoreviewClusteringP2P": 20.76, "HALClusteringS2S": 19.58, + "MLSUMClusteringP2P (de)": 20.86, + "MLSUMClusteringP2P (fr)": 34.35, + "MLSUMClusteringP2P (ru)": 22.69, + "MLSUMClusteringP2P (es)": 33.17, "MLSUMClusteringP2P": 34.35, + "MLSUMClusteringS2S (de)": 20.69, + "MLSUMClusteringS2S (fr)": 32.64, + "MLSUMClusteringS2S (ru)": 20.82, + "MLSUMClusteringS2S (es)": 32.28, "MLSUMClusteringS2S": 29.3, "MasakhaNEWSClusteringP2P (amh)": 40.5, "MasakhaNEWSClusteringP2P (eng)": 55.86, @@ -16613,6 +35175,8 @@ "RuSciBenchOECDClusteringP2P": 10.19, "StackExchangeClustering": 53.05, "StackExchangeClusteringP2P": 33.13, + "TenKGnadClusteringP2P": 30.45, + "TenKGnadClusteringS2S": 15.81, "TwentyNewsgroupsClustering": 47.47 } ] @@ -16622,6 +35186,7 @@ { "Model": "all-MiniLM-L12-v2", "CDSC-E": 49.04, + "FalseFriendsGermanEnglish": 47.96, "OpusparcusPC (de)": 91.2, "OpusparcusPC (en)": 97.41, "OpusparcusPC (fi)": 85.99, @@ -16645,6 +35210,7 @@ { "Model": "all-MiniLM-L12-v2", "CDSC-E": 49.04, + "FalseFriendsGermanEnglish": 48.02, "OpusparcusPC (de)": 91.2, "OpusparcusPC (en)": 97.41, "OpusparcusPC (fi)": 85.99, @@ -16680,6 +35246,27 @@ "StackOverflowDupQuestions": 51.47, "SyntecReranking": 69.17, "T2Reranking": 60.32 + }, + { + "Model": "all-MiniLM-L12-v2", + "MIRACLReranking (ar)": 7.49, + "MIRACLReranking (bn)": 9.23, + "MIRACLReranking (de)": 23.33, + "MIRACLReranking (en)": 42.01, + "MIRACLReranking (es)": 28.07, + "MIRACLReranking (fa)": 7.11, + "MIRACLReranking (fi)": 37.13, + "MIRACLReranking (fr)": 24.1, + "MIRACLReranking (hi)": 7.8, + "MIRACLReranking (id)": 21.86, + "MIRACLReranking (ja)": 10.64, + "MIRACLReranking (ko)": 8.82, + "MIRACLReranking (ru)": 18.42, + "MIRACLReranking (sw)": 28.34, + "MIRACLReranking (te)": 2.28, + "MIRACLReranking (th)": 4.22, + "MIRACLReranking (yo)": 50.47, + "MIRACLReranking (zh)": 6.96 } ] }, @@ -16723,7 +35310,10 @@ "FEVER": 55.9, "FiQA-PL": 5.82, "FiQA2018": 37.27, + "GerDaLIR": 0.55, "GerDaLIRSmall": 1.35, + "GermanDPR": 50.27, + "GermanQuAD-Retrieval": 71.43, "HellaSwag": 24.08, "HotpotQA": 44.59, "LEMBNarrativeQARetrieval": 19.64, @@ -16735,6 +35325,24 @@ "LegalBenchCorporateLobbying": 88.69, "LegalQuAD": 7.44, "LegalSummarization": 57.43, + "MIRACLRetrieval (ar)": 0.01, + "MIRACLRetrieval (bn)": 0.06, + "MIRACLRetrieval (de)": 10.85, + "MIRACLRetrieval (en)": 26.85, + "MIRACLRetrieval (es)": 11.15, + "MIRACLRetrieval (fa)": 0.05, + "MIRACLRetrieval (fi)": 18.24, + "MIRACLRetrieval (fr)": 10.89, + "MIRACLRetrieval (hi)": 0.21, + "MIRACLRetrieval (id)": 10.12, + "MIRACLRetrieval (ja)": 0.73, + "MIRACLRetrieval (ko)": 4.57, + "MIRACLRetrieval (ru)": 3.04, + "MIRACLRetrieval (sw)": 19.38, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.32, + "MIRACLRetrieval (yo)": 47.02, + "MIRACLRetrieval (zh)": 0.05, "MMarcoRetrieval": 7.46, "MSMARCO": 39.03, "MedicalRetrieval": 2.3, @@ -16754,6 +35362,7 @@ "QuoraRetrieval": 87.75, "RARbCode": 42.44, "RARbMath": 66.36, + "RiaNewsRetrieval": 15.65, "RuBQRetrieval": 8.84, "SCIDOCS": 21.82, "SCIDOCS-PL": 5.34, @@ -16775,6 +35384,9 @@ "Touche2020": 17.22, "VideoRetrieval": 9.38, "WinoGrande": 27.2, + "XMarket (de)": 10.4, + "XMarket (en)": 26.67, + "XMarket (es)": 11.29, "XPQARetrieval (ara-ara)": 7.83, "XPQARetrieval (eng-ara)": 2.55, "XPQARetrieval (ara-eng)": 8.88, @@ -16824,6 +35436,7 @@ "BIOSSES": 83.57, "BQ": 23.31, "CDSC-R": 82.5, + "GermanSTSBenchmark": 62.88, "LCQMC": 21.04, "PAWSX": 7.31, "RUParaPhraserSTS": 45.47, @@ -17253,16 +35866,24 @@ "Model": "all-MiniLM-L6-v2", "AlloProfClusteringP2P": 51.83, "AlloProfClusteringS2S": 32.07, - "ArxivClusteringP2P": 46.55, - "ArxivClusteringS2S": 37.86, + "ArxivClusteringP2P": 46.47, + "ArxivClusteringS2S": 37.67, "BiorxivClusteringP2P": 38.37, "BiorxivClusteringS2S": 32.88, + "BlurbsClusteringP2P": 19.94, + "BlurbsClusteringS2S": 9.38, "GeoreviewClusteringP2P": 20.25, "HALClusteringS2S": 18.84, "MLSUMClusteringP2P": 36.74, "MLSUMClusteringP2P (ru)": 23.91, + "MLSUMClusteringP2P (de)": 26.72, + "MLSUMClusteringP2P (fr)": 36.74, + "MLSUMClusteringP2P (es)": 35.55, "MLSUMClusteringS2S": 28.12, "MLSUMClusteringS2S (ru)": 19.07, + "MLSUMClusteringS2S (de)": 26.44, + "MLSUMClusteringS2S (fr)": 35.66, + "MLSUMClusteringS2S (es)": 34.81, "MasakhaNEWSClusteringP2P (fra)": 34.92, "MasakhaNEWSClusteringP2P (amh)": 43.85, "MasakhaNEWSClusteringP2P (eng)": 48.88, @@ -17303,6 +35924,8 @@ "RuSciBenchOECDClusteringP2P": 9.44, "StackExchangeClustering": 53.14, "StackExchangeClusteringP2P": 34.26, + "TenKGnadClusteringP2P": 30.3, + "TenKGnadClusteringS2S": 13.32, "TwentyNewsgroupsClustering": 46.49 } ] @@ -17312,6 +35935,7 @@ { "Model": "all-MiniLM-L6-v2", "CDSC-E": 47.27, + "FalseFriendsGermanEnglish": 47.96, "OpusparcusPC (de)": 89.91, "OpusparcusPC (en)": 97.46, "OpusparcusPC (fi)": 85.44, @@ -17335,6 +35959,7 @@ { "Model": "all-MiniLM-L6-v2", "CDSC-E": 47.27, + "FalseFriendsGermanEnglish": 47.97, "OpusparcusPC (fr)": 86.53, "OpusparcusPC (de)": 89.91, "OpusparcusPC (en)": 97.46, @@ -17378,6 +36003,27 @@ "StackOverflowDupQuestions": 50.77, "SyntecReranking": 67.31, "T2Reranking": 56.26 + }, + { + "Model": "all-MiniLM-L6-v2", + "MIRACLReranking (ar)": 3.26, + "MIRACLReranking (bn)": 2.55, + "MIRACLReranking (de)": 16.26, + "MIRACLReranking (en)": 44.48, + "MIRACLReranking (es)": 21.34, + "MIRACLReranking (fa)": 4.63, + "MIRACLReranking (fi)": 30.04, + "MIRACLReranking (fr)": 19.04, + "MIRACLReranking (hi)": 6.02, + "MIRACLReranking (id)": 18.33, + "MIRACLReranking (ja)": 5.01, + "MIRACLReranking (ko)": 8.18, + "MIRACLReranking (ru)": 4.67, + "MIRACLReranking (sw)": 27.93, + "MIRACLReranking (te)": 2.94, + "MIRACLReranking (th)": 3.62, + "MIRACLReranking (yo)": 46.62, + "MIRACLReranking (zh)": 6.08 } ] }, @@ -17421,7 +36067,10 @@ "FEVER": 51.93, "FiQA-PL": 2.29, "FiQA2018": 36.87, + "GerDaLIR": 1.07, "GerDaLIRSmall": 2.41, + "GermanDPR": 47.83, + "GermanQuAD-Retrieval": 68.82, "HellaSwag": 24.21, "HotpotQA": 46.51, "LEMBNarrativeQARetrieval": 18.27, @@ -17434,6 +36083,23 @@ "LegalQuAD": 11.81, "LegalSummarization": 59.0, "MIRACLRetrieval (ru)": 0.39, + "MIRACLRetrieval (ar)": 0.01, + "MIRACLRetrieval (bn)": 0.05, + "MIRACLRetrieval (de)": 5.91, + "MIRACLRetrieval (en)": 29.26, + "MIRACLRetrieval (es)": 7.0, + "MIRACLRetrieval (fa)": 0.13, + "MIRACLRetrieval (fi)": 12.45, + "MIRACLRetrieval (fr)": 6.94, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 7.16, + "MIRACLRetrieval (ja)": 0.35, + "MIRACLRetrieval (ko)": 1.35, + "MIRACLRetrieval (sw)": 17.11, + "MIRACLRetrieval (te)": 0.04, + "MIRACLRetrieval (th)": 0.33, + "MIRACLRetrieval (yo)": 38.05, + "MIRACLRetrieval (zh)": 0.0, "MMarcoRetrieval": 6.21, "MSMARCO": 36.54, "MedicalRetrieval": 1.76, @@ -17450,7 +36116,7 @@ "NQ": 43.87, "PIQA": 25.28, "Quail": 3.92, - "QuoraRetrieval": 87.56, + "QuoraRetrieval": 87.55, "RARbCode": 44.27, "RARbMath": 68.19, "RiaNewsRetrieval": 0.67, @@ -17475,6 +36141,9 @@ "Touche2020": 16.9, "VideoRetrieval": 9.79, "WinoGrande": 47.35, + "XMarket (de)": 9.3, + "XMarket (en)": 24.92, + "XMarket (es)": 10.11, "XPQARetrieval (fr)": 51.79, "XPQARetrieval (ara-ara)": 8.05, "XPQARetrieval (eng-ara)": 1.9, @@ -17524,6 +36193,7 @@ "BIOSSES": 81.64, "BQ": 23.84, "CDSC-R": 79.45, + "GermanSTSBenchmark": 60.56, "LCQMC": 23.85, "PAWSX": 7.21, "RUParaPhraserSTS": 43.93, @@ -18013,11 +36683,24 @@ "v_measure": [ { "Model": "all-mpnet-base-v2", + "AlloProfClusteringP2P": 56.41, + "AlloProfClusteringS2S": 36.59, "ArxivClusteringP2P": 48.38, "ArxivClusteringS2S": 39.72, "BiorxivClusteringP2P": 39.62, "BiorxivClusteringS2S": 35.02, + "BlurbsClusteringP2P": 25.03, + "BlurbsClusteringS2S": 10.62, "GeoreviewClusteringP2P": 20.33, + "HALClusteringS2S": 21.21, + "MLSUMClusteringP2P (de)": 36.78, + "MLSUMClusteringP2P (fr)": 40.62, + "MLSUMClusteringP2P (ru)": 22.35, + "MLSUMClusteringP2P (es)": 39.83, + "MLSUMClusteringS2S (de)": 36.21, + "MLSUMClusteringS2S (fr)": 39.91, + "MLSUMClusteringS2S (ru)": 22.55, + "MLSUMClusteringS2S (es)": 39.32, "MasakhaNEWSClusteringP2P (amh)": 42.49, "MasakhaNEWSClusteringP2P (eng)": 67.24, "MasakhaNEWSClusteringP2P (fra)": 61.99, @@ -18058,6 +36741,8 @@ "RuSciBenchOECDClusteringP2P": 12.49, "StackExchangeClustering": 53.8, "StackExchangeClusteringP2P": 34.28, + "TenKGnadClusteringP2P": 39.41, + "TenKGnadClusteringS2S": 19.98, "TwentyNewsgroupsClustering": 49.74 } ] @@ -18067,6 +36752,7 @@ { "Model": "all-mpnet-base-v2", "CDSC-E": 45.37, + "FalseFriendsGermanEnglish": 47.44, "OpusparcusPC (de)": 89.78, "OpusparcusPC (en)": 97.75, "OpusparcusPC (fi)": 85.82, @@ -18090,6 +36776,7 @@ { "Model": "all-mpnet-base-v2", "CDSC-E": 45.37, + "FalseFriendsGermanEnglish": 47.48, "OpusparcusPC (de)": 89.78, "OpusparcusPC (en)": 97.75, "OpusparcusPC (fi)": 85.82, @@ -18125,6 +36812,27 @@ "StackOverflowDupQuestions": 51.98, "SyntecReranking": 66.12, "T2Reranking": 58.3 + }, + { + "Model": "all-mpnet-base-v2", + "MIRACLReranking (ar)": 2.97, + "MIRACLReranking (bn)": 5.28, + "MIRACLReranking (de)": 23.43, + "MIRACLReranking (en)": 43.05, + "MIRACLReranking (es)": 26.46, + "MIRACLReranking (fa)": 4.86, + "MIRACLReranking (fi)": 25.32, + "MIRACLReranking (fr)": 22.26, + "MIRACLReranking (hi)": 6.99, + "MIRACLReranking (id)": 16.1, + "MIRACLReranking (ja)": 7.01, + "MIRACLReranking (ko)": 5.94, + "MIRACLReranking (ru)": 9.85, + "MIRACLReranking (sw)": 23.06, + "MIRACLReranking (te)": 2.53, + "MIRACLReranking (th)": 3.55, + "MIRACLReranking (yo)": 45.07, + "MIRACLReranking (zh)": 7.23 } ] }, @@ -18180,7 +36888,10 @@ "FEVER": 50.86, "FiQA-PL": 3.6, "FiQA2018": 49.96, + "GerDaLIR": 1.53, "GerDaLIRSmall": 3.78, + "GermanDPR": 56.96, + "GermanQuAD-Retrieval": 75.63, "HellaSwag": 26.27, "HotpotQA": 39.29, "LEMBNarrativeQARetrieval": 19.34, @@ -18192,6 +36903,24 @@ "LegalBenchCorporateLobbying": 89.04, "LegalQuAD": 10.67, "LegalSummarization": 58.55, + "MIRACLRetrieval (ar)": 0.01, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 9.69, + "MIRACLRetrieval (en)": 25.2, + "MIRACLRetrieval (es)": 7.45, + "MIRACLRetrieval (fa)": 0.04, + "MIRACLRetrieval (fi)": 10.14, + "MIRACLRetrieval (fr)": 7.92, + "MIRACLRetrieval (hi)": 0.14, + "MIRACLRetrieval (id)": 5.78, + "MIRACLRetrieval (ja)": 0.82, + "MIRACLRetrieval (ko)": 3.15, + "MIRACLRetrieval (ru)": 0.77, + "MIRACLRetrieval (sw)": 13.53, + "MIRACLRetrieval (te)": 0.04, + "MIRACLRetrieval (th)": 0.2, + "MIRACLRetrieval (yo)": 35.67, + "MIRACLRetrieval (zh)": 0.0, "MMarcoRetrieval": 7.13, "MSMARCO": 39.75, "MedicalRetrieval": 1.71, @@ -18211,6 +36940,7 @@ "QuoraRetrieval": 87.46, "RARbCode": 53.21, "RARbMath": 71.85, + "RiaNewsRetrieval": 3.32, "RuBQRetrieval": 4.75, "SCIDOCS": 23.76, "SCIDOCS-PL": 4.02, @@ -18232,6 +36962,9 @@ "Touche2020": 19.93, "VideoRetrieval": 8.48, "WinoGrande": 20.77, + "XMarket (de)": 8.23, + "XMarket (en)": 27.37, + "XMarket (es)": 8.05, "XPQARetrieval (ara-ara)": 9.42, "XPQARetrieval (eng-ara)": 2.36, "XPQARetrieval (ara-eng)": 8.98, @@ -18293,6 +37026,7 @@ "BIOSSES": 80.43, "BQ": 21.39, "CDSC-R": 77.04, + "GermanSTSBenchmark": 61.27, "LCQMC": 22.84, "PAWSX": 6.44, "RUParaPhraserSTS": 42.15, @@ -20827,8 +39561,14 @@ "GeoreviewClusteringP2P": 53.37, "HALClusteringS2S": 23.21, "MLSUMClusteringP2P (ru)": 37.0, + "MLSUMClusteringP2P (de)": 33.37, + "MLSUMClusteringP2P (fr)": 37.96, + "MLSUMClusteringP2P (es)": 36.62, "MLSUMClusteringP2P": 39.97, "MLSUMClusteringS2S (ru)": 38.16, + "MLSUMClusteringS2S (de)": 34.26, + "MLSUMClusteringS2S (fr)": 35.87, + "MLSUMClusteringS2S (es)": 35.6, "MLSUMClusteringS2S": 36.55, "MasakhaNEWSClusteringP2P (amh)": 40.36, "MasakhaNEWSClusteringP2P (eng)": 49.96, @@ -20881,6 +39621,7 @@ { "Model": "paraphrase-multilingual-MiniLM-L12-v2", "CDSC-E": 72.22, + "FalseFriendsGermanEnglish": 48.8, "OpusparcusPC (de)": 96.63, "OpusparcusPC (en)": 98.59, "OpusparcusPC (fi)": 93.2, @@ -20904,6 +39645,7 @@ { "Model": "paraphrase-multilingual-MiniLM-L12-v2", "CDSC-E": 72.33, + "FalseFriendsGermanEnglish": 49.5, "OpusparcusPC (de)": 96.63, "OpusparcusPC (en)": 98.59, "OpusparcusPC (fi)": 93.2, @@ -20952,6 +39694,27 @@ "StackOverflowDupQuestions": 45.85, "SyntecReranking": 75.03, "T2Reranking": 65.28 + }, + { + "Model": "paraphrase-multilingual-MiniLM-L12-v2", + "MIRACLReranking (ar)": 41.35, + "MIRACLReranking (bn)": 7.91, + "MIRACLReranking (de)": 36.46, + "MIRACLReranking (en)": 47.91, + "MIRACLReranking (es)": 46.0, + "MIRACLReranking (fa)": 30.83, + "MIRACLReranking (fi)": 54.16, + "MIRACLReranking (fr)": 35.78, + "MIRACLReranking (hi)": 36.16, + "MIRACLReranking (id)": 41.57, + "MIRACLReranking (ja)": 33.84, + "MIRACLReranking (ko)": 35.56, + "MIRACLReranking (ru)": 39.88, + "MIRACLReranking (sw)": 24.09, + "MIRACLReranking (te)": 14.45, + "MIRACLReranking (th)": 44.64, + "MIRACLReranking (yo)": 40.82, + "MIRACLReranking (zh)": 35.72 } ] }, @@ -20970,7 +39733,7 @@ "BSARDRetrieval": 0.0, "CQADupstackRetrieval": 30.7, "ClimateFEVER": 18.49, - "CmedqaRetrieval": 10.78, + "CmedqaRetrieval": 10.79, "CodeFeedbackMT": 12.53, "CodeFeedbackST": 32.53, "CodeSearchNetCCRetrieval (python)": 44.49, @@ -20996,7 +39759,10 @@ "FEVER": 52.66, "FiQA-PL": 12.49, "FiQA2018": 20.33, + "GerDaLIR": 0.95, "GerDaLIRSmall": 2.62, + "GermanDPR": 64.63, + "GermanQuAD-Retrieval": 82.14, "HellaSwag": 16.98, "HotpotQA": 30.01, "HotpotQA-PL": 22.76, @@ -21009,6 +39775,24 @@ "LegalBenchCorporateLobbying": 88.51, "LegalQuAD": 13.31, "LegalSummarization": 54.97, + "MIRACLRetrieval (ar)": 19.57, + "MIRACLRetrieval (bn)": 1.12, + "MIRACLRetrieval (de)": 22.43, + "MIRACLRetrieval (en)": 32.75, + "MIRACLRetrieval (es)": 25.87, + "MIRACLRetrieval (fa)": 13.33, + "MIRACLRetrieval (fi)": 34.14, + "MIRACLRetrieval (fr)": 21.75, + "MIRACLRetrieval (hi)": 18.91, + "MIRACLRetrieval (id)": 25.76, + "MIRACLRetrieval (ja)": 14.06, + "MIRACLRetrieval (ko)": 25.68, + "MIRACLRetrieval (ru)": 18.96, + "MIRACLRetrieval (sw)": 11.04, + "MIRACLRetrieval (te)": 0.95, + "MIRACLRetrieval (th)": 26.91, + "MIRACLRetrieval (yo)": 24.47, + "MIRACLRetrieval (zh)": 21.35, "MMarcoRetrieval": 46.62, "MSMARCO": 23.72, "MSMARCO-PL": 10.39, @@ -21053,6 +39837,9 @@ "Touche2020": 16.06, "VideoRetrieval": 14.71, "WinoGrande": 46.52, + "XMarket (de)": 5.95, + "XMarket (en)": 14.43, + "XMarket (es)": 7.11, "XPQARetrieval (ara-ara)": 22.97, "XPQARetrieval (eng-ara)": 17.17, "XPQARetrieval (ara-eng)": 25.5, @@ -21102,6 +39889,7 @@ "BIOSSES": 74.18, "BQ": 38.53, "CDSC-R": 88.98, + "GermanSTSBenchmark": 79.1, "LCQMC": 63.96, "PAWSX": 10.13, "RUParaPhraserSTS": 61.87, @@ -21531,20 +40319,26 @@ { "Model": "paraphrase-multilingual-mpnet-base-v2", "8TagsClustering": 25.62, - "AlloProfClusteringP2P": 54.49, - "AlloProfClusteringS2S": 44.79, - "ArxivClusteringP2P": 37.78, - "ArxivClusteringS2S": 31.68, + "AlloProfClusteringP2P": 53.84, + "AlloProfClusteringS2S": 40.83, + "ArxivClusteringP2P": 37.83, + "ArxivClusteringS2S": 31.69, "BiorxivClusteringP2P": 33.02, "BiorxivClusteringS2S": 29.45, - "BlurbsClusteringP2P": 34.38, - "BlurbsClusteringS2S": 15.81, + "BlurbsClusteringP2P": 34.0, + "BlurbsClusteringS2S": 15.31, "GeoreviewClusteringP2P": 56.18, - "HALClusteringS2S": 23.97, + "HALClusteringS2S": 23.56, "MLSUMClusteringP2P": 40.55, "MLSUMClusteringP2P (ru)": 35.95, + "MLSUMClusteringP2P (de)": 34.46, + "MLSUMClusteringP2P (fr)": 39.08, + "MLSUMClusteringP2P (es)": 37.58, "MLSUMClusteringS2S": 37.53, "MLSUMClusteringS2S (ru)": 38.88, + "MLSUMClusteringS2S (de)": 34.25, + "MLSUMClusteringS2S (fr)": 36.54, + "MLSUMClusteringS2S (es)": 36.99, "MasakhaNEWSClusteringP2P (fra)": 53.3, "MasakhaNEWSClusteringP2P (amh)": 46.85, "MasakhaNEWSClusteringP2P (eng)": 47.3, @@ -21585,8 +40379,8 @@ "RuSciBenchOECDClusteringP2P": 42.9, "StackExchangeClustering": 52.99, "StackExchangeClusteringP2P": 33.06, - "TenKGnadClusteringP2P": 35.96, - "TenKGnadClusteringS2S": 22.0, + "TenKGnadClusteringP2P": 33.62, + "TenKGnadClusteringS2S": 20.93, "TwentyNewsgroupsClustering": 44.36 } ] @@ -21608,6 +40402,7 @@ { "Model": "paraphrase-multilingual-mpnet-base-v2", "CDSC-E": 75.77, + "FalseFriendsGermanEnglish": 51.63, "OpusparcusPC (fr)": 93.45, "OpusparcusPC (de)": 97.34, "OpusparcusPC (en)": 98.59, @@ -21632,6 +40427,7 @@ { "Model": "paraphrase-multilingual-mpnet-base-v2", "CDSC-E": 75.77, + "FalseFriendsGermanEnglish": 51.35, "OpusparcusPC (de)": 97.34, "OpusparcusPC (en)": 98.59, "OpusparcusPC (fi)": 95.33, @@ -21667,6 +40463,27 @@ "StackOverflowDupQuestions": 46.78, "SyntecReranking": 80.97, "T2Reranking": 64.49 + }, + { + "Model": "paraphrase-multilingual-mpnet-base-v2", + "MIRACLReranking (ar)": 44.47, + "MIRACLReranking (bn)": 30.21, + "MIRACLReranking (de)": 40.7, + "MIRACLReranking (en)": 50.23, + "MIRACLReranking (es)": 50.96, + "MIRACLReranking (fa)": 37.18, + "MIRACLReranking (fi)": 59.56, + "MIRACLReranking (fr)": 40.93, + "MIRACLReranking (hi)": 40.36, + "MIRACLReranking (id)": 45.04, + "MIRACLReranking (ja)": 36.2, + "MIRACLReranking (ko)": 40.13, + "MIRACLReranking (ru)": 43.66, + "MIRACLReranking (sw)": 26.94, + "MIRACLReranking (te)": 23.65, + "MIRACLReranking (th)": 46.63, + "MIRACLReranking (yo)": 42.37, + "MIRACLReranking (zh)": 37.56 } ] }, @@ -21711,7 +40528,10 @@ "FEVER": 56.76, "FiQA-PL": 14.71, "FiQA2018": 22.96, + "GerDaLIR": 1.14, "GerDaLIRSmall": 3.0, + "GermanDPR": 67.88, + "GermanQuAD-Retrieval": 85.24, "HellaSwag": 17.53, "HotpotQA": 37.03, "HotpotQA-PL": 29.36, @@ -21724,8 +40544,26 @@ "LegalBenchCorporateLobbying": 87.62, "LegalQuAD": 17.8, "LegalSummarization": 56.8, + "MIRACLRetrieval (ar)": 20.69, + "MIRACLRetrieval (bn)": 11.04, + "MIRACLRetrieval (de)": 28.28, + "MIRACLRetrieval (en)": 32.78, + "MIRACLRetrieval (es)": 28.4, + "MIRACLRetrieval (fa)": 17.34, + "MIRACLRetrieval (fi)": 40.78, + "MIRACLRetrieval (fr)": 25.87, + "MIRACLRetrieval (hi)": 20.27, + "MIRACLRetrieval (id)": 29.58, + "MIRACLRetrieval (ja)": 13.91, + "MIRACLRetrieval (ko)": 30.01, + "MIRACLRetrieval (ru)": 22.87, + "MIRACLRetrieval (sw)": 17.24, + "MIRACLRetrieval (te)": 5.33, + "MIRACLRetrieval (th)": 25.49, + "MIRACLRetrieval (yo)": 28.34, + "MIRACLRetrieval (zh)": 20.19, "MMarcoRetrieval": 44.62, - "MSMARCO": 26.6, + "MSMARCO": 26.59, "MSMARCO-PL": 12.45, "MedicalRetrieval": 14.1, "MintakaRetrieval (fr)": 24.45, @@ -21743,7 +40581,7 @@ "PIQA": 18.65, "Quail": 2.98, "Quora-PL": 79.18, - "QuoraRetrieval": 86.4, + "QuoraRetrieval": 86.41, "RARbCode": 11.02, "RARbMath": 30.93, "RiaNewsRetrieval": 51.75, @@ -21768,6 +40606,9 @@ "Touche2020": 17.4, "VideoRetrieval": 14.18, "WinoGrande": 49.01, + "XMarket (de)": 7.72, + "XMarket (en)": 16.28, + "XMarket (es)": 9.18, "XPQARetrieval (fr)": 46.22, "XPQARetrieval (ara-ara)": 24.86, "XPQARetrieval (eng-ara)": 19.6, @@ -21817,6 +40658,7 @@ "BIOSSES": 76.27, "BQ": 36.33, "CDSC-R": 88.8, + "GermanSTSBenchmark": 83.41, "LCQMC": 63.3, "PAWSX": 12.16, "RUParaPhraserSTS": 65.74, @@ -23254,6 +42096,7 @@ "f1": [ { "Model": "LaBSE-ru-turbo", + "BornholmBitextMining": 37.59, "Tatoeba (rus-eng)": 93.22 } ] @@ -23262,15 +42105,169 @@ "accuracy": [ { "Model": "LaBSE-ru-turbo", + "AllegroReviews": 24.9, + "AmazonCounterfactualClassification (en-ext)": 74.51, + "AmazonCounterfactualClassification (en)": 74.01, + "AmazonCounterfactualClassification (de)": 51.82, + "AmazonCounterfactualClassification (ja)": 56.28, + "AmazonPolarityClassification": 79.24, + "AmazonReviewsClassification (en)": 42.38, + "AmazonReviewsClassification (de)": 29.28, + "AmazonReviewsClassification (es)": 33.88, + "AmazonReviewsClassification (fr)": 32.2, + "AmazonReviewsClassification (ja)": 20.53, + "AmazonReviewsClassification (zh)": 20.93, + "AngryTweetsClassification": 46.87, + "Banking77Classification": 73.54, + "CBD": 53.58, + "DanishPoliticalCommentsClassification": 30.36, + "EmotionClassification": 45.58, "GeoreviewClassification": 46.04, "HeadlineClassification": 69.98, + "ImdbClassification": 71.58, "InappropriatenessClassification": 61.39, "KinopoiskClassification": 53.59, + "LccSentimentClassification": 48.0, + "MTOPDomainClassification (en)": 89.99, + "MTOPDomainClassification (de)": 64.42, + "MTOPDomainClassification (es)": 63.98, + "MTOPDomainClassification (fr)": 68.37, + "MTOPDomainClassification (hi)": 21.64, + "MTOPDomainClassification (th)": 15.28, + "MTOPIntentClassification (en)": 58.5, + "MTOPIntentClassification (de)": 36.38, + "MTOPIntentClassification (es)": 37.06, + "MTOPIntentClassification (fr)": 35.98, + "MTOPIntentClassification (hi)": 3.76, + "MTOPIntentClassification (th)": 4.99, + "MasakhaNEWSClassification (amh)": 35.64, + "MasakhaNEWSClassification (eng)": 79.59, + "MasakhaNEWSClassification (fra)": 75.43, + "MasakhaNEWSClassification (hau)": 57.16, + "MasakhaNEWSClassification (ibo)": 52.74, + "MasakhaNEWSClassification (lin)": 67.2, + "MasakhaNEWSClassification (lug)": 53.23, + "MasakhaNEWSClassification (orm)": 52.22, + "MasakhaNEWSClassification (pcm)": 93.05, + "MasakhaNEWSClassification (run)": 57.64, + "MasakhaNEWSClassification (sna)": 71.17, + "MasakhaNEWSClassification (som)": 44.97, + "MasakhaNEWSClassification (swa)": 52.67, + "MasakhaNEWSClassification (tir)": 26.95, + "MasakhaNEWSClassification (xho)": 62.36, + "MasakhaNEWSClassification (yor)": 63.11, "MassiveIntentClassification (ru)": 66.08, + "MassiveIntentClassification (pt)": 39.34, + "MassiveIntentClassification (fi)": 35.23, + "MassiveIntentClassification (hu)": 30.01, + "MassiveIntentClassification (da)": 39.46, + "MassiveIntentClassification (ja)": 4.66, + "MassiveIntentClassification (ur)": 2.45, + "MassiveIntentClassification (fr)": 39.59, + "MassiveIntentClassification (km)": 5.34, + "MassiveIntentClassification (te)": 2.39, + "MassiveIntentClassification (ka)": 2.69, + "MassiveIntentClassification (mn)": 32.23, + "MassiveIntentClassification (hi)": 3.21, + "MassiveIntentClassification (is)": 27.36, + "MassiveIntentClassification (ro)": 36.04, + "MassiveIntentClassification (az)": 20.84, + "MassiveIntentClassification (sw)": 33.68, + "MassiveIntentClassification (sq)": 35.67, + "MassiveIntentClassification (bn)": 3.74, + "MassiveIntentClassification (vi)": 23.65, + "MassiveIntentClassification (my)": 3.37, + "MassiveIntentClassification (th)": 3.87, + "MassiveIntentClassification (en)": 64.92, + "MassiveIntentClassification (ar)": 4.44, + "MassiveIntentClassification (kn)": 2.58, + "MassiveIntentClassification (pl)": 30.78, + "MassiveIntentClassification (tr)": 29.76, + "MassiveIntentClassification (hy)": 2.28, + "MassiveIntentClassification (fa)": 3.46, + "MassiveIntentClassification (de)": 37.67, + "MassiveIntentClassification (id)": 36.68, + "MassiveIntentClassification (ta)": 1.65, + "MassiveIntentClassification (nl)": 36.79, + "MassiveIntentClassification (tl)": 35.52, + "MassiveIntentClassification (am)": 2.29, + "MassiveIntentClassification (ko)": 2.25, + "MassiveIntentClassification (el)": 10.36, + "MassiveIntentClassification (he)": 2.31, + "MassiveIntentClassification (sl)": 36.95, + "MassiveIntentClassification (ml)": 2.56, + "MassiveIntentClassification (cy)": 35.08, + "MassiveIntentClassification (ms)": 32.99, + "MassiveIntentClassification (jv)": 33.21, + "MassiveIntentClassification (es)": 37.45, + "MassiveIntentClassification (lv)": 24.75, + "MassiveIntentClassification (sv)": 35.51, + "MassiveIntentClassification (zh-TW)": 4.56, + "MassiveIntentClassification (zh-CN)": 3.76, + "MassiveIntentClassification (it)": 42.88, + "MassiveIntentClassification (af)": 33.65, + "MassiveIntentClassification (nb)": 37.7, "MassiveScenarioClassification (ru)": 71.13, + "MassiveScenarioClassification (kn)": 7.55, + "MassiveScenarioClassification (pt)": 47.33, + "MassiveScenarioClassification (tl)": 42.97, + "MassiveScenarioClassification (hy)": 8.46, + "MassiveScenarioClassification (hu)": 37.92, + "MassiveScenarioClassification (lv)": 29.04, + "MassiveScenarioClassification (sq)": 44.31, + "MassiveScenarioClassification (it)": 50.78, + "MassiveScenarioClassification (sv)": 41.57, + "MassiveScenarioClassification (zh-CN)": 10.17, + "MassiveScenarioClassification (ro)": 46.49, + "MassiveScenarioClassification (sl)": 42.08, + "MassiveScenarioClassification (bn)": 8.95, + "MassiveScenarioClassification (es)": 45.47, + "MassiveScenarioClassification (fa)": 7.15, + "MassiveScenarioClassification (hi)": 7.71, + "MassiveScenarioClassification (is)": 35.45, + "MassiveScenarioClassification (nl)": 44.99, + "MassiveScenarioClassification (ms)": 40.41, + "MassiveScenarioClassification (de)": 46.58, + "MassiveScenarioClassification (nb)": 43.87, + "MassiveScenarioClassification (id)": 42.1, + "MassiveScenarioClassification (ta)": 7.02, + "MassiveScenarioClassification (vi)": 31.76, + "MassiveScenarioClassification (th)": 8.74, + "MassiveScenarioClassification (te)": 7.29, + "MassiveScenarioClassification (he)": 7.65, + "MassiveScenarioClassification (en)": 71.06, + "MassiveScenarioClassification (zh-TW)": 10.21, + "MassiveScenarioClassification (fr)": 47.94, + "MassiveScenarioClassification (cy)": 43.03, + "MassiveScenarioClassification (km)": 8.92, + "MassiveScenarioClassification (ml)": 7.16, + "MassiveScenarioClassification (da)": 48.37, + "MassiveScenarioClassification (jv)": 41.0, + "MassiveScenarioClassification (sw)": 42.72, + "MassiveScenarioClassification (tr)": 35.96, + "MassiveScenarioClassification (am)": 7.12, + "MassiveScenarioClassification (az)": 27.25, + "MassiveScenarioClassification (el)": 17.96, + "MassiveScenarioClassification (af)": 42.41, + "MassiveScenarioClassification (ka)": 6.95, + "MassiveScenarioClassification (ko)": 5.61, + "MassiveScenarioClassification (fi)": 40.85, + "MassiveScenarioClassification (mn)": 36.93, + "MassiveScenarioClassification (my)": 10.47, + "MassiveScenarioClassification (ja)": 10.34, + "MassiveScenarioClassification (ur)": 8.63, + "MassiveScenarioClassification (ar)": 11.75, + "MassiveScenarioClassification (pl)": 36.2, + "NoRecClassification": 40.1, + "NordicLangClassification": 46.52, + "PAC": 69.41, + "PolEmo2.0-IN": 44.06, + "PolEmo2.0-OUT": 27.02, "RuReviewsClassification": 64.58, "RuSciBenchGRNTIClassification": 56.67, - "RuSciBenchOECDClassification": 43.58 + "RuSciBenchOECDClassification": 43.58, + "ToxicConversationsClassification": 64.28, + "TweetSentimentExtractionClassification": 63.49 } ] }, @@ -23278,11 +42275,67 @@ "v_measure": [ { "Model": "LaBSE-ru-turbo", + "AlloProfClusteringP2P": 58.25, + "AlloProfClusteringS2S": 23.39, + "ArxivClusteringP2P": 35.03, + "ArxivClusteringS2S": 23.08, + "BiorxivClusteringP2P": 31.48, + "BiorxivClusteringS2S": 20.71, + "BlurbsClusteringP2P": 26.37, + "BlurbsClusteringS2S": 10.26, "GeoreviewClusteringP2P": 64.55, + "HALClusteringS2S": 14.97, "MLSUMClusteringP2P (ru)": 45.7, + "MLSUMClusteringP2P (de)": 32.73, + "MLSUMClusteringP2P (fr)": 40.75, + "MLSUMClusteringP2P (es)": 41.11, "MLSUMClusteringS2S (ru)": 42.93, - "RuSciBenchGRNTIClusteringP2P": 50.64, - "RuSciBenchOECDClusteringP2P": 44.48 + "MLSUMClusteringS2S (de)": 34.6, + "MLSUMClusteringS2S (fr)": 40.44, + "MLSUMClusteringS2S (es)": 40.4, + "MasakhaNEWSClusteringP2P (amh)": 42.64, + "MasakhaNEWSClusteringP2P (eng)": 53.41, + "MasakhaNEWSClusteringP2P (fra)": 62.23, + "MasakhaNEWSClusteringP2P (hau)": 35.53, + "MasakhaNEWSClusteringP2P (ibo)": 34.19, + "MasakhaNEWSClusteringP2P (lin)": 50.83, + "MasakhaNEWSClusteringP2P (lug)": 42.65, + "MasakhaNEWSClusteringP2P (orm)": 29.3, + "MasakhaNEWSClusteringP2P (pcm)": 92.96, + "MasakhaNEWSClusteringP2P (run)": 51.6, + "MasakhaNEWSClusteringP2P (sna)": 47.86, + "MasakhaNEWSClusteringP2P (som)": 31.78, + "MasakhaNEWSClusteringP2P (swa)": 21.21, + "MasakhaNEWSClusteringP2P (tir)": 44.22, + "MasakhaNEWSClusteringP2P (xho)": 29.11, + "MasakhaNEWSClusteringP2P (yor)": 27.34, + "MasakhaNEWSClusteringS2S (amh)": 43.99, + "MasakhaNEWSClusteringS2S (eng)": 13.78, + "MasakhaNEWSClusteringS2S (fra)": 34.26, + "MasakhaNEWSClusteringS2S (hau)": 14.67, + "MasakhaNEWSClusteringS2S (ibo)": 32.77, + "MasakhaNEWSClusteringS2S (lin)": 48.41, + "MasakhaNEWSClusteringS2S (lug)": 42.65, + "MasakhaNEWSClusteringS2S (orm)": 25.07, + "MasakhaNEWSClusteringS2S (pcm)": 64.67, + "MasakhaNEWSClusteringS2S (run)": 44.37, + "MasakhaNEWSClusteringS2S (sna)": 46.43, + "MasakhaNEWSClusteringS2S (som)": 25.16, + "MasakhaNEWSClusteringS2S (swa)": 16.84, + "MasakhaNEWSClusteringS2S (tir)": 43.8, + "MasakhaNEWSClusteringS2S (xho)": 29.51, + "MasakhaNEWSClusteringS2S (yor)": 24.19, + "MedrxivClusteringP2P": 29.44, + "MedrxivClusteringS2S": 23.93, + "RedditClustering": 35.73, + "RedditClusteringP2P": 51.74, + "RuSciBenchGRNTIClusteringP2P": 51.69, + "RuSciBenchOECDClusteringP2P": 45.56, + "StackExchangeClustering": 45.45, + "StackExchangeClusteringP2P": 31.13, + "TenKGnadClusteringP2P": 37.43, + "TenKGnadClusteringS2S": 13.19, + "TwentyNewsgroupsClustering": 25.67 } ] }, @@ -23290,13 +42343,51 @@ "max_ap": [ { "Model": "LaBSE-ru-turbo", + "CDSC-E": 52.92, + "FalseFriendsGermanEnglish": 47.15, "OpusparcusPC (ru)": 89.32, - "TERRa": 57.81 + "OpusparcusPC (de)": 91.59, + "OpusparcusPC (en)": 98.04, + "OpusparcusPC (fi)": 85.53, + "OpusparcusPC (fr)": 88.54, + "OpusparcusPC (sv)": 86.76, + "PSC": 85.45, + "PawsXPairClassification (de)": 51.09, + "PawsXPairClassification (en)": 60.74, + "PawsXPairClassification (es)": 52.25, + "PawsXPairClassification (fr)": 54.84, + "PawsXPairClassification (ja)": 47.44, + "PawsXPairClassification (ko)": 47.04, + "PawsXPairClassification (zh)": 50.49, + "SICK-E-PL": 47.79, + "SprintDuplicateQuestions": 94.26, + "TERRa": 57.81, + "TwitterSemEval2015": 64.76, + "TwitterURLCorpus": 84.18 }, { "Model": "LaBSE-ru-turbo", + "CDSC-E": 53.23, + "FalseFriendsGermanEnglish": 47.15, "OpusparcusPC (ru)": 89.32, - "TERRa": 57.81 + "OpusparcusPC (de)": 91.62, + "OpusparcusPC (en)": 98.07, + "OpusparcusPC (fi)": 85.58, + "OpusparcusPC (fr)": 88.54, + "OpusparcusPC (sv)": 86.76, + "PSC": 85.45, + "PawsXPairClassification (de)": 51.25, + "PawsXPairClassification (en)": 60.87, + "PawsXPairClassification (es)": 52.31, + "PawsXPairClassification (fr)": 54.98, + "PawsXPairClassification (ja)": 47.44, + "PawsXPairClassification (ko)": 47.25, + "PawsXPairClassification (zh)": 50.79, + "SICK-E-PL": 47.9, + "SprintDuplicateQuestions": 94.39, + "TERRa": 57.81, + "TwitterSemEval2015": 64.76, + "TwitterURLCorpus": 84.18 } ] }, @@ -23304,11 +42395,35 @@ "map": [ { "Model": "LaBSE-ru-turbo", - "MIRACLReranking (ru)": 57.44 + "AlloprofReranking": 41.6, + "AskUbuntuDupQuestions": 55.91, + "MindSmallReranking": 29.19, + "RuBQReranking": 68.65, + "SciDocsRR": 69.74, + "StackOverflowDupQuestions": 45.98, + "SyntecReranking": 53.82, + "T2Reranking": 52.42 }, { "Model": "LaBSE-ru-turbo", - "RuBQReranking": 68.65 + "MIRACLReranking (ru)": 57.44, + "MIRACLReranking (ar)": 2.29, + "MIRACLReranking (bn)": 2.07, + "MIRACLReranking (de)": 22.14, + "MIRACLReranking (en)": 51.47, + "MIRACLReranking (es)": 27.98, + "MIRACLReranking (fa)": 3.18, + "MIRACLReranking (fi)": 31.28, + "MIRACLReranking (fr)": 26.29, + "MIRACLReranking (hi)": 2.99, + "MIRACLReranking (id)": 19.64, + "MIRACLReranking (ja)": 2.58, + "MIRACLReranking (ko)": 5.09, + "MIRACLReranking (sw)": 29.43, + "MIRACLReranking (te)": 3.13, + "MIRACLReranking (th)": 2.47, + "MIRACLReranking (yo)": 48.48, + "MIRACLReranking (zh)": 4.2 } ] }, @@ -23316,9 +42431,143 @@ "ndcg_at_10": [ { "Model": "LaBSE-ru-turbo", + "AILACasedocs": 19.7, + "AILAStatutes": 15.9, + "ARCChallenge": 7.14, + "AlloprofRetrieval": 10.08, + "AlphaNLI": 23.76, + "AppsRetrieval": 3.21, + "ArguAna": 46.57, + "BSARDRetrieval": 1.71, + "ClimateFEVER": 13.19, + "CmedqaRetrieval": 1.27, + "CodeFeedbackMT": 28.06, + "CodeFeedbackST": 52.83, + "CodeSearchNetCCRetrieval (python)": 46.49, + "CodeSearchNetCCRetrieval (javascript)": 48.94, + "CodeSearchNetCCRetrieval (go)": 32.67, + "CodeSearchNetCCRetrieval (ruby)": 47.35, + "CodeSearchNetCCRetrieval (java)": 35.98, + "CodeSearchNetCCRetrieval (php)": 28.61, + "CodeSearchNetRetrieval (python)": 71.17, + "CodeSearchNetRetrieval (javascript)": 52.91, + "CodeSearchNetRetrieval (go)": 70.52, + "CodeSearchNetRetrieval (ruby)": 60.79, + "CodeSearchNetRetrieval (java)": 42.15, + "CodeSearchNetRetrieval (php)": 55.98, + "CodeTransOceanContest": 41.2, + "CodeTransOceanDL": 26.26, + "CosQA": 17.94, + "CovidRetrieval": 0.06, + "DBPedia": 29.74, + "FEVER": 48.04, + "FiQA2018": 21.01, + "GerDaLIR": 1.06, + "GerDaLIRSmall": 2.92, + "GermanQuAD-Retrieval": 66.35, + "HellaSwag": 19.23, + "HotpotQA": 54.51, + "LEMBNarrativeQARetrieval": 13.6, + "LEMBQMSumRetrieval": 21.47, + "LEMBSummScreenFDRetrieval": 57.41, + "LEMBWikimQARetrieval": 39.41, + "LeCaRDv2": 9.86, + "LegalBenchConsumerContractsQA": 62.58, + "LegalBenchCorporateLobbying": 86.41, + "LegalQuAD": 11.82, + "LegalSummarization": 57.64, "MIRACLRetrieval (ru)": 55.97, + "MIRACLRetrieval (ar)": 0.01, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 10.18, + "MIRACLRetrieval (en)": 41.09, + "MIRACLRetrieval (es)": 10.08, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 13.8, + "MIRACLRetrieval (fr)": 13.74, + "MIRACLRetrieval (hi)": 0.18, + "MIRACLRetrieval (id)": 7.11, + "MIRACLRetrieval (ja)": 0.0, + "MIRACLRetrieval (ko)": 0.92, + "MIRACLRetrieval (sw)": 19.66, + "MIRACLRetrieval (te)": 0.0, + "MIRACLRetrieval (th)": 0.07, + "MIRACLRetrieval (yo)": 38.76, + "MIRACLRetrieval (zh)": 0.0, + "MSMARCO": 27.51, + "MintakaRetrieval (ar)": 0.5, + "MintakaRetrieval (de)": 9.55, + "MintakaRetrieval (es)": 11.36, + "MintakaRetrieval (fr)": 12.44, + "MintakaRetrieval (hi)": 0.82, + "MintakaRetrieval (it)": 11.4, + "MintakaRetrieval (ja)": 1.14, + "MintakaRetrieval (pt)": 12.64, + "NFCorpus": 22.08, + "NQ": 35.87, + "PIQA": 16.31, + "Quail": 2.52, + "QuoraRetrieval": 85.71, + "RARbCode": 9.48, + "RARbMath": 46.36, "RiaNewsRetrieval": 69.36, - "RuBQRetrieval": 65.71 + "RuBQRetrieval": 65.71, + "SCIDOCS": 11.7, + "SIQA": 1.68, + "SciFact": 50.49, + "SciFact-PL": 23.95, + "SpartQA": 3.89, + "StackOverflowQA": 53.06, + "SyntecRetrieval": 43.26, + "SyntheticText2SQL": 44.08, + "TRECCOVID": 30.52, + "TRECCOVID-PL": 6.45, + "TempReasonL1": 1.06, + "TempReasonL2Fact": 8.84, + "TempReasonL2Pure": 0.1, + "TempReasonL3Fact": 9.51, + "TempReasonL3Pure": 4.66, + "Touche2020": 14.08, + "WinoGrande": 17.92, + "XMarket (de)": 5.95, + "XMarket (en)": 9.81, + "XMarket (es)": 7.06, + "XPQARetrieval (ara-ara)": 4.71, + "XPQARetrieval (eng-ara)": 4.3, + "XPQARetrieval (ara-eng)": 8.38, + "XPQARetrieval (deu-deu)": 41.62, + "XPQARetrieval (eng-deu)": 12.76, + "XPQARetrieval (deu-eng)": 35.52, + "XPQARetrieval (spa-spa)": 31.42, + "XPQARetrieval (eng-spa)": 13.48, + "XPQARetrieval (spa-eng)": 27.67, + "XPQARetrieval (fra-fra)": 39.24, + "XPQARetrieval (eng-fra)": 15.96, + "XPQARetrieval (fra-eng)": 33.89, + "XPQARetrieval (hin-hin)": 6.89, + "XPQARetrieval (eng-hin)": 6.29, + "XPQARetrieval (hin-eng)": 7.6, + "XPQARetrieval (ita-ita)": 47.91, + "XPQARetrieval (eng-ita)": 13.39, + "XPQARetrieval (ita-eng)": 34.7, + "XPQARetrieval (jpn-jpn)": 6.29, + "XPQARetrieval (eng-jpn)": 3.14, + "XPQARetrieval (jpn-eng)": 8.38, + "XPQARetrieval (kor-kor)": 2.51, + "XPQARetrieval (eng-kor)": 4.89, + "XPQARetrieval (kor-eng)": 3.54, + "XPQARetrieval (pol-pol)": 23.18, + "XPQARetrieval (eng-pol)": 9.8, + "XPQARetrieval (pol-eng)": 20.13, + "XPQARetrieval (por-por)": 27.19, + "XPQARetrieval (eng-por)": 10.36, + "XPQARetrieval (por-eng)": 22.76, + "XPQARetrieval (tam-tam)": 3.02, + "XPQARetrieval (eng-tam)": 5.27, + "XPQARetrieval (tam-eng)": 3.51, + "XPQARetrieval (cmn-cmn)": 14.0, + "XPQARetrieval (eng-cmn)": 7.68, + "XPQARetrieval (cmn-eng)": 13.48 } ] }, @@ -23326,27 +42575,149 @@ "cosine_spearman": [ { "Model": "LaBSE-ru-turbo", + "BIOSSES": 80.3, + "CDSC-R": 75.79, + "GermanSTSBenchmark": 61.54, + "SICK-R": 75.57, + "SICK-R-PL": 50.86, + "SICKFr": 64.97, + "STS12": 72.45, + "STS13": 78.98, + "STS14": 76.7, + "STS15": 85.62, + "STS16": 79.93, + "STS17 (fr-en)": 39.61, + "STS17 (en-en)": 86.49, + "STS17 (es-en)": 36.65, + "STS17 (it-en)": 23.36, + "STS17 (en-de)": 33.58, + "STS17 (nl-en)": 32.13, + "STS17 (ar-ar)": 14.47, + "STS17 (ko-ko)": 8.95, + "STS17 (en-ar)": 1.3, + "STS17 (es-es)": 71.77, + "STS17 (en-tr)": -9.64, + "STS22 (ru)": 67.57, + "STS22 (fr-pl)": 73.25, + "STS22 (de-pl)": 36.4, + "STS22 (de-en)": 44.31, + "STS22 (fr)": 74.66, + "STS22 (en)": 63.1, + "STS22 (es-en)": 68.55, + "STS22 (es-it)": 56.99, + "STS22 (zh-en)": 33.76, + "STS22 (tr)": 47.83, + "STS22 (es)": 57.06, + "STS22 (pl)": 29.24, + "STS22 (zh)": 33.15, + "STS22 (de-fr)": 48.38, + "STS22 (ar)": 29.74, + "STS22 (pl-en)": 65.09, + "STS22 (de)": 32.62, + "STS22 (it)": 66.02, + "STSB": 8.82, + "STSBenchmark": 81.82, + "STSBenchmarkMultilingualSTS (de)": 63.22, + "STSBenchmarkMultilingualSTS (es)": 57.44, + "STSBenchmarkMultilingualSTS (en)": 81.82, + "STSBenchmarkMultilingualSTS (pl)": 53.2, + "STSBenchmarkMultilingualSTS (nl)": 53.16, + "STSBenchmarkMultilingualSTS (it)": 63.22, + "STSBenchmarkMultilingualSTS (pt)": 54.98, + "STSBenchmarkMultilingualSTS (fr)": 64.24, + "STSBenchmarkMultilingualSTS (zh)": 10.91 + }, + { + "Model": "LaBSE-ru-turbo", + "BIOSSES": 80.3, + "CDSC-R": 75.79, + "GermanSTSBenchmark": 61.54, "RUParaPhraserSTS": 72.97, "RuSTSBenchmarkSTS": 81.77, - "STS22 (ru)": 62.89, - "STSBenchmarkMultilingualSTS (ru)": 81.81 + "SICK-R": 75.57, + "SICK-R-PL": 50.85, + "SICKFr": 64.97, + "STS12": 72.45, + "STS13": 78.98, + "STS14": 76.7, + "STS15": 85.62, + "STS16": 79.93, + "STS17 (fr-en)": 39.61, + "STS17 (en-en)": 86.49, + "STS17 (es-en)": 36.65, + "STS17 (it-en)": 23.36, + "STS17 (en-de)": 33.58, + "STS17 (nl-en)": 32.13, + "STS17 (ar-ar)": 13.11, + "STS17 (ko-ko)": 8.96, + "STS17 (en-ar)": 1.3, + "STS17 (es-es)": 71.77, + "STS17 (en-tr)": -9.64, + "STS22 (ru)": 67.57, + "STS22 (fr-pl)": 73.25, + "STS22 (de-pl)": 36.4, + "STS22 (de-en)": 44.31, + "STS22 (fr)": 74.66, + "STS22 (en)": 63.1, + "STS22 (es-en)": 68.55, + "STS22 (es-it)": 56.99, + "STS22 (zh-en)": 33.76, + "STS22 (tr)": 47.83, + "STS22 (es)": 57.06, + "STS22 (pl)": 29.44, + "STS22 (zh)": 33.15, + "STS22 (de-fr)": 48.38, + "STS22 (ar)": 29.72, + "STS22 (pl-en)": 65.09, + "STS22 (de)": 32.62, + "STS22 (it)": 66.02, + "STSB": 8.79, + "STSBenchmark": 81.82, + "STSBenchmarkMultilingualSTS (ru)": 81.81, + "STSBenchmarkMultilingualSTS (de)": 63.22, + "STSBenchmarkMultilingualSTS (es)": 57.44, + "STSBenchmarkMultilingualSTS (en)": 81.82, + "STSBenchmarkMultilingualSTS (pl)": 53.2, + "STSBenchmarkMultilingualSTS (nl)": 53.16, + "STSBenchmarkMultilingualSTS (it)": 63.22, + "STSBenchmarkMultilingualSTS (pt)": 54.98, + "STSBenchmarkMultilingualSTS (fr)": 64.24, + "STSBenchmarkMultilingualSTS (zh)": 10.83 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "LaBSE-ru-turbo", + "SummEval": 30.12, + "SummEvalFr": 28.7 + }, + { + "Model": "LaBSE-ru-turbo", + "SummEval": 30.12, + "SummEvalFr": 28.7 + } + ] }, "MultilabelClassification": { "accuracy": [ { "Model": "LaBSE-ru-turbo", "CEDRClassification": 45.11, - "SensitiveTopicsClassification": 27.52 + "SensitiveTopicsClassification": 28.75 } ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "LaBSE-ru-turbo", + "Core17InstructionRetrieval": 0.29, + "News21InstructionRetrieval": -0.47, + "Robust04InstructionRetrieval": -10.4 + } + ] } }, "sergeyzh__rubert-tiny-turbo": { @@ -23354,6 +42725,7 @@ "f1": [ { "Model": "rubert-tiny-turbo", + "BornholmBitextMining": 42.96, "Tatoeba (rus-eng)": 83.14 } ] @@ -23362,14 +42734,57 @@ "accuracy": [ { "Model": "rubert-tiny-turbo", + "AllegroReviews": 25.4, + "AmazonCounterfactualClassification (en-ext)": 69.94, + "AmazonCounterfactualClassification (en)": 71.04, + "AmazonCounterfactualClassification (de)": 61.07, + "AmazonCounterfactualClassification (ja)": 52.16, "AmazonPolarityClassification": 68.36, + "AmazonReviewsClassification (en)": 36.73, + "AmazonReviewsClassification (de)": 27.69, + "AmazonReviewsClassification (es)": 30.91, + "AmazonReviewsClassification (fr)": 27.89, + "AmazonReviewsClassification (ja)": 20.84, + "AmazonReviewsClassification (zh)": 20.9, + "AngryTweetsClassification": 44.74, "Banking77Classification": 59.86, + "CBD": 54.03, + "DanishPoliticalCommentsClassification": 27.44, "EmotionClassification": 29.5, "GeoreviewClassification": 41.36, "HeadlineClassification": 68.9, "ImdbClassification": 58.36, "InappropriatenessClassification": 59.11, "KinopoiskClassification": 50.47, + "LccSentimentClassification": 36.2, + "MTOPDomainClassification (en)": 78.49, + "MTOPDomainClassification (de)": 55.38, + "MTOPDomainClassification (es)": 54.62, + "MTOPDomainClassification (fr)": 62.52, + "MTOPDomainClassification (hi)": 21.52, + "MTOPDomainClassification (th)": 16.44, + "MTOPIntentClassification (en)": 42.46, + "MTOPIntentClassification (de)": 31.78, + "MTOPIntentClassification (es)": 28.4, + "MTOPIntentClassification (fr)": 31.28, + "MTOPIntentClassification (hi)": 2.9, + "MTOPIntentClassification (th)": 4.78, + "MasakhaNEWSClassification (amh)": 31.09, + "MasakhaNEWSClassification (eng)": 64.06, + "MasakhaNEWSClassification (fra)": 38.6, + "MasakhaNEWSClassification (hau)": 37.6, + "MasakhaNEWSClassification (ibo)": 30.33, + "MasakhaNEWSClassification (lin)": 57.89, + "MasakhaNEWSClassification (lug)": 38.79, + "MasakhaNEWSClassification (orm)": 34.65, + "MasakhaNEWSClassification (pcm)": 85.7, + "MasakhaNEWSClassification (run)": 37.95, + "MasakhaNEWSClassification (sna)": 54.58, + "MasakhaNEWSClassification (som)": 29.05, + "MasakhaNEWSClassification (swa)": 30.15, + "MasakhaNEWSClassification (tir)": 24.67, + "MasakhaNEWSClassification (xho)": 43.37, + "MasakhaNEWSClassification (yor)": 36.25, "MassiveIntentClassification (zh-CN)": 5.21, "MassiveIntentClassification (ko)": 2.53, "MassiveIntentClassification (hi)": 2.56, @@ -23472,6 +42887,11 @@ "MassiveScenarioClassification (he)": 7.61, "MassiveScenarioClassification (nl)": 40.94, "MassiveScenarioClassification (km)": 8.51, + "NoRecClassification": 40.31, + "NordicLangClassification": 49.9, + "PAC": 59.88, + "PolEmo2.0-IN": 38.1, + "PolEmo2.0-OUT": 23.74, "RuReviewsClassification": 60.66, "RuSciBenchGRNTIClassification": 52.93, "RuSciBenchOECDClassification": 40.79, @@ -23484,21 +42904,66 @@ "v_measure": [ { "Model": "rubert-tiny-turbo", + "AlloProfClusteringP2P": 34.87, + "AlloProfClusteringS2S": 22.73, "ArxivClusteringP2P": 24.83, "ArxivClusteringS2S": 16.68, "BiorxivClusteringP2P": 20.0, "BiorxivClusteringS2S": 12.67, + "BlurbsClusteringP2P": 12.49, + "BlurbsClusteringS2S": 8.97, "GeoreviewClusteringP2P": 59.71, + "HALClusteringS2S": 5.84, "MLSUMClusteringP2P (ru)": 40.02, + "MLSUMClusteringP2P (de)": 11.49, + "MLSUMClusteringP2P (fr)": 30.26, + "MLSUMClusteringP2P (es)": 34.16, "MLSUMClusteringS2S (ru)": 41.36, + "MLSUMClusteringS2S (de)": 11.17, + "MLSUMClusteringS2S (fr)": 29.34, + "MLSUMClusteringS2S (es)": 33.25, + "MasakhaNEWSClusteringP2P (amh)": 40.23, + "MasakhaNEWSClusteringP2P (eng)": 5.19, + "MasakhaNEWSClusteringP2P (fra)": 26.13, + "MasakhaNEWSClusteringP2P (hau)": 10.18, + "MasakhaNEWSClusteringP2P (ibo)": 21.87, + "MasakhaNEWSClusteringP2P (lin)": 42.77, + "MasakhaNEWSClusteringP2P (lug)": 42.51, + "MasakhaNEWSClusteringP2P (orm)": 21.39, + "MasakhaNEWSClusteringP2P (pcm)": 51.79, + "MasakhaNEWSClusteringP2P (run)": 42.72, + "MasakhaNEWSClusteringP2P (sna)": 43.34, + "MasakhaNEWSClusteringP2P (som)": 23.5, + "MasakhaNEWSClusteringP2P (swa)": 4.66, + "MasakhaNEWSClusteringP2P (tir)": 45.67, + "MasakhaNEWSClusteringP2P (xho)": 22.17, + "MasakhaNEWSClusteringP2P (yor)": 23.26, + "MasakhaNEWSClusteringS2S (amh)": 44.56, + "MasakhaNEWSClusteringS2S (eng)": 24.45, + "MasakhaNEWSClusteringS2S (fra)": 25.43, + "MasakhaNEWSClusteringS2S (hau)": 6.46, + "MasakhaNEWSClusteringS2S (ibo)": 34.22, + "MasakhaNEWSClusteringS2S (lin)": 44.27, + "MasakhaNEWSClusteringS2S (lug)": 43.53, + "MasakhaNEWSClusteringS2S (orm)": 26.63, + "MasakhaNEWSClusteringS2S (pcm)": 58.95, + "MasakhaNEWSClusteringS2S (run)": 47.69, + "MasakhaNEWSClusteringS2S (sna)": 42.32, + "MasakhaNEWSClusteringS2S (som)": 23.51, + "MasakhaNEWSClusteringS2S (swa)": 15.74, + "MasakhaNEWSClusteringS2S (tir)": 48.37, + "MasakhaNEWSClusteringS2S (xho)": 24.53, + "MasakhaNEWSClusteringS2S (yor)": 24.7, "MedrxivClusteringP2P": 20.79, "MedrxivClusteringS2S": 18.18, "RedditClustering": 26.28, "RedditClusteringP2P": 40.48, - "RuSciBenchGRNTIClusteringP2P": 47.55, - "RuSciBenchOECDClusteringP2P": 41.44, + "RuSciBenchGRNTIClusteringP2P": 48.1, + "RuSciBenchOECDClusteringP2P": 41.06, "StackExchangeClustering": 33.51, "StackExchangeClusteringP2P": 27.98, + "TenKGnadClusteringP2P": 8.65, + "TenKGnadClusteringS2S": 9.77, "TwentyNewsgroupsClustering": 19.9 } ] @@ -23507,13 +42972,51 @@ "max_ap": [ { "Model": "rubert-tiny-turbo", + "CDSC-E": 48.4, + "FalseFriendsGermanEnglish": 47.21, "OpusparcusPC (ru)": 87.58, - "TERRa": 56.09 + "OpusparcusPC (de)": 91.25, + "OpusparcusPC (en)": 97.27, + "OpusparcusPC (fi)": 84.56, + "OpusparcusPC (fr)": 85.13, + "OpusparcusPC (sv)": 84.27, + "PSC": 72.63, + "PawsXPairClassification (de)": 50.85, + "PawsXPairClassification (en)": 49.09, + "PawsXPairClassification (es)": 50.07, + "PawsXPairClassification (fr)": 51.42, + "PawsXPairClassification (ja)": 48.03, + "PawsXPairClassification (ko)": 47.09, + "PawsXPairClassification (zh)": 51.98, + "SICK-E-PL": 48.51, + "SprintDuplicateQuestions": 91.94, + "TERRa": 56.09, + "TwitterSemEval2015": 56.87, + "TwitterURLCorpus": 79.67 }, { "Model": "rubert-tiny-turbo", + "CDSC-E": 48.65, + "FalseFriendsGermanEnglish": 47.33, "OpusparcusPC (ru)": 87.58, - "TERRa": 56.27 + "OpusparcusPC (de)": 91.25, + "OpusparcusPC (en)": 97.28, + "OpusparcusPC (fi)": 84.56, + "OpusparcusPC (fr)": 85.13, + "OpusparcusPC (sv)": 84.27, + "PSC": 72.63, + "PawsXPairClassification (de)": 51.04, + "PawsXPairClassification (en)": 49.11, + "PawsXPairClassification (es)": 50.07, + "PawsXPairClassification (fr)": 51.43, + "PawsXPairClassification (ja)": 48.38, + "PawsXPairClassification (ko)": 47.29, + "PawsXPairClassification (zh)": 52.38, + "SICK-E-PL": 48.72, + "SprintDuplicateQuestions": 91.94, + "TERRa": 56.27, + "TwitterSemEval2015": 56.87, + "TwitterURLCorpus": 79.67 } ] }, @@ -23521,11 +43024,35 @@ "map": [ { "Model": "rubert-tiny-turbo", - "MIRACLReranking (ru)": 47.73 + "AlloprofReranking": 30.29, + "AskUbuntuDupQuestions": 49.48, + "MindSmallReranking": 29.14, + "RuBQReranking": 62.15, + "SciDocsRR": 59.49, + "StackOverflowDupQuestions": 37.97, + "SyntecReranking": 40.01, + "T2Reranking": 51.61 }, { "Model": "rubert-tiny-turbo", - "RuBQReranking": 62.15 + "MIRACLReranking (ru)": 47.73, + "MIRACLReranking (ar)": 2.02, + "MIRACLReranking (bn)": 1.54, + "MIRACLReranking (de)": 12.08, + "MIRACLReranking (en)": 28.98, + "MIRACLReranking (es)": 14.02, + "MIRACLReranking (fa)": 3.73, + "MIRACLReranking (fi)": 23.27, + "MIRACLReranking (fr)": 10.09, + "MIRACLReranking (hi)": 3.65, + "MIRACLReranking (id)": 15.61, + "MIRACLReranking (ja)": 2.32, + "MIRACLReranking (ko)": 6.19, + "MIRACLReranking (sw)": 21.3, + "MIRACLReranking (te)": 1.5, + "MIRACLReranking (th)": 2.19, + "MIRACLReranking (yo)": 28.48, + "MIRACLReranking (zh)": 2.83 } ] }, @@ -23536,13 +43063,140 @@ "AILACasedocs": 7.43, "AILAStatutes": 13.62, "ARCChallenge": 3.85, + "AlloprofRetrieval": 0.94, "AlphaNLI": 14.15, + "AppsRetrieval": 1.25, "ArguAna": 32.03, + "BSARDRetrieval": 1.06, "ClimateFEVER": 5.56, + "CmedqaRetrieval": 1.14, + "CodeFeedbackMT": 18.67, + "CodeFeedbackST": 25.37, + "CodeSearchNetCCRetrieval (python)": 32.45, + "CodeSearchNetCCRetrieval (javascript)": 27.6, + "CodeSearchNetCCRetrieval (go)": 17.14, + "CodeSearchNetCCRetrieval (ruby)": 37.08, + "CodeSearchNetCCRetrieval (java)": 23.17, + "CodeSearchNetCCRetrieval (php)": 17.53, + "CodeSearchNetRetrieval (python)": 47.77, + "CodeSearchNetRetrieval (javascript)": 24.46, + "CodeSearchNetRetrieval (go)": 34.78, + "CodeSearchNetRetrieval (ruby)": 39.47, + "CodeSearchNetRetrieval (java)": 20.77, + "CodeSearchNetRetrieval (php)": 30.9, + "CodeTransOceanContest": 23.44, + "CodeTransOceanDL": 32.03, + "CosQA": 7.22, + "CovidRetrieval": 0.04, "DBPedia": 9.61, + "FEVER": 16.42, + "FiQA2018": 7.18, + "GerDaLIR": 0.13, + "GerDaLIRSmall": 0.34, + "GermanQuAD-Retrieval": 19.17, + "HellaSwag": 10.6, + "HotpotQA": 29.51, + "LEMBNarrativeQARetrieval": 7.26, + "LEMBQMSumRetrieval": 19.0, + "LEMBSummScreenFDRetrieval": 59.64, + "LEMBWikimQARetrieval": 33.38, + "LeCaRDv2": 8.54, + "LegalBenchConsumerContractsQA": 35.72, + "LegalBenchCorporateLobbying": 70.01, + "LegalQuAD": 3.11, + "LegalSummarization": 45.44, "MIRACLRetrieval (ru)": 37.07, + "MIRACLRetrieval (ar)": 0.0, + "MIRACLRetrieval (bn)": 0.0, + "MIRACLRetrieval (de)": 1.52, + "MIRACLRetrieval (en)": 12.01, + "MIRACLRetrieval (es)": 1.63, + "MIRACLRetrieval (fa)": 0.0, + "MIRACLRetrieval (fi)": 4.14, + "MIRACLRetrieval (fr)": 1.76, + "MIRACLRetrieval (hi)": 0.0, + "MIRACLRetrieval (id)": 3.51, + "MIRACLRetrieval (ja)": 0.02, + "MIRACLRetrieval (ko)": 1.41, + "MIRACLRetrieval (sw)": 9.46, + "MIRACLRetrieval (te)": 0.04, + "MIRACLRetrieval (th)": 0.04, + "MIRACLRetrieval (yo)": 13.83, + "MIRACLRetrieval (zh)": 0.0, + "MSMARCO": 5.79, + "MintakaRetrieval (ar)": 0.52, + "MintakaRetrieval (de)": 10.58, + "MintakaRetrieval (es)": 9.24, + "MintakaRetrieval (fr)": 9.63, + "MintakaRetrieval (hi)": 1.03, + "MintakaRetrieval (it)": 9.67, + "MintakaRetrieval (ja)": 1.04, + "MintakaRetrieval (pt)": 11.61, + "NFCorpus": 9.19, + "NQ": 4.96, + "PIQA": 6.27, + "Quail": 1.11, + "QuoraRetrieval": 73.99, + "RARbCode": 1.47, + "RARbMath": 26.36, "RiaNewsRetrieval": 51.27, - "RuBQRetrieval": 51.73 + "RuBQRetrieval": 51.73, + "SCIDOCS": 4.07, + "SIQA": 0.84, + "SciFact": 27.32, + "SciFact-PL": 7.26, + "SpartQA": 6.31, + "StackOverflowQA": 31.97, + "SyntecRetrieval": 14.16, + "SyntheticText2SQL": 31.4, + "TRECCOVID": 17.53, + "TRECCOVID-PL": 5.64, + "TempReasonL1": 0.77, + "TempReasonL2Fact": 6.72, + "TempReasonL2Pure": 0.1, + "TempReasonL3Fact": 7.0, + "TempReasonL3Pure": 4.88, + "Touche2020": 4.43, + "WinoGrande": 27.9, + "XMarket (de)": 2.51, + "XMarket (en)": 3.46, + "XMarket (es)": 2.32, + "XPQARetrieval (ara-ara)": 3.95, + "XPQARetrieval (eng-ara)": 3.16, + "XPQARetrieval (ara-eng)": 5.85, + "XPQARetrieval (deu-deu)": 27.11, + "XPQARetrieval (eng-deu)": 5.17, + "XPQARetrieval (deu-eng)": 15.11, + "XPQARetrieval (spa-spa)": 22.36, + "XPQARetrieval (eng-spa)": 5.92, + "XPQARetrieval (spa-eng)": 13.22, + "XPQARetrieval (fra-fra)": 25.41, + "XPQARetrieval (eng-fra)": 6.09, + "XPQARetrieval (fra-eng)": 16.73, + "XPQARetrieval (hin-hin)": 6.3, + "XPQARetrieval (eng-hin)": 4.45, + "XPQARetrieval (hin-eng)": 5.13, + "XPQARetrieval (ita-ita)": 38.08, + "XPQARetrieval (eng-ita)": 6.83, + "XPQARetrieval (ita-eng)": 17.7, + "XPQARetrieval (jpn-jpn)": 5.73, + "XPQARetrieval (eng-jpn)": 2.03, + "XPQARetrieval (jpn-eng)": 5.5, + "XPQARetrieval (kor-kor)": 2.18, + "XPQARetrieval (eng-kor)": 3.99, + "XPQARetrieval (kor-eng)": 2.51, + "XPQARetrieval (pol-pol)": 14.8, + "XPQARetrieval (eng-pol)": 5.32, + "XPQARetrieval (pol-eng)": 12.01, + "XPQARetrieval (por-por)": 18.48, + "XPQARetrieval (eng-por)": 5.05, + "XPQARetrieval (por-eng)": 11.7, + "XPQARetrieval (tam-tam)": 2.8, + "XPQARetrieval (eng-tam)": 3.71, + "XPQARetrieval (tam-eng)": 2.73, + "XPQARetrieval (cmn-cmn)": 13.08, + "XPQARetrieval (eng-cmn)": 3.63, + "XPQARetrieval (cmn-eng)": 8.65 } ] }, @@ -23550,12 +43204,71 @@ "cosine_spearman": [ { "Model": "rubert-tiny-turbo", + "BIOSSES": 72.48, + "CDSC-R": 73.78, + "GermanSTSBenchmark": 57.5, + "SICK-R": 68.43, + "SICK-R-PL": 48.95, + "SICKFr": 57.71, + "STS12": 66.17, + "STS13": 60.34, + "STS14": 66.12, + "STS15": 77.41, + "STS16": 73.3, + "STS17 (ko-ko)": 10.27, + "STS17 (ar-ar)": 14.32, + "STS17 (en-ar)": 9.83, + "STS17 (en-de)": 25.27, + "STS17 (it-en)": 23.96, + "STS17 (es-es)": 69.98, + "STS17 (en-tr)": 2.36, + "STS17 (en-en)": 76.02, + "STS17 (es-en)": 22.68, + "STS17 (nl-en)": 20.77, + "STS17 (fr-en)": 21.97, + "STS22 (ru)": 64.56, + "STSB": 15.61, + "STSBenchmark": 68.97, + "STSBenchmarkMultilingualSTS (pl)": 53.05, + "STSBenchmarkMultilingualSTS (es)": 53.79, + "STSBenchmarkMultilingualSTS (zh)": 15.18, + "STSBenchmarkMultilingualSTS (fr)": 58.44, + "STSBenchmarkMultilingualSTS (it)": 56.09, + "STSBenchmarkMultilingualSTS (pt)": 48.9, + "STSBenchmarkMultilingualSTS (de)": 59.77, + "STSBenchmarkMultilingualSTS (en)": 68.97, + "STSBenchmarkMultilingualSTS (nl)": 55.13 + }, + { + "Model": "rubert-tiny-turbo", + "BIOSSES": 72.48, + "CDSC-R": 73.78, + "GermanSTSBenchmark": 57.5, "RUParaPhraserSTS": 72.15, "RuSTSBenchmarkSTS": 78.48, + "SICK-R": 68.43, + "SICK-R-PL": 48.95, + "SICKFr": 57.71, + "STS12": 66.17, + "STS13": 60.34, + "STS14": 66.12, + "STS15": 77.41, + "STS16": 73.3, + "STS17 (ko-ko)": 10.11, + "STS17 (ar-ar)": 16.06, + "STS17 (en-ar)": 9.82, + "STS17 (en-de)": 25.27, + "STS17 (it-en)": 23.96, + "STS17 (es-es)": 69.98, + "STS17 (en-tr)": 2.36, + "STS17 (en-en)": 76.02, + "STS17 (es-en)": 22.68, + "STS17 (nl-en)": 20.77, + "STS17 (fr-en)": 21.97, "STS22 (zh)": 32.83, "STS22 (de-fr)": 17.5, "STS22 (pl-en)": 42.08, - "STS22 (ru)": 60.06, + "STS22 (ru)": 64.56, "STS22 (fr)": 42.0, "STS22 (de)": 8.16, "STS22 (tr)": 15.46, @@ -23570,24 +43283,53 @@ "STS22 (es)": 45.31, "STS22 (zh-en)": 31.25, "STS22 (en)": 47.06, - "STSBenchmarkMultilingualSTS (ru)": 78.12 + "STSB": 15.57, + "STSBenchmark": 68.97, + "STSBenchmarkMultilingualSTS (ru)": 78.12, + "STSBenchmarkMultilingualSTS (pl)": 53.04, + "STSBenchmarkMultilingualSTS (es)": 53.79, + "STSBenchmarkMultilingualSTS (zh)": 15.17, + "STSBenchmarkMultilingualSTS (fr)": 58.44, + "STSBenchmarkMultilingualSTS (it)": 56.09, + "STSBenchmarkMultilingualSTS (pt)": 48.9, + "STSBenchmarkMultilingualSTS (de)": 59.76, + "STSBenchmarkMultilingualSTS (en)": 68.97, + "STSBenchmarkMultilingualSTS (nl)": 55.13 } ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "rubert-tiny-turbo", + "SummEval": 31.12, + "SummEvalFr": 30.87 + }, + { + "Model": "rubert-tiny-turbo", + "SummEval": 31.12, + "SummEvalFr": 30.87 + } + ] }, "MultilabelClassification": { "accuracy": [ { "Model": "rubert-tiny-turbo", "CEDRClassification": 38.95, - "SensitiveTopicsClassification": 24.44 + "SensitiveTopicsClassification": 25.16 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "rubert-tiny-turbo", + "Core17InstructionRetrieval": -4.06, + "News21InstructionRetrieval": 1.39, + "Robust04InstructionRetrieval": -4.16 } ] - }, - "InstructionRetrieval": { - "p-MRR": [] } }, "shibing624__text2vec-base-chinese": { @@ -23688,18 +43430,171 @@ }, "shibing624__text2vec-base-multilingual": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "text2vec-base-multilingual", + "BornholmBitextMining": 17.43, + "Tatoeba (zsm-eng)": 94.48, + "Tatoeba (ang-eng)": 9.91, + "Tatoeba (swh-eng)": 14.69, + "Tatoeba (afr-eng)": 53.85, + "Tatoeba (lfn-eng)": 42.06, + "Tatoeba (hsb-eng)": 36.15, + "Tatoeba (fra-eng)": 91.29, + "Tatoeba (tzl-eng)": 25.48, + "Tatoeba (rus-eng)": 91.73, + "Tatoeba (khm-eng)": 27.12, + "Tatoeba (mar-eng)": 91.27, + "Tatoeba (cmn-eng)": 93.67, + "Tatoeba (mon-eng)": 91.7, + "Tatoeba (nld-eng)": 94.35, + "Tatoeba (cat-eng)": 93.48, + "Tatoeba (ido-eng)": 32.99, + "Tatoeba (ell-eng)": 95.23, + "Tatoeba (bre-eng)": 4.44, + "Tatoeba (hye-eng)": 92.68, + "Tatoeba (dtp-eng)": 3.32, + "Tatoeba (hrv-eng)": 96.07, + "Tatoeba (ita-eng)": 92.18, + "Tatoeba (kur-eng)": 43.05, + "Tatoeba (tgl-eng)": 9.49, + "Tatoeba (kzj-eng)": 3.99, + "Tatoeba (arq-eng)": 15.17, + "Tatoeba (kab-eng)": 0.86, + "Tatoeba (ind-eng)": 91.57, + "Tatoeba (slv-eng)": 97.25, + "Tatoeba (isl-eng)": 20.93, + "Tatoeba (vie-eng)": 94.48, + "Tatoeba (arz-eng)": 46.38, + "Tatoeba (war-eng)": 5.47, + "Tatoeba (srp-eng)": 91.77, + "Tatoeba (hin-eng)": 97.17, + "Tatoeba (pam-eng)": 3.32, + "Tatoeba (tat-eng)": 6.16, + "Tatoeba (nov-eng)": 46.38, + "Tatoeba (ron-eng)": 94.22, + "Tatoeba (jpn-eng)": 88.66, + "Tatoeba (por-eng)": 92.58, + "Tatoeba (ina-eng)": 73.14, + "Tatoeba (deu-eng)": 96.03, + "Tatoeba (yue-eng)": 54.38, + "Tatoeba (swe-eng)": 93.92, + "Tatoeba (bos-eng)": 93.6, + "Tatoeba (awa-eng)": 26.57, + "Tatoeba (cym-eng)": 11.56, + "Tatoeba (nob-eng)": 98.67, + "Tatoeba (bul-eng)": 92.79, + "Tatoeba (aze-eng)": 50.48, + "Tatoeba (glg-eng)": 93.12, + "Tatoeba (nno-eng)": 70.14, + "Tatoeba (yid-eng)": 8.31, + "Tatoeba (epo-eng)": 32.67, + "Tatoeba (oci-eng)": 35.7, + "Tatoeba (spa-eng)": 95.57, + "Tatoeba (mhr-eng)": 3.97, + "Tatoeba (est-eng)": 97.75, + "Tatoeba (lat-eng)": 18.11, + "Tatoeba (dsb-eng)": 29.84, + "Tatoeba (fry-eng)": 26.27, + "Tatoeba (pol-eng)": 94.7, + "Tatoeba (fin-eng)": 93.86, + "Tatoeba (orv-eng)": 12.32, + "Tatoeba (lvs-eng)": 98.0, + "Tatoeba (ben-eng)": 19.9, + "Tatoeba (tuk-eng)": 12.3, + "Tatoeba (ast-eng)": 59.27, + "Tatoeba (ile-eng)": 56.1, + "Tatoeba (pms-eng)": 22.86, + "Tatoeba (jav-eng)": 11.43, + "Tatoeba (slk-eng)": 94.83, + "Tatoeba (ber-eng)": 2.53, + "Tatoeba (heb-eng)": 86.49, + "Tatoeba (gle-eng)": 11.26, + "Tatoeba (kor-eng)": 86.37, + "Tatoeba (uzb-eng)": 14.03, + "Tatoeba (cbk-eng)": 54.82, + "Tatoeba (sqi-eng)": 98.17, + "Tatoeba (uig-eng)": 9.65, + "Tatoeba (cha-eng)": 11.03, + "Tatoeba (max-eng)": 36.23, + "Tatoeba (swg-eng)": 22.4, + "Tatoeba (lit-eng)": 93.15, + "Tatoeba (tha-eng)": 95.99, + "Tatoeba (pes-eng)": 88.84, + "Tatoeba (tur-eng)": 94.73, + "Tatoeba (ceb-eng)": 5.72, + "Tatoeba (urd-eng)": 94.21, + "Tatoeba (ara-eng)": 88.48, + "Tatoeba (ukr-eng)": 92.51, + "Tatoeba (wuu-eng)": 52.99, + "Tatoeba (amh-eng)": 28.72, + "Tatoeba (csb-eng)": 16.97, + "Tatoeba (gla-eng)": 1.87, + "Tatoeba (tel-eng)": 41.89, + "Tatoeba (ces-eng)": 94.3, + "Tatoeba (kat-eng)": 93.76, + "Tatoeba (nds-eng)": 29.11, + "Tatoeba (hun-eng)": 90.12, + "Tatoeba (dan-eng)": 94.65, + "Tatoeba (bel-eng)": 57.88, + "Tatoeba (tam-eng)": 35.72, + "Tatoeba (cor-eng)": 3.09, + "Tatoeba (xho-eng)": 2.01, + "Tatoeba (mkd-eng)": 90.86, + "Tatoeba (mal-eng)": 7.61, + "Tatoeba (fao-eng)": 19.62, + "Tatoeba (kaz-eng)": 23.64, + "Tatoeba (eus-eng)": 16.35, + "Tatoeba (gsw-eng)": 27.33 + } + ] }, "Classification": { "accuracy": [ { "Model": "text2vec-base-multilingual", + "AllegroReviews": 27.57, + "AmazonCounterfactualClassification (de)": 68.7, + "AmazonReviewsClassification (de)": 33.39, "AmazonReviewsClassification (fr)": 34.25, + "AngryTweetsClassification": 48.59, + "CBD": 57.94, + "DanishPoliticalCommentsClassification": 36.55, + "GeoreviewClassification": 34.63, + "HeadlineClassification": 62.29, + "InappropriatenessClassification": 57.37, + "KinopoiskClassification": 42.09, + "LccSentimentClassification": 51.93, + "MTOPDomainClassification (de)": 78.61, "MTOPDomainClassification (fr)": 71.83, + "MTOPIntentClassification (de)": 55.26, "MTOPIntentClassification (fr)": 44.53, + "MasakhaNEWSClassification (amh)": 77.29, + "MasakhaNEWSClassification (eng)": 68.54, "MasakhaNEWSClassification (fra)": 73.84, + "MasakhaNEWSClassification (hau)": 46.81, + "MasakhaNEWSClassification (ibo)": 41.72, + "MasakhaNEWSClassification (lin)": 55.6, + "MasakhaNEWSClassification (lug)": 34.71, + "MasakhaNEWSClassification (orm)": 36.65, + "MasakhaNEWSClassification (pcm)": 82.75, + "MasakhaNEWSClassification (run)": 42.11, + "MasakhaNEWSClassification (sna)": 54.12, + "MasakhaNEWSClassification (som)": 32.07, + "MasakhaNEWSClassification (swa)": 57.54, + "MasakhaNEWSClassification (tir)": 40.81, + "MasakhaNEWSClassification (xho)": 35.12, + "MasakhaNEWSClassification (yor)": 49.29, "MassiveIntentClassification (fr)": 51.93, - "MassiveScenarioClassification (fr)": 58.31 + "MassiveScenarioClassification (fr)": 58.31, + "NoRecClassification": 43.98, + "NordicLangClassification": 38.13, + "PAC": 66.11, + "PolEmo2.0-IN": 54.7, + "PolEmo2.0-OUT": 20.51, + "RuReviewsClassification": 56.71, + "RuSciBenchGRNTIClassification": 49.18, + "RuSciBenchOECDClassification": 39.55 } ] }, @@ -23709,11 +43604,56 @@ "Model": "text2vec-base-multilingual", "AlloProfClusteringP2P": 49.11, "AlloProfClusteringS2S": 32.72, + "BlurbsClusteringP2P": 27.75, + "BlurbsClusteringS2S": 13.22, + "GeoreviewClusteringP2P": 51.65, "HALClusteringS2S": 16.19, + "MLSUMClusteringP2P (de)": 33.54, + "MLSUMClusteringP2P (fr)": 36.41, + "MLSUMClusteringP2P (ru)": 35.95, + "MLSUMClusteringP2P (es)": 37.46, "MLSUMClusteringP2P": 36.19, + "MLSUMClusteringS2S (de)": 31.94, + "MLSUMClusteringS2S (fr)": 35.75, + "MLSUMClusteringS2S (ru)": 35.41, + "MLSUMClusteringS2S (es)": 37.23, "MLSUMClusteringS2S": 30.39, + "MasakhaNEWSClusteringP2P (amh)": 52.2, + "MasakhaNEWSClusteringP2P (eng)": 42.5, "MasakhaNEWSClusteringP2P (fra)": 38.51, - "MasakhaNEWSClusteringS2S (fra)": 32.51 + "MasakhaNEWSClusteringP2P (hau)": 27.39, + "MasakhaNEWSClusteringP2P (ibo)": 29.8, + "MasakhaNEWSClusteringP2P (lin)": 43.86, + "MasakhaNEWSClusteringP2P (lug)": 44.81, + "MasakhaNEWSClusteringP2P (orm)": 23.29, + "MasakhaNEWSClusteringP2P (pcm)": 53.31, + "MasakhaNEWSClusteringP2P (run)": 43.27, + "MasakhaNEWSClusteringP2P (sna)": 45.25, + "MasakhaNEWSClusteringP2P (som)": 27.97, + "MasakhaNEWSClusteringP2P (swa)": 26.53, + "MasakhaNEWSClusteringP2P (tir)": 46.64, + "MasakhaNEWSClusteringP2P (xho)": 21.6, + "MasakhaNEWSClusteringP2P (yor)": 31.63, + "MasakhaNEWSClusteringS2S (amh)": 52.13, + "MasakhaNEWSClusteringS2S (eng)": 9.86, + "MasakhaNEWSClusteringS2S (fra)": 32.51, + "MasakhaNEWSClusteringS2S (hau)": 15.48, + "MasakhaNEWSClusteringS2S (ibo)": 32.76, + "MasakhaNEWSClusteringS2S (lin)": 46.64, + "MasakhaNEWSClusteringS2S (lug)": 41.71, + "MasakhaNEWSClusteringS2S (orm)": 22.31, + "MasakhaNEWSClusteringS2S (pcm)": 44.62, + "MasakhaNEWSClusteringS2S (run)": 45.44, + "MasakhaNEWSClusteringS2S (sna)": 43.21, + "MasakhaNEWSClusteringS2S (som)": 26.3, + "MasakhaNEWSClusteringS2S (swa)": 17.91, + "MasakhaNEWSClusteringS2S (tir)": 45.61, + "MasakhaNEWSClusteringS2S (xho)": 21.36, + "MasakhaNEWSClusteringS2S (yor)": 24.11, + "RuSciBenchGRNTIClusteringP2P": 42.83, + "RuSciBenchOECDClusteringP2P": 38.58, + "TenKGnadClusteringP2P": 30.87, + "TenKGnadClusteringS2S": 14.84 } ] }, @@ -23721,13 +43661,50 @@ "max_ap": [ { "Model": "text2vec-base-multilingual", + "CDSC-E": 74.42, + "FalseFriendsGermanEnglish": 48.21, + "OpusparcusPC (de)": 96.39, + "OpusparcusPC (en)": 98.39, + "OpusparcusPC (fi)": 92.88, "OpusparcusPC (fr)": 92.04, - "PawsXPairClassification (fr)": 65.57 + "OpusparcusPC (ru)": 87.84, + "OpusparcusPC (sv)": 93.65, + "PSC": 94.96, + "PawsXPairClassification (de)": 64.44, + "PawsXPairClassification (en)": 68.78, + "PawsXPairClassification (es)": 63.87, + "PawsXPairClassification (fr)": 65.57, + "PawsXPairClassification (ja)": 57.71, + "PawsXPairClassification (ko)": 49.51, + "PawsXPairClassification (zh)": 65.33, + "SICK-E-PL": 74.15, + "TERRa": 54.56 + }, + { + "Model": "text2vec-base-multilingual", + "CDSC-E": 74.42, + "FalseFriendsGermanEnglish": 48.49, + "OpusparcusPC (de)": 96.39, + "OpusparcusPC (en)": 98.45, + "OpusparcusPC (fi)": 92.88, + "OpusparcusPC (fr)": 92.04, + "OpusparcusPC (ru)": 87.84, + "OpusparcusPC (sv)": 93.65, + "PSC": 94.96, + "PawsXPairClassification (de)": 64.79, + "PawsXPairClassification (en)": 68.78, + "PawsXPairClassification (es)": 63.87, + "PawsXPairClassification (fr)": 65.6, + "PawsXPairClassification (ja)": 57.86, + "PawsXPairClassification (ko)": 49.51, + "PawsXPairClassification (zh)": 65.51, + "SICK-E-PL": 74.15, + "TERRa": 54.56 }, { "Model": "text2vec-base-multilingual", "OpusparcusPC (fr)": 92.04, - "PawsXPairClassification (fr)": 65.6 + "PawsXPairClassification (fr)": 65.57 } ] }, @@ -23736,7 +43713,31 @@ { "Model": "text2vec-base-multilingual", "AlloprofReranking": 51.48, - "SyntecReranking": 70.28 + "MindSmallReranking": 29.64, + "RuBQReranking": 47.69, + "SyntecReranking": 70.28, + "T2Reranking": 65.07 + }, + { + "Model": "text2vec-base-multilingual", + "MIRACLReranking (ar)": 37.44, + "MIRACLReranking (bn)": 19.37, + "MIRACLReranking (de)": 25.45, + "MIRACLReranking (en)": 37.85, + "MIRACLReranking (es)": 36.76, + "MIRACLReranking (fa)": 22.75, + "MIRACLReranking (fi)": 42.38, + "MIRACLReranking (fr)": 26.52, + "MIRACLReranking (hi)": 23.05, + "MIRACLReranking (id)": 32.89, + "MIRACLReranking (ja)": 29.42, + "MIRACLReranking (ko)": 23.99, + "MIRACLReranking (ru)": 29.44, + "MIRACLReranking (sw)": 21.21, + "MIRACLReranking (te)": 18.51, + "MIRACLReranking (th)": 26.99, + "MIRACLReranking (yo)": 35.18, + "MIRACLReranking (zh)": 20.66 } ] }, @@ -23744,16 +43745,188 @@ "ndcg_at_10": [ { "Model": "text2vec-base-multilingual", + "AILACasedocs": 9.96, + "AILAStatutes": 15.39, + "ARCChallenge": 5.45, "AlloprofRetrieval": 18.9, + "AlphaNLI": 23.29, + "AppsRetrieval": 1.31, + "ArguAna": 31.35, "BSARDRetrieval": 0.0, + "ClimateFEVER": 11.19, + "CmedqaRetrieval": 12.1, + "CodeFeedbackMT": 12.67, + "CodeFeedbackST": 22.08, + "CodeSearchNetCCRetrieval (python)": 17.26, + "CodeSearchNetCCRetrieval (javascript)": 16.51, + "CodeSearchNetCCRetrieval (go)": 8.65, + "CodeSearchNetCCRetrieval (ruby)": 21.38, + "CodeSearchNetCCRetrieval (java)": 9.18, + "CodeSearchNetCCRetrieval (php)": 8.02, + "CodeSearchNetRetrieval (python)": 39.52, + "CodeSearchNetRetrieval (javascript)": 22.2, + "CodeSearchNetRetrieval (go)": 32.32, + "CodeSearchNetRetrieval (ruby)": 34.56, + "CodeSearchNetRetrieval (java)": 18.91, + "CodeSearchNetRetrieval (php)": 26.69, + "CodeTransOceanContest": 17.87, + "CodeTransOceanDL": 26.42, + "CosQA": 11.36, + "CovidRetrieval": 15.0, + "DBPedia": 15.87, + "FEVER": 30.69, + "FiQA2018": 12.34, + "GerDaLIR": 1.18, + "GerDaLIRSmall": 3.55, + "GermanQuAD-Retrieval": 68.44, + "HellaSwag": 15.59, + "HotpotQA": 16.63, + "LEMBNarrativeQARetrieval": 6.26, + "LEMBQMSumRetrieval": 12.2, + "LEMBSummScreenFDRetrieval": 40.45, + "LEMBWikimQARetrieval": 23.88, + "LeCaRDv2": 25.28, + "LegalBenchConsumerContractsQA": 48.83, + "LegalBenchCorporateLobbying": 81.56, + "LegalQuAD": 11.71, + "LegalSummarization": 48.68, + "MIRACLRetrieval (ar)": 17.44, + "MIRACLRetrieval (bn)": 3.55, + "MIRACLRetrieval (de)": 12.85, + "MIRACLRetrieval (en)": 18.93, + "MIRACLRetrieval (es)": 16.1, + "MIRACLRetrieval (fa)": 10.15, + "MIRACLRetrieval (fi)": 21.62, + "MIRACLRetrieval (fr)": 13.13, + "MIRACLRetrieval (hi)": 9.42, + "MIRACLRetrieval (id)": 17.33, + "MIRACLRetrieval (ja)": 10.28, + "MIRACLRetrieval (ko)": 16.31, + "MIRACLRetrieval (ru)": 11.99, + "MIRACLRetrieval (sw)": 8.6, + "MIRACLRetrieval (te)": 2.54, + "MIRACLRetrieval (th)": 10.66, + "MIRACLRetrieval (yo)": 17.03, + "MIRACLRetrieval (zh)": 9.32, + "MSMARCO": 14.57, + "MintakaRetrieval (ar)": 8.28, + "MintakaRetrieval (de)": 13.55, + "MintakaRetrieval (es)": 14.61, "MintakaRetrieval (fr)": 14.81, + "MintakaRetrieval (hi)": 7.09, + "MintakaRetrieval (it)": 14.28, + "MintakaRetrieval (ja)": 9.2, + "MintakaRetrieval (pt)": 14.95, + "NFCorpus": 18.64, + "NQ": 20.7, + "PIQA": 11.75, + "Quail": 1.75, + "RARbCode": 3.59, + "RARbMath": 31.12, + "RiaNewsRetrieval": 23.85, + "RuBQRetrieval": 21.04, + "SCIDOCS": 9.8, + "SIQA": 0.38, + "SciFact": 38.99, + "SciFact-PL": 28.92, + "SpartQA": 8.55, + "StackOverflowQA": 32.47, "SyntecRetrieval": 49.69, + "SyntheticText2SQL": 24.54, + "TRECCOVID": 32.1, + "TRECCOVID-PL": 30.14, + "TempReasonL1": 1.48, + "TempReasonL2Fact": 2.89, + "TempReasonL2Pure": 0.09, + "TempReasonL3Fact": 3.82, + "TempReasonL3Pure": 0.42, + "Touche2020": 14.08, + "WinoGrande": 31.61, + "XMarket (de)": 4.04, + "XMarket (en)": 8.18, + "XMarket (es)": 5.39, + "XPQARetrieval (ara-ara)": 22.16, + "XPQARetrieval (eng-ara)": 13.78, + "XPQARetrieval (ara-eng)": 20.65, + "XPQARetrieval (deu-deu)": 41.79, + "XPQARetrieval (eng-deu)": 19.11, + "XPQARetrieval (deu-eng)": 44.08, + "XPQARetrieval (spa-spa)": 35.97, + "XPQARetrieval (eng-spa)": 19.3, + "XPQARetrieval (spa-eng)": 35.72, + "XPQARetrieval (fra-fra)": 40.4, + "XPQARetrieval (eng-fra)": 17.9, + "XPQARetrieval (fra-eng)": 38.18, + "XPQARetrieval (hin-hin)": 53.69, + "XPQARetrieval (eng-hin)": 19.47, + "XPQARetrieval (hin-eng)": 43.22, + "XPQARetrieval (ita-ita)": 50.45, + "XPQARetrieval (eng-ita)": 19.99, + "XPQARetrieval (ita-eng)": 41.49, + "XPQARetrieval (jpn-jpn)": 51.88, + "XPQARetrieval (eng-jpn)": 16.12, + "XPQARetrieval (jpn-eng)": 40.69, + "XPQARetrieval (kor-kor)": 17.62, + "XPQARetrieval (eng-kor)": 16.7, + "XPQARetrieval (kor-eng)": 16.93, + "XPQARetrieval (pol-pol)": 28.06, + "XPQARetrieval (eng-pol)": 12.59, + "XPQARetrieval (pol-eng)": 25.14, + "XPQARetrieval (por-por)": 30.62, + "XPQARetrieval (eng-por)": 14.86, + "XPQARetrieval (por-eng)": 28.56, + "XPQARetrieval (tam-tam)": 15.47, + "XPQARetrieval (eng-tam)": 5.43, + "XPQARetrieval (tam-eng)": 8.94, + "XPQARetrieval (cmn-cmn)": 39.62, + "XPQARetrieval (eng-cmn)": 14.62, + "XPQARetrieval (cmn-eng)": 31.26, "XPQARetrieval (fr)": 40.4 } ] }, "STS": { "cosine_spearman": [ + { + "Model": "text2vec-base-multilingual", + "CDSC-R": 90.74, + "GermanSTSBenchmark": 83.32, + "RUParaPhraserSTS": 67.02, + "RuSTSBenchmarkSTS": 82.6, + "SICK-R-PL": 72.43, + "SICKFr": 77.25, + "STSB": 80.45, + "STSBenchmarkMultilingualSTS (es)": 84.9, + "STSBenchmarkMultilingualSTS (pt)": 84.06, + "STSBenchmarkMultilingualSTS (nl)": 82.82, + "STSBenchmarkMultilingualSTS (ru)": 82.7, + "STSBenchmarkMultilingualSTS (fr)": 83.47, + "STSBenchmarkMultilingualSTS (de)": 83.15, + "STSBenchmarkMultilingualSTS (zh)": 81.06, + "STSBenchmarkMultilingualSTS (en)": 86.45, + "STSBenchmarkMultilingualSTS (it)": 83.72, + "STSBenchmarkMultilingualSTS (pl)": 82.42 + }, + { + "Model": "text2vec-base-multilingual", + "CDSC-R": 90.74, + "GermanSTSBenchmark": 83.32, + "RUParaPhraserSTS": 67.02, + "RuSTSBenchmarkSTS": 82.6, + "SICK-R-PL": 72.43, + "SICKFr": 77.25, + "STSB": 80.45, + "STSBenchmarkMultilingualSTS (es)": 84.9, + "STSBenchmarkMultilingualSTS (pt)": 84.06, + "STSBenchmarkMultilingualSTS (nl)": 82.82, + "STSBenchmarkMultilingualSTS (ru)": 82.7, + "STSBenchmarkMultilingualSTS (fr)": 83.47, + "STSBenchmarkMultilingualSTS (de)": 83.15, + "STSBenchmarkMultilingualSTS (zh)": 81.06, + "STSBenchmarkMultilingualSTS (en)": 86.45, + "STSBenchmarkMultilingualSTS (it)": 83.72, + "STSBenchmarkMultilingualSTS (pl)": 82.42 + }, { "Model": "text2vec-base-multilingual", "SICKFr": 77.25, @@ -23764,6 +43937,14 @@ }, "Summarization": { "cosine_spearman": [ + { + "Model": "text2vec-base-multilingual", + "SummEvalFr": 29.33 + }, + { + "Model": "text2vec-base-multilingual", + "SummEvalFr": 29.33 + }, { "Model": "text2vec-base-multilingual", "SummEvalFr": 29.33 @@ -23771,10 +43952,23 @@ ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "text2vec-base-multilingual", + "CEDRClassification": 36.37, + "SensitiveTopicsClassification": 22.47 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "text2vec-base-multilingual", + "Core17InstructionRetrieval": -2.75, + "News21InstructionRetrieval": -2.1, + "Robust04InstructionRetrieval": -7.06 + } + ] } }, "shibing624__text2vec-large-chinese": { @@ -24158,19 +44352,426 @@ }, "voyageai__voyage-3": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "voyage-3", + "BornholmBitextMining": 34.84, + "Tatoeba (lat-eng)": 50.17, + "Tatoeba (max-eng)": 41.21, + "Tatoeba (cbk-eng)": 54.97, + "Tatoeba (nds-eng)": 69.17, + "Tatoeba (swe-eng)": 86.61, + "Tatoeba (tzl-eng)": 23.68, + "Tatoeba (nob-eng)": 92.86, + "Tatoeba (swh-eng)": 24.15, + "Tatoeba (afr-eng)": 68.23, + "Tatoeba (ara-eng)": 76.19, + "Tatoeba (ido-eng)": 69.36, + "Tatoeba (isl-eng)": 83.52, + "Tatoeba (nov-eng)": 50.2, + "Tatoeba (dan-eng)": 91.25, + "Tatoeba (war-eng)": 21.4, + "Tatoeba (pes-eng)": 80.49, + "Tatoeba (pol-eng)": 89.37, + "Tatoeba (ell-eng)": 81.25, + "Tatoeba (kor-eng)": 80.99, + "Tatoeba (khm-eng)": 2.22, + "Tatoeba (spa-eng)": 90.48, + "Tatoeba (heb-eng)": 78.8, + "Tatoeba (pam-eng)": 8.62, + "Tatoeba (cha-eng)": 28.41, + "Tatoeba (ind-eng)": 84.33, + "Tatoeba (vie-eng)": 90.94, + "Tatoeba (hye-eng)": 75.17, + "Tatoeba (tgl-eng)": 68.92, + "Tatoeba (hin-eng)": 85.93, + "Tatoeba (yid-eng)": 48.54, + "Tatoeba (ukr-eng)": 86.36, + "Tatoeba (urd-eng)": 77.63, + "Tatoeba (bul-eng)": 84.4, + "Tatoeba (rus-eng)": 88.01, + "Tatoeba (jav-eng)": 24.94, + "Tatoeba (deu-eng)": 94.67, + "Tatoeba (por-eng)": 89.06, + "Tatoeba (tur-eng)": 83.06, + "Tatoeba (gle-eng)": 28.8, + "Tatoeba (slv-eng)": 76.75, + "Tatoeba (kaz-eng)": 70.94, + "Tatoeba (fra-eng)": 86.35, + "Tatoeba (hrv-eng)": 81.03, + "Tatoeba (gsw-eng)": 44.12, + "Tatoeba (mal-eng)": 76.57, + "Tatoeba (tha-eng)": 73.05, + "Tatoeba (awa-eng)": 49.75, + "Tatoeba (uzb-eng)": 43.48, + "Tatoeba (kab-eng)": 2.95, + "Tatoeba (ben-eng)": 68.0, + "Tatoeba (csb-eng)": 47.73, + "Tatoeba (swg-eng)": 56.37, + "Tatoeba (ceb-eng)": 21.09, + "Tatoeba (ita-eng)": 85.3, + "Tatoeba (oci-eng)": 47.55, + "Tatoeba (fin-eng)": 84.79, + "Tatoeba (est-eng)": 73.93, + "Tatoeba (glg-eng)": 83.89, + "Tatoeba (arz-eng)": 52.87, + "Tatoeba (amh-eng)": 30.35, + "Tatoeba (aze-eng)": 82.64, + "Tatoeba (hun-eng)": 77.69, + "Tatoeba (srp-eng)": 75.45, + "Tatoeba (ces-eng)": 85.54, + "Tatoeba (cmn-eng)": 90.34, + "Tatoeba (sqi-eng)": 84.32, + "Tatoeba (ast-eng)": 75.12, + "Tatoeba (epo-eng)": 83.2, + "Tatoeba (uig-eng)": 50.74, + "Tatoeba (cym-eng)": 63.42, + "Tatoeba (dtp-eng)": 6.47, + "Tatoeba (ang-eng)": 58.48, + "Tatoeba (eus-eng)": 62.28, + "Tatoeba (dsb-eng)": 45.88, + "Tatoeba (slk-eng)": 83.46, + "Tatoeba (bel-eng)": 86.29, + "Tatoeba (wuu-eng)": 74.44, + "Tatoeba (lfn-eng)": 58.61, + "Tatoeba (kur-eng)": 50.02, + "Tatoeba (bos-eng)": 80.62, + "Tatoeba (fry-eng)": 63.1, + "Tatoeba (xho-eng)": 9.51, + "Tatoeba (bre-eng)": 18.83, + "Tatoeba (kzj-eng)": 7.63, + "Tatoeba (nno-eng)": 81.17, + "Tatoeba (lvs-eng)": 76.45, + "Tatoeba (hsb-eng)": 60.76, + "Tatoeba (tel-eng)": 68.28, + "Tatoeba (zsm-eng)": 81.68, + "Tatoeba (arq-eng)": 18.83, + "Tatoeba (mon-eng)": 68.66, + "Tatoeba (mkd-eng)": 77.4, + "Tatoeba (jpn-eng)": 80.76, + "Tatoeba (gla-eng)": 12.44, + "Tatoeba (kat-eng)": 74.33, + "Tatoeba (lit-eng)": 79.76, + "Tatoeba (tat-eng)": 63.72, + "Tatoeba (fao-eng)": 61.51, + "Tatoeba (tuk-eng)": 34.36, + "Tatoeba (ber-eng)": 6.36, + "Tatoeba (ina-eng)": 83.88, + "Tatoeba (nld-eng)": 91.59, + "Tatoeba (pms-eng)": 57.89, + "Tatoeba (mhr-eng)": 13.48, + "Tatoeba (orv-eng)": 34.56, + "Tatoeba (ile-eng)": 69.65, + "Tatoeba (tam-eng)": 68.61, + "Tatoeba (cor-eng)": 6.41, + "Tatoeba (cat-eng)": 84.34, + "Tatoeba (yue-eng)": 71.38, + "Tatoeba (mar-eng)": 72.56, + "Tatoeba (ron-eng)": 83.85 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "voyage-3", + "AllegroReviews": 39.35, + "AmazonCounterfactualClassification (en-ext)": 62.11, + "AmazonCounterfactualClassification (en)": 60.37, + "AmazonCounterfactualClassification (de)": 57.76, + "AmazonCounterfactualClassification (ja)": 65.06, + "AmazonPolarityClassification": 86.02, + "AmazonReviewsClassification (en)": 36.97, + "AmazonReviewsClassification (de)": 38.1, + "AmazonReviewsClassification (es)": 36.32, + "AmazonReviewsClassification (fr)": 35.43, + "AmazonReviewsClassification (ja)": 37.39, + "AmazonReviewsClassification (zh)": 32.94, + "AngryTweetsClassification": 54.26, + "Banking77Classification": 73.8, + "CBD": 66.4, + "DanishPoliticalCommentsClassification": 36.51, + "EmotionClassification": 40.62, + "GeoreviewClassification": 43.02, + "HeadlineClassification": 76.74, + "ImdbClassification": 89.68, + "InappropriatenessClassification": 59.62, + "KinopoiskClassification": 62.14, + "LccSentimentClassification": 55.93, + "MTOPDomainClassification (en)": 87.15, + "MTOPDomainClassification (de)": 84.03, + "MTOPDomainClassification (es)": 84.36, + "MTOPDomainClassification (fr)": 80.39, + "MTOPDomainClassification (hi)": 84.78, + "MTOPDomainClassification (th)": 80.49, + "MTOPIntentClassification (en)": 56.34, + "MTOPIntentClassification (de)": 57.94, + "MTOPIntentClassification (es)": 58.01, + "MTOPIntentClassification (fr)": 52.87, + "MTOPIntentClassification (hi)": 54.37, + "MTOPIntentClassification (th)": 56.06, + "MasakhaNEWSClassification (amh)": 79.81, + "MasakhaNEWSClassification (eng)": 71.82, + "MasakhaNEWSClassification (fra)": 70.54, + "MasakhaNEWSClassification (hau)": 74.33, + "MasakhaNEWSClassification (ibo)": 63.15, + "MasakhaNEWSClassification (lin)": 70.46, + "MasakhaNEWSClassification (lug)": 67.71, + "MasakhaNEWSClassification (orm)": 73.88, + "MasakhaNEWSClassification (pcm)": 85.02, + "MasakhaNEWSClassification (run)": 69.29, + "MasakhaNEWSClassification (sna)": 83.33, + "MasakhaNEWSClassification (som)": 61.12, + "MasakhaNEWSClassification (swa)": 68.42, + "MasakhaNEWSClassification (tir)": 65.26, + "MasakhaNEWSClassification (xho)": 77.88, + "MasakhaNEWSClassification (yor)": 76.11, + "MassiveIntentClassification (ru)": 58.32, + "MassiveIntentClassification (ur)": 52.72, + "MassiveIntentClassification (sq)": 56.6, + "MassiveIntentClassification (sv)": 59.09, + "MassiveIntentClassification (th)": 55.36, + "MassiveIntentClassification (fa)": 59.38, + "MassiveIntentClassification (el)": 56.02, + "MassiveIntentClassification (hi)": 55.99, + "MassiveIntentClassification (vi)": 59.24, + "MassiveIntentClassification (tr)": 59.45, + "MassiveIntentClassification (it)": 59.0, + "MassiveIntentClassification (ta)": 49.98, + "MassiveIntentClassification (sw)": 44.49, + "MassiveIntentClassification (tl)": 53.96, + "MassiveIntentClassification (fi)": 60.96, + "MassiveIntentClassification (he)": 56.41, + "MassiveIntentClassification (id)": 58.52, + "MassiveIntentClassification (az)": 56.82, + "MassiveIntentClassification (pt)": 59.34, + "MassiveIntentClassification (my)": 38.76, + "MassiveIntentClassification (ro)": 53.33, + "MassiveIntentClassification (hu)": 55.93, + "MassiveIntentClassification (ko)": 59.3, + "MassiveIntentClassification (nl)": 57.24, + "MassiveIntentClassification (ar)": 49.51, + "MassiveIntentClassification (km)": 34.82, + "MassiveIntentClassification (de)": 57.95, + "MassiveIntentClassification (am)": 36.33, + "MassiveIntentClassification (nb)": 58.53, + "MassiveIntentClassification (ml)": 52.87, + "MassiveIntentClassification (bn)": 53.85, + "MassiveIntentClassification (sl)": 56.0, + "MassiveIntentClassification (te)": 52.91, + "MassiveIntentClassification (da)": 59.34, + "MassiveIntentClassification (cy)": 48.16, + "MassiveIntentClassification (es)": 58.5, + "MassiveIntentClassification (lv)": 55.67, + "MassiveIntentClassification (pl)": 57.36, + "MassiveIntentClassification (zh-TW)": 54.88, + "MassiveIntentClassification (ms)": 53.42, + "MassiveIntentClassification (af)": 49.97, + "MassiveIntentClassification (jv)": 43.79, + "MassiveIntentClassification (hy)": 54.2, + "MassiveIntentClassification (ja)": 60.23, + "MassiveIntentClassification (ka)": 49.24, + "MassiveIntentClassification (fr)": 57.54, + "MassiveIntentClassification (is)": 53.36, + "MassiveIntentClassification (mn)": 52.26, + "MassiveIntentClassification (kn)": 50.87, + "MassiveIntentClassification (zh-CN)": 60.13, + "MassiveIntentClassification (en)": 63.75, + "MassiveScenarioClassification (sv)": 68.38, + "MassiveScenarioClassification (fr)": 66.07, + "MassiveScenarioClassification (km)": 38.68, + "MassiveScenarioClassification (te)": 62.13, + "MassiveScenarioClassification (bn)": 59.88, + "MassiveScenarioClassification (my)": 45.17, + "MassiveScenarioClassification (tl)": 62.36, + "MassiveScenarioClassification (fi)": 66.28, + "MassiveScenarioClassification (hi)": 61.29, + "MassiveScenarioClassification (tr)": 66.8, + "MassiveScenarioClassification (pl)": 64.96, + "MassiveScenarioClassification (az)": 64.37, + "MassiveScenarioClassification (ms)": 63.1, + "MassiveScenarioClassification (fa)": 66.24, + "MassiveScenarioClassification (he)": 64.02, + "MassiveScenarioClassification (ka)": 55.48, + "MassiveScenarioClassification (nl)": 66.71, + "MassiveScenarioClassification (ru)": 66.18, + "MassiveScenarioClassification (vi)": 67.19, + "MassiveScenarioClassification (jv)": 50.63, + "MassiveScenarioClassification (af)": 61.28, + "MassiveScenarioClassification (zh-TW)": 63.5, + "MassiveScenarioClassification (it)": 66.3, + "MassiveScenarioClassification (lv)": 60.81, + "MassiveScenarioClassification (zh-CN)": 68.37, + "MassiveScenarioClassification (id)": 67.18, + "MassiveScenarioClassification (ja)": 67.03, + "MassiveScenarioClassification (ro)": 61.52, + "MassiveScenarioClassification (nb)": 67.81, + "MassiveScenarioClassification (en)": 71.0, + "MassiveScenarioClassification (ko)": 67.6, + "MassiveScenarioClassification (ur)": 59.09, + "MassiveScenarioClassification (am)": 42.89, + "MassiveScenarioClassification (cy)": 56.86, + "MassiveScenarioClassification (da)": 67.53, + "MassiveScenarioClassification (sq)": 65.72, + "MassiveScenarioClassification (is)": 61.24, + "MassiveScenarioClassification (mn)": 57.43, + "MassiveScenarioClassification (sw)": 51.89, + "MassiveScenarioClassification (es)": 66.28, + "MassiveScenarioClassification (hu)": 66.07, + "MassiveScenarioClassification (ta)": 57.44, + "MassiveScenarioClassification (th)": 65.01, + "MassiveScenarioClassification (hy)": 60.1, + "MassiveScenarioClassification (de)": 67.76, + "MassiveScenarioClassification (ar)": 57.93, + "MassiveScenarioClassification (el)": 63.1, + "MassiveScenarioClassification (ml)": 59.28, + "MassiveScenarioClassification (kn)": 58.94, + "MassiveScenarioClassification (sl)": 62.96, + "MassiveScenarioClassification (pt)": 65.62, + "NoRecClassification": 50.16, + "NordicLangClassification": 47.13, + "PAC": 68.47, + "PolEmo2.0-IN": 71.05, + "PolEmo2.0-OUT": 42.61, + "RuReviewsClassification": 58.37, + "RuSciBenchGRNTIClassification": 59.01, + "RuSciBenchOECDClassification": 45.37, + "ToxicConversationsClassification": 60.73, + "TweetSentimentExtractionClassification": 51.28 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "voyage-3", + "AlloProfClusteringP2P": 59.71, + "AlloProfClusteringS2S": 47.41, + "ArxivClusteringP2P": 46.38, + "ArxivClusteringS2S": 39.09, + "BiorxivClusteringP2P": 38.0, + "BiorxivClusteringS2S": 35.33, + "BlurbsClusteringP2P": 41.96, + "BlurbsClusteringS2S": 18.13, + "GeoreviewClusteringP2P": 72.23, + "HALClusteringS2S": 26.14, + "MLSUMClusteringP2P (de)": 35.65, + "MLSUMClusteringP2P (fr)": 42.12, + "MLSUMClusteringP2P (ru)": 37.26, + "MLSUMClusteringP2P (es)": 45.55, + "MLSUMClusteringS2S (de)": 37.51, + "MLSUMClusteringS2S (fr)": 42.26, + "MLSUMClusteringS2S (ru)": 40.34, + "MLSUMClusteringS2S (es)": 44.87, + "MasakhaNEWSClusteringP2P (amh)": 54.69, + "MasakhaNEWSClusteringP2P (eng)": 48.43, + "MasakhaNEWSClusteringP2P (fra)": 36.68, + "MasakhaNEWSClusteringP2P (hau)": 43.55, + "MasakhaNEWSClusteringP2P (ibo)": 48.5, + "MasakhaNEWSClusteringP2P (lin)": 46.08, + "MasakhaNEWSClusteringP2P (lug)": 47.42, + "MasakhaNEWSClusteringP2P (orm)": 51.58, + "MasakhaNEWSClusteringP2P (pcm)": 53.92, + "MasakhaNEWSClusteringP2P (run)": 57.08, + "MasakhaNEWSClusteringP2P (sna)": 56.38, + "MasakhaNEWSClusteringP2P (som)": 33.65, + "MasakhaNEWSClusteringP2P (swa)": 25.28, + "MasakhaNEWSClusteringP2P (tir)": 58.23, + "MasakhaNEWSClusteringP2P (xho)": 40.47, + "MasakhaNEWSClusteringP2P (yor)": 41.09, + "MasakhaNEWSClusteringS2S (amh)": 56.35, + "MasakhaNEWSClusteringS2S (eng)": 55.07, + "MasakhaNEWSClusteringS2S (fra)": 41.39, + "MasakhaNEWSClusteringS2S (hau)": 26.63, + "MasakhaNEWSClusteringS2S (ibo)": 35.71, + "MasakhaNEWSClusteringS2S (lin)": 48.81, + "MasakhaNEWSClusteringS2S (lug)": 42.37, + "MasakhaNEWSClusteringS2S (orm)": 27.6, + "MasakhaNEWSClusteringS2S (pcm)": 55.02, + "MasakhaNEWSClusteringS2S (run)": 52.14, + "MasakhaNEWSClusteringS2S (sna)": 48.84, + "MasakhaNEWSClusteringS2S (som)": 28.6, + "MasakhaNEWSClusteringS2S (swa)": 20.29, + "MasakhaNEWSClusteringS2S (tir)": 48.05, + "MasakhaNEWSClusteringS2S (xho)": 23.41, + "MasakhaNEWSClusteringS2S (yor)": 30.96, + "MedrxivClusteringP2P": 32.39, + "MedrxivClusteringS2S": 32.14, + "RedditClustering": 50.49, + "RedditClusteringP2P": 58.55, + "RuSciBenchGRNTIClusteringP2P": 52.22, + "RuSciBenchOECDClusteringP2P": 44.42, + "StackExchangeClustering": 60.36, + "StackExchangeClusteringP2P": 32.78, + "TenKGnadClusteringP2P": 38.06, + "TenKGnadClusteringS2S": 35.49, + "TwentyNewsgroupsClustering": 46.4 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "voyage-3", + "CDSC-E": 66.92, + "FalseFriendsGermanEnglish": 52.21, + "OpusparcusPC (de)": 95.28, + "OpusparcusPC (en)": 98.09, + "OpusparcusPC (fi)": 92.53, + "OpusparcusPC (fr)": 91.63, + "OpusparcusPC (ru)": 90.1, + "OpusparcusPC (sv)": 93.41, + "PSC": 99.63, + "PawsXPairClassification (de)": 60.28, + "PawsXPairClassification (en)": 63.74, + "PawsXPairClassification (es)": 60.92, + "PawsXPairClassification (fr)": 64.0, + "PawsXPairClassification (ja)": 52.59, + "PawsXPairClassification (ko)": 52.85, + "PawsXPairClassification (zh)": 58.53, + "SICK-E-PL": 65.89, + "SprintDuplicateQuestions": 90.2, + "TERRa": 51.98, + "TwitterSemEval2015": 61.84, + "TwitterURLCorpus": 83.61 + }, + { + "Model": "voyage-3", + "CDSC-E": 66.93, + "FalseFriendsGermanEnglish": 52.21, + "OpusparcusPC (de)": 95.28, + "OpusparcusPC (en)": 98.09, + "OpusparcusPC (fi)": 92.57, + "OpusparcusPC (fr)": 91.63, + "OpusparcusPC (ru)": 90.12, + "OpusparcusPC (sv)": 93.41, + "PSC": 99.64, + "PawsXPairClassification (de)": 60.28, + "PawsXPairClassification (en)": 63.74, + "PawsXPairClassification (es)": 60.94, + "PawsXPairClassification (fr)": 64.01, + "PawsXPairClassification (ja)": 52.71, + "PawsXPairClassification (ko)": 52.88, + "PawsXPairClassification (zh)": 58.84, + "SICK-E-PL": 65.92, + "SprintDuplicateQuestions": 90.2, + "TERRa": 52.04, + "TwitterSemEval2015": 61.84, + "TwitterURLCorpus": 83.61 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "voyage-3", + "AlloprofReranking": 84.65, + "AskUbuntuDupQuestions": 61.52, + "MindSmallReranking": 31.93, + "RuBQReranking": 72.6, + "T2Reranking": 65.27 + } + ] }, "Retrieval": { "ndcg_at_10": [ @@ -24180,7 +44781,9 @@ "AILAStatutes": 42.49, "AlloprofRetrieval": 73.76, "AppsRetrieval": 73.03, + "ArguAna": 60.96, "BSARDRetrieval": 44.87, + "CmedqaRetrieval": 34.83, "CodeFeedbackMT": 66.69, "CodeFeedbackST": 83.02, "CodeSearchNetCCRetrieval (python)": 79.8, @@ -24198,6 +44801,8 @@ "CodeTransOceanContest": 89.92, "CodeTransOceanDL": 33.92, "CosQA": 28.7, + "CovidRetrieval": 88.47, + "FiQA2018": 52.28, "GerDaLIRSmall": 44.72, "LEMBNarrativeQARetrieval": 54.12, "LEMBQMSumRetrieval": 51.05, @@ -24210,43 +44815,560 @@ "LegalSummarization": 69.23, "MIRACLRetrieval (ru)": 68.43, "MintakaRetrieval (fr)": 44.56, + "NFCorpus": 38.22, "RiaNewsRetrieval": 88.02, "RuBQRetrieval": 71.54, + "SCIDOCS": 21.43, + "SciFact": 74.94, + "SciFact-PL": 68.72, + "SpartQA": 10.73, "StackOverflowQA": 94.33, "SyntecRetrieval": 87.54, "SyntheticText2SQL": 57.56, + "TRECCOVID": 80.46, + "TRECCOVID-PL": 76.11, + "TempReasonL1": 1.23, + "WinoGrande": 67.74, "XPQARetrieval (fra-fra)": 76.08 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "voyage-3", + "BIOSSES": 87.92, + "CDSC-R": 86.77, + "GermanSTSBenchmark": 71.59, + "RUParaPhraserSTS": 65.57, + "RuSTSBenchmarkSTS": 69.9, + "SICK-R": 79.63, + "SICK-R-PL": 65.71, + "SICKFr": 73.58, + "STS12": 69.52, + "STS13": 80.56, + "STS14": 73.33, + "STS15": 80.39, + "STS16": 79.83, + "STS17 (en-de)": 82.12, + "STS17 (en-tr)": 67.44, + "STS17 (fr-en)": 77.55, + "STS17 (es-en)": 78.34, + "STS17 (es-es)": 84.23, + "STS17 (nl-en)": 80.56, + "STS17 (en-ar)": 62.98, + "STS17 (ar-ar)": 68.04, + "STS17 (it-en)": 81.22, + "STS17 (ko-ko)": 69.39, + "STS17 (en-en)": 86.81, + "STSB": 64.81, + "STSBenchmark": 79.53, + "STSBenchmarkMultilingualSTS (pt)": 72.26, + "STSBenchmarkMultilingualSTS (pl)": 71.28, + "STSBenchmarkMultilingualSTS (ru)": 70.05, + "STSBenchmarkMultilingualSTS (nl)": 73.64, + "STSBenchmarkMultilingualSTS (es)": 73.96, + "STSBenchmarkMultilingualSTS (zh)": 66.97, + "STSBenchmarkMultilingualSTS (it)": 72.42, + "STSBenchmarkMultilingualSTS (en)": 79.57, + "STSBenchmarkMultilingualSTS (de)": 73.01, + "STSBenchmarkMultilingualSTS (fr)": 74.22 + }, + { + "Model": "voyage-3", + "BIOSSES": 87.92, + "CDSC-R": 86.77, + "GermanSTSBenchmark": 71.59, + "RUParaPhraserSTS": 65.57, + "RuSTSBenchmarkSTS": 69.9, + "SICK-R": 79.63, + "SICK-R-PL": 65.71, + "SICKFr": 73.58, + "STS12": 69.52, + "STS13": 80.56, + "STS14": 73.33, + "STS15": 80.39, + "STS16": 79.83, + "STS17 (en-de)": 82.12, + "STS17 (en-tr)": 67.44, + "STS17 (fr-en)": 77.55, + "STS17 (es-en)": 78.34, + "STS17 (es-es)": 84.23, + "STS17 (nl-en)": 80.56, + "STS17 (en-ar)": 62.98, + "STS17 (ar-ar)": 68.04, + "STS17 (it-en)": 81.22, + "STS17 (ko-ko)": 69.39, + "STS17 (en-en)": 86.81, + "STSB": 64.81, + "STSBenchmark": 79.53, + "STSBenchmarkMultilingualSTS (pt)": 72.26, + "STSBenchmarkMultilingualSTS (pl)": 71.28, + "STSBenchmarkMultilingualSTS (ru)": 70.05, + "STSBenchmarkMultilingualSTS (nl)": 73.64, + "STSBenchmarkMultilingualSTS (es)": 73.96, + "STSBenchmarkMultilingualSTS (zh)": 66.97, + "STSBenchmarkMultilingualSTS (it)": 72.42, + "STSBenchmarkMultilingualSTS (en)": 79.57, + "STSBenchmarkMultilingualSTS (de)": 73.01, + "STSBenchmarkMultilingualSTS (fr)": 74.22 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "voyage-3", + "SummEval": 28.92 + }, + { + "Model": "voyage-3", + "SummEval": 28.92 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "voyage-3", + "CEDRClassification": 36.17, + "SensitiveTopicsClassification": 26.23 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "voyage-3", + "Core17InstructionRetrieval": 1.45, + "News21InstructionRetrieval": 4.59, + "Robust04InstructionRetrieval": -2.0 + } + ] } }, "voyageai__voyage-3-lite": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "voyage-3-lite", + "BornholmBitextMining": 40.01, + "Tatoeba (glg-eng)": 83.71, + "Tatoeba (cor-eng)": 5.07, + "Tatoeba (war-eng)": 18.68, + "Tatoeba (kzj-eng)": 5.9, + "Tatoeba (heb-eng)": 76.32, + "Tatoeba (kat-eng)": 75.06, + "Tatoeba (csb-eng)": 32.04, + "Tatoeba (ber-eng)": 5.5, + "Tatoeba (hin-eng)": 83.54, + "Tatoeba (ceb-eng)": 17.46, + "Tatoeba (tel-eng)": 71.92, + "Tatoeba (urd-eng)": 82.72, + "Tatoeba (zsm-eng)": 83.35, + "Tatoeba (pms-eng)": 48.41, + "Tatoeba (bel-eng)": 86.51, + "Tatoeba (tam-eng)": 74.08, + "Tatoeba (ile-eng)": 70.53, + "Tatoeba (yue-eng)": 67.59, + "Tatoeba (tha-eng)": 85.63, + "Tatoeba (nds-eng)": 68.74, + "Tatoeba (ang-eng)": 44.62, + "Tatoeba (dsb-eng)": 46.75, + "Tatoeba (ind-eng)": 85.37, + "Tatoeba (sqi-eng)": 84.45, + "Tatoeba (afr-eng)": 75.83, + "Tatoeba (ina-eng)": 84.59, + "Tatoeba (gla-eng)": 9.4, + "Tatoeba (rus-eng)": 88.3, + "Tatoeba (hye-eng)": 78.89, + "Tatoeba (gsw-eng)": 42.12, + "Tatoeba (ben-eng)": 68.37, + "Tatoeba (tzl-eng)": 30.3, + "Tatoeba (pam-eng)": 6.05, + "Tatoeba (uig-eng)": 48.13, + "Tatoeba (lit-eng)": 80.2, + "Tatoeba (cha-eng)": 25.42, + "Tatoeba (khm-eng)": 38.22, + "Tatoeba (fry-eng)": 67.03, + "Tatoeba (ron-eng)": 85.52, + "Tatoeba (kab-eng)": 1.78, + "Tatoeba (hun-eng)": 75.75, + "Tatoeba (isl-eng)": 85.18, + "Tatoeba (slv-eng)": 77.15, + "Tatoeba (mon-eng)": 73.52, + "Tatoeba (uzb-eng)": 32.4, + "Tatoeba (dan-eng)": 91.92, + "Tatoeba (tgl-eng)": 57.23, + "Tatoeba (nld-eng)": 91.73, + "Tatoeba (nno-eng)": 85.64, + "Tatoeba (dtp-eng)": 5.26, + "Tatoeba (orv-eng)": 30.48, + "Tatoeba (fra-eng)": 85.98, + "Tatoeba (wuu-eng)": 77.34, + "Tatoeba (cbk-eng)": 55.37, + "Tatoeba (kor-eng)": 78.64, + "Tatoeba (arz-eng)": 49.44, + "Tatoeba (lfn-eng)": 50.19, + "Tatoeba (srp-eng)": 77.34, + "Tatoeba (mhr-eng)": 10.16, + "Tatoeba (max-eng)": 36.38, + "Tatoeba (oci-eng)": 48.04, + "Tatoeba (yid-eng)": 37.45, + "Tatoeba (ces-eng)": 87.47, + "Tatoeba (swe-eng)": 88.0, + "Tatoeba (nov-eng)": 57.51, + "Tatoeba (swg-eng)": 47.89, + "Tatoeba (tur-eng)": 86.98, + "Tatoeba (deu-eng)": 96.13, + "Tatoeba (ell-eng)": 80.58, + "Tatoeba (cym-eng)": 56.79, + "Tatoeba (jpn-eng)": 84.44, + "Tatoeba (awa-eng)": 45.66, + "Tatoeba (fao-eng)": 65.29, + "Tatoeba (est-eng)": 77.82, + "Tatoeba (ido-eng)": 64.81, + "Tatoeba (hsb-eng)": 58.22, + "Tatoeba (tat-eng)": 65.16, + "Tatoeba (hrv-eng)": 79.93, + "Tatoeba (swh-eng)": 16.1, + "Tatoeba (slk-eng)": 86.74, + "Tatoeba (ukr-eng)": 86.53, + "Tatoeba (ast-eng)": 75.33, + "Tatoeba (vie-eng)": 92.25, + "Tatoeba (bre-eng)": 17.51, + "Tatoeba (ita-eng)": 86.04, + "Tatoeba (ara-eng)": 74.73, + "Tatoeba (gle-eng)": 21.42, + "Tatoeba (pes-eng)": 82.83, + "Tatoeba (epo-eng)": 83.01, + "Tatoeba (cmn-eng)": 90.06, + "Tatoeba (eus-eng)": 70.02, + "Tatoeba (lat-eng)": 30.46, + "Tatoeba (mal-eng)": 83.27, + "Tatoeba (bos-eng)": 78.14, + "Tatoeba (nob-eng)": 93.75, + "Tatoeba (spa-eng)": 93.35, + "Tatoeba (arq-eng)": 18.26, + "Tatoeba (por-eng)": 90.37, + "Tatoeba (cat-eng)": 85.5, + "Tatoeba (tuk-eng)": 28.51, + "Tatoeba (mkd-eng)": 76.99, + "Tatoeba (kaz-eng)": 73.05, + "Tatoeba (jav-eng)": 23.65, + "Tatoeba (kur-eng)": 39.6, + "Tatoeba (mar-eng)": 77.58, + "Tatoeba (lvs-eng)": 78.75, + "Tatoeba (xho-eng)": 7.91, + "Tatoeba (fin-eng)": 87.41, + "Tatoeba (amh-eng)": 38.1, + "Tatoeba (bul-eng)": 84.43, + "Tatoeba (pol-eng)": 91.58, + "Tatoeba (aze-eng)": 82.44 + } + ] }, "Classification": { - "accuracy": [] + "accuracy": [ + { + "Model": "voyage-3-lite", + "AllegroReviews": 41.6, + "AmazonCounterfactualClassification (en-ext)": 65.74, + "AmazonCounterfactualClassification (en)": 63.58, + "AmazonCounterfactualClassification (de)": 64.15, + "AmazonCounterfactualClassification (ja)": 66.08, + "AmazonPolarityClassification": 75.7, + "AmazonReviewsClassification (en)": 37.63, + "AmazonReviewsClassification (de)": 41.0, + "AmazonReviewsClassification (es)": 39.26, + "AmazonReviewsClassification (fr)": 38.92, + "AmazonReviewsClassification (ja)": 38.58, + "AmazonReviewsClassification (zh)": 36.01, + "AngryTweetsClassification": 54.86, + "Banking77Classification": 76.85, + "CBD": 64.89, + "DanishPoliticalCommentsClassification": 37.46, + "EmotionClassification": 43.81, + "GeoreviewClassification": 45.4, + "HeadlineClassification": 78.34, + "ImdbClassification": 73.04, + "InappropriatenessClassification": 59.04, + "KinopoiskClassification": 57.39, + "LccSentimentClassification": 59.93, + "MTOPDomainClassification (en)": 90.52, + "MTOPDomainClassification (de)": 86.85, + "MTOPDomainClassification (es)": 84.68, + "MTOPDomainClassification (fr)": 82.89, + "MTOPDomainClassification (hi)": 83.19, + "MTOPDomainClassification (th)": 82.81, + "MTOPIntentClassification (en)": 62.23, + "MTOPIntentClassification (de)": 61.23, + "MTOPIntentClassification (es)": 61.31, + "MTOPIntentClassification (fr)": 55.38, + "MTOPIntentClassification (hi)": 56.72, + "MTOPIntentClassification (th)": 62.57, + "MasakhaNEWSClassification (amh)": 84.36, + "MasakhaNEWSClassification (eng)": 78.64, + "MasakhaNEWSClassification (fra)": 77.49, + "MasakhaNEWSClassification (hau)": 73.49, + "MasakhaNEWSClassification (ibo)": 65.95, + "MasakhaNEWSClassification (lin)": 73.49, + "MasakhaNEWSClassification (lug)": 66.32, + "MasakhaNEWSClassification (orm)": 72.92, + "MasakhaNEWSClassification (pcm)": 91.31, + "MasakhaNEWSClassification (run)": 72.48, + "MasakhaNEWSClassification (sna)": 85.07, + "MasakhaNEWSClassification (som)": 56.84, + "MasakhaNEWSClassification (swa)": 68.72, + "MasakhaNEWSClassification (tir)": 60.15, + "MasakhaNEWSClassification (xho)": 77.21, + "MasakhaNEWSClassification (yor)": 73.58, + "MassiveIntentClassification (sw)": 44.18, + "MassiveIntentClassification (vi)": 62.64, + "MassiveIntentClassification (sv)": 62.52, + "MassiveIntentClassification (km)": 38.5, + "MassiveIntentClassification (ja)": 64.02, + "MassiveIntentClassification (nb)": 62.3, + "MassiveIntentClassification (am)": 40.31, + "MassiveIntentClassification (is)": 57.27, + "MassiveIntentClassification (ka)": 50.07, + "MassiveIntentClassification (ar)": 51.41, + "MassiveIntentClassification (zh-TW)": 57.62, + "MassiveIntentClassification (bn)": 55.38, + "MassiveIntentClassification (sq)": 58.33, + "MassiveIntentClassification (te)": 53.72, + "MassiveIntentClassification (fi)": 61.16, + "MassiveIntentClassification (ta)": 51.63, + "MassiveIntentClassification (en)": 66.46, + "MassiveIntentClassification (zh-CN)": 64.98, + "MassiveIntentClassification (he)": 57.14, + "MassiveIntentClassification (cy)": 46.62, + "MassiveIntentClassification (jv)": 44.31, + "MassiveIntentClassification (el)": 56.16, + "MassiveIntentClassification (fa)": 63.87, + "MassiveIntentClassification (ro)": 54.23, + "MassiveIntentClassification (sl)": 58.45, + "MassiveIntentClassification (mn)": 55.9, + "MassiveIntentClassification (ko)": 60.7, + "MassiveIntentClassification (my)": 46.85, + "MassiveIntentClassification (az)": 60.51, + "MassiveIntentClassification (nl)": 60.61, + "MassiveIntentClassification (tr)": 62.05, + "MassiveIntentClassification (hy)": 59.17, + "MassiveIntentClassification (kn)": 51.19, + "MassiveIntentClassification (th)": 57.76, + "MassiveIntentClassification (lv)": 59.34, + "MassiveIntentClassification (id)": 60.44, + "MassiveIntentClassification (da)": 61.75, + "MassiveIntentClassification (de)": 59.82, + "MassiveIntentClassification (ur)": 55.39, + "MassiveIntentClassification (es)": 57.78, + "MassiveIntentClassification (hi)": 56.95, + "MassiveIntentClassification (ru)": 63.02, + "MassiveIntentClassification (ml)": 54.23, + "MassiveIntentClassification (pt)": 60.52, + "MassiveIntentClassification (pl)": 62.34, + "MassiveIntentClassification (af)": 51.75, + "MassiveIntentClassification (tl)": 52.13, + "MassiveIntentClassification (hu)": 57.9, + "MassiveIntentClassification (it)": 58.53, + "MassiveIntentClassification (fr)": 58.02, + "MassiveIntentClassification (ms)": 56.17, + "MassiveScenarioClassification (cy)": 56.54, + "MassiveScenarioClassification (ms)": 64.37, + "MassiveScenarioClassification (ur)": 61.78, + "MassiveScenarioClassification (th)": 65.75, + "MassiveScenarioClassification (da)": 69.41, + "MassiveScenarioClassification (it)": 64.76, + "MassiveScenarioClassification (ml)": 60.42, + "MassiveScenarioClassification (fr)": 65.51, + "MassiveScenarioClassification (pt)": 64.66, + "MassiveScenarioClassification (es)": 64.58, + "MassiveScenarioClassification (nl)": 67.55, + "MassiveScenarioClassification (bn)": 61.15, + "MassiveScenarioClassification (is)": 63.8, + "MassiveScenarioClassification (zh-TW)": 66.14, + "MassiveScenarioClassification (he)": 62.98, + "MassiveScenarioClassification (km)": 46.98, + "MassiveScenarioClassification (pl)": 65.13, + "MassiveScenarioClassification (tr)": 66.58, + "MassiveScenarioClassification (hu)": 66.29, + "MassiveScenarioClassification (zh-CN)": 70.44, + "MassiveScenarioClassification (sw)": 50.48, + "MassiveScenarioClassification (vi)": 67.33, + "MassiveScenarioClassification (am)": 47.4, + "MassiveScenarioClassification (az)": 65.45, + "MassiveScenarioClassification (de)": 68.76, + "MassiveScenarioClassification (el)": 64.34, + "MassiveScenarioClassification (ro)": 61.31, + "MassiveScenarioClassification (en)": 71.94, + "MassiveScenarioClassification (fi)": 65.14, + "MassiveScenarioClassification (ko)": 67.4, + "MassiveScenarioClassification (ta)": 59.58, + "MassiveScenarioClassification (jv)": 52.04, + "MassiveScenarioClassification (my)": 54.48, + "MassiveScenarioClassification (ka)": 57.01, + "MassiveScenarioClassification (sq)": 64.65, + "MassiveScenarioClassification (kn)": 58.69, + "MassiveScenarioClassification (tl)": 58.96, + "MassiveScenarioClassification (sv)": 69.08, + "MassiveScenarioClassification (ru)": 68.44, + "MassiveScenarioClassification (hy)": 64.17, + "MassiveScenarioClassification (nb)": 68.35, + "MassiveScenarioClassification (fa)": 67.14, + "MassiveScenarioClassification (ar)": 59.13, + "MassiveScenarioClassification (id)": 66.71, + "MassiveScenarioClassification (mn)": 60.82, + "MassiveScenarioClassification (sl)": 64.22, + "MassiveScenarioClassification (lv)": 64.23, + "MassiveScenarioClassification (af)": 59.74, + "MassiveScenarioClassification (hi)": 62.27, + "MassiveScenarioClassification (ja)": 69.19, + "MassiveScenarioClassification (te)": 59.89, + "NoRecClassification": 51.87, + "NordicLangClassification": 48.92, + "PAC": 70.35, + "PolEmo2.0-IN": 67.76, + "PolEmo2.0-OUT": 41.98, + "RuReviewsClassification": 59.14, + "RuSciBenchGRNTIClassification": 58.03, + "RuSciBenchOECDClassification": 44.54, + "ToxicConversationsClassification": 61.86, + "TweetSentimentExtractionClassification": 56.84 + } + ] }, "Clustering": { - "v_measure": [] + "v_measure": [ + { + "Model": "voyage-3-lite", + "AlloProfClusteringP2P": 60.61, + "AlloProfClusteringS2S": 47.75, + "ArxivClusteringP2P": 43.4, + "ArxivClusteringS2S": 32.03, + "BiorxivClusteringP2P": 37.01, + "BiorxivClusteringS2S": 29.02, + "BlurbsClusteringP2P": 40.26, + "BlurbsClusteringS2S": 15.97, + "GeoreviewClusteringP2P": 63.53, + "HALClusteringS2S": 24.74, + "MLSUMClusteringP2P (de)": 44.05, + "MLSUMClusteringP2P (fr)": 45.69, + "MLSUMClusteringP2P (ru)": 48.37, + "MLSUMClusteringP2P (es)": 49.09, + "MLSUMClusteringS2S (de)": 44.99, + "MLSUMClusteringS2S (fr)": 45.62, + "MLSUMClusteringS2S (ru)": 49.14, + "MLSUMClusteringS2S (es)": 48.59, + "MasakhaNEWSClusteringP2P (amh)": 63.83, + "MasakhaNEWSClusteringP2P (eng)": 70.71, + "MasakhaNEWSClusteringP2P (fra)": 58.78, + "MasakhaNEWSClusteringP2P (hau)": 44.79, + "MasakhaNEWSClusteringP2P (ibo)": 47.36, + "MasakhaNEWSClusteringP2P (lin)": 60.43, + "MasakhaNEWSClusteringP2P (lug)": 43.85, + "MasakhaNEWSClusteringP2P (orm)": 47.96, + "MasakhaNEWSClusteringP2P (pcm)": 69.6, + "MasakhaNEWSClusteringP2P (run)": 57.35, + "MasakhaNEWSClusteringP2P (sna)": 61.77, + "MasakhaNEWSClusteringP2P (som)": 36.3, + "MasakhaNEWSClusteringP2P (swa)": 26.36, + "MasakhaNEWSClusteringP2P (tir)": 50.45, + "MasakhaNEWSClusteringP2P (xho)": 41.99, + "MasakhaNEWSClusteringP2P (yor)": 27.22, + "MasakhaNEWSClusteringS2S (amh)": 50.65, + "MasakhaNEWSClusteringS2S (eng)": 57.59, + "MasakhaNEWSClusteringS2S (fra)": 43.8, + "MasakhaNEWSClusteringS2S (hau)": 28.59, + "MasakhaNEWSClusteringS2S (ibo)": 42.81, + "MasakhaNEWSClusteringS2S (lin)": 53.67, + "MasakhaNEWSClusteringS2S (lug)": 47.26, + "MasakhaNEWSClusteringS2S (orm)": 33.55, + "MasakhaNEWSClusteringS2S (pcm)": 81.83, + "MasakhaNEWSClusteringS2S (run)": 48.94, + "MasakhaNEWSClusteringS2S (sna)": 46.16, + "MasakhaNEWSClusteringS2S (som)": 30.62, + "MasakhaNEWSClusteringS2S (swa)": 16.81, + "MasakhaNEWSClusteringS2S (tir)": 46.6, + "MasakhaNEWSClusteringS2S (xho)": 29.94, + "MasakhaNEWSClusteringS2S (yor)": 28.83, + "MedrxivClusteringP2P": 32.65, + "MedrxivClusteringS2S": 28.77, + "RedditClustering": 47.35, + "RedditClusteringP2P": 58.57, + "RuSciBenchGRNTIClusteringP2P": 53.76, + "RuSciBenchOECDClusteringP2P": 45.13, + "StackExchangeClustering": 59.88, + "StackExchangeClusteringP2P": 33.31, + "TenKGnadClusteringP2P": 46.79, + "TenKGnadClusteringS2S": 33.11, + "TwentyNewsgroupsClustering": 43.21 + } + ] }, "PairClassification": { - "max_ap": [] + "max_ap": [ + { + "Model": "voyage-3-lite", + "CDSC-E": 66.64, + "FalseFriendsGermanEnglish": 51.59, + "OpusparcusPC (de)": 96.07, + "OpusparcusPC (en)": 98.12, + "OpusparcusPC (fi)": 93.18, + "OpusparcusPC (fr)": 91.54, + "OpusparcusPC (ru)": 87.84, + "OpusparcusPC (sv)": 93.37, + "PSC": 99.57, + "PawsXPairClassification (de)": 55.96, + "PawsXPairClassification (en)": 59.14, + "PawsXPairClassification (es)": 56.63, + "PawsXPairClassification (fr)": 58.75, + "PawsXPairClassification (ja)": 51.1, + "PawsXPairClassification (ko)": 51.15, + "PawsXPairClassification (zh)": 56.26, + "SICK-E-PL": 61.81, + "SprintDuplicateQuestions": 89.47, + "TERRa": 52.4, + "TwitterSemEval2015": 62.64, + "TwitterURLCorpus": 84.09 + }, + { + "Model": "voyage-3-lite", + "CDSC-E": 66.64, + "FalseFriendsGermanEnglish": 51.59, + "OpusparcusPC (de)": 96.07, + "OpusparcusPC (en)": 98.12, + "OpusparcusPC (fi)": 93.18, + "OpusparcusPC (fr)": 91.54, + "OpusparcusPC (ru)": 87.84, + "OpusparcusPC (sv)": 93.37, + "PSC": 99.57, + "PawsXPairClassification (de)": 56.07, + "PawsXPairClassification (en)": 59.14, + "PawsXPairClassification (es)": 56.63, + "PawsXPairClassification (fr)": 58.75, + "PawsXPairClassification (ja)": 51.23, + "PawsXPairClassification (ko)": 51.15, + "PawsXPairClassification (zh)": 56.55, + "SICK-E-PL": 61.84, + "SprintDuplicateQuestions": 89.47, + "TERRa": 52.68, + "TwitterSemEval2015": 62.64, + "TwitterURLCorpus": 84.09 + } + ] }, "Reranking": { - "map": [] + "map": [ + { + "Model": "voyage-3-lite", + "AlloprofReranking": 75.02, + "AskUbuntuDupQuestions": 60.58, + "MindSmallReranking": 31.83, + "RuBQReranking": 67.85, + "T2Reranking": 65.4 + } + ] }, "Retrieval": { "ndcg_at_10": [ @@ -24254,6 +45376,11 @@ "Model": "voyage-3-lite", "AILACasedocs": 38.15, "AILAStatutes": 35.03, + "AlloprofRetrieval": 47.86, + "ArguAna": 60.14, + "CmedqaRetrieval": 28.85, + "CovidRetrieval": 74.91, + "FiQA2018": 41.69, "GerDaLIRSmall": 43.73, "LEMBNarrativeQARetrieval": 51.67, "LEMBQMSumRetrieval": 53.01, @@ -24263,21 +45390,132 @@ "LegalBenchConsumerContractsQA": 83.22, "LegalBenchCorporateLobbying": 94.53, "LegalQuAD": 61.96, - "LegalSummarization": 61.42 + "LegalSummarization": 61.42, + "NFCorpus": 30.33, + "SCIDOCS": 19.12, + "SciFact": 70.75, + "SciFact-PL": 58.76, + "SpartQA": 0.45, + "StackOverflowQA": 89.41, + "TRECCOVID": 76.5, + "TRECCOVID-PL": 68.99, + "TempReasonL1": 1.85, + "WinoGrande": 41.17 } ] }, "STS": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "voyage-3-lite", + "BIOSSES": 85.19, + "CDSC-R": 89.51, + "GermanSTSBenchmark": 69.78, + "RUParaPhraserSTS": 64.5, + "RuSTSBenchmarkSTS": 69.33, + "SICK-R": 76.65, + "SICK-R-PL": 67.25, + "SICKFr": 72.19, + "STS12": 70.76, + "STS13": 76.51, + "STS14": 71.92, + "STS15": 81.1, + "STS16": 80.8, + "STS17 (en-tr)": 63.71, + "STS17 (fr-en)": 73.69, + "STS17 (nl-en)": 77.19, + "STS17 (en-en)": 87.18, + "STS17 (es-es)": 82.43, + "STS17 (en-ar)": 61.43, + "STS17 (ar-ar)": 69.8, + "STS17 (it-en)": 74.99, + "STS17 (en-de)": 77.44, + "STS17 (es-en)": 69.69, + "STS17 (ko-ko)": 65.95, + "STSB": 66.07, + "STSBenchmark": 78.56, + "STSBenchmarkMultilingualSTS (it)": 71.4, + "STSBenchmarkMultilingualSTS (en)": 78.56, + "STSBenchmarkMultilingualSTS (pl)": 69.57, + "STSBenchmarkMultilingualSTS (de)": 71.71, + "STSBenchmarkMultilingualSTS (nl)": 70.65, + "STSBenchmarkMultilingualSTS (pt)": 69.01, + "STSBenchmarkMultilingualSTS (es)": 70.6, + "STSBenchmarkMultilingualSTS (ru)": 68.62, + "STSBenchmarkMultilingualSTS (fr)": 71.24, + "STSBenchmarkMultilingualSTS (zh)": 68.75 + }, + { + "Model": "voyage-3-lite", + "BIOSSES": 85.19, + "CDSC-R": 89.51, + "GermanSTSBenchmark": 69.78, + "RUParaPhraserSTS": 64.5, + "RuSTSBenchmarkSTS": 69.33, + "SICK-R": 76.65, + "SICK-R-PL": 67.25, + "SICKFr": 72.19, + "STS12": 70.76, + "STS13": 76.51, + "STS14": 71.92, + "STS15": 81.1, + "STS16": 80.8, + "STS17 (en-tr)": 63.71, + "STS17 (fr-en)": 73.69, + "STS17 (nl-en)": 77.19, + "STS17 (en-en)": 87.18, + "STS17 (es-es)": 82.43, + "STS17 (en-ar)": 61.43, + "STS17 (ar-ar)": 69.8, + "STS17 (it-en)": 74.99, + "STS17 (en-de)": 77.44, + "STS17 (es-en)": 69.69, + "STS17 (ko-ko)": 65.95, + "STSB": 66.07, + "STSBenchmark": 78.56, + "STSBenchmarkMultilingualSTS (it)": 71.4, + "STSBenchmarkMultilingualSTS (en)": 78.56, + "STSBenchmarkMultilingualSTS (pl)": 69.57, + "STSBenchmarkMultilingualSTS (de)": 71.71, + "STSBenchmarkMultilingualSTS (nl)": 70.65, + "STSBenchmarkMultilingualSTS (pt)": 69.01, + "STSBenchmarkMultilingualSTS (es)": 70.6, + "STSBenchmarkMultilingualSTS (ru)": 68.62, + "STSBenchmarkMultilingualSTS (fr)": 71.24, + "STSBenchmarkMultilingualSTS (zh)": 68.75 + } + ] }, "Summarization": { - "cosine_spearman": [] + "cosine_spearman": [ + { + "Model": "voyage-3-lite", + "SummEval": 31.4 + }, + { + "Model": "voyage-3-lite", + "SummEval": 31.4 + } + ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "voyage-3-lite", + "CEDRClassification": 37.56, + "SensitiveTopicsClassification": 24.41 + } + ] }, "InstructionRetrieval": { - "p-MRR": [] + "p-MRR": [ + { + "Model": "voyage-3-lite", + "Core17InstructionRetrieval": 0.41, + "News21InstructionRetrieval": 0.13, + "Robust04InstructionRetrieval": -4.06 + } + ] } }, "voyageai__voyage-code-2": { @@ -24373,24 +45611,276 @@ }, "voyageai__voyage-large-2-instruct": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "voyage-large-2-instruct", + "BornholmBitextMining": 34.66, + "Tatoeba (swg-eng)": 40.7, + "Tatoeba (aze-eng)": 35.41, + "Tatoeba (jpn-eng)": 84.32, + "Tatoeba (nov-eng)": 53.41, + "Tatoeba (ido-eng)": 54.53, + "Tatoeba (ina-eng)": 83.38, + "Tatoeba (jav-eng)": 11.06, + "Tatoeba (ara-eng)": 42.48, + "Tatoeba (bre-eng)": 5.07, + "Tatoeba (vie-eng)": 69.38, + "Tatoeba (fry-eng)": 39.02, + "Tatoeba (uig-eng)": 10.48, + "Tatoeba (amh-eng)": 0.18, + "Tatoeba (gla-eng)": 6.5, + "Tatoeba (awa-eng)": 22.77, + "Tatoeba (ind-eng)": 69.76, + "Tatoeba (tur-eng)": 58.51, + "Tatoeba (ces-eng)": 82.28, + "Tatoeba (deu-eng)": 96.21, + "Tatoeba (rus-eng)": 87.31, + "Tatoeba (lvs-eng)": 29.6, + "Tatoeba (slk-eng)": 77.44, + "Tatoeba (khm-eng)": 0.29, + "Tatoeba (ita-eng)": 87.11, + "Tatoeba (gsw-eng)": 43.62, + "Tatoeba (cha-eng)": 21.54, + "Tatoeba (orv-eng)": 30.21, + "Tatoeba (cmn-eng)": 91.87, + "Tatoeba (glg-eng)": 77.07, + "Tatoeba (pam-eng)": 5.97, + "Tatoeba (wuu-eng)": 74.56, + "Tatoeba (yid-eng)": 1.54, + "Tatoeba (kzj-eng)": 5.79, + "Tatoeba (ceb-eng)": 10.49, + "Tatoeba (pol-eng)": 88.57, + "Tatoeba (tha-eng)": 40.24, + "Tatoeba (kat-eng)": 13.22, + "Tatoeba (mal-eng)": 1.82, + "Tatoeba (nno-eng)": 69.29, + "Tatoeba (yue-eng)": 75.17, + "Tatoeba (epo-eng)": 58.0, + "Tatoeba (spa-eng)": 92.7, + "Tatoeba (ukr-eng)": 84.69, + "Tatoeba (tat-eng)": 10.24, + "Tatoeba (isl-eng)": 39.4, + "Tatoeba (heb-eng)": 24.59, + "Tatoeba (hye-eng)": 2.8, + "Tatoeba (swe-eng)": 84.16, + "Tatoeba (ell-eng)": 50.68, + "Tatoeba (sqi-eng)": 24.05, + "Tatoeba (dsb-eng)": 42.21, + "Tatoeba (mon-eng)": 12.51, + "Tatoeba (arz-eng)": 24.28, + "Tatoeba (lfn-eng)": 50.81, + "Tatoeba (fra-eng)": 89.27, + "Tatoeba (kaz-eng)": 14.08, + "Tatoeba (nds-eng)": 50.54, + "Tatoeba (fao-eng)": 44.19, + "Tatoeba (ber-eng)": 5.31, + "Tatoeba (ron-eng)": 80.44, + "Tatoeba (bos-eng)": 79.94, + "Tatoeba (xho-eng)": 7.3, + "Tatoeba (fin-eng)": 55.38, + "Tatoeba (pes-eng)": 43.87, + "Tatoeba (uzb-eng)": 12.82, + "Tatoeba (slv-eng)": 72.11, + "Tatoeba (hun-eng)": 69.19, + "Tatoeba (swh-eng)": 10.94, + "Tatoeba (nld-eng)": 89.88, + "Tatoeba (dan-eng)": 85.71, + "Tatoeba (kor-eng)": 67.28, + "Tatoeba (bul-eng)": 79.3, + "Tatoeba (mar-eng)": 12.22, + "Tatoeba (lat-eng)": 47.22, + "Tatoeba (war-eng)": 11.37, + "Tatoeba (kab-eng)": 1.36, + "Tatoeba (srp-eng)": 75.61, + "Tatoeba (hin-eng)": 39.36, + "Tatoeba (tuk-eng)": 7.38, + "Tatoeba (lit-eng)": 26.73, + "Tatoeba (hsb-eng)": 50.29, + "Tatoeba (zsm-eng)": 71.1, + "Tatoeba (mhr-eng)": 7.86, + "Tatoeba (tgl-eng)": 25.72, + "Tatoeba (hrv-eng)": 83.09, + "Tatoeba (tzl-eng)": 32.1, + "Tatoeba (por-eng)": 89.17, + "Tatoeba (urd-eng)": 21.4, + "Tatoeba (gle-eng)": 8.47, + "Tatoeba (cbk-eng)": 56.61, + "Tatoeba (mkd-eng)": 53.83, + "Tatoeba (ast-eng)": 69.08, + "Tatoeba (max-eng)": 35.91, + "Tatoeba (oci-eng)": 44.59, + "Tatoeba (est-eng)": 24.87, + "Tatoeba (tel-eng)": 1.26, + "Tatoeba (kur-eng)": 10.77, + "Tatoeba (bel-eng)": 59.58, + "Tatoeba (ile-eng)": 63.57, + "Tatoeba (cym-eng)": 10.27, + "Tatoeba (ben-eng)": 11.23, + "Tatoeba (pms-eng)": 41.25, + "Tatoeba (dtp-eng)": 4.32, + "Tatoeba (csb-eng)": 34.29, + "Tatoeba (afr-eng)": 63.53, + "Tatoeba (cor-eng)": 3.72, + "Tatoeba (nob-eng)": 85.34, + "Tatoeba (tam-eng)": 2.28, + "Tatoeba (cat-eng)": 84.06, + "Tatoeba (arq-eng)": 6.63, + "Tatoeba (eus-eng)": 13.67, + "Tatoeba (ang-eng)": 44.02 + } + ] }, "Classification": { "accuracy": [ { "Model": "voyage-large-2-instruct", - "AmazonCounterfactualClassification (en)": 77.6, - "AmazonPolarityClassification": 96.58, - "AmazonReviewsClassification (en)": 50.77, - "Banking77Classification": 86.96, - "EmotionClassification": 59.81, - "ImdbClassification": 96.13, - "MTOPDomainClassification (en)": 98.86, - "MTOPIntentClassification (en)": 86.97, - "MassiveIntentClassification (en)": 81.08, - "MassiveScenarioClassification (en)": 87.95, - "ToxicConversationsClassification": 83.58, - "TweetSentimentExtractionClassification": 71.55 + "AllegroReviews": 46.66, + "AmazonCounterfactualClassification (en)": 83.06, + "AmazonCounterfactualClassification (en-ext)": 85.13, + "AmazonCounterfactualClassification (de)": 64.61, + "AmazonCounterfactualClassification (ja)": 77.46, + "AmazonPolarityClassification": 96.11, + "AmazonReviewsClassification (en)": 51.4, + "AmazonReviewsClassification (de)": 47.52, + "AmazonReviewsClassification (es)": 46.78, + "AmazonReviewsClassification (fr)": 46.43, + "AmazonReviewsClassification (ja)": 45.09, + "AmazonReviewsClassification (zh)": 44.16, + "AngryTweetsClassification": 56.06, + "Banking77Classification": 87.62, + "CBD": 73.6, + "DanishPoliticalCommentsClassification": 38.61, + "EmotionClassification": 64.46, + "GeoreviewClassification": 47.18, + "HeadlineClassification": 75.1, + "ImdbClassification": 93.78, + "InappropriatenessClassification": 62.99, + "KinopoiskClassification": 64.43, + "LccSentimentClassification": 59.2, + "MTOPDomainClassification (en)": 97.46, + "MTOPDomainClassification (de)": 95.27, + "MTOPDomainClassification (es)": 95.28, + "MTOPDomainClassification (fr)": 93.57, + "MTOPDomainClassification (hi)": 79.0, + "MTOPDomainClassification (th)": 76.64, + "MTOPIntentClassification (en)": 75.01, + "MTOPIntentClassification (de)": 70.28, + "MTOPIntentClassification (es)": 74.9, + "MTOPIntentClassification (fr)": 68.15, + "MTOPIntentClassification (hi)": 40.64, + "MTOPIntentClassification (th)": 52.46, + "MassiveIntentClassification (en)": 76.62, + "MassiveIntentClassification (jv)": 42.59, + "MassiveIntentClassification (te)": 22.0, + "MassiveIntentClassification (km)": 21.07, + "MassiveIntentClassification (ur)": 38.9, + "MassiveIntentClassification (hy)": 32.94, + "MassiveIntentClassification (my)": 18.82, + "MassiveIntentClassification (pl)": 67.0, + "MassiveIntentClassification (tr)": 59.39, + "MassiveIntentClassification (hu)": 60.74, + "MassiveIntentClassification (kn)": 17.95, + "MassiveIntentClassification (tl)": 48.84, + "MassiveIntentClassification (ta)": 28.71, + "MassiveIntentClassification (da)": 65.75, + "MassiveIntentClassification (es)": 69.32, + "MassiveIntentClassification (fi)": 56.04, + "MassiveIntentClassification (fr)": 69.76, + "MassiveIntentClassification (ml)": 31.31, + "MassiveIntentClassification (ro)": 61.22, + "MassiveIntentClassification (cy)": 36.38, + "MassiveIntentClassification (ko)": 59.58, + "MassiveIntentClassification (el)": 51.05, + "MassiveIntentClassification (af)": 56.14, + "MassiveIntentClassification (he)": 46.56, + "MassiveIntentClassification (lv)": 45.89, + "MassiveIntentClassification (id)": 64.75, + "MassiveIntentClassification (it)": 68.79, + "MassiveIntentClassification (de)": 67.84, + "MassiveIntentClassification (pt)": 70.28, + "MassiveIntentClassification (th)": 45.59, + "MassiveIntentClassification (is)": 46.74, + "MassiveIntentClassification (ru)": 70.87, + "MassiveIntentClassification (vi)": 55.12, + "MassiveIntentClassification (sl)": 62.57, + "MassiveIntentClassification (sw)": 38.08, + "MassiveIntentClassification (sq)": 41.24, + "MassiveIntentClassification (az)": 50.48, + "MassiveIntentClassification (am)": 24.82, + "MassiveIntentClassification (hi)": 48.1, + "MassiveIntentClassification (ms)": 57.61, + "MassiveIntentClassification (bn)": 39.06, + "MassiveIntentClassification (nb)": 64.93, + "MassiveIntentClassification (nl)": 68.84, + "MassiveIntentClassification (ka)": 32.48, + "MassiveIntentClassification (sv)": 68.53, + "MassiveIntentClassification (mn)": 33.06, + "MassiveIntentClassification (zh-CN)": 69.49, + "MassiveIntentClassification (fa)": 55.04, + "MassiveIntentClassification (ja)": 67.62, + "MassiveIntentClassification (zh-TW)": 63.36, + "MassiveIntentClassification (ar)": 47.61, + "MassiveScenarioClassification (en)": 83.75, + "MassiveScenarioClassification (km)": 25.63, + "MassiveScenarioClassification (am)": 34.34, + "MassiveScenarioClassification (lv)": 56.15, + "MassiveScenarioClassification (th)": 56.57, + "MassiveScenarioClassification (hu)": 71.32, + "MassiveScenarioClassification (hy)": 40.24, + "MassiveScenarioClassification (ja)": 76.24, + "MassiveScenarioClassification (it)": 77.33, + "MassiveScenarioClassification (es)": 77.27, + "MassiveScenarioClassification (hi)": 56.05, + "MassiveScenarioClassification (da)": 76.76, + "MassiveScenarioClassification (kn)": 26.11, + "MassiveScenarioClassification (is)": 58.65, + "MassiveScenarioClassification (ms)": 67.79, + "MassiveScenarioClassification (de)": 78.79, + "MassiveScenarioClassification (vi)": 62.79, + "MassiveScenarioClassification (te)": 28.33, + "MassiveScenarioClassification (fi)": 64.86, + "MassiveScenarioClassification (cy)": 45.38, + "MassiveScenarioClassification (id)": 72.48, + "MassiveScenarioClassification (sv)": 78.4, + "MassiveScenarioClassification (sq)": 52.83, + "MassiveScenarioClassification (he)": 53.04, + "MassiveScenarioClassification (ru)": 78.34, + "MassiveScenarioClassification (sw)": 46.95, + "MassiveScenarioClassification (bn)": 47.38, + "MassiveScenarioClassification (af)": 67.7, + "MassiveScenarioClassification (fr)": 77.99, + "MassiveScenarioClassification (tr)": 67.67, + "MassiveScenarioClassification (jv)": 52.42, + "MassiveScenarioClassification (ka)": 41.12, + "MassiveScenarioClassification (nb)": 74.44, + "MassiveScenarioClassification (zh-TW)": 72.34, + "MassiveScenarioClassification (mn)": 41.83, + "MassiveScenarioClassification (el)": 60.8, + "MassiveScenarioClassification (ar)": 55.46, + "MassiveScenarioClassification (az)": 58.36, + "MassiveScenarioClassification (ko)": 68.71, + "MassiveScenarioClassification (tl)": 59.21, + "MassiveScenarioClassification (zh-CN)": 78.77, + "MassiveScenarioClassification (ro)": 70.68, + "MassiveScenarioClassification (ur)": 48.18, + "MassiveScenarioClassification (sl)": 71.12, + "MassiveScenarioClassification (nl)": 78.78, + "MassiveScenarioClassification (ml)": 38.14, + "MassiveScenarioClassification (pt)": 76.66, + "MassiveScenarioClassification (fa)": 60.92, + "MassiveScenarioClassification (ta)": 36.67, + "MassiveScenarioClassification (my)": 25.4, + "MassiveScenarioClassification (pl)": 76.06, + "NoRecClassification": 51.44, + "NordicLangClassification": 57.17, + "PAC": 65.35, + "PolEmo2.0-IN": 71.3, + "PolEmo2.0-OUT": 33.89, + "RuReviewsClassification": 65.99, + "RuSciBenchGRNTIClassification": 61.46, + "RuSciBenchOECDClassification": 47.86, + "ToxicConversationsClassification": 69.39, + "TweetSentimentExtractionClassification": 61.9 } ] }, @@ -24398,17 +45888,42 @@ "v_measure": [ { "Model": "voyage-large-2-instruct", - "ArxivClusteringP2P": 51.81, - "ArxivClusteringS2S": 44.73, - "BiorxivClusteringP2P": 46.07, - "BiorxivClusteringS2S": 40.64, - "MedrxivClusteringP2P": 42.94, - "MedrxivClusteringS2S": 41.44, - "RedditClustering": 68.5, - "RedditClusteringP2P": 64.86, - "StackExchangeClustering": 74.16, - "StackExchangeClusteringP2P": 45.1, - "TwentyNewsgroupsClustering": 66.62 + "AlloProfClusteringP2P": 62.06, + "AlloProfClusteringS2S": 48.05, + "ArxivClusteringP2P": 50.0, + "ArxivClusteringS2S": 43.99, + "BiorxivClusteringP2P": 41.6, + "BiorxivClusteringS2S": 37.75, + "BlurbsClusteringP2P": 40.78, + "BlurbsClusteringS2S": 20.59, + "GeoreviewClusteringP2P": 69.84, + "HALClusteringS2S": 27.07, + "MasakhaNEWSClusteringS2S (amh)": 42.05, + "MasakhaNEWSClusteringS2S (eng)": 55.35, + "MasakhaNEWSClusteringS2S (fra)": 41.79, + "MasakhaNEWSClusteringS2S (hau)": 22.56, + "MasakhaNEWSClusteringS2S (ibo)": 35.92, + "MasakhaNEWSClusteringS2S (lin)": 47.48, + "MasakhaNEWSClusteringS2S (lug)": 49.46, + "MasakhaNEWSClusteringS2S (orm)": 27.61, + "MasakhaNEWSClusteringS2S (pcm)": 48.95, + "MasakhaNEWSClusteringS2S (run)": 51.87, + "MasakhaNEWSClusteringS2S (sna)": 46.67, + "MasakhaNEWSClusteringS2S (som)": 38.65, + "MasakhaNEWSClusteringS2S (swa)": 17.78, + "MasakhaNEWSClusteringS2S (tir)": 42.67, + "MasakhaNEWSClusteringS2S (xho)": 22.1, + "MasakhaNEWSClusteringS2S (yor)": 29.25, + "MedrxivClusteringP2P": 36.13, + "MedrxivClusteringS2S": 35.82, + "RedditClustering": 63.37, + "RedditClusteringP2P": 62.66, + "RuSciBenchGRNTIClusteringP2P": 55.65, + "RuSciBenchOECDClusteringP2P": 47.23, + "StackExchangeClustering": 71.08, + "StackExchangeClusteringP2P": 36.02, + "TenKGnadClusteringS2S": 32.74, + "TwentyNewsgroupsClustering": 61.06 } ] }, @@ -24416,13 +45931,55 @@ "max_ap": [ { "Model": "voyage-large-2-instruct", - "SprintDuplicateQuestions": 94.5, - "TwitterSemEval2015": 86.32, - "TwitterURLCorpus": 86.9 + "CDSC-E": 68.05, + "FalseFriendsGermanEnglish": 52.72, + "OpusparcusPC (de)": 96.14, + "OpusparcusPC (en)": 98.3, + "OpusparcusPC (fi)": 87.37, + "OpusparcusPC (fr)": 92.98, + "OpusparcusPC (ru)": 89.62, + "OpusparcusPC (sv)": 92.63, + "PSC": 99.41, + "PawsXPairClassification (de)": 61.62, + "PawsXPairClassification (en)": 66.12, + "PawsXPairClassification (es)": 61.98, + "PawsXPairClassification (fr)": 64.41, + "PawsXPairClassification (ja)": 53.33, + "PawsXPairClassification (ko)": 53.16, + "PawsXPairClassification (zh)": 60.09, + "SICK-E-PL": 68.53, + "SprintDuplicateQuestions": 95.83, + "TERRa": 56.43, + "TwitterSemEval2015": 89.46, + "TwitterURLCorpus": 88.66 }, { "Model": "voyage-large-2-instruct", - "SprintDuplicateQuestions": 94.53, + "CDSC-E": 68.16, + "FalseFriendsGermanEnglish": 52.72, + "OpusparcusPC (de)": 96.15, + "OpusparcusPC (en)": 98.3, + "OpusparcusPC (fi)": 87.37, + "OpusparcusPC (fr)": 92.98, + "OpusparcusPC (ru)": 89.62, + "OpusparcusPC (sv)": 92.63, + "PSC": 99.41, + "PawsXPairClassification (de)": 61.69, + "PawsXPairClassification (en)": 66.12, + "PawsXPairClassification (es)": 61.99, + "PawsXPairClassification (fr)": 64.41, + "PawsXPairClassification (ja)": 53.42, + "PawsXPairClassification (ko)": 53.2, + "PawsXPairClassification (zh)": 60.09, + "SICK-E-PL": 68.53, + "SprintDuplicateQuestions": 95.83, + "TERRa": 56.43, + "TwitterSemEval2015": 89.46, + "TwitterURLCorpus": 88.66 + }, + { + "Model": "voyage-large-2-instruct", + "SprintDuplicateQuestions": 94.5, "TwitterSemEval2015": 86.32, "TwitterURLCorpus": 86.9 } @@ -24432,8 +45989,9 @@ "map": [ { "Model": "voyage-large-2-instruct", - "AskUbuntuDupQuestions": 64.92, - "MindSmallReranking": 30.97, + "AskUbuntuDupQuestions": 68.79, + "MindSmallReranking": 31.38, + "RuBQReranking": 70.59, "SciDocsRR": 89.34, "StackOverflowDupQuestions": 55.11 } @@ -24443,7 +46001,8 @@ "ndcg_at_10": [ { "Model": "voyage-large-2-instruct", - "ArguAna": 64.06, + "AILAStatutes": 40.75, + "ArguAna": 58.61, "BrightRetrieval (theoremqa_questions)": 26.06, "BrightRetrieval (earth_science)": 25.09, "BrightRetrieval (leetcode)": 30.6, @@ -24460,16 +46019,21 @@ "ClimateFEVER": 32.65, "DBPedia": 46.03, "FEVER": 91.47, - "FiQA2018": 59.76, + "FiQA2018": 54.84, "HotpotQA": 70.86, + "LegalBenchCorporateLobbying": 95.32, "MSMARCO": 40.6, - "NFCorpus": 40.32, + "NFCorpus": 38.16, "NQ": 65.92, "QuoraRetrieval": 87.4, "SCIDOCS": 24.32, - "SciFact": 79.99, + "SciFact": 77.8, + "SciFact-PL": 68.06, + "SpartQA": 0.3, "TRECCOVID": 85.07, - "Touche2020": 39.16 + "TempReasonL1": 1.1, + "Touche2020": 39.16, + "WinoGrande": 28.75 } ], "recall_at_1": [ @@ -24500,6 +46064,80 @@ "STS17 (en-en)": 90.06, "STS22 (en)": 66.32, "STSBenchmark": 89.22 + }, + { + "Model": "voyage-large-2-instruct", + "CDSC-R": 90.05, + "GermanSTSBenchmark": 82.68, + "RUParaPhraserSTS": 69.05, + "RuSTSBenchmarkSTS": 81.77, + "SICK-R": 81.92, + "SICK-R-PL": 71.52, + "SICKFr": 77.05, + "STS13": 84.96, + "STS14": 82.67, + "STS15": 87.64, + "STS16": 84.5, + "STS17 (en-en)": 88.08, + "STS17 (fr-en)": 82.59, + "STS17 (en-tr)": 53.1, + "STS17 (ar-ar)": 68.95, + "STS17 (en-de)": 85.77, + "STS17 (it-en)": 82.75, + "STS17 (es-es)": 85.83, + "STS17 (nl-en)": 83.82, + "STS17 (ko-ko)": 77.63, + "STS17 (en-ar)": 30.33, + "STS17 (es-en)": 84.88, + "STSB": 78.75, + "STSBenchmark": 87.59, + "STSBenchmarkMultilingualSTS (it)": 82.77, + "STSBenchmarkMultilingualSTS (zh)": 80.35, + "STSBenchmarkMultilingualSTS (pl)": 79.15, + "STSBenchmarkMultilingualSTS (nl)": 82.06, + "STSBenchmarkMultilingualSTS (fr)": 83.68, + "STSBenchmarkMultilingualSTS (en)": 87.6, + "STSBenchmarkMultilingualSTS (pt)": 83.1, + "STSBenchmarkMultilingualSTS (ru)": 81.75, + "STSBenchmarkMultilingualSTS (es)": 84.57, + "STSBenchmarkMultilingualSTS (de)": 84.07 + }, + { + "Model": "voyage-large-2-instruct", + "CDSC-R": 90.05, + "GermanSTSBenchmark": 82.68, + "RUParaPhraserSTS": 69.05, + "RuSTSBenchmarkSTS": 81.77, + "SICK-R": 81.92, + "SICK-R-PL": 71.52, + "SICKFr": 77.05, + "STS13": 84.96, + "STS14": 82.67, + "STS15": 87.64, + "STS16": 84.5, + "STS17 (en-en)": 88.08, + "STS17 (fr-en)": 82.59, + "STS17 (en-tr)": 53.1, + "STS17 (ar-ar)": 68.95, + "STS17 (en-de)": 85.77, + "STS17 (it-en)": 82.75, + "STS17 (es-es)": 85.83, + "STS17 (nl-en)": 83.82, + "STS17 (ko-ko)": 77.63, + "STS17 (en-ar)": 30.33, + "STS17 (es-en)": 84.88, + "STSB": 78.75, + "STSBenchmark": 87.59, + "STSBenchmarkMultilingualSTS (it)": 82.77, + "STSBenchmarkMultilingualSTS (zh)": 80.35, + "STSBenchmarkMultilingualSTS (pl)": 79.15, + "STSBenchmarkMultilingualSTS (nl)": 82.06, + "STSBenchmarkMultilingualSTS (fr)": 83.68, + "STSBenchmarkMultilingualSTS (en)": 87.6, + "STSBenchmarkMultilingualSTS (pt)": 83.1, + "STSBenchmarkMultilingualSTS (ru)": 81.75, + "STSBenchmarkMultilingualSTS (es)": 84.57, + "STSBenchmarkMultilingualSTS (de)": 84.07 } ] }, @@ -24508,11 +46146,25 @@ { "Model": "voyage-large-2-instruct", "SummEval": 30.84 + }, + { + "Model": "voyage-large-2-instruct", + "SummEval": 30.15 + }, + { + "Model": "voyage-large-2-instruct", + "SummEval": 30.15 } ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "voyage-large-2-instruct", + "CEDRClassification": 45.03, + "SensitiveTopicsClassification": 32.23 + } + ] }, "InstructionRetrieval": { "p-MRR": [] @@ -24867,18 +46519,277 @@ }, "voyageai__voyage-multilingual-2": { "BitextMining": { - "f1": [] + "f1": [ + { + "Model": "voyage-multilingual-2", + "BornholmBitextMining": 42.68, + "Tatoeba (bul-eng)": 88.43, + "Tatoeba (kaz-eng)": 64.49, + "Tatoeba (gle-eng)": 34.14, + "Tatoeba (uzb-eng)": 51.71, + "Tatoeba (por-eng)": 93.5, + "Tatoeba (gla-eng)": 23.63, + "Tatoeba (cha-eng)": 31.4, + "Tatoeba (kzj-eng)": 11.52, + "Tatoeba (ben-eng)": 80.05, + "Tatoeba (cat-eng)": 92.5, + "Tatoeba (heb-eng)": 83.4, + "Tatoeba (mkd-eng)": 79.84, + "Tatoeba (ara-eng)": 83.89, + "Tatoeba (slk-eng)": 91.15, + "Tatoeba (ces-eng)": 90.96, + "Tatoeba (swg-eng)": 70.94, + "Tatoeba (urd-eng)": 89.23, + "Tatoeba (mar-eng)": 62.12, + "Tatoeba (tha-eng)": 91.47, + "Tatoeba (hin-eng)": 93.82, + "Tatoeba (kur-eng)": 58.11, + "Tatoeba (lvs-eng)": 61.75, + "Tatoeba (jpn-eng)": 90.7, + "Tatoeba (tzl-eng)": 48.64, + "Tatoeba (war-eng)": 33.84, + "Tatoeba (epo-eng)": 90.17, + "Tatoeba (swh-eng)": 35.1, + "Tatoeba (hsb-eng)": 77.99, + "Tatoeba (cbk-eng)": 69.28, + "Tatoeba (kor-eng)": 87.75, + "Tatoeba (bel-eng)": 90.39, + "Tatoeba (kab-eng)": 4.22, + "Tatoeba (lit-eng)": 68.44, + "Tatoeba (nld-eng)": 93.56, + "Tatoeba (pol-eng)": 96.18, + "Tatoeba (ind-eng)": 91.69, + "Tatoeba (fin-eng)": 95.09, + "Tatoeba (cmn-eng)": 93.9, + "Tatoeba (est-eng)": 69.45, + "Tatoeba (fry-eng)": 74.56, + "Tatoeba (srp-eng)": 86.7, + "Tatoeba (khm-eng)": 8.52, + "Tatoeba (hye-eng)": 67.2, + "Tatoeba (nov-eng)": 72.82, + "Tatoeba (max-eng)": 56.28, + "Tatoeba (hun-eng)": 82.11, + "Tatoeba (tgl-eng)": 87.01, + "Tatoeba (tam-eng)": 53.72, + "Tatoeba (gsw-eng)": 57.86, + "Tatoeba (afr-eng)": 79.99, + "Tatoeba (pam-eng)": 14.03, + "Tatoeba (arz-eng)": 64.42, + "Tatoeba (kat-eng)": 56.25, + "Tatoeba (ina-eng)": 93.04, + "Tatoeba (amh-eng)": 4.07, + "Tatoeba (pes-eng)": 87.79, + "Tatoeba (eus-eng)": 67.7, + "Tatoeba (lfn-eng)": 70.49, + "Tatoeba (spa-eng)": 95.97, + "Tatoeba (mon-eng)": 88.26, + "Tatoeba (orv-eng)": 46.59, + "Tatoeba (wuu-eng)": 82.69, + "Tatoeba (swe-eng)": 93.29, + "Tatoeba (csb-eng)": 62.45, + "Tatoeba (fao-eng)": 76.43, + "Tatoeba (fra-eng)": 91.89, + "Tatoeba (xho-eng)": 13.94, + "Tatoeba (lat-eng)": 64.13, + "Tatoeba (cym-eng)": 46.28, + "Tatoeba (glg-eng)": 92.13, + "Tatoeba (ron-eng)": 92.57, + "Tatoeba (ido-eng)": 85.36, + "Tatoeba (bre-eng)": 20.24, + "Tatoeba (tel-eng)": 80.98, + "Tatoeba (isl-eng)": 92.75, + "Tatoeba (sqi-eng)": 71.18, + "Tatoeba (hrv-eng)": 91.01, + "Tatoeba (yid-eng)": 39.23, + "Tatoeba (tuk-eng)": 35.8, + "Tatoeba (cor-eng)": 8.09, + "Tatoeba (aze-eng)": 75.22, + "Tatoeba (vie-eng)": 94.83, + "Tatoeba (yue-eng)": 75.31, + "Tatoeba (oci-eng)": 65.69, + "Tatoeba (mhr-eng)": 14.52, + "Tatoeba (ceb-eng)": 29.27, + "Tatoeba (ast-eng)": 83.43, + "Tatoeba (jav-eng)": 44.22, + "Tatoeba (nds-eng)": 83.38, + "Tatoeba (tat-eng)": 56.92, + "Tatoeba (ang-eng)": 72.81, + "Tatoeba (uig-eng)": 39.35, + "Tatoeba (ukr-eng)": 92.07, + "Tatoeba (bos-eng)": 88.32, + "Tatoeba (deu-eng)": 97.2, + "Tatoeba (ber-eng)": 6.76, + "Tatoeba (ita-eng)": 92.12, + "Tatoeba (dan-eng)": 94.61, + "Tatoeba (awa-eng)": 62.93, + "Tatoeba (pms-eng)": 73.49, + "Tatoeba (dtp-eng)": 7.91, + "Tatoeba (nob-eng)": 97.2, + "Tatoeba (slv-eng)": 82.31, + "Tatoeba (ile-eng)": 82.73, + "Tatoeba (mal-eng)": 40.18, + "Tatoeba (dsb-eng)": 64.52, + "Tatoeba (ell-eng)": 73.34, + "Tatoeba (rus-eng)": 92.13, + "Tatoeba (zsm-eng)": 89.51, + "Tatoeba (tur-eng)": 81.52, + "Tatoeba (arq-eng)": 32.02, + "Tatoeba (nno-eng)": 92.31 + } + ] }, "Classification": { "accuracy": [ { "Model": "voyage-multilingual-2", - "AmazonReviewsClassification (fr)": 43.36, - "MTOPDomainClassification (fr)": 90.33, - "MTOPIntentClassification (fr)": 60.52, + "AllegroReviews": 45.14, + "AmazonCounterfactualClassification (en-ext)": 72.42, + "AmazonCounterfactualClassification (en)": 72.73, + "AmazonCounterfactualClassification (de)": 63.02, + "AmazonCounterfactualClassification (ja)": 78.14, + "AmazonPolarityClassification": 87.78, + "AmazonReviewsClassification (fr)": 42.16, + "AmazonReviewsClassification (en)": 42.88, + "AmazonReviewsClassification (de)": 44.39, + "AmazonReviewsClassification (es)": 42.17, + "AmazonReviewsClassification (ja)": 42.63, + "AmazonReviewsClassification (zh)": 37.67, + "AngryTweetsClassification": 55.9, + "Banking77Classification": 78.61, + "CBD": 70.77, + "DanishPoliticalCommentsClassification": 41.0, + "EmotionClassification": 45.83, + "GeoreviewClassification": 45.45, + "HeadlineClassification": 79.3, + "ImdbClassification": 82.8, + "InappropriatenessClassification": 60.33, + "KinopoiskClassification": 57.99, + "LccSentimentClassification": 59.93, + "MTOPDomainClassification (fr)": 88.63, + "MTOPDomainClassification (en)": 93.04, + "MTOPDomainClassification (de)": 90.41, + "MTOPDomainClassification (es)": 90.95, + "MTOPDomainClassification (hi)": 88.56, + "MTOPDomainClassification (th)": 86.67, + "MTOPIntentClassification (fr)": 65.15, + "MTOPIntentClassification (en)": 68.46, + "MTOPIntentClassification (de)": 69.92, + "MTOPIntentClassification (es)": 69.54, + "MTOPIntentClassification (hi)": 62.33, + "MTOPIntentClassification (th)": 65.79, "MasakhaNEWSClassification (fra)": 74.81, - "MassiveIntentClassification (fr)": 68.06, - "MassiveScenarioClassification (fr)": 74.29 + "MassiveIntentClassification (fr)": 66.07, + "MassiveIntentClassification (lv)": 52.06, + "MassiveIntentClassification (hy)": 51.68, + "MassiveIntentClassification (he)": 63.97, + "MassiveIntentClassification (my)": 36.56, + "MassiveIntentClassification (ta)": 46.96, + "MassiveIntentClassification (ms)": 61.06, + "MassiveIntentClassification (mn)": 61.2, + "MassiveIntentClassification (pl)": 68.03, + "MassiveIntentClassification (ml)": 45.01, + "MassiveIntentClassification (ar)": 54.82, + "MassiveIntentClassification (da)": 67.76, + "MassiveIntentClassification (hu)": 61.11, + "MassiveIntentClassification (nl)": 66.54, + "MassiveIntentClassification (fa)": 65.38, + "MassiveIntentClassification (bn)": 61.35, + "MassiveIntentClassification (cy)": 46.17, + "MassiveIntentClassification (el)": 55.96, + "MassiveIntentClassification (ja)": 69.47, + "MassiveIntentClassification (sl)": 60.62, + "MassiveIntentClassification (az)": 57.64, + "MassiveIntentClassification (th)": 60.68, + "MassiveIntentClassification (it)": 67.51, + "MassiveIntentClassification (sw)": 48.45, + "MassiveIntentClassification (es)": 65.48, + "MassiveIntentClassification (tl)": 63.0, + "MassiveIntentClassification (fi)": 67.71, + "MassiveIntentClassification (zh-CN)": 67.46, + "MassiveIntentClassification (kn)": 46.24, + "MassiveIntentClassification (nb)": 67.58, + "MassiveIntentClassification (vi)": 63.81, + "MassiveIntentClassification (af)": 59.88, + "MassiveIntentClassification (jv)": 50.18, + "MassiveIntentClassification (ko)": 63.68, + "MassiveIntentClassification (ro)": 61.28, + "MassiveIntentClassification (ur)": 59.66, + "MassiveIntentClassification (pt)": 66.59, + "MassiveIntentClassification (is)": 63.95, + "MassiveIntentClassification (sv)": 68.56, + "MassiveIntentClassification (km)": 37.94, + "MassiveIntentClassification (ka)": 44.8, + "MassiveIntentClassification (de)": 66.71, + "MassiveIntentClassification (id)": 65.2, + "MassiveIntentClassification (zh-TW)": 62.52, + "MassiveIntentClassification (te)": 56.09, + "MassiveIntentClassification (am)": 26.5, + "MassiveIntentClassification (ru)": 67.89, + "MassiveIntentClassification (en)": 72.45, + "MassiveIntentClassification (tr)": 60.12, + "MassiveIntentClassification (sq)": 51.73, + "MassiveIntentClassification (hi)": 62.01, + "MassiveScenarioClassification (fr)": 72.06, + "MassiveScenarioClassification (hi)": 68.28, + "MassiveScenarioClassification (de)": 74.13, + "MassiveScenarioClassification (en)": 76.56, + "MassiveScenarioClassification (is)": 71.1, + "MassiveScenarioClassification (sw)": 57.39, + "MassiveScenarioClassification (cy)": 52.27, + "MassiveScenarioClassification (vi)": 71.04, + "MassiveScenarioClassification (my)": 41.78, + "MassiveScenarioClassification (sq)": 60.35, + "MassiveScenarioClassification (af)": 68.22, + "MassiveScenarioClassification (kn)": 55.63, + "MassiveScenarioClassification (fi)": 72.59, + "MassiveScenarioClassification (am)": 33.05, + "MassiveScenarioClassification (ko)": 70.92, + "MassiveScenarioClassification (sl)": 66.35, + "MassiveScenarioClassification (az)": 63.07, + "MassiveScenarioClassification (hy)": 58.24, + "MassiveScenarioClassification (es)": 70.93, + "MassiveScenarioClassification (bn)": 66.94, + "MassiveScenarioClassification (ka)": 50.52, + "MassiveScenarioClassification (jv)": 58.54, + "MassiveScenarioClassification (mn)": 65.72, + "MassiveScenarioClassification (nl)": 72.98, + "MassiveScenarioClassification (ur)": 67.57, + "MassiveScenarioClassification (ar)": 62.96, + "MassiveScenarioClassification (id)": 71.94, + "MassiveScenarioClassification (th)": 68.72, + "MassiveScenarioClassification (ro)": 67.79, + "MassiveScenarioClassification (ml)": 52.6, + "MassiveScenarioClassification (ru)": 73.86, + "MassiveScenarioClassification (el)": 62.86, + "MassiveScenarioClassification (sv)": 75.08, + "MassiveScenarioClassification (zh-CN)": 75.08, + "MassiveScenarioClassification (te)": 64.09, + "MassiveScenarioClassification (ms)": 69.26, + "MassiveScenarioClassification (it)": 72.94, + "MassiveScenarioClassification (nb)": 73.8, + "MassiveScenarioClassification (lv)": 59.79, + "MassiveScenarioClassification (ta)": 54.6, + "MassiveScenarioClassification (he)": 69.6, + "MassiveScenarioClassification (km)": 41.74, + "MassiveScenarioClassification (hu)": 68.02, + "MassiveScenarioClassification (fa)": 71.88, + "MassiveScenarioClassification (pl)": 72.57, + "MassiveScenarioClassification (ja)": 76.15, + "MassiveScenarioClassification (pt)": 71.42, + "MassiveScenarioClassification (da)": 73.23, + "MassiveScenarioClassification (tr)": 65.08, + "MassiveScenarioClassification (tl)": 68.64, + "MassiveScenarioClassification (zh-TW)": 70.91, + "NoRecClassification": 53.35, + "NordicLangClassification": 58.41, + "PAC": 70.59, + "PolEmo2.0-IN": 72.76, + "PolEmo2.0-OUT": 52.51, + "RuReviewsClassification": 63.12, + "RuSciBenchGRNTIClassification": 60.63, + "RuSciBenchOECDClassification": 46.05, + "ToxicConversationsClassification": 61.88, + "TweetSentimentExtractionClassification": 57.51 } ] }, @@ -24886,13 +46797,44 @@ "v_measure": [ { "Model": "voyage-multilingual-2", - "AlloProfClusteringP2P": 65.37, - "AlloProfClusteringS2S": 47.03, - "HALClusteringS2S": 27.67, + "AlloProfClusteringP2P": 63.81, + "AlloProfClusteringS2S": 52.38, + "ArxivClusteringP2P": 47.84, + "ArxivClusteringS2S": 40.56, + "BiorxivClusteringS2S": 32.54, + "BlurbsClusteringP2P": 43.27, + "BlurbsClusteringS2S": 17.78, + "GeoreviewClusteringP2P": 71.94, + "HALClusteringS2S": 26.01, "MLSUMClusteringP2P (fr)": 45.99, "MLSUMClusteringS2S (fr)": 45.57, "MasakhaNEWSClusteringP2P (fra)": 44.53, - "MasakhaNEWSClusteringS2S (fra)": 49.8 + "MasakhaNEWSClusteringS2S (fra)": 63.87, + "MasakhaNEWSClusteringS2S (amh)": 45.35, + "MasakhaNEWSClusteringS2S (eng)": 49.35, + "MasakhaNEWSClusteringS2S (hau)": 28.42, + "MasakhaNEWSClusteringS2S (ibo)": 35.48, + "MasakhaNEWSClusteringS2S (lin)": 56.02, + "MasakhaNEWSClusteringS2S (lug)": 43.66, + "MasakhaNEWSClusteringS2S (orm)": 34.32, + "MasakhaNEWSClusteringS2S (pcm)": 75.25, + "MasakhaNEWSClusteringS2S (run)": 46.39, + "MasakhaNEWSClusteringS2S (sna)": 49.4, + "MasakhaNEWSClusteringS2S (som)": 22.73, + "MasakhaNEWSClusteringS2S (swa)": 16.77, + "MasakhaNEWSClusteringS2S (tir)": 43.68, + "MasakhaNEWSClusteringS2S (xho)": 32.98, + "MasakhaNEWSClusteringS2S (yor)": 30.5, + "MedrxivClusteringS2S": 30.83, + "RedditClustering": 49.55, + "RedditClusteringP2P": 62.69, + "RuSciBenchGRNTIClusteringP2P": 50.93, + "RuSciBenchOECDClusteringP2P": 44.34, + "StackExchangeClustering": 64.13, + "StackExchangeClusteringP2P": 34.38, + "TenKGnadClusteringP2P": 43.87, + "TenKGnadClusteringS2S": 37.37, + "TwentyNewsgroupsClustering": 47.12 } ] }, @@ -24900,13 +46842,56 @@ "max_ap": [ { "Model": "voyage-multilingual-2", - "OpusparcusPC (fr)": 93.68, - "PawsXPairClassification (fr)": 63.64 + "CDSC-E": 66.76, + "FalseFriendsGermanEnglish": 53.6, + "OpusparcusPC (de)": 96.95, + "OpusparcusPC (en)": 98.72, + "OpusparcusPC (fi)": 94.77, + "OpusparcusPC (fr)": 94.18, + "OpusparcusPC (ru)": 90.47, + "OpusparcusPC (sv)": 95.13, + "PSC": 99.63, + "PawsXPairClassification (de)": 59.28, + "PawsXPairClassification (en)": 61.13, + "PawsXPairClassification (es)": 59.53, + "PawsXPairClassification (fr)": 62.26, + "PawsXPairClassification (ja)": 52.18, + "PawsXPairClassification (ko)": 52.61, + "PawsXPairClassification (zh)": 57.97, + "SICK-E-PL": 70.8, + "SprintDuplicateQuestions": 81.08, + "TERRa": 51.48, + "TwitterSemEval2015": 68.86, + "TwitterURLCorpus": 84.37 + }, + { + "Model": "voyage-multilingual-2", + "CDSC-E": 66.76, + "FalseFriendsGermanEnglish": 53.62, + "OpusparcusPC (fr)": 94.18, + "OpusparcusPC (de)": 96.97, + "OpusparcusPC (en)": 98.72, + "OpusparcusPC (fi)": 94.78, + "OpusparcusPC (ru)": 90.47, + "OpusparcusPC (sv)": 95.14, + "PSC": 99.63, + "PawsXPairClassification (fr)": 62.28, + "PawsXPairClassification (de)": 59.28, + "PawsXPairClassification (en)": 61.13, + "PawsXPairClassification (es)": 59.57, + "PawsXPairClassification (ja)": 52.22, + "PawsXPairClassification (ko)": 52.61, + "PawsXPairClassification (zh)": 58.19, + "SICK-E-PL": 70.81, + "SprintDuplicateQuestions": 81.08, + "TERRa": 51.48, + "TwitterSemEval2015": 68.86, + "TwitterURLCorpus": 84.37 }, { "Model": "voyage-multilingual-2", "OpusparcusPC (fr)": 93.68, - "PawsXPairClassification (fr)": 63.71 + "PawsXPairClassification (fr)": 63.64 } ] }, @@ -24915,6 +46900,8 @@ { "Model": "voyage-multilingual-2", "AlloprofReranking": 74.78, + "AskUbuntuDupQuestions": 61.76, + "MindSmallReranking": 33.05, "SyntecReranking": 90.4 } ] @@ -24923,20 +46910,111 @@ "ndcg_at_10": [ { "Model": "voyage-multilingual-2", + "AILAStatutes": 45.0, "AlloprofRetrieval": 58.27, + "ArguAna": 61.82, "BSARDRetrieval": 5.14, + "CmedqaRetrieval": 34.4, "LEMBNarrativeQARetrieval": 64.69, "LEMBQMSumRetrieval": 51.49, "LEMBSummScreenFDRetrieval": 99.11, "LEMBWikimQARetrieval": 87.49, + "LegalBenchCorporateLobbying": 95.92, "MintakaRetrieval (fr)": 49.19, + "NFCorpus": 39.28, + "SCIDOCS": 22.45, + "SciFact": 75.98, + "SciFact-PL": 69.27, + "SpartQA": 10.19, "SyntecRetrieval": 87.28, + "TRECCOVID": 80.11, + "TRECCOVID-PL": 69.86, + "TempReasonL1": 1.37, + "WinoGrande": 39.09, "XPQARetrieval (fr)": 72.92 } ] }, "STS": { "cosine_spearman": [ + { + "Model": "voyage-multilingual-2", + "BIOSSES": 87.11, + "CDSC-R": 87.48, + "GermanSTSBenchmark": 74.08, + "RUParaPhraserSTS": 67.61, + "RuSTSBenchmarkSTS": 71.51, + "SICK-R": 78.97, + "SICK-R-PL": 70.42, + "SICKFr": 72.87, + "STS12": 67.3, + "STS13": 80.09, + "STS14": 71.98, + "STS15": 78.07, + "STS16": 77.36, + "STS17 (es-en)": 76.92, + "STS17 (it-en)": 81.58, + "STS17 (fr-en)": 78.76, + "STS17 (es-es)": 84.05, + "STS17 (ar-ar)": 72.42, + "STS17 (en-tr)": 62.87, + "STS17 (en-en)": 86.52, + "STS17 (en-de)": 78.95, + "STS17 (en-ar)": 73.01, + "STS17 (nl-en)": 81.95, + "STS17 (ko-ko)": 70.66, + "STSB": 68.25, + "STSBenchmark": 75.79, + "STSBenchmarkMultilingualSTS (pl)": 71.96, + "STSBenchmarkMultilingualSTS (fr)": 74.54, + "STSBenchmarkMultilingualSTS (it)": 72.94, + "STSBenchmarkMultilingualSTS (de)": 74.96, + "STSBenchmarkMultilingualSTS (nl)": 73.78, + "STSBenchmarkMultilingualSTS (ru)": 71.56, + "STSBenchmarkMultilingualSTS (pt)": 73.57, + "STSBenchmarkMultilingualSTS (zh)": 69.94, + "STSBenchmarkMultilingualSTS (en)": 75.83, + "STSBenchmarkMultilingualSTS (es)": 74.35 + }, + { + "Model": "voyage-multilingual-2", + "BIOSSES": 87.11, + "CDSC-R": 87.48, + "GermanSTSBenchmark": 74.08, + "RUParaPhraserSTS": 67.61, + "RuSTSBenchmarkSTS": 71.51, + "SICK-R": 78.97, + "SICK-R-PL": 70.42, + "SICKFr": 72.87, + "STS12": 67.3, + "STS13": 80.09, + "STS14": 71.98, + "STS15": 78.07, + "STS16": 77.36, + "STS17 (es-en)": 76.92, + "STS17 (it-en)": 81.58, + "STS17 (fr-en)": 78.76, + "STS17 (es-es)": 84.05, + "STS17 (ar-ar)": 72.42, + "STS17 (en-tr)": 62.87, + "STS17 (en-en)": 86.52, + "STS17 (en-de)": 78.95, + "STS17 (en-ar)": 73.01, + "STS17 (nl-en)": 81.95, + "STS17 (ko-ko)": 70.66, + "STSB": 68.25, + "STSBenchmark": 75.79, + "STSBenchmarkMultilingualSTS (pl)": 71.96, + "STSBenchmarkMultilingualSTS (fr)": 74.54, + "STSBenchmarkMultilingualSTS (it)": 72.94, + "STSBenchmarkMultilingualSTS (de)": 74.96, + "STSBenchmarkMultilingualSTS (nl)": 73.78, + "STSBenchmarkMultilingualSTS (ru)": 71.56, + "STSBenchmarkMultilingualSTS (pt)": 73.57, + "STSBenchmarkMultilingualSTS (zh)": 69.94, + "STSBenchmarkMultilingualSTS (en)": 75.83, + "STSBenchmarkMultilingualSTS (es)": 74.35 + }, { "Model": "voyage-multilingual-2", "SICKFr": 74.9, @@ -24947,6 +47025,14 @@ }, "Summarization": { "cosine_spearman": [ + { + "Model": "voyage-multilingual-2", + "SummEval": 28.44 + }, + { + "Model": "voyage-multilingual-2", + "SummEval": 28.44 + }, { "Model": "voyage-multilingual-2", "SummEvalFr": 29.96 @@ -24954,7 +47040,13 @@ ] }, "MultilabelClassification": { - "accuracy": [] + "accuracy": [ + { + "Model": "voyage-multilingual-2", + "CEDRClassification": 39.15, + "SensitiveTopicsClassification": 26.74 + } + ] }, "InstructionRetrieval": { "p-MRR": []