diff --git a/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json b/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json
new file mode 100644
index 0000000..1b99a44
--- /dev/null
+++ b/docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7809527709426081
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5155899232320224
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7879373479249787
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8662625888023707
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9095168116460639
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.42314124780036416
+ },
+ "jaqket": {
+ "ndcg@10": 0.36199154051747723
+ },
+ "mrtydi": {
+ "ndcg@10": 0.07810683176415421
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.6077212544951452
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.6433890489201118
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.39317174536190913
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.754165277432144
+ },
+ "jsts": {
+ "spearman": 0.7558202366183716
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.4966545453348478
+ },
+ "mewsc16": {
+ "v_measure_score": 0.3877356318022785
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6237623762376237
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json b/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json
new file mode 100644
index 0000000..ea227c2
--- /dev/null
+++ b/docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.776174162517931
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5085781180553806
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7718541530739129
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8592571786794985
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9100551950168166
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.42368135774043536
+ },
+ "jaqket": {
+ "ndcg@10": 0.37721850397542034
+ },
+ "mrtydi": {
+ "ndcg@10": 0.07878085186566607
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.636999375405723
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.6413498649875696
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.397250919496823
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7756925231422259
+ },
+ "jsts": {
+ "spearman": 0.7652968548841591
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5262387436934941
+ },
+ "mewsc16": {
+ "v_measure_score": 0.37277574537292835
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.623321554770318
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json
new file mode 100644
index 0000000..dbed068
--- /dev/null
+++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7619809437515043
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5205592432502059
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7789367871593064
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8490320705866646
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9065584234991577
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.4411487123884245
+ },
+ "jaqket": {
+ "ndcg@10": 0.39613283459361814
+ },
+ "mrtydi": {
+ "ndcg@10": 0.08154879873415645
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.6276035246534508
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.5838785018803183
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.3489329387182086
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7463567093877269
+ },
+ "jsts": {
+ "spearman": 0.7468283806971927
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.41041888940251137
+ },
+ "mewsc16": {
+ "v_measure_score": 0.45175891401665724
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6236711552090717
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json
new file mode 100644
index 0000000..9528312
--- /dev/null
+++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-unsup/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7619809437515043
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5152108946679324
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7895128475562229
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.865430249169577
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9115815294581953
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.47387768939865055
+ },
+ "jaqket": {
+ "ndcg@10": 0.3956683977353904
+ },
+ "mrtydi": {
+ "ndcg@10": 0.1144234568266308
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.6416096544574569
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.7023477497744102
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.4536720868647063
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.781770693640686
+ },
+ "jsts": {
+ "spearman": 0.7680617109850311
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5301620892693397
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4034776723308173
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6238078417520311
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json
new file mode 100644
index 0000000..b36686c
--- /dev/null
+++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7725250131648236
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5341627023771393
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7682863192709365
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8639396658321546
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9094717381883379
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.47038430326303626
+ },
+ "jaqket": {
+ "ndcg@10": 0.44101304795602897
+ },
+ "mrtydi": {
+ "ndcg@10": 0.11429128335865787
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.43434267808785576
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.6240651697600803
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.3651687833824759
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.787528927058734
+ },
+ "jsts": {
+ "spearman": 0.7781413957931619
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.48448646364489634
+ },
+ "mewsc16": {
+ "v_measure_score": 0.43168522818790694
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6235418875927891
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json
new file mode 100644
index 0000000..f620d50
--- /dev/null
+++ b/docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-unsup/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7635642561809131
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5275222511867922
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7688060073049678
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8651446837233107
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9129851570116734
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.5014367709991477
+ },
+ "jaqket": {
+ "ndcg@10": 0.4583812630740073
+ },
+ "mrtydi": {
+ "ndcg@10": 0.13003320802922363
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.5508587506679636
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.7497069192695408
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.4524300499843447
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7984403024596518
+ },
+ "jsts": {
+ "spearman": 0.7813685476201204
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5319881995988209
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4330807170988368
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6226614895870103
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/OpenAI/text-embedding-3-large/summary.json b/docs/results/OpenAI/text-embedding-3-large/summary.json
new file mode 100644
index 0000000..46af0c5
--- /dev/null
+++ b/docs/results/OpenAI/text-embedding-3-large/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7789727938896414
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.6043632319384946
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.8090871295952566
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.9108443051510002
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9358042266852659
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.7240937077183436
+ },
+ "jaqket": {
+ "ndcg@10": 0.48208863565793814
+ },
+ "mrtydi": {
+ "ndcg@10": 0.3488438390945784
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9932811349540317
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9655113335080678
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.9547126796600445
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8126909906411093
+ },
+ "jsts": {
+ "spearman": 0.8376863979620452
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.05018478985401151
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4955424351458981
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6234502302515055
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/OpenAI/text-embedding-3-small/summary.json b/docs/results/OpenAI/text-embedding-3-small/summary.json
new file mode 100644
index 0000000..74cee2e
--- /dev/null
+++ b/docs/results/OpenAI/text-embedding-3-small/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7000818608185178
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5592259673654241
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7766119663088307
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8866536867311439
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9291728102678644
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.640150048193537
+ },
+ "jaqket": {
+ "ndcg@10": 0.3394304922804131
+ },
+ "mrtydi": {
+ "ndcg@10": 0.2002984123046011
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9846617848570168
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9170440283351765
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.9017272741306225
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8083062989093882
+ },
+ "jsts": {
+ "spearman": 0.7808357024283473
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.051323988942160705
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4755374215259236
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6227417640807651
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/OpenAI/text-embedding-ada-002/summary.json b/docs/results/OpenAI/text-embedding-ada-002/summary.json
new file mode 100644
index 0000000..8c7a548
--- /dev/null
+++ b/docs/results/OpenAI/text-embedding-ada-002/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.6441904761904762
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5312953134953877
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7457150118928685
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8689044829586676
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9303611831749345
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.6102270226904314
+ },
+ "jaqket": {
+ "ndcg@10": 0.4256467956806472
+ },
+ "mrtydi": {
+ "ndcg@10": 0.1450739420851161
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.9499224324391132
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9123300358752942
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.8197798210453923
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7909435250482901
+ },
+ "jsts": {
+ "spearman": 0.7894052744557472
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.060252212362740365
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4691938182964486
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6239830208701805
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json b/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json
new file mode 100644
index 0000000..42cc5ff
--- /dev/null
+++ b/docs/results/cl-nagoya/sup-simcse-ja-base/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7234436301724776
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5441445333270086
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7951973953020242
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8760200177186923
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9183455876236017
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.5161990612242935
+ },
+ "jaqket": {
+ "ndcg@10": 0.5024513438428565
+ },
+ "mrtydi": {
+ "ndcg@10": 0.13976323269046823
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.6807886421530585
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.6570889175649209
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.48219159577174137
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8282816229512862
+ },
+ "jsts": {
+ "spearman": 0.8127259236647225
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5266774168531417
+ },
+ "mewsc16": {
+ "v_measure_score": 0.5091016872016825
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6256665481692143
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json b/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json
new file mode 100644
index 0000000..a2d8924
--- /dev/null
+++ b/docs/results/cl-nagoya/sup-simcse-ja-large/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7321444865928852
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5475800661400465
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7922802742146243
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8772172454209797
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9148471751378899
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.4683673504170269
+ },
+ "jaqket": {
+ "ndcg@10": 0.39878189118804513
+ },
+ "mrtydi": {
+ "ndcg@10": 0.11834919561027905
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.634254459552888
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.37927566884615427
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.25787534957423713
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.837959537101532
+ },
+ "jsts": {
+ "spearman": 0.825691902117111
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5074967876488787
+ },
+ "mewsc16": {
+ "v_measure_score": 0.503782014677764
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6250885896527285
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json b/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json
new file mode 100644
index 0000000..3863c9e
--- /dev/null
+++ b/docs/results/cl-nagoya/unsup-simcse-ja-base/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7330185800774036
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5392887528271114
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7907120296283751
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8597097942715117
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9115668272308735
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.46003459081522513
+ },
+ "jaqket": {
+ "ndcg@10": 0.3945725593125862
+ },
+ "mrtydi": {
+ "ndcg@10": 0.055507775092798486
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.6025847751308843
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.5562839869857912
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.3449181162324482
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7849379492955117
+ },
+ "jsts": {
+ "spearman": 0.7894946592483818
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5223347838445698
+ },
+ "mewsc16": {
+ "v_measure_score": 0.37310458219601117
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.624424778761062
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json b/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json
new file mode 100644
index 0000000..d37618a
--- /dev/null
+++ b/docs/results/cl-nagoya/unsup-simcse-ja-large/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.767905114979583
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5537089641846143
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7912698845073401
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8736185210672394
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9095494729022622
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.4509073581555124
+ },
+ "jaqket": {
+ "ndcg@10": 0.34595043675331943
+ },
+ "mrtydi": {
+ "ndcg@10": 0.05750859876901772
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.550742021417855
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.6307172007359215
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.39612451822677164
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8014979086154339
+ },
+ "jsts": {
+ "spearman": 0.8097685749017456
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5090447587797094
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4591920015613856
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6248671625929861
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/colorfulscoop/sbert-base-ja/summary.json b/docs/results/colorfulscoop/sbert-base-ja/summary.json
new file mode 100644
index 0000000..2a08044
--- /dev/null
+++ b/docs/results/colorfulscoop/sbert-base-ja/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7221023294352484
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.47952384496155054
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.725195343788811
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.836177960542408
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.8997301146575819
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.21501915127957166
+ },
+ "jaqket": {
+ "ndcg@10": 0.13161989528541293
+ },
+ "mrtydi": {
+ "ndcg@10": 0.00436010196904899
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.2878020264605714
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.22397059858982324
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.12815871897103842
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.6659298300713198
+ },
+ "jsts": {
+ "spearman": 0.7423952309826243
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.4298579019834722
+ },
+ "mewsc16": {
+ "v_measure_score": 0.46641671645082333
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6231013776050865
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/intfloat/multilingual-e5-base/summary.json b/docs/results/intfloat/multilingual-e5-base/summary.json
new file mode 100644
index 0000000..96f9640
--- /dev/null
+++ b/docs/results/intfloat/multilingual-e5-base/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.6367079139150691
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5424265794470897
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7277503514873049
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8652828949015864
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9285060467194839
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.6534478396845428
+ },
+ "jaqket": {
+ "ndcg@10": 0.5067444792013236
+ },
+ "mrtydi": {
+ "ndcg@10": 0.3837652120001251
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.8709767034225332
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9473129303429082
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.7304538728893641
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8128058660848744
+ },
+ "jsts": {
+ "spearman": 0.7839196475937381
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5502694126615243
+ },
+ "mewsc16": {
+ "v_measure_score": 0.41494514000218946
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6226482073127441
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/intfloat/multilingual-e5-large/summary.json b/docs/results/intfloat/multilingual-e5-large/summary.json
new file mode 100644
index 0000000..a28c470
--- /dev/null
+++ b/docs/results/intfloat/multilingual-e5-large/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.706580687830688
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5653992303516462
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7577710251429624
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8859090262583831
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9296254722183955
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.7030214336558751
+ },
+ "jaqket": {
+ "ndcg@10": 0.5878065301444064
+ },
+ "mrtydi": {
+ "ndcg@10": 0.4363167873386172
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.8600225120389309
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9469712765040588
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.7248023877969718
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7840335060728089
+ },
+ "jsts": {
+ "spearman": 0.8098724997856234
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5713023706914878
+ },
+ "mewsc16": {
+ "v_measure_score": 0.4534484706354193
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.621496984746364
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/intfloat/multilingual-e5-small/summary.json b/docs/results/intfloat/multilingual-e5-small/summary.json
new file mode 100644
index 0000000..99a4423
--- /dev/null
+++ b/docs/results/intfloat/multilingual-e5-small/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.6214130966524566
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5127428912860463
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7085230519111091
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8622036829599259
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9303349187158247
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.6411252958220891
+ },
+ "jaqket": {
+ "ndcg@10": 0.49966509556428645
+ },
+ "mrtydi": {
+ "ndcg@10": 0.36054822913647616
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.8520749151982298
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.9526123412781002
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.729906931983999
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.8150271836013705
+ },
+ "jsts": {
+ "spearman": 0.786450077409501
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.5470075389200084
+ },
+ "mewsc16": {
+ "v_measure_score": 0.391226933590049
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6219382321618744
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json b/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json
new file mode 100644
index 0000000..6b7309a
--- /dev/null
+++ b/docs/results/oshizo/sbert-jsnli-luke-japanese-base-lite/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7994675369288904
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5748206591211895
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.8025949222725076
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8875250742566655
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9156331205981866
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.519938655947725
+ },
+ "jaqket": {
+ "ndcg@10": 0.4206746951743811
+ },
+ "mrtydi": {
+ "ndcg@10": 0.10116108109776817
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.4930421996747514
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.719369187830078
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.3258568875005778
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7211422898060521
+ },
+ "jsts": {
+ "spearman": 0.8109305772255819
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.4677177349822789
+ },
+ "mewsc16": {
+ "v_measure_score": 0.5389209739242912
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6237623762376237
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/pkshatech/GLuCoSE-base-ja/summary.json b/docs/results/pkshatech/GLuCoSE-base-ja/summary.json
new file mode 100644
index 0000000..9048691
--- /dev/null
+++ b/docs/results/pkshatech/GLuCoSE-base-ja/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.8243606275521169
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.580654308041878
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7885427536904928
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8794225134482166
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9190289767663239
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.6387979415478197
+ },
+ "jaqket": {
+ "ndcg@10": 0.3981609655991592
+ },
+ "mrtydi": {
+ "ndcg@10": 0.30281316435910444
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.7825765249971093
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.8206371528870603
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.5982476164344701
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7496711324072552
+ },
+ "jsts": {
+ "spearman": 0.824592262812859
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.49890886040948096
+ },
+ "mewsc16": {
+ "v_measure_score": 0.49676862904881375
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.663883089770355
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json b/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json
new file mode 100644
index 0000000..cc9f179
--- /dev/null
+++ b/docs/results/pkshatech/simcse-ja-bert-base-clcmlp/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.6748573563374541
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5084883283463678
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7967050091211104
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.871999260591497
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.914930352019688
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.41496851385134836
+ },
+ "jaqket": {
+ "ndcg@10": 0.46003031782136106
+ },
+ "mrtydi": {
+ "ndcg@10": 0.1019130492122431
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.4014036990267884
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.5962532652358485
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.2452584471710635
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7307715649457595
+ },
+ "jsts": {
+ "spearman": 0.8052279921326252
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.4476707933600858
+ },
+ "mewsc16": {
+ "v_measure_score": 0.5029508725037098
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6239830208701805
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/sentence-transformers/LaBSE/summary.json b/docs/results/sentence-transformers/LaBSE/summary.json
new file mode 100644
index 0000000..de8fd21
--- /dev/null
+++ b/docs/results/sentence-transformers/LaBSE/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7361214773958769
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.516957890685124
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7698802987251081
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8835366493433755
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.9162507647227857
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.4310160105414995
+ },
+ "jaqket": {
+ "ndcg@10": 0.34245849139132745
+ },
+ "mrtydi": {
+ "ndcg@10": 0.04238747941951049
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.48918127058907085
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.7513086500303519
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.35089108319096984
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7698905918950973
+ },
+ "jsts": {
+ "spearman": 0.7612337568248777
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.4829337123233023
+ },
+ "mewsc16": {
+ "v_measure_score": 0.41471299546625956
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.623321554770318
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json b/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json
new file mode 100644
index 0000000..12f71a2
--- /dev/null
+++ b/docs/results/sentence-transformers/stsb-xlm-r-multilingual/summary.json
@@ -0,0 +1,62 @@
+{
+ "Classification": {
+ "amazon_counterfactual_classification": {
+ "macro_f1": 0.7565022696601644
+ },
+ "amazon_review_classification": {
+ "macro_f1": 0.5131771609073525
+ },
+ "massive_intent_classification": {
+ "macro_f1": 0.7427818411370812
+ },
+ "massive_scenario_classification": {
+ "macro_f1": 0.8609512679368835
+ }
+ },
+ "Reranking": {
+ "esci": {
+ "ndcg@10": 0.901984958764163
+ }
+ },
+ "Retrieval": {
+ "jagovfaqs_22k": {
+ "ndcg@10": 0.2511106863952595
+ },
+ "jaqket": {
+ "ndcg@10": 0.21606007987072834
+ },
+ "mrtydi": {
+ "ndcg@10": 0.027590779174942116
+ },
+ "nlp_journal_abs_intro": {
+ "ndcg@10": 0.2848558252647936
+ },
+ "nlp_journal_title_abs": {
+ "ndcg@10": 0.3646520309406354
+ },
+ "nlp_journal_title_intro": {
+ "ndcg@10": 0.11545016260271045
+ }
+ },
+ "STS": {
+ "jsick": {
+ "spearman": 0.7236409557069434
+ },
+ "jsts": {
+ "spearman": 0.7843597058304203
+ }
+ },
+ "Clustering": {
+ "livedoor_news": {
+ "v_measure_score": 0.24487129939212224
+ },
+ "mewsc16": {
+ "v_measure_score": 0.304278393205056
+ }
+ },
+ "PairClassification": {
+ "paws_x_ja": {
+ "binary_f1": 0.6219686162624821
+ }
+ }
+}
\ No newline at end of file
diff --git a/leaderboard.md b/leaderboard.md
new file mode 100644
index 0000000..107f2e9
--- /dev/null
+++ b/leaderboard.md
@@ -0,0 +1,188 @@
+# Leaderboard
+This leaderboard shows the results stored under `docs/results`. The scores are all multiplied by 100.
+
+## Summary
+
+The summary shows the average scores within each task.
+
+| Model | Avg. | Retrieval | STS | Classification | Reranking | Clustering | PairClassification |
+|:----------------------------------------------|:----------|:------------|:----------|:-----------------|:------------|:-------------|:---------------------|
+| intfloat/multilingual-e5-large | **71.65** | 70.98 | 79.70 | 72.89 | 92.96 | 51.24 | 62.15 |
+| pkshatech/GLuCoSE-base-ja | 70.44 | 59.02 | 78.71 | 76.82 | 91.90 | 49.78 | **66.39** |
+| intfloat/multilingual-e5-base | 70.12 | 68.21 | 79.84 | 69.30 | 92.85 | 48.26 | 62.26 |
+| OpenAI/text-embedding-3-large | 69.63 | **74.48** | 82.52 | **77.58** | **93.58** | 27.29 | 62.35 |
+| intfloat/multilingual-e5-small | 69.52 | 67.27 | 80.07 | 67.62 | 93.03 | 46.91 | 62.19 |
+| cl-nagoya/sup-simcse-ja-base | 68.56 | 49.64 | 82.05 | 73.47 | 91.83 | **51.79** | 62.57 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 66.89 | 47.38 | 78.99 | 73.13 | 91.30 | 48.25 | 62.27 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 66.75 | 43.00 | 76.60 | 76.61 | 91.56 | 50.33 | 62.38 |
+| OpenAI/text-embedding-3-small | 66.74 | 66.39 | 79.46 | 73.06 | 92.92 | 26.34 | 62.27 |
+| cl-nagoya/sup-simcse-ja-large | 66.51 | 37.62 | **83.18** | 73.73 | 91.48 | 50.56 | 62.51 |
+| cl-nagoya/unsup-simcse-ja-large | 66.27 | 40.53 | 80.56 | 74.66 | 90.95 | 48.41 | 62.49 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 66.23 | 46.36 | 77.49 | 73.30 | 91.16 | 46.68 | 62.38 |
+| OpenAI/text-embedding-ada-002 | 65.84 | 64.38 | 79.02 | 69.75 | 93.04 | 26.47 | 62.40 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 65.28 | 40.82 | 78.28 | 73.47 | 90.95 | 45.81 | 62.35 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 65.14 | 42.59 | 77.05 | 72.90 | 91.01 | 44.95 | 62.33 |
+| cl-nagoya/unsup-simcse-ja-base | 65.07 | 40.23 | 78.72 | 73.07 | 91.16 | 44.77 | 62.44 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 64.77 | 41.79 | 75.50 | 73.77 | 90.95 | 44.22 | 62.38 |
+| sentence-transformers/LaBSE | 64.70 | 40.12 | 76.56 | 72.66 | 91.63 | 44.88 | 62.33 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 64.42 | 37.00 | 76.80 | 71.30 | 91.49 | 47.53 | 62.40 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 64.15 | 41.32 | 74.66 | 72.76 | 90.66 | 43.11 | 62.37 |
+| colorfulscoop/sbert-base-ja | 58.85 | 16.52 | 70.42 | 69.07 | 89.97 | 44.81 | 62.31 |
+| sentence-transformers/stsb-xlm-r-multilingual | 58.01 | 21.00 | 75.40 | 71.84 | 90.20 | 27.46 | 62.20 |
+
+## Retrieval
+| Model | Avg. | jagovfaqs_22k
(ndcg@10) | jaqket
(ndcg@10) | mrtydi
(ndcg@10) | nlp_journal_abs_intro
(ndcg@10) | nlp_journal_title_abs
(ndcg@10) | nlp_journal_title_intro
(ndcg@10) |
+|:----------------------------------------------|:----------|:-----------------------------|:----------------------|:----------------------|:-------------------------------------|:-------------------------------------|:---------------------------------------|
+| OpenAI/text-embedding-3-large | **74.48** | **72.41** | 48.21 | 34.88 | **99.33** | **96.55** | **95.47** |
+| intfloat/multilingual-e5-large | 70.98 | 70.30 | **58.78** | **43.63** | 86.00 | 94.70 | 72.48 |
+| intfloat/multilingual-e5-base | 68.21 | 65.34 | 50.67 | 38.38 | 87.10 | 94.73 | 73.05 |
+| intfloat/multilingual-e5-small | 67.27 | 64.11 | 49.97 | 36.05 | 85.21 | 95.26 | 72.99 |
+| OpenAI/text-embedding-3-small | 66.39 | 64.02 | 33.94 | 20.03 | 98.47 | 91.70 | 90.17 |
+| OpenAI/text-embedding-ada-002 | 64.38 | 61.02 | 42.56 | 14.51 | 94.99 | 91.23 | 81.98 |
+| pkshatech/GLuCoSE-base-ja | 59.02 | 63.88 | 39.82 | 30.28 | 78.26 | 82.06 | 59.82 |
+| cl-nagoya/sup-simcse-ja-base | 49.64 | 51.62 | 50.25 | 13.98 | 68.08 | 65.71 | 48.22 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 47.38 | 50.14 | 45.84 | 13.00 | 55.09 | 74.97 | 45.24 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 46.36 | 47.39 | 39.57 | 11.44 | 64.16 | 70.23 | 45.37 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 43.00 | 51.99 | 42.07 | 10.12 | 49.30 | 71.94 | 32.59 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 42.59 | 42.37 | 37.72 | 7.88 | 63.70 | 64.13 | 39.73 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 41.79 | 42.31 | 36.20 | 7.81 | 60.77 | 64.34 | 39.32 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 41.32 | 44.11 | 39.61 | 8.15 | 62.76 | 58.39 | 34.89 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 40.82 | 47.04 | 44.10 | 11.43 | 43.43 | 62.41 | 36.52 |
+| cl-nagoya/unsup-simcse-ja-large | 40.53 | 45.09 | 34.60 | 5.75 | 55.07 | 63.07 | 39.61 |
+| cl-nagoya/unsup-simcse-ja-base | 40.23 | 46.00 | 39.46 | 5.55 | 60.26 | 55.63 | 34.49 |
+| sentence-transformers/LaBSE | 40.12 | 43.10 | 34.25 | 4.24 | 48.92 | 75.13 | 35.09 |
+| cl-nagoya/sup-simcse-ja-large | 37.62 | 46.84 | 39.88 | 11.83 | 63.43 | 37.93 | 25.79 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 37.00 | 41.50 | 46.00 | 10.19 | 40.14 | 59.63 | 24.53 |
+| sentence-transformers/stsb-xlm-r-multilingual | 21.00 | 25.11 | 21.61 | 2.76 | 28.49 | 36.47 | 11.55 |
+| colorfulscoop/sbert-base-ja | 16.52 | 21.50 | 13.16 | 0.44 | 28.78 | 22.40 | 12.82 |
+
+## STS
+| Model | Avg. | jsick
(spearman) | jsts
(spearman) |
+|:----------------------------------------------|:----------|:----------------------|:---------------------|
+| cl-nagoya/sup-simcse-ja-large | **83.18** | **83.80** | 82.57 |
+| OpenAI/text-embedding-3-large | 82.52 | 81.27 | **83.77** |
+| cl-nagoya/sup-simcse-ja-base | 82.05 | 82.83 | 81.27 |
+| cl-nagoya/unsup-simcse-ja-large | 80.56 | 80.15 | 80.98 |
+| intfloat/multilingual-e5-small | 80.07 | 81.50 | 78.65 |
+| intfloat/multilingual-e5-base | 79.84 | 81.28 | 78.39 |
+| intfloat/multilingual-e5-large | 79.70 | 78.40 | 80.99 |
+| OpenAI/text-embedding-3-small | 79.46 | 80.83 | 78.08 |
+| OpenAI/text-embedding-ada-002 | 79.02 | 79.09 | 78.94 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 78.99 | 79.84 | 78.14 |
+| cl-nagoya/unsup-simcse-ja-base | 78.72 | 78.49 | 78.95 |
+| pkshatech/GLuCoSE-base-ja | 78.71 | 74.97 | 82.46 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 78.28 | 78.75 | 77.81 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 77.49 | 78.18 | 76.81 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 77.05 | 77.57 | 76.53 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 76.80 | 73.08 | 80.52 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 76.60 | 72.11 | 81.09 |
+| sentence-transformers/LaBSE | 76.56 | 76.99 | 76.12 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 75.50 | 75.42 | 75.58 |
+| sentence-transformers/stsb-xlm-r-multilingual | 75.40 | 72.36 | 78.44 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 74.66 | 74.64 | 74.68 |
+| colorfulscoop/sbert-base-ja | 70.42 | 66.59 | 74.24 |
+
+## Classification
+| Model | Avg. | amazon_counterfactual
(macro_f1) | amazon_review
(macro_f1) | massive_intent
(macro_f1) | massive_scenario
(macro_f1) |
+|:----------------------------------------------|:----------|:--------------------------------------|:------------------------------|:-------------------------------|:---------------------------------|
+| OpenAI/text-embedding-3-large | **77.58** | 77.90 | **60.44** | **80.91** | **91.08** |
+| pkshatech/GLuCoSE-base-ja | 76.82 | **82.44** | 58.07 | 78.85 | 87.94 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 76.61 | 79.95 | 57.48 | 80.26 | 88.75 |
+| cl-nagoya/unsup-simcse-ja-large | 74.66 | 76.79 | 55.37 | 79.13 | 87.36 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 73.77 | 78.10 | 51.56 | 78.79 | 86.63 |
+| cl-nagoya/sup-simcse-ja-large | 73.73 | 73.21 | 54.76 | 79.23 | 87.72 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 73.47 | 77.25 | 53.42 | 76.83 | 86.39 |
+| cl-nagoya/sup-simcse-ja-base | 73.47 | 72.34 | 54.41 | 79.52 | 87.60 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 73.30 | 76.20 | 51.52 | 78.95 | 86.54 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 73.13 | 76.36 | 52.75 | 76.88 | 86.51 |
+| cl-nagoya/unsup-simcse-ja-base | 73.07 | 73.30 | 53.93 | 79.07 | 85.97 |
+| OpenAI/text-embedding-3-small | 73.06 | 70.01 | 55.92 | 77.66 | 88.67 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 72.90 | 77.62 | 50.86 | 77.19 | 85.93 |
+| intfloat/multilingual-e5-large | 72.89 | 70.66 | 56.54 | 75.78 | 88.59 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 72.76 | 76.20 | 52.06 | 77.89 | 84.90 |
+| sentence-transformers/LaBSE | 72.66 | 73.61 | 51.70 | 76.99 | 88.35 |
+| sentence-transformers/stsb-xlm-r-multilingual | 71.84 | 75.65 | 51.32 | 74.28 | 86.10 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 71.30 | 67.49 | 50.85 | 79.67 | 87.20 |
+| OpenAI/text-embedding-ada-002 | 69.75 | 64.42 | 53.13 | 74.57 | 86.89 |
+| intfloat/multilingual-e5-base | 69.30 | 63.67 | 54.24 | 72.78 | 86.53 |
+| colorfulscoop/sbert-base-ja | 69.07 | 72.21 | 47.95 | 72.52 | 83.62 |
+| intfloat/multilingual-e5-small | 67.62 | 62.14 | 51.27 | 70.85 | 86.22 |
+
+## Reranking
+| Model | Avg. | esci
(ndcg@10) |
+|:----------------------------------------------|:----------|:--------------------|
+| OpenAI/text-embedding-3-large | **93.58** | **93.58** |
+| OpenAI/text-embedding-ada-002 | 93.04 | 93.04 |
+| intfloat/multilingual-e5-small | 93.03 | 93.03 |
+| intfloat/multilingual-e5-large | 92.96 | 92.96 |
+| OpenAI/text-embedding-3-small | 92.92 | 92.92 |
+| intfloat/multilingual-e5-base | 92.85 | 92.85 |
+| pkshatech/GLuCoSE-base-ja | 91.90 | 91.90 |
+| cl-nagoya/sup-simcse-ja-base | 91.83 | 91.83 |
+| sentence-transformers/LaBSE | 91.63 | 91.63 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 91.56 | 91.56 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 91.49 | 91.49 |
+| cl-nagoya/sup-simcse-ja-large | 91.48 | 91.48 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 91.30 | 91.30 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 91.16 | 91.16 |
+| cl-nagoya/unsup-simcse-ja-base | 91.16 | 91.16 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 91.01 | 91.01 |
+| cl-nagoya/unsup-simcse-ja-large | 90.95 | 90.95 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 90.95 | 90.95 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 90.95 | 90.95 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 90.66 | 90.66 |
+| sentence-transformers/stsb-xlm-r-multilingual | 90.20 | 90.20 |
+| colorfulscoop/sbert-base-ja | 89.97 | 89.97 |
+
+## Clustering
+| Model | Avg. | livedoor_news
(v_measure_score) | mewsc16
(v_measure_score) |
+|:----------------------------------------------|:----------|:-------------------------------------|:-------------------------------|
+| cl-nagoya/sup-simcse-ja-base | **51.79** | 52.67 | 50.91 |
+| intfloat/multilingual-e5-large | 51.24 | **57.13** | 45.34 |
+| cl-nagoya/sup-simcse-ja-large | 50.56 | 50.75 | 50.38 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 50.33 | 46.77 | **53.89** |
+| pkshatech/GLuCoSE-base-ja | 49.78 | 49.89 | 49.68 |
+| cl-nagoya/unsup-simcse-ja-large | 48.41 | 50.90 | 45.92 |
+| intfloat/multilingual-e5-base | 48.26 | 55.03 | 41.49 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 48.25 | 53.20 | 43.31 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 47.53 | 44.77 | 50.30 |
+| intfloat/multilingual-e5-small | 46.91 | 54.70 | 39.12 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 46.68 | 53.02 | 40.35 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 45.81 | 48.45 | 43.17 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 44.95 | 52.62 | 37.28 |
+| sentence-transformers/LaBSE | 44.88 | 48.29 | 41.47 |
+| colorfulscoop/sbert-base-ja | 44.81 | 42.99 | 46.64 |
+| cl-nagoya/unsup-simcse-ja-base | 44.77 | 52.23 | 37.31 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 44.22 | 49.67 | 38.77 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 43.11 | 41.04 | 45.18 |
+| sentence-transformers/stsb-xlm-r-multilingual | 27.46 | 24.49 | 30.43 |
+| OpenAI/text-embedding-3-large | 27.29 | 5.02 | 49.55 |
+| OpenAI/text-embedding-ada-002 | 26.47 | 6.03 | 46.92 |
+| OpenAI/text-embedding-3-small | 26.34 | 5.13 | 47.55 |
+
+## PairClassification
+| Model | Avg. | paws_x_ja
(binary_f1) |
+|:----------------------------------------------|:----------|:---------------------------|
+| pkshatech/GLuCoSE-base-ja | **66.39** | **66.39** |
+| cl-nagoya/sup-simcse-ja-base | 62.57 | 62.57 |
+| cl-nagoya/sup-simcse-ja-large | 62.51 | 62.51 |
+| cl-nagoya/unsup-simcse-ja-large | 62.49 | 62.49 |
+| cl-nagoya/unsup-simcse-ja-base | 62.44 | 62.44 |
+| pkshatech/simcse-ja-bert-base-clcmlp | 62.40 | 62.40 |
+| OpenAI/text-embedding-ada-002 | 62.40 | 62.40 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 62.38 | 62.38 |
+| oshizo/sbert-jsnli-luke-japanese-base-lite | 62.38 | 62.38 |
+| MU-Kindai/Japanese-DiffCSE-BERT-base | 62.38 | 62.38 |
+| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 62.37 | 62.37 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 62.35 | 62.35 |
+| OpenAI/text-embedding-3-large | 62.35 | 62.35 |
+| MU-Kindai/Japanese-MixCSE-BERT-base | 62.33 | 62.33 |
+| sentence-transformers/LaBSE | 62.33 | 62.33 |
+| colorfulscoop/sbert-base-ja | 62.31 | 62.31 |
+| OpenAI/text-embedding-3-small | 62.27 | 62.27 |
+| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 62.27 | 62.27 |
+| intfloat/multilingual-e5-base | 62.26 | 62.26 |
+| sentence-transformers/stsb-xlm-r-multilingual | 62.20 | 62.20 |
+| intfloat/multilingual-e5-small | 62.19 | 62.19 |
+| intfloat/multilingual-e5-large | 62.15 | 62.15 |
+
diff --git a/make_leaderboard.py b/make_leaderboard.py
new file mode 100644
index 0000000..ff3a330
--- /dev/null
+++ b/make_leaderboard.py
@@ -0,0 +1,103 @@
+import json
+from collections import defaultdict
+from pathlib import Path
+
+from tabulate import tabulate
+
+dataset_name_aliases = {
+ "amazon_counterfactual_classification": "amazon_counterfactual",
+ "amazon_review_classification": "amazon_review",
+ "massive_intent_classification": "massive_intent",
+ "massive_scenario_classification": "massive_scenario",
+}
+
+TASK_ORDER = ["Retrieval", "STS", "Classification", "Reranking", "Clustering", "PairClassification"]
+SUMMARY_KEY = "Summary"
+
+"""
+Collects the results from the results folder.
+"""
+# {task_name: {model_signature: {(dataset_name, metric_name): score}}}
+all_results: dict[str, dict[str, dict[str, float]]] = defaultdict(lambda: defaultdict(dict))
+for summary_file in Path("docs/results").rglob("summary.json"):
+ if not summary_file.exists():
+ continue
+
+ with open(summary_file) as f:
+ summary = json.load(f)
+
+ org_name = summary_file.parent.parent.name
+ model_name = summary_file.parent.name
+ model_signature = f"{org_name}/{model_name}"
+
+ for task_name, task_results in summary.items():
+ task_results_formatted: dict[str, float] = {}
+ task_scores: list[float] = []
+ for dataset_name, metric_dict in task_results.items():
+ metric_name, score = next(iter(metric_dict.items()))
+ dataset_name = dataset_name_aliases.get(dataset_name, dataset_name)
+ task_results_formatted[f"{dataset_name}
({metric_name})"] = score
+ task_scores.append(score)
+ all_results[task_name][model_signature] = task_results_formatted
+ all_results[SUMMARY_KEY][model_signature][task_name] = sum(task_scores) / len(task_scores)
+
+"""
+Creates markdown tables for each task.
+"""
+
+
+def format_score(score: float) -> str:
+ return f"{score * 100:.2f}"
+
+
+AVG_COLUMN_NAME = "Avg."
+markdown_tables: dict[str, str] = {}
+for task_name, task_results in all_results.items():
+ # format to markdown table
+ dataset_keys = list(task_results[next(iter(task_results))].keys())
+ if task_name == SUMMARY_KEY:
+ dataset_keys = TASK_ORDER
+
+ header = ["Model", AVG_COLUMN_NAME, *dataset_keys]
+ table_list: list[list[str | float]] = []
+ for model_signature, dataset_scores in task_results.items():
+ model_scores = [dataset_scores[k] for k in dataset_keys]
+ average_score = sum(model_scores) / len(model_scores)
+ table_list.append([model_signature, average_score, *model_scores])
+
+ # sort by the average score
+ avg_idx = header.index(AVG_COLUMN_NAME)
+ table_list.sort(key=lambda x: x[avg_idx], reverse=True)
+
+ # make the highest score in each dataset bold
+ for dataset_name in [AVG_COLUMN_NAME, *dataset_keys]:
+ task_idx = header.index(dataset_name)
+ max_score = max(row[task_idx] for row in table_list)
+ for row in table_list:
+ if row[task_idx] == max_score:
+ row[task_idx] = f"**{format_score(row[task_idx])}**"
+ else:
+ row[task_idx] = format_score(row[task_idx])
+
+ # add header
+ table_list.insert(0, ["Model", AVG_COLUMN_NAME, *dataset_keys])
+ markdown_table = tabulate(table_list, headers="firstrow", tablefmt="pipe")
+ markdown_tables[task_name] = markdown_table
+
+"""
+Dump the markdown tables to a file.
+"""
+with open("leaderboard.md", "w") as f:
+ f.write("# Leaderboard\n")
+ f.write(
+ "This leaderboard shows the results stored under `docs/results`. The scores are all multiplied by 100.\n\n"
+ )
+ for task_name in [SUMMARY_KEY, *TASK_ORDER]:
+ markdown_table = markdown_tables[task_name]
+ f.write(f"## {task_name}\n")
+
+ if task_name == SUMMARY_KEY:
+ f.write("\nThe summary shows the average scores within each task.\n\n")
+
+ f.write(markdown_table)
+ f.write("\n\n")
diff --git a/poetry.lock b/poetry.lock
index 71b1f79..40fbe9f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1454,14 +1454,13 @@ files = [
[[package]]
name = "nvidia-nvjitlink-cu12"
-version = "12.5.40"
+version = "12.5.82"
description = "Nvidia JIT LTO Library"
optional = false
python-versions = ">=3"
files = [
- {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"},
- {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"},
- {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"},
+ {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f9b37bc5c8cf7509665cb6ada5aaa0ce65618f2332b7d3e78e9790511f111212"},
+ {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-win_amd64.whl", hash = "sha256:e782564d705ff0bf61ac3e1bf730166da66dd2fe9012f111ede5fc49b64ae697"},
]
[[package]]
@@ -2621,6 +2620,20 @@ files = [
[package.dependencies]
mpmath = ">=1.1.0,<1.4.0"
+[[package]]
+name = "tabulate"
+version = "0.9.0"
+description = "Pretty-print tabular data"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
+ {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
+]
+
+[package.extras]
+widechars = ["wcwidth"]
+
[[package]]
name = "tbb"
version = "2021.13.0"
@@ -3389,4 +3402,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<4.0"
-content-hash = "bfff8c9db1f28df560b71b1c09802902ee89f5c39baa84ffda906a9238d38df8"
+content-hash = "a2c9ed2cef63429fda1482752acb674fe3b39b94498bbe2c177d0b8ac9558c44"
diff --git a/pyproject.toml b/pyproject.toml
index c4775df..8c27ffb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,15 +32,17 @@ pytest-mock = "^3.14.0"
tiktoken = "^0.6.0"
numpy = "^1.26"
accelerate = "^0.31.0"
+tabulate = "^0.9.0"
[tool.poetry.group.dev.dependencies]
black = "^23.11.0"
isort = "^5.12.0"
mypy = "^1.7.1"
flake8 = "^7.0.0"
+tabulate = "^0.9.0"
[tool.black]
line-length = 119
[tool.isort]
-profile = "black"
+profile = "black"
\ No newline at end of file