Skip to content

Commit

Permalink
Merge pull request #40 from sbintuitions/leaderboard
Browse files Browse the repository at this point in the history
[Feature] Leaderboard作成
  • Loading branch information
lsz05 authored Jul 10, 2024
2 parents 94dc67a + 25fd21b commit 094bdc5
Show file tree
Hide file tree
Showing 27 changed files with 1,678 additions and 6 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

This is an easy-to-use evaluation script designed for JMTEB evaluation.

JMTEB leaderboard is [here](leaderboard.md). A guidance for submission is coming soon.

## Quick start

```bash
Expand Down
62 changes: 62 additions & 0 deletions docs/results/MU-Kindai/Japanese-DiffCSE-BERT-base/summary.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"Classification": {
"amazon_counterfactual_classification": {
"macro_f1": 0.7809527709426081
},
"amazon_review_classification": {
"macro_f1": 0.5155899232320224
},
"massive_intent_classification": {
"macro_f1": 0.7879373479249787
},
"massive_scenario_classification": {
"macro_f1": 0.8662625888023707
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9095168116460639
}
},
"Retrieval": {
"jagovfaqs_22k": {
"ndcg@10": 0.42314124780036416
},
"jaqket": {
"ndcg@10": 0.36199154051747723
},
"mrtydi": {
"ndcg@10": 0.07810683176415421
},
"nlp_journal_abs_intro": {
"ndcg@10": 0.6077212544951452
},
"nlp_journal_title_abs": {
"ndcg@10": 0.6433890489201118
},
"nlp_journal_title_intro": {
"ndcg@10": 0.39317174536190913
}
},
"STS": {
"jsick": {
"spearman": 0.754165277432144
},
"jsts": {
"spearman": 0.7558202366183716
}
},
"Clustering": {
"livedoor_news": {
"v_measure_score": 0.4966545453348478
},
"mewsc16": {
"v_measure_score": 0.3877356318022785
}
},
"PairClassification": {
"paws_x_ja": {
"binary_f1": 0.6237623762376237
}
}
}
62 changes: 62 additions & 0 deletions docs/results/MU-Kindai/Japanese-MixCSE-BERT-base/summary.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"Classification": {
"amazon_counterfactual_classification": {
"macro_f1": 0.776174162517931
},
"amazon_review_classification": {
"macro_f1": 0.5085781180553806
},
"massive_intent_classification": {
"macro_f1": 0.7718541530739129
},
"massive_scenario_classification": {
"macro_f1": 0.8592571786794985
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9100551950168166
}
},
"Retrieval": {
"jagovfaqs_22k": {
"ndcg@10": 0.42368135774043536
},
"jaqket": {
"ndcg@10": 0.37721850397542034
},
"mrtydi": {
"ndcg@10": 0.07878085186566607
},
"nlp_journal_abs_intro": {
"ndcg@10": 0.636999375405723
},
"nlp_journal_title_abs": {
"ndcg@10": 0.6413498649875696
},
"nlp_journal_title_intro": {
"ndcg@10": 0.397250919496823
}
},
"STS": {
"jsick": {
"spearman": 0.7756925231422259
},
"jsts": {
"spearman": 0.7652968548841591
}
},
"Clustering": {
"livedoor_news": {
"v_measure_score": 0.5262387436934941
},
"mewsc16": {
"v_measure_score": 0.37277574537292835
}
},
"PairClassification": {
"paws_x_ja": {
"binary_f1": 0.623321554770318
}
}
}
62 changes: 62 additions & 0 deletions docs/results/MU-Kindai/Japanese-SimCSE-BERT-base-sup/summary.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"Classification": {
"amazon_counterfactual_classification": {
"macro_f1": 0.7619809437515043
},
"amazon_review_classification": {
"macro_f1": 0.5205592432502059
},
"massive_intent_classification": {
"macro_f1": 0.7789367871593064
},
"massive_scenario_classification": {
"macro_f1": 0.8490320705866646
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9065584234991577
}
},
"Retrieval": {
"jagovfaqs_22k": {
"ndcg@10": 0.4411487123884245
},
"jaqket": {
"ndcg@10": 0.39613283459361814
},
"mrtydi": {
"ndcg@10": 0.08154879873415645
},
"nlp_journal_abs_intro": {
"ndcg@10": 0.6276035246534508
},
"nlp_journal_title_abs": {
"ndcg@10": 0.5838785018803183
},
"nlp_journal_title_intro": {
"ndcg@10": 0.3489329387182086
}
},
"STS": {
"jsick": {
"spearman": 0.7463567093877269
},
"jsts": {
"spearman": 0.7468283806971927
}
},
"Clustering": {
"livedoor_news": {
"v_measure_score": 0.41041888940251137
},
"mewsc16": {
"v_measure_score": 0.45175891401665724
}
},
"PairClassification": {
"paws_x_ja": {
"binary_f1": 0.6236711552090717
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"Classification": {
"amazon_counterfactual_classification": {
"macro_f1": 0.7619809437515043
},
"amazon_review_classification": {
"macro_f1": 0.5152108946679324
},
"massive_intent_classification": {
"macro_f1": 0.7895128475562229
},
"massive_scenario_classification": {
"macro_f1": 0.865430249169577
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9115815294581953
}
},
"Retrieval": {
"jagovfaqs_22k": {
"ndcg@10": 0.47387768939865055
},
"jaqket": {
"ndcg@10": 0.3956683977353904
},
"mrtydi": {
"ndcg@10": 0.1144234568266308
},
"nlp_journal_abs_intro": {
"ndcg@10": 0.6416096544574569
},
"nlp_journal_title_abs": {
"ndcg@10": 0.7023477497744102
},
"nlp_journal_title_intro": {
"ndcg@10": 0.4536720868647063
}
},
"STS": {
"jsick": {
"spearman": 0.781770693640686
},
"jsts": {
"spearman": 0.7680617109850311
}
},
"Clustering": {
"livedoor_news": {
"v_measure_score": 0.5301620892693397
},
"mewsc16": {
"v_measure_score": 0.4034776723308173
}
},
"PairClassification": {
"paws_x_ja": {
"binary_f1": 0.6238078417520311
}
}
}
62 changes: 62 additions & 0 deletions docs/results/MU-Kindai/Japanese-SimCSE-BERT-large-sup/summary.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"Classification": {
"amazon_counterfactual_classification": {
"macro_f1": 0.7725250131648236
},
"amazon_review_classification": {
"macro_f1": 0.5341627023771393
},
"massive_intent_classification": {
"macro_f1": 0.7682863192709365
},
"massive_scenario_classification": {
"macro_f1": 0.8639396658321546
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9094717381883379
}
},
"Retrieval": {
"jagovfaqs_22k": {
"ndcg@10": 0.47038430326303626
},
"jaqket": {
"ndcg@10": 0.44101304795602897
},
"mrtydi": {
"ndcg@10": 0.11429128335865787
},
"nlp_journal_abs_intro": {
"ndcg@10": 0.43434267808785576
},
"nlp_journal_title_abs": {
"ndcg@10": 0.6240651697600803
},
"nlp_journal_title_intro": {
"ndcg@10": 0.3651687833824759
}
},
"STS": {
"jsick": {
"spearman": 0.787528927058734
},
"jsts": {
"spearman": 0.7781413957931619
}
},
"Clustering": {
"livedoor_news": {
"v_measure_score": 0.48448646364489634
},
"mewsc16": {
"v_measure_score": 0.43168522818790694
}
},
"PairClassification": {
"paws_x_ja": {
"binary_f1": 0.6235418875927891
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"Classification": {
"amazon_counterfactual_classification": {
"macro_f1": 0.7635642561809131
},
"amazon_review_classification": {
"macro_f1": 0.5275222511867922
},
"massive_intent_classification": {
"macro_f1": 0.7688060073049678
},
"massive_scenario_classification": {
"macro_f1": 0.8651446837233107
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9129851570116734
}
},
"Retrieval": {
"jagovfaqs_22k": {
"ndcg@10": 0.5014367709991477
},
"jaqket": {
"ndcg@10": 0.4583812630740073
},
"mrtydi": {
"ndcg@10": 0.13003320802922363
},
"nlp_journal_abs_intro": {
"ndcg@10": 0.5508587506679636
},
"nlp_journal_title_abs": {
"ndcg@10": 0.7497069192695408
},
"nlp_journal_title_intro": {
"ndcg@10": 0.4524300499843447
}
},
"STS": {
"jsick": {
"spearman": 0.7984403024596518
},
"jsts": {
"spearman": 0.7813685476201204
}
},
"Clustering": {
"livedoor_news": {
"v_measure_score": 0.5319881995988209
},
"mewsc16": {
"v_measure_score": 0.4330807170988368
}
},
"PairClassification": {
"paws_x_ja": {
"binary_f1": 0.6226614895870103
}
}
}
Loading

0 comments on commit 094bdc5

Please sign in to comment.