From 28c0d401dc28530b2ab95533dd9272cac5ef6333 Mon Sep 17 00:00:00 2001 From: nvitramble Date: Wed, 29 Jun 2022 17:24:02 -0700 Subject: [PATCH] Add new BERT calibration dataset --- calibration/SQuAD-v1.1/README.md | 7 +- ...tion.txt => bert_calibration_features.txt} | 0 .../SQuAD-v1.1/bert_calibration_qas_ids.txt | 100 ++++++++++++++++++ 3 files changed, 106 insertions(+), 1 deletion(-) rename calibration/SQuAD-v1.1/{bert-calibration.txt => bert_calibration_features.txt} (100%) create mode 100755 calibration/SQuAD-v1.1/bert_calibration_qas_ids.txt diff --git a/calibration/SQuAD-v1.1/README.md b/calibration/SQuAD-v1.1/README.md index 5f0fad6d3..85b2513d7 100644 --- a/calibration/SQuAD-v1.1/README.md +++ b/calibration/SQuAD-v1.1/README.md @@ -1 +1,6 @@ -The calibration file has 100 randomly selected samples from dev-1.1.json, which contains 10570 samples in total. +The integers in bert_calibration_features.txt correspond to 100 randomly selected indices in the list of features generated from dev-v1.1.json using [convert_examples_to_features()](https://github.com/mlcommons/inference/blob/master/language/bert/create_squad_data.py#L249) with a doc_stride of 128 and a max_seq_len of 384. + +The values in bert_calibration_qas_ids.txt correspond to 100 randomly selected qas ids in the dev-v1.1.json file. + +Please only use at most 1 calibration file from this folder for calibration. + diff --git a/calibration/SQuAD-v1.1/bert-calibration.txt b/calibration/SQuAD-v1.1/bert_calibration_features.txt similarity index 100% rename from calibration/SQuAD-v1.1/bert-calibration.txt rename to calibration/SQuAD-v1.1/bert_calibration_features.txt diff --git a/calibration/SQuAD-v1.1/bert_calibration_qas_ids.txt b/calibration/SQuAD-v1.1/bert_calibration_qas_ids.txt new file mode 100755 index 000000000..5d6f45989 --- /dev/null +++ b/calibration/SQuAD-v1.1/bert_calibration_qas_ids.txt @@ -0,0 +1,100 @@ +573020f7b2c2fd14005688fa +56beb6f23aeaaa14008c92a1 +5737a5931c456719005744e9 +5725d79e89a1e219009abf91 +56e0d9e0231d4119001ac43f +57281ab63acd2414000df496 +57269fab5951b619008f780b +5726400589a1e219009ac5f0 +572fd264b2c2fd14005684aa +56f85e71a6d7ea1400e175c4 +5728804b4b864d1900164a47 +57264cac708984140094c1b4 +5726bf135951b619008f7ceb +5728848cff5b5019007da298 +572fbf21a23a5019007fc93b +5727448b5951b619008f87a1 +5729e1101d04691400779641 +56e11afbcd28a01900c675c9 +5726642f5951b619008f7159 +56e08d32231d4119001ac2b1 +57265d86f1498d1400e8dd50 +56f7eddca6d7ea1400e172d9 +56de1645cffd8e1900b4b5d1 +5726a5525951b619008f78df +56f851b1a6d7ea1400e1755e +572a18a4af94a219006aa7e2 +57286bb84b864d19001649ca +571bb2269499d21900609cab +56d7251d0d65d214001983cc +56f88eafaef2371900626194 +571cde695efbb31900334e16 +57294279af94a219006aa20a +56bec98e3aeaaa14008c9457 +57269656708984140094cb01 +56be54bdacb8001400a50323 +571c9074dd7acb1400e4c100 +56f8b4d79b226e1400dd0e78 +5710f2e2a58dae1900cd6b73 +572683e6f1498d1400e8e24e +56f7f2e0aef2371900625cb3 +572fadcbb2c2fd1400568329 +5725fabc89a1e219009ac12a +5727aa413acd2414000de924 +56e77da237bdd419002c403d +5729e2b76aef0514001550d2 +57265e11708984140094c3bd +5726bf325951b619008f7d01 +57335fcad058e614000b5973 +572663a9f1498d1400e8ddf2 +57299ec43f37b3190047850f +56f80e1daef2371900625d8d +572689b6dd62a815002e8892 +57264a74708984140094c18c +57274d1cdd62a815002e9ab2 +572871bc4b864d1900164a04 +56d7018a0d65d214001982c5 +57111713a58dae1900cd6c02 +56bebbbf3aeaaa14008c9317 +57300e2604bcaa1900d770b7 +56f8074faef2371900625d7a +5727c94bff5b5019007d954b +5727ffb5ff5b5019007d9a8d +56e75d5037bdd419002c3ef8 +57273e50dd62a815002e9a05 +5729582b1d046914007792e4 +57290ee2af94a219006aa003 +57286ec63acd2414000df9d4 +572632ceec44d21400f3dc30 +5726f635dd62a815002e9658 +572a1f086aef0514001552c2 +57269344f1498d1400e8e440 +56bec6ac3aeaaa14008c93ff +57283adcff5b5019007d9f96 +5733266d4776f41900660714 +5725d79e89a1e219009abf94 +57280f974b864d1900164372 +570960cf200fba1400367f04 +570d28bdb3d812140066d4a7 +56e1c0f6cd28a01900c67b2e +56bec3153aeaaa14008c938b +57284618ff5b5019007da0ac +571c3e8cdd7acb1400e4c0a7 +5728fb6a1d04691400778ef6 +5726ef12dd62a815002e95a0 +57296f85af94a219006aa404 +572fe288a23a5019007fcadb +5727500f708984140094dbff +572fc659b2c2fd1400568449 +570d3468b3d812140066d545 +572a07c11d046914007796d5 +56e1fc57e3433e140042322c +573098f38ab72b1400f9c5d5 +56e1b355e3433e14004230b2 +57280cac2ca10214002d9cac +57287b4a4b864d1900164a2b +56bf36b93aeaaa14008c9565 +5728202c4b864d19001644ec +5728dab94b864d1900164f99 +57376a1bc3c5551400e51ec5 +57377083c3c5551400e51ee2