diff --git a/ALCF/data-lists/sirius/books.txt b/ALCF/data-lists/sirius/books.txt new file mode 100644 index 0000000000..7567ba5227 --- /dev/null +++ b/ALCF/data-lists/sirius/books.txt @@ -0,0 +1,3 @@ +0.006 /lus/tegu/projects/PolarisAT/foremans/projects/argonne-lcf/Megatron-DeepSpeed/data/books-0000_text_document +0.006 /lus/tegu/projects/PolarisAT/foremans/projects/argonne-lcf/Megatron-DeepSpeed/data/books-0001_text_document +0.006 /lus/tegu/projects/PolarisAT/foremans/projects/argonne-lcf/Megatron-DeepSpeed/data/books-0002_text_document diff --git a/ALCF/data-lists/sunspot/algebraic.txt b/ALCF/data-lists/sunspot/algebraic.txt new file mode 100644 index 0000000000..0f25e30498 --- /dev/null +++ b/ALCF/data-lists/sunspot/algebraic.txt @@ -0,0 +1,16 @@ +0.0018520780893211373 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0000_text_document +0.0017591050606817512 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0001_text_document +0.001459052794333798 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0002_text_document +0.0007405667281569194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0003_text_document +0.00019420030110896795 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0004_text_document +0.0009008668715801845 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0005_text_document +0.00015115827957143057 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0006_text_document +0.0014552844319220648 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0007_text_document +0.0012469861325685161 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0008_text_document +0.00136412011372413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0009_text_document +0.0007064279699221103 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0010_text_document +0.0008472240000687427 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0011_text_document +0.0001984375713341955 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0012_text_document +0.0005472773881697123 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0013_text_document +0.001815779629850992 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0014_text_document +0.0018313600689757324 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/algebraic-stack-train-0015_text_document diff --git a/ALCF/data-lists/sunspot/arxiv.txt b/ALCF/data-lists/sunspot/arxiv.txt new file mode 100644 index 0000000000..c50df90503 --- /dev/null +++ b/ALCF/data-lists/sunspot/arxiv.txt @@ -0,0 +1,100 @@ +0.0002583902668716813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0000_text_document +0.0002646575141232155 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0001_text_document +0.0003165521247456758 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0002_text_document +0.0002920706460176214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0003_text_document +0.00028396813182810215 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0004_text_document +0.00030445161883108107 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0005_text_document +0.00031628781276576474 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0006_text_document +0.0003083776568189157 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0007_text_document +0.0003176359471472902 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0008_text_document +0.0002536009369131698 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0009_text_document +0.0003067491424681363 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0010_text_document +0.0002597217257557784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0011_text_document +0.0003788556450109768 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0012_text_document +0.0002796563272052598 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0013_text_document +0.00033573826524290287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0014_text_document +0.00030523658022800287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0015_text_document +0.00032211552192240096 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0016_text_document +0.0003329295675164247 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0017_text_document +0.0003101982186639862 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0018_text_document +0.00032361798234223355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0019_text_document +0.0003495541581652915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0020_text_document +0.0002821637448858042 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0021_text_document +0.00030399523537629673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0022_text_document +0.0002955658968247219 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0023_text_document +0.00028942158502924254 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0024_text_document +0.00028769546171490733 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0025_text_document +0.0002938111057234182 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0026_text_document +0.0002711150403010948 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0027_text_document +0.00031130095874747565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0028_text_document +0.0003002996118160777 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0029_text_document +0.0003732757901604459 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0030_text_document +0.00026784205751795894 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0031_text_document +0.0002799626521661984 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0032_text_document +0.00034334276069078164 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0033_text_document +0.0003582469803674965 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0034_text_document +0.00031094844818418623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0035_text_document +0.0002766228384977191 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0036_text_document +0.00030297116159471485 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0037_text_document +0.00027033888377464685 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0038_text_document +0.00030090862368377933 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0039_text_document +0.00028543875802490955 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0040_text_document +0.00027559768459074204 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0041_text_document +0.0003182185533962886 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0042_text_document +0.0003311392971435837 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0043_text_document +0.00028751652060804325 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0044_text_document +0.000303466863212589 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0045_text_document +0.00033400462801277524 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0046_text_document +0.0002589234031777426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0047_text_document +0.0002913508598466723 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0048_text_document +0.0002670572450004856 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0049_text_document +0.00032027399105647656 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0050_text_document +0.00032188376258379377 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0051_text_document +0.0003161585784100882 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0052_text_document +0.0003184249182974135 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0053_text_document +0.00030381336664000807 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0054_text_document +0.0003190437442184283 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0055_text_document +0.0002537961798200545 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0056_text_document +0.0003017817117223326 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0057_text_document +0.00028685268513240224 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0058_text_document +0.00031265179094451165 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0059_text_document +0.00034708319096986816 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0060_text_document +0.00026650837943080664 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0061_text_document +0.00034588832248507335 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0062_text_document +0.0002416982248399037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0063_text_document +0.0003089296918222243 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0064_text_document +0.00029137184185700827 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0065_text_document +0.00026464226846800774 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0066_text_document +0.00030545397919456627 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0067_text_document +0.0003206778460448875 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0068_text_document +0.00030968971641110967 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0069_text_document +0.00023325653928600864 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0070_text_document +0.00030526899198338555 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0071_text_document +0.00035376719076633584 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0072_text_document +0.000290224385981026 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0073_text_document +0.000294650083382008 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0074_text_document +0.00028768858128616436 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0075_text_document +0.00030856965235527843 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0076_text_document +0.00030579942447879054 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0077_text_document +0.0002863101084704357 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0078_text_document +0.0002870032092492213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0079_text_document +0.000264182727569885 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0080_text_document +0.0002974012367036449 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0081_text_document +0.00032238412143059203 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0082_text_document +0.00031683716893819036 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0083_text_document +0.00031157434937617524 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0084_text_document +0.0003411742735695989 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0085_text_document +0.00026778444816570715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0086_text_document +0.0003037045797275201 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0087_text_document +0.00027746114370081314 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0088_text_document +0.00027148285946862043 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0089_text_document +0.00028042950114678207 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0090_text_document +0.0003235607816590721 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0091_text_document +0.0003086692227306295 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0092_text_document +0.00033990349455148105 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0093_text_document +0.00030945053208470265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0094_text_document +0.00027309074552265303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0095_text_document +0.00028737393506316194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0096_text_document +0.0003098868328009879 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0097_text_document +0.0002614229162588409 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0098_text_document +0.0002884388407820923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/arxiv-0099_text_document diff --git a/ALCF/data-lists/sunspot/books.txt b/ALCF/data-lists/sunspot/books.txt new file mode 100644 index 0000000000..7aa37a00d2 --- /dev/null +++ b/ALCF/data-lists/sunspot/books.txt @@ -0,0 +1,3 @@ +0.0031025147279277244 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/books-0000_text_document +0.003102019887362634 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/books-0001_text_document +0.0009996745994661548 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/books-0002_text_document diff --git a/ALCF/data-lists/sunspot/c4.txt b/ALCF/data-lists/sunspot/c4.txt new file mode 100644 index 0000000000..9504bcbfe6 --- /dev/null +++ b/ALCF/data-lists/sunspot/c4.txt @@ -0,0 +1,171 @@ +0.0002406272620255565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0000_text_document +0.0002404825539493424 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0001_text_document +0.00024062296575435581 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0002_text_document +0.00024069315766818953 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0003_text_document +0.00024055829162263452 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0004_text_document +0.00024062053397343032 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0005_text_document +0.0002410715545206964 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0006_text_document +0.00024024881846087368 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0007_text_document +0.0002407074700790688 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0008_text_document +0.00024072141428809043 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0009_text_document +0.00024027710230872736 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0010_text_document +0.0002409111299205489 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0011_text_document +0.00024081954058275009 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0012_text_document +0.00024086076794990912 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0013_text_document +0.00024098672620832446 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0014_text_document +0.00024068622303333862 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0015_text_document +0.00024140627024291824 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0016_text_document +0.0002414512033594384 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0017_text_document +0.00024028742594941463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0018_text_document +0.00024018036089269645 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0019_text_document +0.0002398347365034979 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0020_text_document +0.00024006780153485276 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0021_text_document +0.00024015620270419213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0022_text_document +0.0002408848259695227 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0023_text_document +0.0002408023185278831 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0024_text_document +0.00024021196580140326 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0025_text_document +0.00024077677271297493 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0026_text_document +0.00024087392454668027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0027_text_document +0.0002408071293824126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0028_text_document +0.00024042223828845715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0029_text_document +0.0002411484752360495 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0030_text_document +0.00023605263746465907 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0031_text_document +0.00023471222158326908 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0032_text_document +0.00023432138580287644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0033_text_document +0.00023407385623382327 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0034_text_document +0.00023487504174367091 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0035_text_document +0.0002341843704976313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0036_text_document +0.00023421993170282486 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0037_text_document +0.00023445057969132037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0038_text_document +0.0002337681680073047 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0039_text_document +0.000234627964808109 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0040_text_document +0.0002338942211888584 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0041_text_document +0.00023403849286843386 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0042_text_document +0.00023405641310796305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0043_text_document +0.00023349169562397965 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0044_text_document +0.00023381157386048856 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0045_text_document +0.00023388742993790587 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0046_text_document +0.00023363103829469813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0047_text_document +0.00023421141834630477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0048_text_document +0.00023420564352232565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0049_text_document +0.00023367463699173143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0050_text_document +0.00023344969163567033 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0051_text_document +0.00023372196941547188 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0052_text_document +0.00023399207645297834 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0053_text_document +0.00023357915605505856 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0054_text_document +0.00023337585642190864 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0055_text_document +0.00023385005470157914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0056_text_document +0.00023301533534493465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0057_text_document +0.00023377864302541782 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0058_text_document +0.00023323745848621437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0059_text_document +0.0002330594611151835 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0060_text_document +0.0002334149675026783 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0061_text_document +0.00023198945902291534 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0062_text_document +0.00023023784834634142 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0063_text_document +0.00022985623060187217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0064_text_document +0.0002292605284569516 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0065_text_document +0.00022926593333048894 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0066_text_document +0.00022922766406807777 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0067_text_document +0.00022898153911167426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0068_text_document +0.0002292473111593315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0069_text_document +0.000228804579400424 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0070_text_document +0.00022865485613513526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0071_text_document +0.00022937426835887895 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0072_text_document +0.00022917388311587372 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0073_text_document +0.0002291660582019043 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0074_text_document +0.00022907895248360543 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0075_text_document +0.0002294617879920205 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0076_text_document +0.0002290452150516566 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0077_text_document +0.00022943405619715553 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0078_text_document +0.0002296271421006204 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0079_text_document +0.00022854791372910372 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0080_text_document +0.00022923123467686557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0081_text_document +0.00022852404355738494 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0082_text_document +0.00022847798660086642 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0083_text_document +0.0002289604586810316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0084_text_document +0.00022835479834950643 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0085_text_document +0.0002289149402884243 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0086_text_document +0.00022806655474763446 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0087_text_document +0.00022826296420992974 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0088_text_document +0.00022906829636213627 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0089_text_document +0.0002287628414466998 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0090_text_document +0.0002282673911253445 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0091_text_document +0.00022869309841939134 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0092_text_document +0.0002281540116815451 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0093_text_document +0.0002259755756162738 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0094_text_document +0.00022562331285233504 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0095_text_document +0.0002259061146106053 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0096_text_document +0.00022567670836663787 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0097_text_document +0.00022573165387587061 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0098_text_document +0.00022508514961670572 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0099_text_document +0.00022564642513773356 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0100_text_document +0.00022563088621998788 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0101_text_document +0.0002250438755373707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0102_text_document +0.00022524465346241134 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0103_text_document +0.00022531737657666812 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0104_text_document +0.00022444687519363458 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0105_text_document +0.00022460397498596298 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0106_text_document +0.00022454218976501763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0107_text_document +0.00022447528843671366 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0108_text_document +0.00022501666332178926 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0109_text_document +0.00022453752304377972 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0110_text_document +0.00022484451871163002 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0111_text_document +0.00022465678847154914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0112_text_document +0.00022453180917044732 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0113_text_document +0.0002247278486823009 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0114_text_document +0.00022465794828242097 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0115_text_document +0.00022431000701925386 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0116_text_document +0.00022476020248460963 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0117_text_document +0.00022467531771795015 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0118_text_document +0.0002236391309945234 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0119_text_document +0.00022458764920536007 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0120_text_document +0.00022430877426744415 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0121_text_document +0.0002247047786127192 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0122_text_document +0.0002245298090400035 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0123_text_document +0.0002245648831396188 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0124_text_document +0.00022292894729820784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0125_text_document +0.00022236668082957533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0126_text_document +0.0002217622659895442 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0127_text_document +0.00022252452726732609 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0128_text_document +0.00022135333211363678 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0129_text_document +0.0002214571757787971 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0130_text_document +0.0002217188139237798 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0131_text_document +0.00022144214894640303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0132_text_document +0.00022100172806631854 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0133_text_document +0.00022156392409199052 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0134_text_document +0.00022134830143710272 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0135_text_document +0.00022158598922529453 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0136_text_document +0.00022142932483041377 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0137_text_document +0.00022120980907786554 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0138_text_document +0.00022117917738112441 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0139_text_document +0.00022077089397851235 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0140_text_document +0.00022093265074996711 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0141_text_document +0.00022091299741377004 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0142_text_document +0.0002205849150703338 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0143_text_document +0.0002210648204787979 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0144_text_document +0.0002214235747364102 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0145_text_document +0.00022083907302221787 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0146_text_document +0.0002206334237915964 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0147_text_document +0.00022065193929912214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0148_text_document +0.00022079775597767288 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0149_text_document +0.00022091492909963518 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0150_text_document +0.00022095009987097293 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0151_text_document +0.0002208150577180165 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0152_text_document +0.00022085759102772088 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0153_text_document +0.00022073789170129016 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0154_text_document +0.00022049322781182384 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0155_text_document +0.00022083270617761285 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0156_text_document +0.00021982452827473632 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0157_text_document +0.00021899870446514259 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0158_text_document +0.00021890358773356361 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0159_text_document +0.00021875556609042841 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0160_text_document +0.00021861195987201226 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0161_text_document +0.00021856782186167455 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0162_text_document +0.00021912837771543515 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0163_text_document +0.00021900213768517756 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0164_text_document +0.00021871675851390374 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0165_text_document +0.0002180537056545586 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0166_text_document +0.0002188196714327129 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0167_text_document +0.00021851362624523464 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0168_text_document +0.0002183236795498736 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0169_text_document +7.291153618675672e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/c4-0170_text_document diff --git a/ALCF/data-lists/sunspot/cc.txt b/ALCF/data-lists/sunspot/cc.txt new file mode 100644 index 0000000000..0a2a0ce35b --- /dev/null +++ b/ALCF/data-lists/sunspot/cc.txt @@ -0,0 +1,1108 @@ +0.0003742481815405742 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0000_text_document +0.00038204855962733055 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0001_text_document +0.00038821818392663593 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0002_text_document +0.00038723332988783727 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0003_text_document +0.00038916141142149904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0004_text_document +0.00038049542523949033 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0005_text_document +0.0003854755539534284 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0006_text_document +0.00024202756466512517 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0007_text_document +0.0003915405155008087 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0008_text_document +0.0003927382151931033 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0009_text_document +0.0003839151202260479 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0010_text_document +0.00040006817468967907 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0011_text_document +0.00040318965964443476 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0012_text_document +0.0003831013019452741 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0013_text_document +0.00039166638383204036 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0014_text_document +0.00039962784023961004 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0015_text_document +0.00039536707853602614 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0016_text_document +0.0004204304698247758 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0017_text_document +0.00041538899178693555 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0018_text_document +0.00039186953333675306 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0019_text_document +0.00038945837196504305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0020_text_document +0.0003919951238929062 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0021_text_document +0.00044377065718528966 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0022_text_document +0.0004407759068603017 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0023_text_document +0.0002487811895843715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0024_text_document +0.00039349432045556636 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0025_text_document +0.00041223198559462343 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0026_text_document +0.0004036573014830213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0027_text_document +0.0003825982215521807 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0028_text_document +0.00040386867133151386 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0029_text_document +0.00024460575279105167 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0030_text_document +0.000269029789531335 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0031_text_document +0.0003573757493252864 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0032_text_document +0.0004600876681392076 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0033_text_document +0.0002605354166397086 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0034_text_document +0.0003882502452157999 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0035_text_document +0.0002466747612126512 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0036_text_document +0.0004024726105072402 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0037_text_document +0.00040820631128483644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0038_text_document +0.0002691094350403538 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0039_text_document +0.00026916830387277267 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0040_text_document +0.0004204663297880574 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0041_text_document +0.00042379698687085554 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0042_text_document +0.0004502169227311871 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0043_text_document +0.0002661708937015295 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0044_text_document +0.00031239486948031334 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0045_text_document +0.0003109054589936201 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0046_text_document +0.00045873053079760646 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0047_text_document +0.00022904931423244635 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0048_text_document +0.0003813462028433663 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0049_text_document +0.00039188129256500874 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0050_text_document +0.00045124222276983765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0051_text_document +0.00048138658436853695 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0052_text_document +0.0003944178776279866 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0053_text_document +0.00039941569676754006 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0054_text_document +0.00037952761190240494 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0055_text_document +0.0003944870860881476 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0056_text_document +0.0003891842411856621 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0057_text_document +0.000387688981934861 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0058_text_document +0.00039197953876258005 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0059_text_document +0.00039007915280311206 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0060_text_document +0.0003995520363699188 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0061_text_document +0.00039230985654592406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0062_text_document +0.0003929472067173851 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0063_text_document +0.0003924096172671473 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0064_text_document +0.0003881636143629905 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0065_text_document +0.000389790617937084 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0066_text_document +0.00037351762309221023 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0067_text_document +0.0003630196170929407 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0068_text_document +0.00033532465765142113 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0069_text_document +0.0003076088685761823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0070_text_document +0.00039463850897720803 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0071_text_document +0.0002843816115231449 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0072_text_document +0.0002909175709416474 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0073_text_document +0.00028867170997202486 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0074_text_document +0.0002838644617723659 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0075_text_document +0.00029027869525543416 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0076_text_document +0.0002821339567560056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0077_text_document +0.0002922988877045601 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0078_text_document +0.0002866955958315786 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0079_text_document +0.0002865271754558126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0080_text_document +0.0002861247475618473 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0081_text_document +0.0002826681072408606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0082_text_document +0.0002849746458282827 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0083_text_document +0.0002816966633435316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0084_text_document +0.00026255342235948463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0085_text_document +0.0002552895098829678 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0086_text_document +0.00025990194083107813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0087_text_document +0.0002524062657685835 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0088_text_document +0.0002538577379748611 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0089_text_document +0.0002561415177406761 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0090_text_document +0.00026206253059694905 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0091_text_document +0.00026168095406910565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0092_text_document +0.0002601305742008613 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0093_text_document +0.00025200823006814814 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0094_text_document +0.0003229951981263502 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0095_text_document +0.00037289448266476045 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0096_text_document +0.0003807825862179898 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0097_text_document +0.0003616333738191483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0098_text_document +0.0003665117918907636 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0099_text_document +0.0003684186453633228 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0100_text_document +0.0003589330610806066 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0101_text_document +0.00036383861418030395 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0102_text_document +0.000359841363355303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0103_text_document +0.00036431044063050464 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0104_text_document +0.0003668574090358279 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0105_text_document +0.000362768263620199 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0106_text_document +0.0003501888032771077 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0107_text_document +0.000352401968221528 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0108_text_document +0.0003541019701869794 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0109_text_document +0.0003628121865546891 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0110_text_document +0.0003752582953758773 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0111_text_document +0.00037902046230424966 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0112_text_document +0.0003777927146925147 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0113_text_document +0.0003760676130509053 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0114_text_document +0.00034046049078755405 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0115_text_document +0.0003338847563259091 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0116_text_document +0.00033294499102761794 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0117_text_document +0.0004912026198265864 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0118_text_document +0.00032064363474664014 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0119_text_document +0.00032154190389541214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0120_text_document +0.00032309660151746207 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0121_text_document +0.00031181143365304544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0122_text_document +0.00031046092294569104 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0123_text_document +0.00031150165249068046 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0124_text_document +0.0003041314265988224 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0125_text_document +0.0003024834909739394 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0126_text_document +0.0003019936835833604 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0127_text_document +0.000292329665283177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0128_text_document +0.0002867061143144972 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0129_text_document +0.00028443615610701707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0130_text_document +0.00028462291013755945 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0131_text_document +0.0002793538601205013 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0132_text_document +0.00027306573977044246 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0133_text_document +0.00027097155673336525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0134_text_document +0.0002752934202112985 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0135_text_document +0.00043042012694697647 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0136_text_document +0.00047495648822986177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0137_text_document +0.00047755032493473855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0138_text_document +0.0004706974343933747 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0139_text_document +0.00046682163297771817 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0140_text_document +0.0004616765425874178 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0141_text_document +0.00030644496751628097 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0142_text_document +0.0002909492555358308 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0143_text_document +0.00027272036068261724 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0144_text_document +0.0004101070217315588 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0145_text_document +0.0003728914338834357 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0146_text_document +0.00036546911442305647 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0147_text_document +0.0003669945482407483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0148_text_document +0.0003715902407424017 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0149_text_document +0.00035837486406683366 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0150_text_document +0.0003573318538685469 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0151_text_document +0.0003553784893071916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0152_text_document +0.0004920659809912352 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0153_text_document +0.0004533619411303183 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0154_text_document +0.00045067066057818706 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0155_text_document +0.00044396985139270645 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0156_text_document +0.00043198288204468477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0157_text_document +0.00043005174223738454 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0158_text_document +0.00041847118430776784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0159_text_document +0.00042952036375796664 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0160_text_document +0.00043420594647324267 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0161_text_document +0.0003461123241053012 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0162_text_document +0.0003408581597849182 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0163_text_document +0.00033172705422182547 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0164_text_document +0.0003392566490686136 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0165_text_document +0.00033578341518385483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0166_text_document +0.0003439196710518844 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0167_text_document +0.00034559163447085543 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0168_text_document +0.00033762478642902825 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0169_text_document +0.00033215210055107224 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0170_text_document +0.00033423579608014966 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0171_text_document +0.0004963355016025102 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0172_text_document +0.0004996862761456923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0173_text_document +0.0005000551829325451 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0174_text_document +0.0005004212610098755 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0175_text_document +0.00027768695585500585 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0176_text_document +0.00028395983854338433 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0177_text_document +0.00027835826303062254 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0178_text_document +0.0002740073176010804 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0179_text_document +0.0002791830529274016 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0180_text_document +0.0002796863816194411 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0181_text_document +0.00026697453022672804 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0182_text_document +0.0002594197440280141 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0183_text_document +0.0003779565697649222 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0184_text_document +0.00041835823476586606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0185_text_document +0.00043788493575265915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0186_text_document +0.0002731731970096006 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0187_text_document +0.000276305847423402 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0188_text_document +0.0002704955773958623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0189_text_document +0.0002629635944827518 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0190_text_document +0.000260070956974436 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0191_text_document +0.00025661553791456334 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0192_text_document +0.00025794727207576157 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0193_text_document +0.00025295733980001527 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0194_text_document +0.0003788106407021029 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0195_text_document +0.0004882344027669431 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0196_text_document +0.0003275324309642705 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0197_text_document +0.0004803401856640094 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0198_text_document +0.00046720138323433943 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0199_text_document +0.00043527810307095335 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0200_text_document +0.00043905395741627827 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0201_text_document +0.00048774175867331425 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0202_text_document +0.00048380704121346737 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0203_text_document +0.0004779011848346118 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0204_text_document +0.00046255587581908036 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0205_text_document +0.00045127922880511576 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0206_text_document +0.0004503891485256095 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0207_text_document +0.0004450142332303422 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0208_text_document +0.00044630282482516654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0209_text_document +0.00044325014465743616 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0210_text_document +0.0004263874842796447 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0211_text_document +0.0004217530913646938 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0212_text_document +0.000415120314341852 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0213_text_document +0.00040987168279144537 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0214_text_document +0.00033468337266607834 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0215_text_document +0.0003353094464683005 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0216_text_document +0.0004833936821707294 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0217_text_document +0.00047194878988920935 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0218_text_document +0.0004648324126996427 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0219_text_document +0.0004562345003964941 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0220_text_document +0.0004933203505465098 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0221_text_document +0.0003530166075325466 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0222_text_document +0.00035368548192804685 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0223_text_document +0.0004872620828289663 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0224_text_document +0.00048293889392426456 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0225_text_document +0.00047936768462267655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0226_text_document +0.00047821013991587545 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0227_text_document +0.0004660610308564753 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0228_text_document +0.000394683430103437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0229_text_document +0.00039165053441571324 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0230_text_document +0.0003906936040164381 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0231_text_document +0.00038074803919159006 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0232_text_document +0.0003686529291578143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0233_text_document +0.00035832920428870976 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0234_text_document +0.00035929024535947033 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0235_text_document +0.0003538226556050544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0236_text_document +0.0003584167868708799 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0237_text_document +0.0003480507542594234 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0238_text_document +0.0003413709023543034 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0239_text_document +0.00034001304759361455 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0240_text_document +0.00033430532902756514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0241_text_document +0.00046519252660631277 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0242_text_document +0.0002938876402514769 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0243_text_document +0.00028676090994509047 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0244_text_document +0.00027296150117506716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0245_text_document +0.00026513502621960483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0246_text_document +0.0002680081327926125 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0247_text_document +0.00025831225828720344 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0248_text_document +0.00026647037295561 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0249_text_document +0.0002525733734572654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0250_text_document +0.00025831708887575375 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0251_text_document +0.00042487627444443476 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0252_text_document +0.0004951213245023891 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0253_text_document +0.0004804051413177752 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0254_text_document +0.0004662397611340532 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0255_text_document +0.0004550138655253933 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0256_text_document +0.00044494909122746795 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0257_text_document +0.0002899112253051385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0258_text_document +0.0004372879736279761 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0259_text_document +0.0004529568099252922 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0260_text_document +0.00045127826158829573 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0261_text_document +0.0004436558176737439 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0262_text_document +0.0004419233237678378 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0263_text_document +0.000434589215880319 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0264_text_document +0.00029153613207706566 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0265_text_document +0.0004312458058738854 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0266_text_document +0.00028741854968757313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0267_text_document +0.00046853200754421234 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0268_text_document +0.0004949145252030074 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0269_text_document +0.00044459683920483167 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0270_text_document +0.0003836095306696336 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0271_text_document +0.0003789760237872398 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0272_text_document +0.0003749227438304427 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0273_text_document +0.0003628558277173369 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_head-0274_text_document +0.00039468301394041474 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0000_text_document +0.00038874701821614864 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0001_text_document +0.0004158492456077867 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0002_text_document +0.00042360504554060077 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0003_text_document +0.00040386729844317623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0004_text_document +0.00027595096702902474 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0005_text_document +0.00043638766787829135 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0006_text_document +0.0002218691596850179 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0007_text_document +0.0004437566108089954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0008_text_document +0.0003889996411609667 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0009_text_document +0.00043454421906537704 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0010_text_document +0.0004522564392830988 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0011_text_document +0.00041517835659357416 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0012_text_document +0.0002614360863446896 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0013_text_document +0.00037543522111463596 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0014_text_document +0.0004386190133514781 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0015_text_document +0.00046358333286115075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0016_text_document +0.00043186261317942404 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0017_text_document +0.0002377581602097957 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0018_text_document +0.00025973334085074254 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0019_text_document +0.00040139099332000796 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0020_text_document +0.00043674860686687174 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0021_text_document +0.00040853289309329373 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0022_text_document +0.000242910191729688 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0023_text_document +0.0004431071731750582 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0024_text_document +0.0004388092670482523 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0025_text_document +0.000381418866255965 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0026_text_document +0.0004100117296419717 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0027_text_document +0.00042469230366022745 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0028_text_document +0.00041744151905374254 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0029_text_document +0.00022835699906752945 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0030_text_document +0.0004380161085387397 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0031_text_document +0.00044803212381807456 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0032_text_document +0.00040554932796137236 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0033_text_document +0.0004234508646347761 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0034_text_document +0.00043341209652360653 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0035_text_document +0.00023966604734537185 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0036_text_document +0.000259165907316014 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0037_text_document +0.0004270653021833602 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0038_text_document +0.0004341547032162028 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0039_text_document +0.0004111478117275994 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0040_text_document +0.0004299383567984396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0041_text_document +0.0004241899124590779 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0042_text_document +0.0004502719349364145 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0043_text_document +0.00038994621469645615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0044_text_document +0.0003859912398894952 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0045_text_document +0.0004247535950310557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0046_text_document +0.000386982084327716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0047_text_document +0.0004196451040053251 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0048_text_document +0.0004096278509782259 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0049_text_document +0.0004373334932695721 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0050_text_document +0.0004180889975240641 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0051_text_document +0.00042079636929672745 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0052_text_document +0.00038063574611812913 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0053_text_document +0.0003817505891515542 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0054_text_document +0.0004420096268860222 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0055_text_document +0.00039182670726410623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0056_text_document +0.0003635667850372299 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0057_text_document +0.00041564996472055667 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0058_text_document +0.000400529358757286 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0059_text_document +0.0003939113874958451 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0060_text_document +0.00039066622068940996 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0061_text_document +0.0004290098538807143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0062_text_document +0.0004240739958197099 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0063_text_document +0.00040775392659215333 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0064_text_document +0.0004091634200396925 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0065_text_document +0.00042299190476617914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0066_text_document +0.0003701492680344151 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0067_text_document +0.0003807353844384635 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0068_text_document +0.00038813507771983156 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0069_text_document +0.00040072346558408346 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0070_text_document +0.0003603595180423597 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0071_text_document +0.00038799421353112465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0072_text_document +0.00037575235582264926 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0073_text_document +0.0004239190342959713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0074_text_document +0.0004606044799136546 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0075_text_document +0.00045107950652529253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0076_text_document +0.0004391947201871058 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0077_text_document +0.0004457516661123035 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0078_text_document +0.0004301297170991686 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0079_text_document +0.00044661704164586694 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0080_text_document +0.0004438849846114837 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0081_text_document +0.0004444205734316823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0082_text_document +0.0004190924165303394 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0083_text_document +0.00043942581131677875 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0084_text_document +0.00021568459798090663 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0085_text_document +0.0003814929225407199 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0086_text_document +0.0003217453179359235 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0087_text_document +0.00031719591470267974 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0088_text_document +0.00032434115726922137 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0089_text_document +0.0004079911120371051 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0090_text_document +0.000329492766381148 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0091_text_document +0.0003845916162001633 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0092_text_document +0.0003835208964390098 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0093_text_document +0.00037847334157173194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0094_text_document +0.00038296039903791865 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0095_text_document +0.00037896336828472 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0096_text_document +0.00037620974396391355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0097_text_document +0.00037420590727111843 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0098_text_document +0.000340490625886403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0099_text_document +0.0003078314411035827 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0100_text_document +0.00034153990750656097 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0101_text_document +0.0003308858103982067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0102_text_document +0.0003452640607156025 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0103_text_document +0.00033095276418403455 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0104_text_document +0.0003116308995860414 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0105_text_document +0.00032446713226408477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0106_text_document +0.0003015816821912984 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0107_text_document +0.00031612418775706894 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0108_text_document +0.0003278516344971041 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0109_text_document +0.00033079446736097217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0110_text_document +0.00032278977146550837 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0111_text_document +0.00032065272988207914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0112_text_document +0.0003936696452406576 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0113_text_document +0.0003450109536627789 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0114_text_document +0.0003339787189919641 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0115_text_document +0.0003284303856176974 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0116_text_document +0.00033652677276843477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0117_text_document +0.0003257822443845694 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0118_text_document +0.0003293985569149334 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0119_text_document +0.0003310360260148262 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0120_text_document +0.0003233770986418526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0121_text_document +0.0003172280092149422 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0122_text_document +0.0003160674744292835 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0123_text_document +0.00030931090289598506 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0124_text_document +0.0003093173886443107 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0125_text_document +0.00033167847081104083 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0126_text_document +0.00031131501311729723 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0127_text_document +0.00031046608876279845 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0128_text_document +0.00030569235942207244 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0129_text_document +0.00030777943671285197 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0130_text_document +0.00029303314290956683 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0131_text_document +0.0003045824546400205 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0132_text_document +0.00030360880677729793 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0133_text_document +0.00031646239964835433 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0134_text_document +0.0003129122300603785 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0135_text_document +0.00031060464956661433 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0136_text_document +0.000311819032500067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0137_text_document +0.0002977872483902282 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0138_text_document +0.0003009448600922438 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0139_text_document +0.00028610292098537774 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0140_text_document +0.0002988326876216654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0141_text_document +0.00028550828372819075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0142_text_document +0.0002830381750875739 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0143_text_document +0.0002848495855927156 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0144_text_document +0.0002856443760308144 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0145_text_document +0.00027442895344188584 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0146_text_document +0.0002681160554049462 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0147_text_document +0.0003421482544126989 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0148_text_document +0.0004005872948449718 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0149_text_document +0.0003930123959320308 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0150_text_document +0.0003867271832275778 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0151_text_document +0.000380805140455254 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0152_text_document +0.0003814769861947819 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0153_text_document +0.00038025170883282324 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0154_text_document +0.0003738026647867475 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0155_text_document +0.00018960856915036276 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0156_text_document +0.0003697177501953134 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0157_text_document +0.00036674194328136693 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0158_text_document +0.00036447406838697555 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0159_text_document +0.00036686410861101255 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0160_text_document +0.00035915267825103423 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0161_text_document +0.0003624758404026675 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0162_text_document +0.0002822812140180794 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0163_text_document +0.00030620512946920813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0164_text_document +0.000294249776520589 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0165_text_document +0.00030238536967523434 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0166_text_document +0.00029509593361580754 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0167_text_document +0.0002906912701830899 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0168_text_document +0.0002921944165474959 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0169_text_document +0.00028358919691127954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0170_text_document +0.0002813182772323272 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0171_text_document +0.00027442640800299205 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0172_text_document +0.0002747820342933984 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0173_text_document +0.0002747584403979717 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0174_text_document +0.00027499129634862444 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0175_text_document +0.0002712050404257197 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0176_text_document +0.0002616256943143254 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0177_text_document +0.00026769938929002815 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0178_text_document +0.00038396081322727017 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0179_text_document +0.0003863140490027991 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0180_text_document +0.00037702277513203237 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0181_text_document +0.0003633274156107032 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0182_text_document +0.0003587473889240435 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0183_text_document +0.0003507672084278415 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0184_text_document +0.00033776425499780385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0185_text_document +0.0003377914127574796 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0186_text_document +0.00032948015659161326 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0187_text_document +0.00033245638541392985 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0188_text_document +0.00031080707640648695 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0189_text_document +0.0002976903331149755 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0190_text_document +0.0002965121463725523 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0191_text_document +0.0002933849695266647 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0192_text_document +0.0002837035078508233 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0193_text_document +0.00028684569079589323 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0194_text_document +0.0003145192320802359 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0195_text_document +0.0003566937253273515 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0196_text_document +0.0003470199109592918 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0197_text_document +0.0003060245312041868 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0198_text_document +0.0002650817213818789 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0199_text_document +0.0002643604938780134 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0200_text_document +0.000299350876031416 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0201_text_document +0.0003178540797697938 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0202_text_document +0.000271850367887767 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0203_text_document +0.00031349896596549 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0204_text_document +0.00031749734412765755 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0205_text_document +0.0003791137842391209 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0206_text_document +0.0003742334169957992 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0207_text_document +0.0003705639757351107 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0208_text_document +0.0003126986769797042 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0209_text_document +0.00031038132814561196 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0210_text_document +0.00036464437173804883 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0211_text_document +0.0003569480488951322 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0212_text_document +0.0003541239221619106 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0213_text_document +0.00035315297411308053 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0214_text_document +0.0003572451925404141 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0215_text_document +0.0003514986129411253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0216_text_document +0.0003521798298425866 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0217_text_document +0.00034553677439244716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0218_text_document +0.000349004719809412 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0219_text_document +0.0003468247484872769 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0220_text_document +0.0003465822608356558 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0221_text_document +0.00035410983132162007 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0222_text_document +0.0003487908354969444 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0223_text_document +0.0003479024763238147 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0224_text_document +0.000341412530646823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0225_text_document +0.00034451316273667034 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0226_text_document +0.0002618849993484869 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0227_text_document +0.00026788679978901144 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0228_text_document +0.00027450670773227214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0229_text_document +0.0002661273129899329 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0230_text_document +0.00026836569676402957 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0231_text_document +0.00026155876975483236 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0232_text_document +0.0002609276830117151 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0233_text_document +0.0002644161630512771 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0234_text_document +0.00036789208972872557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0235_text_document +0.00037829849439990513 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0236_text_document +0.0003788894943523098 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0237_text_document +0.0003617207777959397 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0238_text_document +0.0002541334487248998 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0240_text_document +0.0002707945538071073 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0241_text_document +0.00027046282716455214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0242_text_document +0.0002652443167243215 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0243_text_document +0.0002685859923850986 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0244_text_document +0.00025734961751176414 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0245_text_document +0.000259041720872915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0246_text_document +0.00025340107274823446 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0247_text_document +0.00025757135121837893 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0248_text_document +0.00025617700500574084 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0249_text_document +0.0002566931670562857 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0250_text_document +0.0002543871190716101 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0251_text_document +0.00024997565589481713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0252_text_document +0.0002954079779456287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0253_text_document +0.00034890741135252835 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0254_text_document +0.0003473298137731525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0255_text_document +0.0003296959618486435 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0256_text_document +0.0003304520061604598 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0257_text_document +0.00032377956175729824 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0258_text_document +0.00031700696295168713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0259_text_document +0.0003060382346081943 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0260_text_document +0.0003012003005056863 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0261_text_document +0.0002981074073993884 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0262_text_document +0.0002922128825950705 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0263_text_document +0.000348901087722931 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0264_text_document +0.0003408286289467841 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0265_text_document +0.0003410649680770183 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0266_text_document +0.0003358524215576502 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0267_text_document +0.0003343661874989231 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0268_text_document +0.00032810573699389156 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0269_text_document +0.00032261449539097497 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0270_text_document +0.0003162694866049203 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0271_text_document +0.0003158381156468853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0272_text_document +0.000317376061083603 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0273_text_document +0.0003125788639953052 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0274_text_document +0.0003010105041885602 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0275_text_document +0.0003065865059090678 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0276_text_document +0.0003084275726508053 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0277_text_document +0.00030966560718296085 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0278_text_document +0.0002957728057853081 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0279_text_document +0.00029904164542325336 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0280_text_document +0.0002955358888729187 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0281_text_document +0.00028692976446931544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0282_text_document +0.0002923476214935797 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0283_text_document +0.0002893691697212419 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0284_text_document +0.0002855895211981585 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0285_text_document +0.00027968347097626246 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0286_text_document +0.0002810783462604979 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0287_text_document +0.00027794080455729715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0288_text_document +0.00034784376461416953 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0289_text_document +0.0003488347959010943 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0290_text_document +0.00034790583710250724 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0291_text_document +0.000345913166618151 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0292_text_document +0.00033801936268066675 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0293_text_document +0.0003290591130212315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0294_text_document +0.00034051399521366823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0295_text_document +0.00032470943131841784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0296_text_document +0.00031679540050914276 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0297_text_document +0.00031814596342422325 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0298_text_document +0.0003156466289485036 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0299_text_document +0.00029985010879003633 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0300_text_document +0.0002905176377776361 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0301_text_document +0.0004206836775460856 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0302_text_document +0.00020660449162246918 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0303_text_document +0.0003461727254468087 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0304_text_document +0.00020592870907067763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0305_text_document +0.00034173505299233005 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0306_text_document +0.0004052437256652738 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0307_text_document +0.0004080650901351697 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0308_text_document +0.00039778184149144276 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0309_text_document +0.00039046311464950275 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0310_text_document +0.00039043444911071384 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0311_text_document +0.000388575704932843 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0312_text_document +0.00019737533145666597 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0313_text_document +0.00037610755595812403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0314_text_document +0.00037315400127598317 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0315_text_document +0.00037415028580922163 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0316_text_document +0.00036694041707212337 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0317_text_document +0.00018947219857306515 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0318_text_document +0.00037046050826533545 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0319_text_document +0.0003587440768559087 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0320_text_document +0.00034623936498708903 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0321_text_document +0.0003502289592617922 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0322_text_document +0.00034692398063649823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0323_text_document +0.000339340809421849 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0324_text_document +0.0003360510394816983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0325_text_document +0.0003354673850814145 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0326_text_document +0.00032937682875877047 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0327_text_document +0.00032844505049317715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0328_text_document +0.00028287199339908627 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0329_text_document +0.0002795217197003578 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0330_text_document +0.00028048955601883463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0331_text_document +0.0002769326396439027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0332_text_document +0.0002727090021299243 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0333_text_document +0.0002726577841024554 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0334_text_document +0.00026663619593455374 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0335_text_document +0.00026068042672138127 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0336_text_document +0.0002637704114326801 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0337_text_document +0.0002593043567100412 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0338_text_document +0.0002599897110113453 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0339_text_document +0.0002435078682758859 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0340_text_document +0.0002450530071379054 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0341_text_document +0.00024233331983743606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0342_text_document +0.0002934750947999535 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0343_text_document +0.00033241226364044474 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0344_text_document +0.00032938406090272075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0345_text_document +0.00032778705403953246 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0346_text_document +0.00032184551480398754 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0347_text_document +0.00031874002264945737 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0348_text_document +0.0003165319685666433 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0349_text_document +0.00031307071173376295 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0350_text_document +0.00031119524184911957 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0351_text_document +0.0003102253344576429 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0352_text_document +0.0003088976240383192 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0353_text_document +0.0002951410823077708 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0354_text_document +0.00029772657676757413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0355_text_document +0.0003056048989909935 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0356_text_document +0.00031991305381648026 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0357_text_document +0.00030890256978362426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0358_text_document +0.0003109382904091933 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0359_text_document +0.00031035798529690644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0360_text_document +0.00030741666395911753 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0361_text_document +0.0002989918594861846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0362_text_document +0.00029569635443989434 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0363_text_document +0.0002973992445667285 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0364_text_document +0.000293397351001072 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0365_text_document +0.00028737817438047954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0366_text_document +0.00028252738144009747 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0367_text_document +0.0002805511898623541 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0368_text_document +0.0003718020784620472 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0369_text_document +0.0003499713845765235 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0370_text_document +0.00034283547445326676 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0371_text_document +0.00031464759888838765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0372_text_document +0.00033188946446414833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0373_text_document +0.000326084432195463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0374_text_document +0.0003764568303917893 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0375_text_document +0.0003604955598858414 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0376_text_document +0.0003655654554133222 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0377_text_document +0.00035762304033750504 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0378_text_document +0.00038478883950347103 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_middle-0379_text_document +0.00027735714341247454 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0000_text_document +0.00028139534607773563 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0001_text_document +0.00019777292251713763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0002_text_document +0.000285571704874486 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0003_text_document +0.00028543482146244363 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0004_text_document +0.00019434234484256758 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0005_text_document +0.00027854908176986763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0006_text_document +0.0002847068039566143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0007_text_document +0.00028672356943064853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0008_text_document +0.00027782687605808177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0009_text_document +0.0002843539634105203 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0010_text_document +0.0002894748379090401 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0011_text_document +0.0002868852440186493 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0012_text_document +0.0002818504885373851 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0013_text_document +0.00028680112812941034 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0014_text_document +0.00019258978168723977 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0015_text_document +0.00028760637934715155 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0016_text_document +0.0002820439443912918 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0017_text_document +0.0002831001054410018 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0018_text_document +0.00029001901552467397 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0019_text_document +0.00027779449377883156 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0020_text_document +0.00019949837437516796 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0021_text_document +0.0002907306472984446 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0022_text_document +0.00027814858381318327 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0023_text_document +0.00019472790889161432 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0024_text_document +0.00020472626596924125 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0025_text_document +0.0002870045081974301 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0026_text_document +0.00019812241927078482 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0027_text_document +0.0002817553333369554 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0028_text_document +0.00027829782796642117 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0029_text_document +0.00028289431732284113 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0030_text_document +0.0002795526296717729 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0031_text_document +0.00027682829988044574 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0032_text_document +0.0002895432402719184 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0033_text_document +0.0002823174903941811 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0034_text_document +0.00028170972351837796 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0035_text_document +0.00027807915877838826 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0036_text_document +0.00028588515681452956 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0037_text_document +0.00028112324090816726 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0038_text_document +0.00020636178289985485 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0039_text_document +0.00019447255290980535 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0040_text_document +0.0002850824220591452 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0041_text_document +0.00027856429520116784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0042_text_document +0.0002820880676635633 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0043_text_document +0.00028943902215995714 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0044_text_document +0.0002676366291085329 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0045_text_document +0.00023806333809954687 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0046_text_document +0.00024526460430233455 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0047_text_document +0.00023876876664622726 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0048_text_document +0.00023379770334179805 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0049_text_document +0.00024175151269138382 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0050_text_document +0.00023386583242595706 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0051_text_document +0.00023771797150160827 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0052_text_document +0.0002262748967483896 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0053_text_document +0.0002408148346432682 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0054_text_document +0.00023398651720444235 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0055_text_document +0.00022989433874474592 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0056_text_document +0.00023948500543957772 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0057_text_document +0.0002331594076859196 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0058_text_document +0.00023375132439600242 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0059_text_document +0.00023923410909668642 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0060_text_document +0.00023952796315562954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0061_text_document +0.0002327466076905069 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0062_text_document +0.00023082758956797212 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0063_text_document +0.0002240509275524448 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0064_text_document +0.00022798879995765268 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0065_text_document +0.000221172516774386 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0066_text_document +0.00021767045123534623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0067_text_document +0.00021982832794804484 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0068_text_document +0.00021971626543789102 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0069_text_document +0.00022566565206920132 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0070_text_document +0.0002181984894194856 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0071_text_document +0.00021831417549554653 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0072_text_document +0.00021601405421187145 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0073_text_document +0.00022275733725519607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0074_text_document +0.00021847734911973986 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0075_text_document +0.0002243591012664014 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0076_text_document +0.00021688758139483833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0077_text_document +0.0002182953624789215 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0078_text_document +0.00020475155724026002 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0079_text_document +0.00021498078062960065 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0080_text_document +0.0002157914337233064 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0081_text_document +0.00021781838494967963 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0082_text_document +0.00021723242266814558 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0083_text_document +0.0002176782686553837 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0084_text_document +0.0003486179404943968 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0085_text_document +0.00034882846352857634 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0086_text_document +0.00031400868448352596 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0087_text_document +0.00030273484020011963 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0088_text_document +0.00029895889118145404 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0089_text_document +0.00029770764609621714 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0090_text_document +0.0002990181332116852 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0091_text_document +0.00029653733972285996 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0092_text_document +0.00029624649222942476 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0093_text_document +0.00029625609720203576 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0094_text_document +0.00029731928930852147 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0095_text_document +0.00029011721326148513 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0096_text_document +0.00028849788197494655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0097_text_document +0.00021601278623858145 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0098_text_document +0.00021319599281739178 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0099_text_document +0.0002153325290600083 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0100_text_document +0.00018566946174516558 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0101_text_document +0.00020736824394291617 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0102_text_document +0.00020857419820128004 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0103_text_document +0.00020058526129536423 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0104_text_document +0.00020745812166665217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0105_text_document +0.00020652171015271702 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0106_text_document +0.00020643808911278608 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0107_text_document +0.00020040513914482103 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0108_text_document +0.00020598050188272898 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0109_text_document +0.0001969184139343296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0110_text_document +0.0001972748812937012 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0111_text_document +0.0002038556751586195 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0112_text_document +0.00020245186011313464 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0113_text_document +0.00019950381422038783 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0114_text_document +0.00020837055459665258 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0115_text_document +0.00020371856218246096 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0116_text_document +0.00019537612301625791 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0117_text_document +0.00019914984508813857 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0118_text_document +0.0002053787713691309 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0119_text_document +0.00019082100541008637 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0120_text_document +0.00020397153334531813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0121_text_document +0.0002021462693077317 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0122_text_document +0.00019609357008124035 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0123_text_document +0.00019693256622486236 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0124_text_document +0.00020007239732428112 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0125_text_document +0.00020467075741591954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0126_text_document +0.00019584883400022932 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0127_text_document +0.00019135050391176972 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0128_text_document +0.0003362829834208298 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0129_text_document +0.00034013691154784095 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0130_text_document +0.00033215887031941976 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0131_text_document +0.00032681189065396707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0132_text_document +0.0003149138485493094 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0133_text_document +0.00030179177307540077 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0134_text_document +0.0002923278437581119 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0135_text_document +0.00029470052278994486 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0136_text_document +0.0002994095093045731 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0137_text_document +0.00029033525096085037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0138_text_document +0.00029390798852496565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0139_text_document +0.0002916230924130842 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0140_text_document +0.00029419886374594913 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0141_text_document +0.0002865469756730764 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0142_text_document +0.00021191292549942086 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0143_text_document +0.00021369664817409847 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0144_text_document +0.00021612485624266726 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0145_text_document +0.00022242192634588478 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0146_text_document +0.00014605095659989698 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0147_text_document +0.00022070626106341693 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0148_text_document +0.0002174420774054071 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0149_text_document +0.00021325858963116995 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0150_text_document +0.0002124322999488052 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0151_text_document +0.0002081218896969054 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0152_text_document +0.0002108710211556957 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0154_text_document +0.00020686867095978426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0155_text_document +0.00020895752681041895 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0156_text_document +0.00020741922266415738 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0157_text_document +0.0002069112657197308 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0158_text_document +0.00020644627473468118 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0159_text_document +0.00020332991338121604 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0160_text_document +0.0003560895677789848 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0161_text_document +0.00032915779111908214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0162_text_document +0.00033810613317040864 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0163_text_document +0.00033729626594036923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0164_text_document +0.00033550342864602944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0165_text_document +0.00034173474024556906 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0166_text_document +0.000331505340748827 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0167_text_document +0.0003270050330117195 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0168_text_document +0.00032585275329172556 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0169_text_document +0.0003143383203190604 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0170_text_document +0.00031655199110388894 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0171_text_document +0.00030738872158476413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0172_text_document +0.00030838388352699285 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0173_text_document +0.0003053596995351888 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0174_text_document +0.00031836304739584593 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0175_text_document +0.000315315435873905 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0176_text_document +0.0003087116248965243 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0177_text_document +0.00030396790625537645 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0178_text_document +0.0003335812246032149 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0179_text_document +0.00034570956323095843 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0180_text_document +0.00034563035636675786 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0181_text_document +0.00033411265479076335 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0182_text_document +0.00034439191141692787 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0183_text_document +0.0003364483125496565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0184_text_document +0.0003299500453608033 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0185_text_document +0.00033163377700074837 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0186_text_document +0.00032638649660627673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0187_text_document +0.00032616167939645234 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0188_text_document +0.0003205289298760723 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0189_text_document +0.00031939393740815355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0190_text_document +0.00031593164066731296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0191_text_document +0.00031928871111254405 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0192_text_document +0.00029670189073175004 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0193_text_document +0.00020517703846735904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0194_text_document +0.00020128418186172073 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0195_text_document +0.00019662723895606717 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0196_text_document +0.0001981157042081407 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0197_text_document +0.00019703489037041608 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0198_text_document +0.00019079796331785068 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0199_text_document +0.0001909352306690079 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0200_text_document +0.00018824662295261396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0201_text_document +0.00019864275319325954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0202_text_document +0.00018818516521649587 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0203_text_document +0.00018875694972812844 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0204_text_document +0.00018231621170645482 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0205_text_document +0.00018349407845798273 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0206_text_document +0.00018088971427746906 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0207_text_document +0.00018296284236327237 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0208_text_document +0.0001876011825819916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0209_text_document +0.000329052068725176 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0210_text_document +0.00032223616273648536 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0211_text_document +0.00031272564089633955 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0212_text_document +0.00031621609908414494 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0213_text_document +0.0003117213560911235 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0214_text_document +0.00030218064069945934 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0215_text_document +0.00030658916600512085 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0216_text_document +0.0002915863534115821 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0217_text_document +0.0002940280138374372 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0218_text_document +0.00029067860468866085 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0219_text_document +0.00028529228063135635 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0220_text_document +0.00028336893301452256 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0221_text_document +0.0002794668089130099 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0222_text_document +0.00021681361378827842 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0223_text_document +0.0001484664674497246 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0224_text_document +0.00021950558378215133 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0225_text_document +0.00021806860758808645 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0226_text_document +0.00021819568718852282 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0227_text_document +0.00021626925931585001 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0228_text_document +0.0001464536143077762 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0229_text_document +0.00021432777088808917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0230_text_document +0.000213473805865147 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0231_text_document +0.00021397067253964538 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0232_text_document +0.00020758957647437263 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0233_text_document +0.00020687124337683314 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0234_text_document +0.00020630057046511005 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0235_text_document +0.0002091166859352538 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0236_text_document +0.00020777355025615267 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0237_text_document +0.00020709287641496176 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0238_text_document +0.00020736464660577094 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0239_text_document +0.00020062246741862607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0240_text_document +0.00020693207561942915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0241_text_document +0.00021151004871893024 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0242_text_document +0.00019930249098689716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0243_text_document +0.00021589710041231824 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0244_text_document +0.00021369204789905741 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0245_text_document +0.0002147099923936778 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0246_text_document +0.00021077531190389536 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0247_text_document +0.0002100509829113836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0248_text_document +0.00021185362601571124 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0249_text_document +0.00020722136637339565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0250_text_document +0.00020300093701169531 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0251_text_document +0.00019859737993313477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0252_text_document +0.00019971314372100164 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0253_text_document +0.00019549908270269278 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0254_text_document +0.00019649820843534028 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0255_text_document +0.00019619415513498067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0256_text_document +0.00019493006120377898 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0257_text_document +0.00019499409035775506 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0258_text_document +0.00019252988593634277 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0259_text_document +0.00019440768268686405 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0260_text_document +0.00018747161324755577 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0261_text_document +0.0001879575932372779 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0262_text_document +0.00019040707058357506 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0263_text_document +0.0001871931095090703 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0264_text_document +0.00020112966223017096 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0265_text_document +0.00020516878165311017 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0266_text_document +0.00020664735191740533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0267_text_document +0.00021041398572882962 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0268_text_document +0.00020397992929690396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0269_text_document +0.0002039978580295561 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0270_text_document +0.00020592785601142126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0271_text_document +0.0001990755527445265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0272_text_document +0.00019729564847798732 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0273_text_document +0.00019958182230527032 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0274_text_document +0.0001985037302636386 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0275_text_document +0.00020204130355115716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0276_text_document +0.0002000296401958085 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0277_text_document +0.0001983064832295463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0278_text_document +0.00019663108484195617 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0279_text_document +0.00019510678560556523 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0280_text_document +0.0001873284057063206 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0281_text_document +0.00019311553072495885 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0282_text_document +0.00034652137288816547 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0283_text_document +0.0002813690318850024 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0284_text_document +0.00027697649713138685 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0285_text_document +0.0002755419092534421 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0286_text_document +0.0002681583054440219 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0287_text_document +0.00026945753192750824 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0288_text_document +0.00026169470768245737 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0289_text_document +0.00026437008960810825 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0290_text_document +0.0002637294838228 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0291_text_document +0.00026491867965088836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0292_text_document +0.00025504483625138986 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0293_text_document +0.0002545040623796586 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0294_text_document +0.0002546682814073622 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0295_text_document +0.00025545439487142615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0296_text_document +0.0002626896557978271 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0297_text_document +0.00025092040940402784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0298_text_document +0.0002589154885863872 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0299_text_document +0.00024106160482721467 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0300_text_document +0.0002483289690087987 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0301_text_document +0.0002388930282784437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0302_text_document +0.00024006340759273874 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0303_text_document +0.00023765248178029045 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0304_text_document +0.00023061351965578936 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0305_text_document +0.00024954224883546477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0306_text_document +0.00017861017233018525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0307_text_document +0.00017810832743667658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0308_text_document +0.00017599709170759497 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0309_text_document +0.00017462723516505223 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0310_text_document +0.0002906316527068669 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0311_text_document +0.00033762141066247166 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0312_text_document +0.00017170670574152494 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0313_text_document +0.00017258674515137717 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0314_text_document +0.0002815386173173926 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0315_text_document +0.0002996845935618989 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0316_text_document +0.0002735268488987296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0317_text_document +0.0002971738713071517 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0318_text_document +0.0002942690674002763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0319_text_document +0.0003322222207729567 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0320_text_document +0.0003378721656198464 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0321_text_document +0.00018307262621851067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0322_text_document +0.00033956081502775057 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0323_text_document +0.00031604820927876276 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0324_text_document +0.00028805657681088917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0325_text_document +0.00026312293321215633 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0326_text_document +0.00034366936722921455 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0327_text_document +0.0002865256504406559 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0328_text_document +0.0003063615195861786 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0329_text_document +0.00028412791619666136 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0330_text_document +0.00028060835132727154 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0331_text_document +0.00032544974761560506 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0332_text_document +0.0002647177833217225 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0333_text_document +0.0003152621884896575 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0334_text_document +0.0003054625140336913 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0335_text_document +0.00031183308312292263 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0336_text_document +0.00018175026696621178 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0337_text_document +0.00017699918328872 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0338_text_document +0.00018222339261441908 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0339_text_document +0.00018348005930964137 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0340_text_document +0.0001810735993810541 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0341_text_document +0.00030846441282038914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0342_text_document +0.0002972326889310354 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0343_text_document +0.00017433421318235594 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0344_text_document +0.00032799458649525895 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0345_text_document +0.00032482130048512673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0346_text_document +0.00031943465668672475 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0347_text_document +0.00029615593630484517 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0348_text_document +0.0002893126939511001 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0349_text_document +0.0002849288351723284 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0350_text_document +0.00028383906633569267 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0351_text_document +0.00028072526091262615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0352_text_document +0.000284239564292377 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0353_text_document +0.0002778903109432523 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0354_text_document +0.0002771644389501471 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0355_text_document +0.0002733316182319337 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0356_text_document +0.00026362539185869363 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0357_text_document +0.0002636325383220217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0358_text_document +0.00026740622442302886 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0359_text_document +0.0002646771971853427 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0360_text_document +0.0002628566720605389 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0361_text_document +0.0002644760695434766 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0362_text_document +0.0002623837702310999 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0363_text_document +0.00026088722976772894 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0364_text_document +0.0002567065374799158 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0365_text_document +0.00018857382101207726 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0366_text_document +0.00019036580399817203 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0367_text_document +0.00018348828065261222 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0368_text_document +0.00018491851780345073 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0369_text_document +0.00018904887260080187 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0370_text_document +0.0001875609304251801 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0371_text_document +0.00018393034720015817 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0372_text_document +0.00018419795526114903 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0373_text_document +0.00018699955623404795 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0374_text_document +0.00018276256902965128 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0375_text_document +0.00017698045695190812 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0376_text_document +0.00018104650132303642 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0377_text_document +0.00017758206731279688 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0378_text_document +0.00017131402995103497 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0379_text_document +0.000175944428350446 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0380_text_document +0.0003416745727147391 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0381_text_document +0.0003163259373952889 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0382_text_document +0.0002804489269172448 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0383_text_document +0.00028748272397403175 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0384_text_document +0.00027603318345630605 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0385_text_document +0.000271638824679648 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0386_text_document +0.0002763761210210942 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0387_text_document +0.00026501984873172717 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0388_text_document +0.00026422486894694714 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0389_text_document +0.0002686339100849262 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0390_text_document +0.0002610837453940606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0391_text_document +0.000260974343729353 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0392_text_document +0.0002599403837029134 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0393_text_document +0.0002937273113238609 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0394_text_document +0.0003341790732600504 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0395_text_document +0.0002620661576600244 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0396_text_document +0.0003027929169239288 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0397_text_document +0.00031944039129326894 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0398_text_document +0.00019025676304139009 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0399_text_document +0.00018680910145009907 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0400_text_document +0.00034215840419416437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0401_text_document +0.00018618120812119364 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0402_text_document +0.00018605853095599425 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0403_text_document +0.00018120712626096538 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0404_text_document +0.00018315079292495327 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0405_text_document +0.00018362556449041974 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0406_text_document +0.0001780024456718171 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0407_text_document +0.00033296526436178697 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0408_text_document +0.0001802398632282846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0409_text_document +0.00017340263100798256 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0410_text_document +0.00017755840547238697 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0411_text_document +0.00018419413735260606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0412_text_document +0.00017869518174591322 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0413_text_document +0.00017526271460129484 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0414_text_document +0.00017852168597981907 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0415_text_document +0.00017566536156787157 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0416_text_document +0.00017589867964432936 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0417_text_document +0.00017831487394075305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0418_text_document +0.00017837310528935862 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0419_text_document +0.00018200908814216548 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0420_text_document +0.0001795136627511612 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0421_text_document +0.0003414021775300033 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0422_text_document +0.00017177291787788502 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0423_text_document +0.0003441900648571877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0424_text_document +0.0003394534597060673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0425_text_document +0.0003236887233114832 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0426_text_document +0.0001639544129688747 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0427_text_document +0.00019137443753211255 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0428_text_document +0.00018575146284680153 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0429_text_document +0.00019184792863440243 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0430_text_document +0.00018966043065679055 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0431_text_document +0.00017968851317035848 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0432_text_document +0.00018479881897661546 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0433_text_document +0.0001813642692683015 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0434_text_document +0.0001686449798983066 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0435_text_document +0.00018516104592230446 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0436_text_document +0.00031283726601066385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0437_text_document +0.0003248607542883853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0438_text_document +0.00031583241601202365 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0439_text_document +0.00031238270857730376 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0440_text_document +0.000307150592403979 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0441_text_document +0.00029443829986847044 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0442_text_document +0.0002942723732234677 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0443_text_document +0.00023514930666443422 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_en_tail-0444_text_document +0.0020776328951453444 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_news_head-0000_text_document +0.0021768234410538883 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_news_head-0001_text_document +0.002106973549276289 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_news_head-0002_text_document +0.002110915756171751 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_news_head-0003_text_document +0.0017032382109816464 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_news_head-0004_text_document +0.0019047944877712286 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_news_middle-0000_text_document +0.0019402711744016077 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_news_middle-0001_text_document +0.0006264790011223686 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_news_middle-0002_text_document +0.0017885401938106643 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/cc_news_tail-0000_text_document + diff --git a/ALCF/data-lists/sunspot/data_file_list_books.txt b/ALCF/data-lists/sunspot/data_file_list_books.txt deleted file mode 100644 index 9187565a5e..0000000000 --- a/ALCF/data-lists/sunspot/data_file_list_books.txt +++ /dev/null @@ -1,3 +0,0 @@ -0.0031007020167215667 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/books-0000_text_document -0.003100207465277759 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/books-0001_text_document -0.000999090518000674 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/books-0002_text_document diff --git a/ALCF/data-lists/sunspot/data_file_list_c4.txt b/ALCF/data-lists/sunspot/data_file_list_c4.txt deleted file mode 100644 index ca7df1839e..0000000000 --- a/ALCF/data-lists/sunspot/data_file_list_c4.txt +++ /dev/null @@ -1,86 +0,0 @@ -0.0011545953050729803 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0000_text_document -0.0011570295715413383 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0001_text_document -0.001156438391210766 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0002_text_document -0.0011556820995190797 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0003_text_document -0.001156780334924253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0004_text_document -0.0011563528368937514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0005_text_document -0.0011574632716369762 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0006_text_document -0.0011577445131424494 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0007_text_document -0.0011599182963630329 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0008_text_document -0.0011550792360663698 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0009_text_document -0.001154948574643344 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0010_text_document -0.0011560157369398198 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0011_text_document -0.0011551344387810997 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0012_text_document -0.0011586914190552 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0013_text_document -0.00115559584811127 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0014_text_document -0.0011562917764239204 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0015_text_document -0.0011582019252872318 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0016_text_document -0.0011585605528399534 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0017_text_document -0.0011567600261132287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0018_text_document -0.0011561323235067436 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0019_text_document -0.0011568948157687324 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0020_text_document -0.0011562184926986983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0021_text_document -0.001155171968076667 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0022_text_document -0.001156245876059478 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0023_text_document -0.0011591826911770261 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0024_text_document -0.0011564400126070828 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0025_text_document -0.0011571005158517765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0026_text_document -0.0011560050453907214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0027_text_document -0.0011559074476966407 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0028_text_document -0.0011567638698290205 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0029_text_document -0.0011558972055942165 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0030_text_document -0.001157532269673901 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0031_text_document -0.0011559883017581377 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0032_text_document -0.001155556362078353 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0033_text_document -0.0011544735837522018 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0034_text_document -0.0011547315955415466 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0035_text_document -0.0011570980852521353 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0036_text_document -0.0011562552591307868 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0037_text_document -0.001156640315842092 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0038_text_document -0.0011587257748187634 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0039_text_document -0.0011563083526351268 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0040_text_document -0.0011554464046007336 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0041_text_document -0.001155442922136426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0042_text_document -0.0011557081619451221 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0043_text_document -0.001156421357082161 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0044_text_document -0.0011562730825316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0045_text_document -0.001157525507046117 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0046_text_document -0.0011552936629887162 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0047_text_document -0.0011578959437852875 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0048_text_document -0.0011568910557636293 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0049_text_document -0.0011578444955946039 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0050_text_document -0.001157076096248001 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0051_text_document -0.0011568459536403974 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0052_text_document -0.0011555352450605598 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0053_text_document -0.0011557650508322967 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0054_text_document -0.0011567625802857914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0055_text_document -0.0011568533734967437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0056_text_document -0.0011562185375437102 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0057_text_document -0.0011558740426473278 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0058_text_document -0.0011549825990520978 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0059_text_document -0.0011572314079774744 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0060_text_document -0.0011576031815962752 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0061_text_document -0.0011567937670018521 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0062_text_document -0.001154956951193276 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0063_text_document -0.001157226898064118 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0064_text_document -0.001156096958730414 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0065_text_document -0.001155844223704128 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0066_text_document -0.0011571187084765205 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0067_text_document -0.0011573954893981501 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0068_text_document -0.0011566700251641518 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0069_text_document -0.0011550051959552815 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0070_text_document -0.0011559629359246125 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0071_text_document -0.001157971629210032 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0072_text_document -0.0011561725903411443 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0073_text_document -0.001157160385935682 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0074_text_document -0.0011568864860569239 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0075_text_document -0.0011576433208715313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0076_text_document -0.0011571382379808948 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0077_text_document -0.0011590178523739284 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0078_text_document -0.001156347684201892 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0079_text_document -0.0011552550374817486 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0080_text_document -0.0011570794132840427 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0081_text_document -0.0011570932061148482 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0082_text_document -0.0011561938025300182 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0083_text_document -0.0011560757016965283 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0084_text_document -0.00019284851714729888 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/c4-0085_text_document diff --git a/ALCF/data-lists/sunspot/data_file_list_cc.txt b/ALCF/data-lists/sunspot/data_file_list_cc.txt deleted file mode 100644 index 4b9a797878..0000000000 --- a/ALCF/data-lists/sunspot/data_file_list_cc.txt +++ /dev/null @@ -1,2880 +0,0 @@ -0.0002329030984435853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0000_text_document -0.00023018699207949078 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0001_text_document -0.00024373839803694205 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0002_text_document -0.00023608269234913788 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0003_text_document -0.00024813091225197464 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0004_text_document -0.00023520818074126314 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0005_text_document -0.0002374607329273171 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0006_text_document -0.00023738412849923294 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0007_text_document -0.0002443634316582533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0008_text_document -0.00023847622533166118 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0009_text_document -0.00023199871587697545 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0010_text_document -0.0002385337709567312 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0011_text_document -0.0002432839071745339 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0012_text_document -0.00023508523674007346 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0013_text_document -0.00032603226617680567 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0014_text_document -0.00023789141182395846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0015_text_document -0.0002461407443245122 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0016_text_document -0.00023499257215518966 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0017_text_document -0.00024846537508068473 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0018_text_document -0.0002386611981191132 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0019_text_document -0.0002476214516386151 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0020_text_document -0.00023922963334203518 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0021_text_document -0.0002566637890877035 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0022_text_document -0.0002480836116312675 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0023_text_document -0.0002500957846859012 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0024_text_document -0.00023232303192858133 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0025_text_document -0.0002402109920207785 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0026_text_document -0.00032458741378655037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0027_text_document -0.00023711130623699136 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0028_text_document -0.0002473092752915358 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0029_text_document -0.00024517111812673547 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0030_text_document -0.00024145261714879915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0031_text_document -0.0002441832095655324 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0032_text_document -0.00024533720808111173 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0033_text_document -0.00024615543201451354 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0034_text_document -0.00029788578618284437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0035_text_document -0.00026821245945822444 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0036_text_document -0.0002451138188102186 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0037_text_document -0.00023812823651070536 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0038_text_document -0.00023799603175215714 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0039_text_document -0.00024128396884325748 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0040_text_document -0.00024158008848876737 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0041_text_document -0.00024722330373436316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0042_text_document -0.00023308404070500205 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0043_text_document -0.0002554252556503107 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0044_text_document -0.0003132025339147037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0045_text_document -0.00024278622445373792 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0046_text_document -0.0003214585004572529 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0047_text_document -0.0003329131703028111 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0048_text_document -0.0002361664236831262 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0049_text_document -0.0002643368247294079 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0050_text_document -0.00024766538637149724 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0051_text_document -0.0002627167479901225 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0052_text_document -0.00025033496855447236 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0053_text_document -0.00024160037266449382 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0054_text_document -0.00022926708072112655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0055_text_document -0.00023577632399723273 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0056_text_document -0.00024916378421745264 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0057_text_document -0.00024065956580145883 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0058_text_document -0.00032914757231594763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0059_text_document -0.000382735213415281 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0060_text_document -0.00019876415914729903 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0061_text_document -0.0002455041228482986 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0062_text_document -0.0002360975192355561 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0063_text_document -0.00035687225557611647 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0064_text_document -0.00034010734287544296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0065_text_document -0.00024289772720050695 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0066_text_document -0.0002298464162081398 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0067_text_document -0.00032731880189343956 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0068_text_document -0.00024593154050122983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0069_text_document -0.00024184757636917526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0070_text_document -0.0002619883069796127 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0071_text_document -0.00023707630401459816 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0072_text_document -0.0003648802259322563 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0073_text_document -0.00034821518419266554 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0074_text_document -0.00025687739808269634 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0075_text_document -0.00025210376457187776 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0076_text_document -0.00025341417049958763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0077_text_document -0.00026096750660126574 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0078_text_document -0.0002557323323244081 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0079_text_document -0.0003306928457892949 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0080_text_document -0.00034038835131844906 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0081_text_document -0.00025944099107910257 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0082_text_document -0.00011523229485833962 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0083_text_document -0.0002577986281049885 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0084_text_document -0.00011763411767853355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0085_text_document -0.00025348268598695737 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0086_text_document -0.00032333206004171266 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0087_text_document -0.00030755087408648437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0088_text_document -0.00023006508933660387 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0089_text_document -0.00023529378653763827 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0090_text_document -0.0002316006671871909 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0091_text_document -0.0002467080329046101 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0092_text_document -0.0002812385280195195 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0093_text_document -0.0002999655363830447 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0094_text_document -0.00030366253916544147 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0095_text_document -0.00034483134052353947 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0096_text_document -0.0002264669007084511 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0097_text_document -0.0002601377797129039 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0098_text_document -0.000243683175313779 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0099_text_document -0.0002458323373867855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0100_text_document -0.00023061358738763293 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0101_text_document -0.0002383240957413279 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0102_text_document -0.00024652411741760106 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0103_text_document -0.00024356064371899462 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0104_text_document -0.00023826916720633669 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0105_text_document -0.00023583636824734604 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0106_text_document -0.00023310828235332517 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0107_text_document -0.00024133699058477928 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0108_text_document -0.00023757818755491814 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0109_text_document -0.00024650642737935284 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0110_text_document -0.00023587507176169633 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0111_text_document -0.0002394516652010616 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0112_text_document -0.00026115753562452 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0113_text_document -0.00023919185015293048 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0114_text_document -0.0002328737948830104 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0115_text_document -0.0002449581587150213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0116_text_document -0.00023488566807302266 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0117_text_document -0.0002461692650286432 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0118_text_document -0.00023193321359714746 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0119_text_document -0.00024814319189332457 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0120_text_document -0.0002502054369100928 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0121_text_document -0.0002294119999864264 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0122_text_document -0.00023986985689573848 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0123_text_document -0.00023333209217509475 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0124_text_document -0.0002268247786450586 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0125_text_document -0.0002289098412617007 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0126_text_document -0.00023635954118858026 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0127_text_document -0.00024647215050850076 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0128_text_document -0.00024326708810109974 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0129_text_document -0.0002931046025004214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0130_text_document -0.00022529330733557138 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0131_text_document -0.00024288319647667783 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0132_text_document -0.0003170441859608398 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0133_text_document -0.00032183678547706126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0134_text_document -0.00020557308761968548 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0135_text_document -0.00020890924417592562 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0136_text_document -0.00021111297420597103 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0137_text_document -0.00021993650550023244 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0138_text_document -0.0002123163519100286 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0139_text_document -0.0002103629651549111 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0140_text_document -0.00021370932994199264 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0141_text_document -0.00020399994203827728 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0142_text_document -0.00021563034464531022 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0143_text_document -0.0002119386189866467 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0144_text_document -0.00020333697838057754 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0145_text_document -0.00020812225502998168 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0146_text_document -0.0002192034455873437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0147_text_document -0.0002146433860256116 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0148_text_document -0.00022498320338620924 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0149_text_document -0.00020605974297327904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0150_text_document -0.00020911517614300505 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0151_text_document -0.00022086517759478398 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0152_text_document -0.00021332423639106333 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0153_text_document -0.00020576019154376813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0154_text_document -0.00020504347709097317 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0155_text_document -0.00020777754226086552 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0156_text_document -0.00021294564928541406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0157_text_document -0.00020775275197134613 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0158_text_document -0.00021002644029417448 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0159_text_document -0.00021013797882725636 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0160_text_document -0.00019076903434985646 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0161_text_document -0.00019137766426576477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0162_text_document -0.0001841037351078922 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0163_text_document -0.0001952863228508793 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0164_text_document -0.00018602295481911772 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0165_text_document -0.0001931370361427833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0166_text_document -0.0001801085437374987 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0167_text_document -0.000188289716886196 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0168_text_document -0.0001852865203803285 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0169_text_document -0.00018892492640726607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0170_text_document -0.0001867706345514145 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0171_text_document -0.00018688900901065678 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0172_text_document -0.00018978617486719294 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0173_text_document -0.00019074400515584856 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0174_text_document -0.00018895644551080948 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0175_text_document -0.0002014139475504348 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0176_text_document -0.00019178652165604014 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0177_text_document -0.00019538713758341256 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0178_text_document -0.00019221603071045457 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0179_text_document -0.00018559646736351844 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0180_text_document -0.00018839424919962872 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0181_text_document -0.00031369302654824313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0182_text_document -0.00029283955302533026 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0183_text_document -0.0003003216050130351 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0184_text_document -0.00030560088357585723 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0185_text_document -0.00030852297965873606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0186_text_document -0.00030137151200383515 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0187_text_document -0.000287675564141583 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0188_text_document -0.0002865118305148982 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0189_text_document -0.0002942394807592494 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0190_text_document -0.0002892999122858095 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0191_text_document -0.00029726222843843534 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0192_text_document -0.0002865106197035132 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0193_text_document -0.0002831383377282607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0194_text_document -0.00029911101649033976 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0195_text_document -0.0002879193266837814 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0196_text_document -0.000293888834619463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0197_text_document -0.00028471984768159116 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0198_text_document -0.0002880090219919074 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0199_text_document -0.0002916398711835823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0200_text_document -0.00029790830243728387 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0201_text_document -0.00028328873748227157 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0202_text_document -0.000295084201372288 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0203_text_document -0.0002870500420988019 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0204_text_document -0.00028061238206088403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0205_text_document -0.00028268741759946835 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0206_text_document -0.0002832900433124873 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0207_text_document -0.0002821269671667503 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0208_text_document -0.00028388007298379026 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0209_text_document -0.0002811354392519064 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0210_text_document -0.0002811576793347316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0211_text_document -0.000291266961761568 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0212_text_document -0.0002930917058536775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0213_text_document -0.00029247722771384336 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0214_text_document -0.00030253733431717943 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0215_text_document -0.0002988938219536017 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0216_text_document -0.0003002888817617649 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0217_text_document -0.00028686614758997625 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0218_text_document -0.00032046548753382687 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0219_text_document -0.00027752519729998216 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0220_text_document -0.00026529350985605245 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0221_text_document -0.0002654493836819182 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0222_text_document -0.00026232091015406547 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0223_text_document -0.0002599081762104853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0224_text_document -0.0002835817651903514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0225_text_document -0.00026294839748242733 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0226_text_document -0.0002610835823452124 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0227_text_document -0.000260110886669002 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0228_text_document -0.000253371820236557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0229_text_document -0.0002581811396117453 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0230_text_document -0.0002514852630632709 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0231_text_document -0.00025726705673313424 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0232_text_document -0.00025592912496079053 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0233_text_document -0.00025012268192543976 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0234_text_document -0.00024391340520007348 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0235_text_document -0.0002384383639062725 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0236_text_document -0.00023975576001149118 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0237_text_document -0.0002338016280970284 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0238_text_document -0.0002439200883556984 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0239_text_document -0.00024142268942556778 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0240_text_document -0.0002427966777591219 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0241_text_document -0.00024280144153436732 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0242_text_document -0.00024065658615901044 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0243_text_document -0.00024455143739741974 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0244_text_document -0.00023239795390635735 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0245_text_document -0.0002582911684560293 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0246_text_document -0.00024625861259252923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0247_text_document -0.0002391576312805854 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0248_text_document -0.000238078180343909 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0249_text_document -0.00023486425304981024 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0250_text_document -0.0002355893518655022 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0251_text_document -0.0002366129403678232 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0252_text_document -0.00023595832035066449 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0253_text_document -0.00023327574008525872 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0254_text_document -0.00024148789011315923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0255_text_document -0.0002373778500991465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0256_text_document -0.00023955987733466374 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0257_text_document -0.000230949882722363 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0258_text_document -0.00023691636140836262 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0259_text_document -0.0002296963977634624 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0260_text_document -0.0002332661069034444 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0261_text_document -0.00023843042502126992 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0262_text_document -0.00023511746712743498 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0263_text_document -0.0002347369877896436 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0264_text_document -0.0002323753243697275 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0265_text_document -0.00026669348300156857 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0266_text_document -0.00025799845912273273 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0267_text_document -0.00027628560903016796 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0268_text_document -0.00026519284616643963 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0269_text_document -0.00026441815097637077 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0270_text_document -0.0002662131391195505 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0271_text_document -0.00027728803868991606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0272_text_document -0.0002769764618252775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0273_text_document -0.00027646939593325287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0274_text_document -0.0002624622460988396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0275_text_document -0.0002597094641937235 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0276_text_document -0.00026414993058715923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0277_text_document -0.00027056496256926013 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0278_text_document -0.0002594411680362496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0279_text_document -0.00026263805833060905 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0280_text_document -0.0002560343870682032 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0281_text_document -0.0002624349038750109 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0282_text_document -0.00025919416325410714 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0283_text_document -0.0002611522977423299 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0284_text_document -0.00023679129688303509 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0285_text_document -0.0002424050866477902 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0286_text_document -0.00022701047777126036 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0287_text_document -0.00023885339653333248 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0288_text_document -0.00024106734540671208 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0289_text_document -0.0002258801520250309 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0290_text_document -0.0003279882524990489 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0291_text_document -0.00033565261995537515 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0292_text_document -0.0003289323356607256 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0293_text_document -0.0003074095430777535 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0294_text_document -0.0003207680812935341 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0295_text_document -0.00031455349141131964 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0296_text_document -0.0003292847953027658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0297_text_document -0.0003336588045388259 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0298_text_document -0.00031509118791912046 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0299_text_document -0.0003142598967986839 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0300_text_document -0.00030783273695855995 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0301_text_document -0.0003180584048660508 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0302_text_document -0.0003132932087805931 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0303_text_document -0.00031883257979717144 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0304_text_document -0.00030944547256766847 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0305_text_document -0.00030308947812968015 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0306_text_document -0.00027546560713402303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0307_text_document -0.0002849896883269672 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0308_text_document -0.00028854314233644503 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0309_text_document -0.00028915140229591915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0310_text_document -0.00028785031389006415 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0311_text_document -0.00029386612956137296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0312_text_document -0.00027190973100817075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0313_text_document -0.00028482862326451903 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0314_text_document -0.00028103519882799385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0315_text_document -0.00027510038584601916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0316_text_document -0.00028413351954904745 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0317_text_document -0.0002766838847779375 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0318_text_document -0.00026734717208098886 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0319_text_document -0.0002798212098651715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0320_text_document -0.0002747771651023886 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0321_text_document -0.0002653649112010507 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0322_text_document -0.0002631895073950362 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0323_text_document -0.00027233897055462913 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0324_text_document -0.00026295942114759743 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0325_text_document -0.00030523368071333024 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0326_text_document -0.00022951852300606208 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0327_text_document -0.00022441558532523096 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0328_text_document -0.00022508048810748277 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0329_text_document -0.00021854625167048365 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0330_text_document -0.00032578339433634126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0331_text_document -0.0003234065091465547 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0332_text_document -0.00031578848940780525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0333_text_document -0.0003211733834987297 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0334_text_document -0.00030598592011548813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0335_text_document -0.00030636342203205056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0336_text_document -0.0003057832116313887 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0337_text_document -0.000314036788141844 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0338_text_document -0.00030966829419359915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0339_text_document -0.00030590256959722885 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0340_text_document -0.0003098044211320355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0341_text_document -0.00031610551467687426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0342_text_document -0.0003181946275637243 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0343_text_document -0.00030594263323826957 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0344_text_document -0.0003126680759448145 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0345_text_document -0.0002992280964722656 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0346_text_document -0.00029925238994904177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0347_text_document -0.0003002679127100512 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0348_text_document -0.00029525568123898354 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0349_text_document -0.0003024653097967333 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0350_text_document -0.0002953978348393056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0351_text_document -0.0003002611325611784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0352_text_document -0.0002957202302765213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0353_text_document -0.00029316969879070013 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0354_text_document -0.00029927093466316167 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0355_text_document -0.00029673566591636904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0356_text_document -0.0002937689672539696 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0357_text_document -0.0002973606684406085 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0358_text_document -0.0002964111065178358 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0359_text_document -0.0003023024169175062 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0360_text_document -0.0003023653161749783 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0361_text_document -0.0003041586406248139 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0362_text_document -0.00029561553630767535 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0363_text_document -0.00024185982713467274 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0364_text_document -0.00023843085692504566 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0365_text_document -0.00024640440430345615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0366_text_document -0.0002514283272863322 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0367_text_document -0.0002428429062712565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0368_text_document -0.00023806417358106035 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0369_text_document -0.000241345504518809 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0370_text_document -0.00023475737093303525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0371_text_document -0.00024315922889458298 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0372_text_document -0.0002509834540572025 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0373_text_document -0.00025303820591366467 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0374_text_document -0.00023678822937901864 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0375_text_document -0.00023171129872234371 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0376_text_document -0.00024461347186013167 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0377_text_document -0.00023799008209254456 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0378_text_document -0.00023090419051131675 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0379_text_document -0.0002236725770641727 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0380_text_document -0.00023567214707890686 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0381_text_document -0.0002262722125540663 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0382_text_document -0.00034312492202384507 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0383_text_document -0.00021814471912144287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0384_text_document -0.00023259303719099642 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0385_text_document -0.00031953022508126173 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0386_text_document -0.00023554778297810253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0387_text_document -0.0002460294175476815 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0388_text_document -0.0002407153820838108 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0389_text_document -0.0002374237316074476 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0390_text_document -0.00023980889380119253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0391_text_document -0.0002511495625217406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0392_text_document -0.0002455758117178104 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0393_text_document -0.00024203242698955926 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0394_text_document -0.00024139601603558614 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0395_text_document -0.00024286894291167163 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0396_text_document -0.00023208951019510916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0397_text_document -0.0002357404012027918 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0398_text_document -0.00023446333528494393 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0399_text_document -0.0002366761658977476 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0400_text_document -0.0002382598783135322 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0401_text_document -0.00023065268726624828 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0402_text_document -0.00022821836479753894 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0403_text_document -0.00023184541693801962 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0404_text_document -0.00023323789396160382 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0405_text_document -0.00022765013762883577 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0406_text_document -0.00023714308028716352 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0407_text_document -0.00028689301916209046 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0408_text_document -0.0003409253474017267 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0409_text_document -0.0003375051344730567 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0410_text_document -0.0003292176313040109 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0411_text_document -0.00032955022485317955 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0412_text_document -0.0003279397699428092 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0413_text_document -0.0003197789907967984 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0414_text_document -0.00031901270687106177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0415_text_document -0.000321273794216131 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0416_text_document -0.0003220857325921838 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0417_text_document -0.00031002969769902754 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0418_text_document -0.00031282247512778876 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0419_text_document -0.0003087408247659614 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0420_text_document -0.0003000588357430778 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0421_text_document -0.0003050525128747414 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0422_text_document -0.0003038755807622741 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0423_text_document -0.00029692774685276133 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0424_text_document -0.0003116160903862434 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0425_text_document -0.00031084101832927995 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0426_text_document -0.00030708350656830715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0427_text_document -0.00031743538194191725 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0428_text_document -0.00031694261996253895 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0429_text_document -0.0003146446823405206 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0430_text_document -0.00030156651655858596 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0431_text_document -0.000303240651608455 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0432_text_document -0.00032558453868072364 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0433_text_document -0.0002973680179620588 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0434_text_document -0.0002971760577119216 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0435_text_document -0.0002973002298006474 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0436_text_document -0.0002878620791957177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0437_text_document -0.00029632190555443135 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0438_text_document -0.0002946733596926658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0439_text_document -0.00029877307004917556 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0440_text_document -0.00029551091884749816 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0441_text_document -0.0002976670701108049 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0442_text_document -0.0002888352867396029 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0443_text_document -0.0002866799361024954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0444_text_document -0.0002859222006630905 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0445_text_document -0.00028581831052887173 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0446_text_document -0.00028506927387831265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0447_text_document -0.0002803249093757669 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0448_text_document -0.0002809203104492272 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0449_text_document -0.00028454145587367076 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0450_text_document -0.00028584177277598123 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0451_text_document -0.00028086934160805217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0452_text_document -0.000270936293938279 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0453_text_document -0.00028304258342716634 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0454_text_document -0.00028276074943094315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0455_text_document -0.0002602100764561298 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0456_text_document -0.00028012504824815937 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0457_text_document -0.0002608944608134916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0458_text_document -0.0002845289889094832 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0459_text_document -0.0002717532367216808 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0460_text_document -0.0002643974553814476 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0461_text_document -0.0002758213344366294 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0462_text_document -0.0002753861114186629 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0463_text_document -0.00031845649723981725 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0464_text_document -0.00032153756772406746 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0465_text_document -0.0003223378422301534 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0466_text_document -0.0002996787108131847 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0467_text_document -0.00030486709979224023 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0468_text_document -0.00031053773722556385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0469_text_document -0.0003002771838331003 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0470_text_document -0.00029794449770130684 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0471_text_document -0.0003033670930430196 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0472_text_document -0.0002965031647098184 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0473_text_document -0.0002837085032811094 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0474_text_document -0.0002828420727162801 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0475_text_document -0.00028941167269403106 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0476_text_document -0.00029157564190928313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0477_text_document -0.00029812762761704826 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0478_text_document -0.0002961388642406645 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0479_text_document -0.0002838466433847451 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0480_text_document -0.0002788779144959817 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0481_text_document -0.0003402152386086791 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0482_text_document -0.00037332501068667467 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0483_text_document -0.0002413675200116708 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0484_text_document -0.0003704235275199961 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0485_text_document -0.0002379466982220781 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0486_text_document -0.00035089333509974934 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0487_text_document -0.00023630817154070126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0488_text_document -0.00023857309295728839 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0489_text_document -0.0002435822475458576 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0490_text_document -0.00023387703405383536 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0491_text_document -0.00034319854187343774 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0492_text_document -0.0003622737409420836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0493_text_document -0.00023570573166970698 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0494_text_document -0.00022641527241191097 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0495_text_document -0.00034243292431352653 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0496_text_document -0.00024045245535407698 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0497_text_document -0.00023676532885361976 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0498_text_document -0.00022335363118071338 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0499_text_document -0.00023448598925498735 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0500_text_document -0.00033737048365832474 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0501_text_document -0.0003357751601882351 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0502_text_document -0.0003383236392673138 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0503_text_document -0.0003397838415177592 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0504_text_document -0.00033705937300296186 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0505_text_document -0.00033448155827902774 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0506_text_document -0.00034576892094196856 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0507_text_document -0.00033674871522955814 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0508_text_document -0.0003328110361659434 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0509_text_document -0.00032432631363958473 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0510_text_document -0.00032731656932112217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0511_text_document -0.00032024116066153716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0512_text_document -0.0003040305172335454 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0513_text_document -0.00031659687802842567 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0514_text_document -0.000303687860573204 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0515_text_document -0.0003155611705529593 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0516_text_document -0.00030697272991348575 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0517_text_document -0.00032874805540012775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0518_text_document -0.0003195460475675836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0519_text_document -0.00029999019685462926 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0520_text_document -0.0003031992730055188 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0521_text_document -0.0003004957313392662 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0522_text_document -0.00029242533089655584 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0523_text_document -0.0002940539652538529 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0524_text_document -0.0003042748602544184 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0525_text_document -0.00029329988520120374 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0526_text_document -0.00028533980088048884 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0527_text_document -0.0002995523399640371 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0528_text_document -0.00024445982369612285 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0529_text_document -0.0002341949821161716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0530_text_document -0.0002448827406649086 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0531_text_document -0.0002464661023748273 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0532_text_document -0.0002458273043503861 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0533_text_document -0.000234131092194839 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0534_text_document -0.00023502842288340058 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0535_text_document -0.00023472409854696446 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0536_text_document -0.0002353934437680525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0537_text_document -0.00023298716740292522 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0538_text_document -0.00023724345571185632 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0539_text_document -0.0002463911915031484 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0540_text_document -0.00023298903026561056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0541_text_document -0.00022884149754863258 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0542_text_document -0.00023103945956545342 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0543_text_document -0.0002444088792883614 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0544_text_document -0.00022581722858094737 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0545_text_document -0.0002370810502668904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0546_text_document -0.00022632319324174496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0547_text_document -0.00023710168144645038 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0548_text_document -0.00022964923090952467 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0549_text_document -0.00023748320722538985 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0550_text_document -0.0003222624777361089 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0551_text_document -0.0002939065142920207 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0552_text_document -0.0003163669341858318 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0553_text_document -0.0002875568128154461 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0554_text_document -0.0002891174847690085 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0555_text_document -0.0002845830978145091 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0556_text_document -0.0002834617830618547 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0557_text_document -0.0002825955578364204 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0558_text_document -0.0002770681818983043 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0559_text_document -0.00027398693963975244 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0560_text_document -0.00026761486776881346 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0561_text_document -0.0002709662939745425 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0562_text_document -0.0002715205476986883 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0563_text_document -0.0002694875173937183 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0564_text_document -0.0002691404382855153 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0565_text_document -0.0002555696578650904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0566_text_document -0.00025938400199289785 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0567_text_document -0.00025330279781755557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0568_text_document -0.00025455190919542185 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0569_text_document -0.0002596474980952091 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0570_text_document -0.0002593765878092823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0571_text_document -0.00026530976177812846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0572_text_document -0.00026521586959931293 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0573_text_document -0.00027156192778243744 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0574_text_document -0.00026542489893346987 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0575_text_document -0.0002637742757379441 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0576_text_document -0.0002660391549513622 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0577_text_document -0.0002622961692249776 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0578_text_document -0.0002668259130904866 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0579_text_document -0.00026393281403990296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0580_text_document -0.0002573087912247817 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0581_text_document -0.0002689284845925933 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0582_text_document -0.0002587878565641303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0583_text_document -0.0002591277179432351 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0584_text_document -0.00025645748667058553 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0585_text_document -0.0002576834953920859 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0586_text_document -0.0002574007659976351 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0587_text_document -0.00026215195926907863 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0588_text_document -0.0002550452573299244 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0589_text_document -0.0002580549425113166 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0590_text_document -0.0002580184320809385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0591_text_document -0.00026135902243793944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0592_text_document -0.0002499110939933153 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0593_text_document -0.00023602977130289638 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0594_text_document -0.0002179537404034863 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0595_text_document -0.000217790844069029 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0596_text_document -0.00021511798361299487 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0597_text_document -0.00025422459968044684 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0598_text_document -0.00026310640293852807 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0599_text_document -0.0003408740036680742 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0600_text_document -0.00025777786217145044 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0601_text_document -0.00025244460970438263 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0602_text_document -0.00025351648924446906 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0603_text_document -0.0003423231978018855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0604_text_document -0.0003423953052478566 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0605_text_document -0.0003318569148201118 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0606_text_document -0.00032767638761629247 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0607_text_document -0.00033215390937927025 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0608_text_document -0.00032618622802635676 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0609_text_document -0.00032507622347617733 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0610_text_document -0.00031030763419557833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_head-0611_text_document -0.00024643590119480534 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0000_text_document -0.0002095902169870633 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0001_text_document -0.00021403593088797332 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0002_text_document -0.0002227102409085263 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0003_text_document -0.00020197706221244385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0004_text_document -0.00022874875522106917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0005_text_document -0.00023280550472601052 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0006_text_document -0.00022749578163623905 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0007_text_document -0.00023802912323224644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0008_text_document -0.00023176496190267302 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0009_text_document -0.0002278986856648695 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0010_text_document -0.00021833909531790053 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0011_text_document -0.0003080057114591217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0012_text_document -0.00021694016663911526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0013_text_document -0.0002668830492707773 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0014_text_document -0.00024523658363304193 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0015_text_document -0.0002894756615830288 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0016_text_document -0.00020347856162111349 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0017_text_document -0.0002134325832786435 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0018_text_document -0.00021673235231198 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0019_text_document -0.0002654127125833355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0020_text_document -0.0002158672209137081 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0021_text_document -0.00023947604851382316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0022_text_document -0.00026152140024106367 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0023_text_document -0.00021518621527788343 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0024_text_document -0.0002439782139658387 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0025_text_document -0.0002905141391659118 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0026_text_document -0.00021642682185339982 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0027_text_document -0.00019960430947798375 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0028_text_document -0.00026322267340937706 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0029_text_document -0.00022334429465509248 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0030_text_document -0.00022855119280875728 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0031_text_document -0.00028578658731994404 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0032_text_document -0.0002584277862839571 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0033_text_document -0.00021861958226794765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0034_text_document -0.00026614391185475836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0035_text_document -0.00028970533715167736 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0036_text_document -0.0002235814952215254 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0037_text_document -0.00022032188312044515 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0038_text_document -0.00022884461811511293 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0039_text_document -0.0002551680347396578 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0040_text_document -0.00022883355545520197 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0041_text_document -0.0002232938120141678 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0042_text_document -0.0002691617763064546 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0043_text_document -0.00023572139842386745 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0044_text_document -0.0002552819803341825 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0045_text_document -0.00027155660031106415 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0046_text_document -0.00021551548292117663 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0047_text_document -0.00020620735756494168 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0048_text_document -0.0002166820604491231 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0049_text_document -0.00018501398539579828 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0050_text_document -0.00027225222848112053 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0051_text_document -0.00023371832644559636 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0052_text_document -0.00023566702124489628 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0053_text_document -0.00023686334707090557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0054_text_document -0.00022423975285568458 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0055_text_document -0.0002528257228301147 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0056_text_document -0.0002561855163693918 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0057_text_document -0.00022810786925037496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0058_text_document -0.0002762405538154904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0059_text_document -0.00022261162863844723 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0060_text_document -0.00022540915157909426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0061_text_document -0.00022299985657677767 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0062_text_document -0.00022755525774778565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0063_text_document -0.00024165856540482104 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0064_text_document -0.00025687628451136137 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0065_text_document -0.0002231870244226192 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0066_text_document -0.00026580529164370396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0067_text_document -0.00028870521089646587 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0068_text_document -0.00021540624754582923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0069_text_document -0.00025778332069476944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0070_text_document -0.00021926796929661694 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0071_text_document -0.00026029886649394187 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0072_text_document -0.00022285796310592967 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0073_text_document -0.00023080628286139754 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0074_text_document -0.00025245808263416443 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0075_text_document -0.00022457772027503216 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0076_text_document -0.00024435224362284627 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0077_text_document -0.00022526086938759533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0078_text_document -0.0002673487094116284 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0079_text_document -0.00023263089713557213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0080_text_document -0.00021778225362633044 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0081_text_document -0.00021409630017652816 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0082_text_document -0.00022267016739539933 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0083_text_document -0.00020585884947224638 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0084_text_document -0.00022993683686780696 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0085_text_document -0.00024242353683668374 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0086_text_document -0.0002092411836993767 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0087_text_document -0.0002197488902020793 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0088_text_document -0.00021875038642425168 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0089_text_document -0.0002494827261520774 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0090_text_document -0.00023601123399284122 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0091_text_document -0.00021826172481591926 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0092_text_document -0.000236632672200321 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0093_text_document -0.00025074570040713444 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0094_text_document -0.00022642420961164095 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0095_text_document -0.00023812142057551977 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0096_text_document -0.0002428821562055837 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0097_text_document -0.00022488741946885592 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0098_text_document -0.00020317409833506262 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0099_text_document -0.00021856439903312987 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0100_text_document -0.0002106925714107645 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0101_text_document -0.00021119826681040816 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0102_text_document -0.0002592340274790045 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0103_text_document -0.00023255611509461946 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0104_text_document -0.00020894883617804318 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0105_text_document -0.00022615604129768463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0106_text_document -0.000203728797783905 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0107_text_document -0.0001989690950208705 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0108_text_document -0.00021734356057002846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0109_text_document -0.0002433390106922548 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0110_text_document -0.00022031295850762523 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0111_text_document -0.00022344289507866802 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0112_text_document -0.00022230083290263739 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0113_text_document -0.00021439002065826426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0114_text_document -0.0002041951415667326 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0115_text_document -0.00022877491032651992 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0116_text_document -0.00021999090587860643 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0117_text_document -0.00025682432698074305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0118_text_document -0.00024400030399295212 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0119_text_document -0.00022789294060424558 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0120_text_document -0.00021497724986548528 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0121_text_document -0.00023813142494777905 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0122_text_document -0.00021895635220322673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0123_text_document -0.00023328497887722523 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0124_text_document -0.00022164528342855325 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0125_text_document -0.0002484042811809953 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0126_text_document -0.00021121568758750245 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0127_text_document -0.00020558498767931708 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0128_text_document -0.00024543621326022564 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0129_text_document -0.00019902438240619879 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0130_text_document -0.00023691721805865155 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0131_text_document -0.00021791494779355714 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0132_text_document -0.0002240264291639859 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0133_text_document -0.0002473539109425455 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0134_text_document -0.0002071473371471445 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0135_text_document -0.00021022258828332134 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0136_text_document -0.00022311670653909265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0137_text_document -0.00022930107525031038 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0138_text_document -0.0002214421423002716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0139_text_document -0.00021570132519262982 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0140_text_document -0.0002197681200389886 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0141_text_document -0.0002800029152388595 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0142_text_document -0.00026843440765131945 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0143_text_document -0.0002849765317975514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0144_text_document -0.00027096319463304773 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0145_text_document -0.00027086227426919104 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0146_text_document -0.0002526247335698449 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0147_text_document -0.00027363240217034764 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0148_text_document -0.0002623467059155748 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0149_text_document -0.00027346078063921375 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0150_text_document -0.00025920642956814055 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0151_text_document -0.00025705335691494745 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0152_text_document -0.00025922805782841715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0153_text_document -0.0002788336705199961 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0154_text_document -0.00024845909125095083 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0155_text_document -0.00028656519284339746 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0156_text_document -0.00025647131598268287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0157_text_document -0.0002784068234736532 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0158_text_document -0.0002528120161786896 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0159_text_document -0.0002488190053053583 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0160_text_document -0.0002704389893183884 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0161_text_document -0.00025616941425622545 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0162_text_document -0.00026029019534693783 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0163_text_document -0.00025685556571703545 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0164_text_document -0.00019723833812640722 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0165_text_document -0.0001895418580073486 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0166_text_document -0.00019011078486016846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0167_text_document -0.00018779376696334834 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0168_text_document -0.00018563641007150188 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0169_text_document -0.00018754827458482748 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0170_text_document -0.00019755194962803275 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0171_text_document -0.00028610572842390993 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0172_text_document -0.00019902354772130188 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0173_text_document -0.00020283251106846995 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0174_text_document -0.00018722834815639619 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0175_text_document -0.00018348325202476222 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0176_text_document -0.0002739432916909774 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0177_text_document -0.00014534657139819037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0178_text_document -0.00015282753276716084 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0179_text_document -0.0001549244865585569 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0180_text_document -0.0001465220076427807 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0181_text_document -0.00015309131688759006 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0182_text_document -0.0001462273984264752 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0183_text_document -0.00014903597785697923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0184_text_document -0.0001547302246314982 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0185_text_document -0.0001486478323505694 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0186_text_document -0.00014887945296702178 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0187_text_document -0.00014582128695700495 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0188_text_document -0.00015040846513981096 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0189_text_document -0.0001492663985213415 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0190_text_document -0.0001491503509128408 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0191_text_document -0.00014485595166153977 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0192_text_document -0.00014471245274265675 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0193_text_document -0.0001539836098505113 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0194_text_document -0.00014269340600113259 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0195_text_document -0.0001366015589763494 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0196_text_document -0.00014275967558886846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0197_text_document -0.00012216291308335102 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0198_text_document -9.860253447438225e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0199_text_document -0.00013395002197992724 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0200_text_document -0.00013095775634161855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0201_text_document -0.00013244501748701574 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0202_text_document -0.00013344638268905827 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0203_text_document -0.00013599432127141194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0204_text_document -0.0001319495730149868 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0205_text_document -0.0001286425479982177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0206_text_document -0.0001288175023456875 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0207_text_document -0.00014061678080985136 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0208_text_document -0.000128553766351679 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0209_text_document -0.00013865417327932483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0210_text_document -0.00012918889813006947 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0211_text_document -0.00013369372633056305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0212_text_document -0.00012818148109232114 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0213_text_document -0.00013087168186794624 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0214_text_document -0.00012209941459024034 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0215_text_document -0.0001170049632015973 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0216_text_document -0.00013033065279061172 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0217_text_document -0.00012782387759971287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0218_text_document -0.00012594444140907917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0219_text_document -0.00012747350244869554 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0220_text_document -0.00011189052700824495 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0221_text_document -0.000118474284791765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0222_text_document -0.00012947220948400783 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0223_text_document -0.00011563584378100779 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0224_text_document -0.00012898102925965738 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0225_text_document -0.000122859118523654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0226_text_document -0.00013841949453733798 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0227_text_document -0.00012735223374055142 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0228_text_document -0.00013005120882648248 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0229_text_document -0.000133953509788018 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0230_text_document -0.00012898361006981912 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0231_text_document -0.00012385687424414202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0232_text_document -0.00012495169231715962 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0233_text_document -0.0001334287109141697 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0234_text_document -0.0001251557347669207 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0235_text_document -0.00012458204389205325 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0236_text_document -0.00013142493999218836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0237_text_document -0.0001234876747521603 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0238_text_document -0.00011414056156548952 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0239_text_document -0.00023536944102421793 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0240_text_document -0.00020899836320101376 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0241_text_document -0.00020694945512603853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0242_text_document -0.0001985515975806629 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0243_text_document -0.00020332234597425947 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0244_text_document -0.00019901014809176087 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0245_text_document -0.00019730742496077176 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0246_text_document -0.0002086531104287768 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0247_text_document -0.00019880240459684486 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0248_text_document -0.0001934729054969894 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0249_text_document -0.00020006177554040137 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0250_text_document -0.0001941325758266985 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0251_text_document -0.00020329878081065027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0252_text_document -0.00020327608562464652 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0253_text_document -0.00019798005487177493 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0254_text_document -0.0001954984594242001 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0255_text_document -0.0001990223203741723 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0256_text_document -0.00019108660381768295 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0257_text_document -0.00019716779886134537 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0258_text_document -0.0001928475026596504 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0259_text_document -0.00019634937526499807 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0260_text_document -0.00019298574642019224 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0261_text_document -0.00018884134414178089 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0262_text_document -0.00018997833083144106 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0263_text_document -0.0001905325885044214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0264_text_document -0.00020263821458910917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0265_text_document -0.0002079379871094917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0266_text_document -0.00019785431238092052 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0267_text_document -0.00018722610077594935 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0268_text_document -0.00019937636744768995 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0269_text_document -0.00018558334637361332 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0270_text_document -0.00019000469868035166 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0271_text_document -0.0001853064471865308 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0272_text_document -0.000189466635918149 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0273_text_document -0.00019109828052136198 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0274_text_document -0.00018290456266579745 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0275_text_document -0.00017877060456109023 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0276_text_document -0.00018344271945962216 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0277_text_document -0.0001937669621232641 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0278_text_document -0.00019434311583686195 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0279_text_document -0.0001805150932807986 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0280_text_document -0.0001914582846585569 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0281_text_document -0.00020025771498172507 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0282_text_document -0.00019924956568197525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0283_text_document -0.000189496868442045 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0284_text_document -0.0001929642820365483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0285_text_document -0.0001903124937955297 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0286_text_document -0.00019497565890742164 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0287_text_document -0.00018960064504727124 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0288_text_document -0.00018568951646616373 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0289_text_document -0.00018239686989629257 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0290_text_document -0.00018605553146990633 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0291_text_document -0.0001844096767388669 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0292_text_document -0.00017898307999377337 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0293_text_document -0.0001739406120499752 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0294_text_document -0.0001911537409150027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0295_text_document -0.00017663348174413226 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0296_text_document -0.00017913373123918278 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0297_text_document -0.00017455805527093036 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0298_text_document -0.00017536417503931625 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0299_text_document -0.00017329247651270448 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0300_text_document -0.00017912565587258707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0301_text_document -0.00017228776664782256 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0302_text_document -0.0001825947205735245 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0303_text_document -0.0001696263054898423 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0304_text_document -0.00017175867341643253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0305_text_document -0.0001668734295531042 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0306_text_document -0.00016312507834781404 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0307_text_document -0.0001687262224636195 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0308_text_document -0.00017236097186979052 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0309_text_document -0.0002586993024691808 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0310_text_document -0.00026219934972577114 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0311_text_document -0.0002566784476550503 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0312_text_document -0.0002530671575343629 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0313_text_document -0.00025526495987018773 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0314_text_document -0.0002510505062545801 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0315_text_document -0.00024743741398453804 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0316_text_document -0.00024882602559273036 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0317_text_document -0.00024230881628338428 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0318_text_document -0.00025005854915078414 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0319_text_document -0.00024477471955617643 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0320_text_document -0.0002480463985551468 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0321_text_document -0.00024335328103980772 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0322_text_document -0.00024464696562773777 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0323_text_document -0.00023820565587951385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0324_text_document -0.00024537554558786237 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0325_text_document -0.00024052017934692743 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0326_text_document -0.00023660347377746528 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0327_text_document -0.00023823292504990384 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0328_text_document -0.00023564543049854766 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0329_text_document -0.0002370415962271789 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0330_text_document -0.00023453319757168757 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0331_text_document -0.000236480621339876 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0332_text_document -0.0002391149628895737 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0333_text_document -0.00023165934662137285 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0334_text_document -0.00023331169915961683 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0335_text_document -0.0002348226454144718 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0336_text_document -0.00023564045570745751 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0337_text_document -0.00016411316830860297 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0338_text_document -0.0002007359738791159 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0339_text_document -0.00019930606930833604 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0340_text_document -0.00019598670739211644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0341_text_document -0.00019115600211637036 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0342_text_document -0.00018957338451495675 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0343_text_document -0.0001997256344570198 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0344_text_document -0.0001924339501051294 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0345_text_document -0.0001929492409258573 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0346_text_document -0.00019129356692417672 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0347_text_document -0.0001927097658307402 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0348_text_document -0.00018744016832935095 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0349_text_document -0.00018898826127054628 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0350_text_document -0.00019337725386559253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0351_text_document -0.00018434878571055096 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0352_text_document -0.00018454731188528818 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0353_text_document -0.00018197801455061398 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0354_text_document -0.00018615322144032256 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0355_text_document -0.00017981075274777777 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0356_text_document -0.00018028813451030057 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0357_text_document -0.0001760055343765487 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0358_text_document -0.00018306121836089844 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0359_text_document -0.00018110213343756692 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0360_text_document -0.00017839531596627688 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0361_text_document -0.00017668405792307465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0362_text_document -0.00018382867977972885 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0363_text_document -0.00017812146256462094 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0364_text_document -0.00017866992260811773 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0365_text_document -0.00017457542446637375 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0366_text_document -0.00017144357690622488 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0367_text_document -0.00017669299438239817 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0368_text_document -0.00017721730286035934 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0369_text_document -0.0002573630336497748 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0370_text_document -0.00025158500395961657 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0371_text_document -0.00025871208953576674 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0372_text_document -0.0002522219361597465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0373_text_document -0.00025035546177162626 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0374_text_document -0.00024714234522261514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0375_text_document -0.00024296206951019436 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0376_text_document -0.00023797488747091152 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0377_text_document -0.0002417964809184933 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0378_text_document -0.0002317015633644362 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0379_text_document -0.00023529081059722227 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0380_text_document -0.00022865050303533797 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0381_text_document -0.00022350627510674308 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0382_text_document -0.00022416076407195612 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0383_text_document -0.0002237152481700081 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0384_text_document -0.00022673308251184112 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0385_text_document -0.00021988509315558021 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0386_text_document -0.00021791186375379613 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0387_text_document -0.00021902394687174658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0388_text_document -0.00022390913224296354 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0389_text_document -0.0002159569838456253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0390_text_document -0.000193074631476835 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0391_text_document -0.00019772710141722856 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0392_text_document -0.0001918863050023569 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0393_text_document -0.0001968641761834432 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0394_text_document -0.00019269495646727515 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0395_text_document -0.0001986463032193898 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0396_text_document -0.0001855871337995234 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0397_text_document -0.00019041152711008963 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0398_text_document -0.00018277849340888642 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0399_text_document -0.00018810546599505484 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0400_text_document -0.00018711834399232793 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0401_text_document -0.000180125082690484 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0402_text_document -0.00023744084906469025 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0403_text_document -0.00023803845013258319 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0404_text_document -0.00023586547263857976 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0405_text_document -0.00023222402329423718 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0406_text_document -0.00023270999204422837 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0407_text_document -0.00023378783679246331 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0408_text_document -0.00017304047941651873 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0409_text_document -0.00017585076104150352 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0410_text_document -0.00017101296884180275 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0411_text_document -0.00017561096140154923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0412_text_document -0.0001713420333669203 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0413_text_document -0.00022603582939637927 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0414_text_document -0.0001703733924033566 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0415_text_document -0.0002396801442728503 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0416_text_document -0.0001676400523382032 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0417_text_document -0.00017193438273170229 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0418_text_document -0.00017529021040710947 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0419_text_document -0.0001630741415909194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0420_text_document -0.00024179471702347313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0421_text_document -0.00016581358754145113 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0422_text_document -0.0002456894490564403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0423_text_document -0.0002456073517995372 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0424_text_document -0.00024937580109172706 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0425_text_document -0.0002457208726475487 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0426_text_document -0.00024399607429757567 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0427_text_document -0.00023977003702270238 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0428_text_document -0.0002453131498067917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0429_text_document -0.0001621090466807557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0430_text_document -0.00024557101413066944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0431_text_document -0.00024662307150866836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0432_text_document -0.00015758980646827074 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0433_text_document -0.00024391288666874046 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0434_text_document -0.00023509503922816786 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0435_text_document -0.00023489539281843744 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0436_text_document -0.00023286637378893443 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0437_text_document -0.00023379369093964089 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0438_text_document -0.00023205784424428202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0439_text_document -0.00023009948269807432 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0440_text_document -0.00023187584394201576 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0441_text_document -0.00023202252759594008 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0442_text_document -0.00022728777233539934 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0443_text_document -0.00022582666382743133 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0444_text_document -0.00022616733175598707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0445_text_document -0.00022768677294110565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0446_text_document -0.00022367789565066836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0447_text_document -0.00022752055218158585 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0448_text_document -0.00021819243338256605 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0449_text_document -0.0002241455531613807 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0450_text_document -0.00022437797440403226 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0451_text_document -0.00022445007197791702 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0452_text_document -0.00022150502971124016 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0453_text_document -0.0002225145672731263 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0454_text_document -0.00022368982014371355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0455_text_document -0.00022402755606263736 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0456_text_document -0.00023016090138940315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0457_text_document -0.0002260342841680707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0458_text_document -0.00022458279279977673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0459_text_document -0.00021839974448010203 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0460_text_document -0.0002264409368746725 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0461_text_document -0.000223550215762877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0462_text_document -0.00021610601829010048 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0463_text_document -0.00022408120517524368 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0464_text_document -0.00021671066876802013 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0465_text_document -0.00016072298972169641 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0466_text_document -0.0001722371396276357 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0467_text_document -0.00017303760343097654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0468_text_document -0.00016820735177759604 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0469_text_document -0.0001782239553050235 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0470_text_document -0.0001749477598265696 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0471_text_document -0.0001700037698924768 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0472_text_document -0.0001721297434219665 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0473_text_document -0.00017082606704868714 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0474_text_document -0.00017400024710211123 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0475_text_document -0.00017016210162102983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0476_text_document -0.00016745166973214216 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0477_text_document -0.0001684428163376526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0478_text_document -0.0001648685852885396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0479_text_document -0.00017387645508870812 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0480_text_document -0.00016594906144137858 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0481_text_document -0.00016042654972698604 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0482_text_document -0.00014860104507835 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0483_text_document -0.00016227281398002708 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0484_text_document -0.00016502091577582913 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0485_text_document -0.00016106235650927743 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0486_text_document -0.00015987309712264371 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0487_text_document -0.0001642815421701454 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0488_text_document -0.00016531915249024665 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0489_text_document -0.00015833872193897052 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0490_text_document -0.00015639158495488916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0491_text_document -0.00015342548972376501 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0492_text_document -0.00015518921543764528 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0493_text_document -0.0001621958240469728 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0494_text_document -0.00015155749799598 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0495_text_document -0.00014939896262383117 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0496_text_document -0.00015490092509698006 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0497_text_document -0.00017977881778259884 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0498_text_document -0.00018596378104021417 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0499_text_document -0.00017898738743182946 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0500_text_document -0.00018286541046512472 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0501_text_document -0.00018092409134830376 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0502_text_document -0.00017788220095337013 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0503_text_document -0.00017903547090898037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0504_text_document -0.0001797342122414524 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0505_text_document -0.00018405110997743763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0506_text_document -0.00016587458814992502 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0507_text_document -0.00018323507493237133 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0508_text_document -0.00017881236669457928 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0509_text_document -0.00017083385044833047 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0510_text_document -0.0001730201559992492 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0511_text_document -0.00016901593018907565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0512_text_document -0.00017121838351155997 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0513_text_document -0.0001762157419442059 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0514_text_document -0.00017000047903250774 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0515_text_document -0.00017628842147757824 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0516_text_document -0.0001760014416563697 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0517_text_document -0.00017080626611158523 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0518_text_document -0.00017077485831581488 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0519_text_document -0.0001740210774510124 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0520_text_document -0.00017310752988628116 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0521_text_document -0.00016563538206915967 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0522_text_document -0.0001698038028867437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0523_text_document -0.00022989652913943246 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0524_text_document -0.00023802118237282655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0525_text_document -0.00023209291976691602 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0526_text_document -0.00023478978296678473 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0527_text_document -0.00023185674392304132 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0528_text_document -0.0002223151271899996 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0529_text_document -0.0002212980337800594 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0530_text_document -0.0002177142043482363 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0531_text_document -0.00022071160791386127 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0532_text_document -0.0002155092901614389 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0533_text_document -0.00021709946336410436 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0534_text_document -0.0002091085371649664 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0535_text_document -0.00021301299764538067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0536_text_document -0.00020514046046681228 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0537_text_document -0.00020554350961511138 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0538_text_document -0.0002032929572669402 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0539_text_document -0.00020017696773262392 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0540_text_document -0.0002041760983122544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0541_text_document -0.00019610775249750582 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0542_text_document -0.0001972797535028649 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0543_text_document -0.00019987201182946655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0544_text_document -0.00023221090921479249 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0545_text_document -0.00022866265656078542 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0546_text_document -0.00022846213721182363 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0547_text_document -0.00022028779604045222 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0548_text_document -0.00023019534411130514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0549_text_document -0.00021499063838892918 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0550_text_document -0.0002238747556640398 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0551_text_document -0.000219139079337847 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0552_text_document -0.00022466810662919942 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0553_text_document -0.00021354111452743537 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0554_text_document -0.0002116352569318229 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0555_text_document -0.00021742490236552721 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0556_text_document -0.00020976053145397075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0557_text_document -0.0002121893598598504 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0558_text_document -0.00020611700008662688 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0559_text_document -0.00020771394257887023 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0560_text_document -0.00020861778045311834 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0561_text_document -0.00020549717473124685 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0562_text_document -0.00021168253336591858 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0563_text_document -0.00020292362079976103 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0564_text_document -0.0002053579978117472 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0565_text_document -0.0002025742316233632 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0566_text_document -0.00019721191770863706 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0567_text_document -0.00020263891920926902 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0568_text_document -0.0002047513235561355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0569_text_document -0.0002058192920224309 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0570_text_document -0.00020762611235464895 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0571_text_document -0.00020536767369033477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0572_text_document -0.000208726602681654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0573_text_document -0.00020670689006790867 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0574_text_document -0.0001987029852837105 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0575_text_document -0.00019743671572624558 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0576_text_document -0.00020347237873346202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0577_text_document -0.00019483561225711876 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0578_text_document -0.00019876706376189147 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0579_text_document -0.00019418407035646924 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0580_text_document -0.00019094739234588127 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0581_text_document -0.00018896169178427298 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0582_text_document -0.00019336957140803166 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0583_text_document -0.00019246034436187084 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0584_text_document -0.00019234601030075014 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0585_text_document -0.00018937638801999214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0586_text_document -0.00019243149393005724 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0587_text_document -0.00018564518487541217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0588_text_document -0.00018349694905090308 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0589_text_document -0.00018632405912780405 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0590_text_document -0.0001859374743982387 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0591_text_document -0.00018735943662878573 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0592_text_document -0.00018429223346416512 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0593_text_document -0.00018743951405683122 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0594_text_document -0.0002231790070545305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0595_text_document -0.00023691491440731282 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0596_text_document -0.00022732583835977663 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0597_text_document -0.00023280690754947414 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0598_text_document -0.00023098339919576762 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0599_text_document -0.00022742109041848038 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0600_text_document -0.00023387941495424947 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0601_text_document -0.00022226509841824269 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0602_text_document -0.00022342786655488707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0603_text_document -0.00022237713376406775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0604_text_document -0.00021379459835981835 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0605_text_document -0.00021934823034546768 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0606_text_document -0.00022299117012803982 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0607_text_document -0.0002249652818475372 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0608_text_document -0.00021549803647665793 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0609_text_document -0.00021082391557018925 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0610_text_document -0.0002063290532408184 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0611_text_document -0.0002098859538424268 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0612_text_document -0.00020927123951292785 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0613_text_document -0.00020988114416198002 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0614_text_document -0.00020708947339409333 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0615_text_document -0.00020681735599881374 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0616_text_document -0.00020862989695824213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0617_text_document -0.00017921617624032585 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0618_text_document -0.0001869630178204498 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0619_text_document -0.0001837421970952879 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0620_text_document -0.0002703540624747488 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0621_text_document -0.0002773524903329593 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0622_text_document -0.00026751943505093036 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0623_text_document -0.00026849089128670544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0624_text_document -0.00017768273890485142 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0625_text_document -0.00026694167218726514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0626_text_document -0.00026851367038544275 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0627_text_document -0.00017178448275206052 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0628_text_document -0.00026146356857229295 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0629_text_document -0.0002631494175929668 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0630_text_document -0.00026756049947472 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0631_text_document -0.0002600735435281443 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0632_text_document -0.00026162102069795645 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0633_text_document -0.0002546230805208093 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0634_text_document -0.00025384118907342997 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0635_text_document -0.00024898898905737453 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0636_text_document -0.0002560021645785107 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0637_text_document -0.00025001876340897294 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0638_text_document -0.00024817567624010623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0639_text_document -0.00025419118513633326 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0640_text_document -0.00025520008446783997 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0641_text_document -0.00024355226527934937 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0642_text_document -0.00024233948860872504 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0643_text_document -0.00024413553528635867 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0644_text_document -0.00024287456234999737 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0645_text_document -0.0002471744870080021 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0646_text_document -0.00024318841473052868 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0647_text_document -0.00024268080340573577 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0648_text_document -0.000242363177173413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0649_text_document -0.00025310826613573865 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0650_text_document -0.0002450433802404371 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0651_text_document -0.0002429196089265994 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0652_text_document -0.00023818874203405117 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0653_text_document -0.00023814010078402416 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0654_text_document -0.0002258262625271231 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0655_text_document -0.0002359106231188901 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0656_text_document -0.00023984369117779496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0657_text_document -0.00022677878582898447 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0658_text_document -0.00023019334994987196 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0659_text_document -0.0002326106169086802 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0660_text_document -0.00023296218608853588 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0661_text_document -0.00021930251468821644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0662_text_document -0.00022685746290158792 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0663_text_document -0.00022204375118840136 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0664_text_document -0.00022312982876300855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0665_text_document -0.00022347955655196657 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0666_text_document -0.00021968416238742178 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0667_text_document -0.00022148339454050315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0668_text_document -0.00022133417129237745 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0669_text_document -0.00021840518548046784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0670_text_document -0.00021501258675160414 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0671_text_document -0.00016302293581967305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0672_text_document -0.00018778970953587786 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0673_text_document -0.00018910837173616491 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0674_text_document -0.00019201324078164315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0675_text_document -0.00018432054093123207 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0676_text_document -0.0001907749590824511 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0677_text_document -0.00017971831966331778 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0678_text_document -0.00018425384289495064 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0679_text_document -0.00018353447605936826 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0680_text_document -0.00018776194922919426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0681_text_document -0.000181858547251418 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0682_text_document -0.00017663862855632625 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0683_text_document -0.00017879513620194847 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0684_text_document -0.00017779569087388971 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0685_text_document -0.00017180299093946108 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0686_text_document -0.00018535528192944938 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0687_text_document -0.0001710147287879884 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0688_text_document -0.000173577199328182 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0689_text_document -0.0001768576763304655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0690_text_document -0.0001796376911260544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0691_text_document -0.00016782824293218567 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0692_text_document -0.00016074324428116396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0693_text_document -0.0001687513348299545 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0694_text_document -0.00016077518171436444 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0695_text_document -0.00017132313128327624 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0696_text_document -0.00016190186959679132 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0697_text_document -0.00016290842504820753 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0698_text_document -0.00016156811558387776 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0699_text_document -0.00023555665280084346 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0700_text_document -0.0002284718177796522 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0701_text_document -0.00022374123273516798 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0702_text_document -0.00021994541999416394 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0703_text_document -0.00022338573100973358 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0704_text_document -0.00022049767881647008 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0705_text_document -0.00022416439897413284 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0706_text_document -0.00021895546198784436 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0707_text_document -0.0002142388294097341 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0708_text_document -0.00022017166748084383 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0709_text_document -0.00021104350754308596 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0710_text_document -0.00020985391201191366 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0711_text_document -0.00021778183924550787 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0712_text_document -0.00021271266854227129 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0713_text_document -0.0002086433619903549 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0714_text_document -0.00021568150697937684 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0715_text_document -0.00020764802098217656 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0716_text_document -0.0002151205404833473 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0717_text_document -0.00020430590834946775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0718_text_document -0.00020866543326050432 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0719_text_document -0.00020818319961436583 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0720_text_document -0.00020070798626764516 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0721_text_document -0.00019693995826673832 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0722_text_document -0.00020030234076064843 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0723_text_document -0.00019788654054706263 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0724_text_document -0.0001993257554824347 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0725_text_document -0.00021386012383904914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0726_text_document -0.00021978412787373083 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0727_text_document -0.0002175599344895926 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0728_text_document -0.00021091594587352813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0729_text_document -0.0002034137316303627 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0730_text_document -0.00021253423082914959 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0731_text_document -0.00020816999471172712 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0732_text_document -0.00021853522405647908 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0733_text_document -0.0002051944662085363 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0734_text_document -0.00020978726975291983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0735_text_document -0.00020468921406556763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0736_text_document -0.0002007846124143192 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0737_text_document -0.00020366090300396152 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0738_text_document -0.0001993156168498017 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0739_text_document -0.00020150340666889603 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0740_text_document -0.00020188286325854645 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0741_text_document -0.00020072267667247027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0742_text_document -0.00019591912629771525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0743_text_document -0.00020056463740447396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0744_text_document -0.0001962511050627094 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0745_text_document -0.00018969020412060633 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0746_text_document -0.00018711981666080213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0747_text_document -0.00019064086480658448 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0748_text_document -0.0001893430509717561 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0749_text_document -0.00018823938035214858 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0750_text_document -0.000191049243153872 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0751_text_document -0.00015215085209234548 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0752_text_document -0.00013881666461144156 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0753_text_document -0.0001511979467407442 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0754_text_document -0.00015091819106548992 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0755_text_document -0.00013896830454629422 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0756_text_document -0.00014286084497610213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0757_text_document -0.00013829013170563417 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0758_text_document -0.00014842506748913496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0759_text_document -0.0001621698681108632 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0760_text_document -0.00025658329333000087 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0761_text_document -0.0002625776226522738 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0762_text_document -0.00018893904126945972 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0763_text_document -0.00019173419836462428 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0764_text_document -0.00024972708669590365 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0765_text_document -0.0002565621075859928 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0766_text_document -0.0002548091984702725 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0767_text_document -0.00024781120449025493 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0768_text_document -0.00024190263274768403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0769_text_document -0.00024935480538538027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0770_text_document -0.00024565807926820224 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0771_text_document -0.00024335665926774057 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0772_text_document -0.0002407471035651234 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0773_text_document -0.00024409063432302957 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0774_text_document -0.00025048184051844287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0775_text_document -2.7431736503196682e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_middle-0776_text_document -0.0001542652540558753 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0000_text_document -0.0001414689533672357 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0001_text_document -0.00014218991553196462 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0002_text_document -0.00014380616486339045 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0003_text_document -0.00014537826992690233 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0004_text_document -0.00015240156803853129 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0005_text_document -0.0001508299161037807 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0006_text_document -0.0001645724380011881 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0007_text_document -0.0001636434127327491 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0008_text_document -0.0001425695379726649 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0009_text_document -0.00015038309042278246 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0010_text_document -0.00015551331010771582 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0011_text_document -0.00014395190746068794 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0012_text_document -0.00014572155617954775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0013_text_document -0.00014985257363654754 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0014_text_document -0.00016517178815597176 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0015_text_document -0.00015368391453534256 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0016_text_document -0.00013802907993189142 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0017_text_document -0.0001438832947332681 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0018_text_document -0.0001453654604013201 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0019_text_document -0.00015126685069470999 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0020_text_document -0.00014666492015973732 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0021_text_document -0.00015372684675786069 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0022_text_document -0.0001466694423156705 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0023_text_document -0.00014645983052842166 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0024_text_document -0.00014464707855314855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0025_text_document -0.00014224079429035223 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0026_text_document -0.00015150561574001976 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0027_text_document -0.00014869251464718684 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0028_text_document -0.00014975351070572874 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0029_text_document -0.00014693519813853656 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0030_text_document -0.00015177096878176463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0031_text_document -0.0001541385774188545 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0032_text_document -0.00014686140972307025 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0033_text_document -0.00014836061485888312 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0034_text_document -0.00015908940031748178 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0035_text_document -0.00014335960523511807 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0036_text_document -0.00014014336145596836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0037_text_document -0.00014804788542816872 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0038_text_document -0.00014447262570766296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0039_text_document -0.0001490836674378867 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0040_text_document -0.00015491171627451768 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0041_text_document -0.00014704465686983656 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0042_text_document -0.00015578029994136968 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0043_text_document -0.00014442509556094932 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0044_text_document -0.00016401352835433973 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0045_text_document -0.0001426617272165932 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0046_text_document -0.00014952006301290383 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0047_text_document -0.00014858509055287383 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0048_text_document -0.0001452147802800582 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0049_text_document -0.00014648995026373163 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0050_text_document -0.000150292569067835 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0051_text_document -0.00015359505638013499 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0052_text_document -0.00014342220561517732 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0053_text_document -0.00015037020981817882 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0054_text_document -0.0001442503228598675 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0055_text_document -0.00015512168691210362 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0056_text_document -0.000141978855262853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0057_text_document -0.0001433238477981227 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0058_text_document -0.0001522852753554881 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0059_text_document -0.00015750021259583146 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0060_text_document -0.0001620583984355833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0061_text_document -0.00014425968431250636 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0062_text_document -0.00015502607180742606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0063_text_document -0.00014808719854384823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0064_text_document -0.00014037741406088144 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0065_text_document -0.00014415351915599912 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0066_text_document -0.00014669998038063754 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0067_text_document -0.00014168851942590583 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0068_text_document -0.00016615444649487683 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0069_text_document -0.00017314227247280456 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0070_text_document -0.00014511886160872687 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0071_text_document -0.0001589885117911034 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0072_text_document -0.0001468857466370262 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0073_text_document -0.00014409172483178647 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0074_text_document -0.00017524066610798787 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0075_text_document -0.0001423201779575328 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0076_text_document -0.00014813204150867026 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0077_text_document -0.0001426495065609589 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0078_text_document -0.00015198519700337085 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0079_text_document -0.0001407239353962083 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0080_text_document -0.00015564799275992607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0081_text_document -0.00014044706039573722 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0082_text_document -0.00014271692599994692 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0083_text_document -0.000145622079855115 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0084_text_document -0.0001420329587382314 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0085_text_document -0.00014388823447845187 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0086_text_document -0.0001386395317413269 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0087_text_document -0.00014615498445222442 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0088_text_document -0.00014100731560794867 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0089_text_document -0.0001412468938663676 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0090_text_document -0.0001448361986040564 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0091_text_document -0.00015041376595655126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0092_text_document -0.00015393889374199827 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0093_text_document -0.0001424230223910099 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0094_text_document -0.00013832238850082653 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0095_text_document -0.00014573052620396468 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0096_text_document -0.00014871061906625763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0097_text_document -0.0001474653563212365 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0098_text_document -0.00014332440162216428 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0099_text_document -0.00013995360169386805 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0100_text_document -0.0001396957447740551 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0101_text_document -0.00014451429874557317 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0102_text_document -0.00014667057760559536 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0103_text_document -0.00014311302174425863 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0104_text_document -0.0001486303888676766 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0105_text_document -0.00014984904337848564 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0106_text_document -0.00014471364010783683 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0107_text_document -0.00014422564733335141 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0108_text_document -0.00014833706425660122 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0109_text_document -0.0001547519654335586 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0110_text_document -0.00016861028196725518 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0111_text_document -0.00014655906054430117 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0112_text_document -0.00016230445673145143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0113_text_document -0.0001608744287595928 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0114_text_document -0.00014838797263124772 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0115_text_document -0.00013772432541929463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0116_text_document -0.00014338391080519997 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0117_text_document -0.00013969596121954725 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0118_text_document -0.00014433977111903384 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0119_text_document -0.00013940910504621967 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0120_text_document -0.00013841015875212353 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0121_text_document -0.00015084460181936482 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0122_text_document -0.00015609034169658813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0123_text_document -0.00014131566380676185 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0124_text_document -0.0001489310284479002 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0125_text_document -0.000142611271970708 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0126_text_document -0.00013893968956373896 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0127_text_document -0.00014729183194546773 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0128_text_document -0.00013844600256987405 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0129_text_document -0.00014038359448051134 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0130_text_document -0.00014148398954188355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0131_text_document -0.00014453817241187933 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0132_text_document -0.00014513564218102443 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0133_text_document -0.00013715869534969562 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0134_text_document -0.00013954027841855143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0135_text_document -0.0001412204761634212 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0136_text_document -0.00014007212448120704 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0137_text_document -0.00013479800901952131 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0138_text_document -0.00014295404043242684 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0139_text_document -0.00013573518591642275 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0140_text_document -0.00013489486257742554 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0141_text_document -0.00014869208126259815 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0142_text_document -0.0001475290332523071 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0143_text_document -0.00013460777613768496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0144_text_document -0.00013367855194670696 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0145_text_document -0.00014765624643721848 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0146_text_document -0.000134270744838921 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0147_text_document -0.0001482262234332188 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0148_text_document -0.00013864007544648005 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0149_text_document -0.00014100224826604942 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0150_text_document -0.00013048509121512907 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0151_text_document -0.00012998584056022605 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0152_text_document -0.00013999889926826433 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0153_text_document -0.00015896524130927037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0154_text_document -0.00013250422088217822 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0155_text_document -0.00013171712561130657 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0156_text_document -0.0001353745598377907 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0157_text_document -0.00014385989862913682 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0158_text_document -0.0001337953809308385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0159_text_document -0.00013398910556960017 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0160_text_document -0.00013559702582181232 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0161_text_document -0.0001479395819777683 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0162_text_document -0.00013591800338063272 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0163_text_document -0.0001378006151746279 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0164_text_document -0.0001348466006623147 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0165_text_document -0.0001333909410523815 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0166_text_document -0.0001395413623736275 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0167_text_document -0.0001340932768114764 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0168_text_document -0.0001401874805500622 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0169_text_document -0.00013331732046238236 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0170_text_document -0.00013697107190707125 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0171_text_document -0.00014902856836260464 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0172_text_document -0.0001360841851454116 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0173_text_document -0.0001430140955418351 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0174_text_document -0.00013769551793105646 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0175_text_document -0.00013655894858384475 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0176_text_document -0.00013428329448183135 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0177_text_document -0.00013659792851661152 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0178_text_document -0.0001390035871359384 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0179_text_document -0.00013373343260207954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0180_text_document -0.0001377384027675603 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0181_text_document -0.00013688101750180593 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0182_text_document -0.00013942483868376482 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0183_text_document -0.00013069676947684327 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0184_text_document -0.00013248181223347942 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0185_text_document -0.00013404120081582244 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0186_text_document -0.0001361765930560515 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0187_text_document -0.00012895481023244784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0188_text_document -0.0001269948854413741 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0189_text_document -0.00012923062571125647 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0190_text_document -0.00013146444734116587 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0191_text_document -0.00012866221788337398 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0192_text_document -0.00012734919091675074 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0193_text_document -0.00012491017673902597 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0194_text_document -0.00012532940253474304 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0195_text_document -0.00012942822588429847 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0196_text_document -0.00012954876208363892 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0197_text_document -0.00012757889363363662 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0198_text_document -0.00012925483823692497 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0199_text_document -0.00012887114306702046 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0200_text_document -0.00014305235456549959 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0201_text_document -0.00012924991650829868 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0202_text_document -0.00013939886744592149 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0203_text_document -0.00013473816912159447 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0204_text_document -0.00012443870588817695 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0205_text_document -0.00012352413384768962 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0206_text_document -0.00012363992848397884 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0207_text_document -0.00012876521187895858 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0208_text_document -0.00012998676310669105 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0209_text_document -0.00013573707197851088 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0210_text_document -0.00012914628304832383 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0211_text_document -0.00012135846145074816 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0212_text_document -0.0001272491158502837 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0213_text_document -0.00014048669089899133 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0214_text_document -0.00012821863542952837 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0215_text_document -0.00012843614908145614 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0216_text_document -0.00012566972592748682 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0217_text_document -0.00012623965035462757 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0218_text_document -0.00012745682281848042 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0219_text_document -0.00012684031670531754 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0220_text_document -0.00013734922167929733 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0221_text_document -0.00012364311692105248 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0222_text_document -0.00012150014908859676 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0223_text_document -0.00013255947544281956 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0224_text_document -0.00013080450775030287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0225_text_document -0.00012642072366799 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0226_text_document -0.00012748944981690917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0227_text_document -0.0001272640012288133 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0228_text_document -0.00012676436334132504 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0229_text_document -0.00011463874381385243 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0230_text_document -0.0001259763726722257 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0231_text_document -0.00013265355691888996 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0232_text_document -0.00012800075083395775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0233_text_document -0.00012600035320386608 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0234_text_document -0.00012796669337022373 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0235_text_document -0.0001281363666451258 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0236_text_document -0.00013103924202277517 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0237_text_document -0.00013710099201804686 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0238_text_document -0.00012390494315996567 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0239_text_document -0.00012375130141281296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0240_text_document -0.00012654460329615904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0241_text_document -0.00013347917998097572 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0242_text_document -0.00012957465780002206 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0243_text_document -0.00012365300899833007 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0244_text_document -0.00012759104863989702 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0245_text_document -0.00012669826503428652 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0246_text_document -0.00019585621938937627 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0247_text_document -0.00017199211271798405 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0248_text_document -0.00017151003159557893 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0249_text_document -0.0001704765251017538 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0250_text_document -0.00016915887010107177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0251_text_document -0.000164507074917777 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0252_text_document -0.0001707345009802067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0253_text_document -0.00016235282921392888 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0254_text_document -0.0001603312806389334 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0255_text_document -0.00016063927887228715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0256_text_document -0.0001682293216120587 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0257_text_document -0.00016945118701893779 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0258_text_document -0.00016510575549830714 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0259_text_document -0.00015878514261762818 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0260_text_document -0.00016058925849180358 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0261_text_document -0.00016806270202025228 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0262_text_document -0.0001601743221175851 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0263_text_document -0.00016728028661189246 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0264_text_document -0.00016271828570438892 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0265_text_document -0.0001663197659329172 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0266_text_document -0.00016211838369998094 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0267_text_document -0.00016174818095722866 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0268_text_document -0.00016660916885770873 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0269_text_document -0.00016749279166083448 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0270_text_document -0.00015990162967327836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0271_text_document -0.00016050019425679443 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0272_text_document -0.00015826664805809287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0273_text_document -0.00015906002765230277 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0274_text_document -0.00016496336225309003 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0275_text_document -0.00015969348413764765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0276_text_document -0.00015888249989873604 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0277_text_document -0.0001588217905168081 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0278_text_document -0.0001579176192128451 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0279_text_document -0.0001599592014593771 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0280_text_document -0.00015860202306757735 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0281_text_document -0.00015475539919197688 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0282_text_document -0.0001606154789998261 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0283_text_document -0.00015967691482799697 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0284_text_document -0.00015467004809542842 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0285_text_document -0.00015681467419158087 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0286_text_document -0.0001622263618651377 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0287_text_document -0.00016071879902106084 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0288_text_document -0.00015926245724996415 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0289_text_document -0.00015865169965265541 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0290_text_document -0.0001558589009989086 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0291_text_document -0.00015834413702510978 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0292_text_document -0.00015984235618630313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0293_text_document -0.00015906347325722462 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0294_text_document -0.0001540401129832678 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0295_text_document -0.00015709268423517463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0296_text_document -0.00016150611616707217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0297_text_document -0.0001575761279522917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0298_text_document -0.00015145845456253164 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0299_text_document -0.00015531545597525365 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0300_text_document -0.00015290580088858923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0301_text_document -0.00015077381822016696 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0302_text_document -0.00016026706987479596 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0303_text_document -0.00015143811781794564 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0304_text_document -0.00015335594803302406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0305_text_document -0.00015760769888428818 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0306_text_document -0.00016811053178478525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0307_text_document -0.00021456946285616728 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0308_text_document -0.00021300214303968855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0309_text_document -0.00020349194545531642 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0310_text_document -0.00021281325399560017 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0311_text_document -0.00020973400589848146 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0312_text_document -0.00020126033912157333 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0313_text_document -0.00020674507357011296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0314_text_document -0.00021222543863325493 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0315_text_document -0.0002050723383820817 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0316_text_document -0.00021804813803312056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0317_text_document -0.0002008803314227051 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0318_text_document -0.0002150047024098784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0319_text_document -0.00020318723314588857 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0320_text_document -0.00020021945595806058 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0321_text_document -0.00020351797666608406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0322_text_document -0.00020832621085218548 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0323_text_document -0.0002035206051090622 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0324_text_document -0.00020272338181805027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0325_text_document -0.00020460676190716195 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0326_text_document -0.00020717814792849565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0327_text_document -0.0001955827435950214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0328_text_document -0.00020417807396352577 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0329_text_document -0.0002011029760914888 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0330_text_document -0.00020292765823625672 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0331_text_document -0.00020035339845060027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0332_text_document -0.0002019662525247444 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0333_text_document -0.000206838061219021 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0334_text_document -0.0001941713531348939 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0335_text_document -0.00010283055875342613 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0336_text_document -0.00010052128921034293 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0337_text_document -0.00021410992316202177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0338_text_document -0.0002019050315219438 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0339_text_document -0.0001986035353671086 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0340_text_document -0.00019334420113344198 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0341_text_document -0.0002040134561840194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0342_text_document -0.00019786749210973914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0343_text_document -0.00021292248961774976 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0344_text_document -0.000198975254462317 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0345_text_document -0.00019270601369753864 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0346_text_document -0.0001938662101557011 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0347_text_document -0.0002029610545170106 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0348_text_document -0.0002024962737322469 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0349_text_document -0.0002075197885043544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0350_text_document -0.00019871985248356538 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0351_text_document -0.0001949694696029141 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0352_text_document -0.00020180408203543252 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0353_text_document -0.00019545199817763088 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0354_text_document -0.00019734611243298183 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0355_text_document -0.00021047242956266074 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0356_text_document -0.0001968562822164333 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0357_text_document -0.00019972266049102106 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0358_text_document -0.00020126122390730825 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0359_text_document -0.00019799642896091898 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0360_text_document -0.0002021712802087185 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0361_text_document -0.0001941903201275054 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0362_text_document -0.00019307283352311706 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0363_text_document -0.00016598318480128866 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0364_text_document -0.00016504803365649659 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0365_text_document -0.00016630327313193533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0366_text_document -0.00016601923469884076 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0367_text_document -0.0001681694501234557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0368_text_document -0.00016859564709291555 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0369_text_document -0.00016845101707974437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0370_text_document -0.0001643037792913447 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0371_text_document -0.00016186624765418046 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0372_text_document -0.00016697344045101027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0373_text_document -0.00016669715111205908 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0374_text_document -0.00016364850623567704 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0375_text_document -0.0001634811496926281 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0376_text_document -0.00016825687707295152 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0377_text_document -0.0001627585946667742 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0378_text_document -0.00016582351614544805 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0379_text_document -0.0001630893218980273 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0380_text_document -0.0001568416151151013 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0381_text_document -0.00017273044852059518 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0382_text_document -0.00016016530273273665 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0383_text_document -0.00015777742226002822 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0384_text_document -0.00016385370668116144 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0385_text_document -0.00016954547679602915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0386_text_document -0.0001676626705219338 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0387_text_document -0.00016250610371947111 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0388_text_document -0.00016004510983519738 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0389_text_document -0.000161815220320894 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0390_text_document -0.00016744693680716642 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0391_text_document -0.00015604191096880147 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0392_text_document -0.0001636895622681933 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0393_text_document -0.000158886517344257 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0394_text_document -0.0001558972054341701 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0395_text_document -0.0001591533045533395 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0396_text_document -0.0001657955386528658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0397_text_document -0.00016060726764524156 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0398_text_document -0.00016167923208527019 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0399_text_document -0.00015484394662326808 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0400_text_document -0.00016052047349647775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0401_text_document -0.0001582576585363055 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0402_text_document -0.0001545777833300399 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0403_text_document -0.00016303475566860345 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0404_text_document -0.0001627904173369714 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0405_text_document -0.0001567550665344843 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0406_text_document -0.0001587287727580368 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0407_text_document -0.0001606889088117574 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0408_text_document -0.00016206324217472778 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0409_text_document -0.00015712668987045555 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0410_text_document -0.0001607143430081059 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0411_text_document -0.00015230600229428526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0412_text_document -0.00016067822548676263 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0413_text_document -0.00015993580979768466 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0414_text_document -0.00016379843410396262 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0415_text_document -0.0001533135627240871 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0416_text_document -0.00016861285265852845 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0417_text_document -0.0001632799417656467 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0418_text_document -0.00015962871905586431 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0419_text_document -0.00015014915949133304 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0420_text_document -0.00015059096546324844 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0421_text_document -0.00015841934874861877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0422_text_document -0.000152377097357806 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0423_text_document -0.00014942797865989248 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0424_text_document -0.00015640838403734855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0425_text_document -0.0001557305888039896 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0426_text_document -0.00014992907934376868 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0427_text_document -0.00015847297170019638 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0428_text_document -0.0001563057066889321 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0429_text_document -0.00015425884830587555 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0430_text_document -0.00015294599138593887 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0431_text_document -0.00015307387809393826 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0432_text_document -0.00016021533866175615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0433_text_document -0.00015819924688246454 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0434_text_document -0.00014854336050366525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0435_text_document -0.00015428039783626384 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0436_text_document -0.00015380539006369472 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0437_text_document -0.00015543551510602353 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0438_text_document -0.00015792640857808265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0439_text_document -0.00015591945366146652 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0440_text_document -0.00014809559672766608 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0441_text_document -0.00015190843215388426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0442_text_document -0.00014890757113683386 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0443_text_document -0.0001610286090290533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0444_text_document -0.00015061787553649923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0445_text_document -0.00014811603935037767 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0446_text_document -0.00015254163073097444 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0447_text_document -0.00015300211863900935 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0448_text_document -0.00015063192030688013 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0449_text_document -0.00015300622789493292 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0450_text_document -0.00015096280425750327 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0451_text_document -0.00015205454100558358 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0452_text_document -0.00015121161958027361 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0453_text_document -0.0001493611157597698 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0454_text_document -0.00015838957873196607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0455_text_document -0.0001497669779590609 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0456_text_document -0.00015173657097785533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0457_text_document -0.0001542516903028995 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0458_text_document -0.000149139532833868 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0459_text_document -0.00014644441551246194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0460_text_document -0.00015166787754612994 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0461_text_document -0.00014923555170687534 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0462_text_document -0.00015589324574035403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0463_text_document -0.00015022803227804745 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0464_text_document -0.00015127324533861265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0465_text_document -0.00014783676790095657 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0466_text_document -0.00014927753645591052 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0467_text_document -0.00014753911610765252 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0468_text_document -0.00014886425094132403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0469_text_document -0.00014432622711023067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0470_text_document -0.00015087353567030766 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0471_text_document -0.00015318739523991737 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0472_text_document -0.00014716603935377532 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0473_text_document -0.00015032310787320853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0474_text_document -0.00014425315738264723 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0475_text_document -0.0001507311940067415 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0476_text_document -0.0001735562949386336 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0477_text_document -0.0001664225151007615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0478_text_document -0.00017016223341338198 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0479_text_document -0.0001686337558140661 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0480_text_document -0.00018737654520115072 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0481_text_document -0.00016696818282464752 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0482_text_document -0.00017542891864931188 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0483_text_document -0.000168925038877694 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0484_text_document -0.0001769097096293462 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0485_text_document -0.00017465563985682533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0486_text_document -0.0001704723163845607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0487_text_document -0.00017113194080906855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0488_text_document -0.00017056770492485763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0489_text_document -0.0001736825492971628 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0490_text_document -0.00017060994856935613 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0491_text_document -0.00017539355807018588 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0492_text_document -0.00017512560274649157 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0493_text_document -0.00017536288179601056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0494_text_document -0.00017214679473623093 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0495_text_document -0.00017372473469635212 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0496_text_document -0.00016968876198424372 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0497_text_document -0.00017328658337078598 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0498_text_document -0.00016545006523949998 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0499_text_document -0.0001712623636560391 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0500_text_document -0.00017259544872761246 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0501_text_document -0.00016731532955664165 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0502_text_document -0.00017234554920296389 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0503_text_document -0.00016824263782247044 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0504_text_document -0.00017046154865322805 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0505_text_document -0.00016701775451880861 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0506_text_document -0.0001640723558698162 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0507_text_document -0.00016912021224512063 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0508_text_document -0.00016148128416798815 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0509_text_document -0.00017033021559990035 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0510_text_document -0.00016742449903581303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0511_text_document -0.00016604941440707502 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0512_text_document -0.00016168218680070063 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0513_text_document -0.00016545734985198287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0514_text_document -0.00016617264790719555 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0515_text_document -0.00016903898126379064 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0516_text_document -0.00016251470403425602 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0517_text_document -0.00016741321573477316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0518_text_document -0.00016314387702135404 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0519_text_document -0.00016261766224352778 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0520_text_document -0.00016043765927930694 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0521_text_document -0.0001581188444159775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0522_text_document -0.0001675593630876091 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0523_text_document -0.00016225811098829194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0524_text_document -0.00016027854790273813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0525_text_document -0.00015477514040295668 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0526_text_document -0.00016132027735084922 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0527_text_document -0.00016144543812901825 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0528_text_document -0.00016356924967160763 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0529_text_document -0.00016721507926064277 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0530_text_document -0.0001623283758093546 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0531_text_document -0.00016540060361910116 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0532_text_document -0.00016618517731232895 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0533_text_document -0.0001661140965633334 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0534_text_document -0.00016521134906101744 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0535_text_document -0.0001605250452596446 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0536_text_document -0.00016158626615495202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0537_text_document -0.00016348402666537893 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0538_text_document -0.00015887094758334445 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0539_text_document -0.00016216761850919694 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0540_text_document -0.00016125922688833952 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0541_text_document -0.00015719662175540762 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0542_text_document -0.00016177908132776304 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0543_text_document -0.0001616654955707841 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0544_text_document -0.0001575744247706023 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0545_text_document -0.00016594502227726776 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0546_text_document -0.00016680360478028852 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0547_text_document -0.00016969508752354227 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0548_text_document -0.00018702211879271686 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0549_text_document -0.00019358085009705273 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0550_text_document -0.0001871367897387826 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0551_text_document -0.00018452058370522755 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0552_text_document -0.0001850164319455863 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0553_text_document -0.00018589455402222413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0554_text_document -0.00018848818876445855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0555_text_document -0.00018677441309244695 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0556_text_document -0.00018806266359047162 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0557_text_document -0.00018742615490284408 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0558_text_document -0.00018308658912909244 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0559_text_document -0.00017917024956722993 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0560_text_document -0.0001796815083811096 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0561_text_document -0.00018830762534435366 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0562_text_document -0.0001850705756497164 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0563_text_document -0.00018620607609678367 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0564_text_document -0.00018735293561315315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0565_text_document -0.00018406055855123805 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0566_text_document -0.00018296049025592247 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0567_text_document -0.00018407127494772196 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0568_text_document -0.0001809459590066732 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0569_text_document -0.00018206921683271417 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0570_text_document -0.0001823423927624476 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0571_text_document -0.00017843504198196598 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0572_text_document -0.0001849074668186014 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0573_text_document -0.0001812163144813499 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0574_text_document -0.00018309068999374263 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0575_text_document -0.00018500613289155086 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0576_text_document -0.00017930403632760822 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0577_text_document -0.0001846380543749688 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0578_text_document -0.0001805411790348431 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0579_text_document -0.00017815258406988848 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0580_text_document -0.00017771149209661494 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0581_text_document -0.000179212119800064 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0582_text_document -0.0001770710081666354 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0583_text_document -0.00018076802304233783 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0584_text_document -0.00018266780486243524 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0585_text_document -0.00017952537023013302 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0586_text_document -0.00017482592939671484 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0587_text_document -0.00017479307237867526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0588_text_document -0.00017947982239834899 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0589_text_document -0.00017800230944457152 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0590_text_document -0.0001768045667273756 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0591_text_document -0.00018432659029891628 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0592_text_document -0.00017860310980883306 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0593_text_document -0.00017352563618741148 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0594_text_document -0.000177967402241009 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0595_text_document -0.0001761394507080597 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0596_text_document -0.0001727461411889822 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0597_text_document -0.00017520765607261058 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0598_text_document -0.00017389963918978602 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0599_text_document -0.00017297383567671195 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0600_text_document -0.00017186248654837811 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0601_text_document -0.00018016764298215066 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0602_text_document -0.00017252933018279703 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0603_text_document -0.0001720498259217191 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0604_text_document -0.00017208910794032673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0605_text_document -0.0001638288329725128 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0606_text_document -0.00015774370365565657 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0607_text_document -0.00015428183891406193 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0608_text_document -0.0001579263490987627 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0609_text_document -0.00015679781661701012 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0610_text_document -0.00015686067490532405 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0611_text_document -0.00015476043642401294 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0612_text_document -0.0001538144005636655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0613_text_document -0.00015471809257783847 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0614_text_document -0.00014950254548936378 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0615_text_document -0.00015189343275275787 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0616_text_document -0.00016808135779534307 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0617_text_document -0.00015331380459020154 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0618_text_document -0.00015025506525877266 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0619_text_document -0.00015705079524537657 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0620_text_document -0.00014843144411648014 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0621_text_document -0.0001536670204340525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0622_text_document -0.00014701650982417206 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0623_text_document -0.0001470830903826265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0624_text_document -0.00014669457615379322 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0625_text_document -0.00015327731341039172 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0626_text_document -0.00016421071093813112 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0627_text_document -0.00014320086554259857 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0628_text_document -0.00014733292080267092 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0629_text_document -0.00014574339323444963 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0630_text_document -0.00014508510524362508 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0631_text_document -0.0001510667294376284 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0632_text_document -0.00014448955337404646 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0633_text_document -0.00015189242851477872 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0634_text_document -0.0001408976680729981 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0635_text_document -0.00014495438771487836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0636_text_document -0.00014607129482780071 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0637_text_document -0.0001425703250247454 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0638_text_document -0.00014772556798043487 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0639_text_document -0.0001454755294743558 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0640_text_document -0.00014604759342940054 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0641_text_document -0.000144987966876031 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0642_text_document -0.00014159362399631978 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0643_text_document -0.00015166107543186514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0644_text_document -0.00013872638536941069 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0645_text_document -0.00014392691133816916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0646_text_document -0.00014527538230304764 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0647_text_document -0.0001445241296159157 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0648_text_document -0.00014566980102669863 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0649_text_document -0.00014105957349679274 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0650_text_document -0.00014407711883329926 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0651_text_document -0.00014304333666146412 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0652_text_document -0.00014480474786471068 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0653_text_document -0.00014513562095603888 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0654_text_document -0.00014216954843071324 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0655_text_document -0.0001472056417215835 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0656_text_document -0.0001411732545194045 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0657_text_document -0.00014472737242668624 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0658_text_document -0.0001412212585262607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0659_text_document -0.00020834639482623596 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0660_text_document -0.00019484913874296875 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0661_text_document -0.00019400182473285833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0662_text_document -0.000192581173021768 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0663_text_document -0.0001958163408499538 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0664_text_document -0.00019017201894348343 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0665_text_document -0.00018748712836308062 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0666_text_document -0.00019398325978096153 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0667_text_document -0.00018740362852951608 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0668_text_document -0.00018769931256921782 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0669_text_document -0.00018841740417805205 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0670_text_document -0.0001897879160564146 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0671_text_document -0.00018663113185306689 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0672_text_document -0.00018894652949372258 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0673_text_document -0.0001929378648272062 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0674_text_document -0.00019134942047365448 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0675_text_document -0.00018699153383533985 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0676_text_document -0.00018610331853766602 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0677_text_document -0.0001863160274451902 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0678_text_document -0.00018636405144302115 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0679_text_document -0.00018489348621678148 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0680_text_document -0.0001860176372198307 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0681_text_document -0.00018315031813541827 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0682_text_document -0.00019049993633217256 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0683_text_document -0.00018374255446481207 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0684_text_document -0.00017918235151102646 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0685_text_document -0.00018078078222027994 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0686_text_document -0.00018377134048126254 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0687_text_document -0.00018119048712916442 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0688_text_document -0.00018226290667237163 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0689_text_document -0.00018539016766122422 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0690_text_document -0.00018304864675259609 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0691_text_document -0.00018006283819913595 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0692_text_document -0.00017853375396011673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0693_text_document -0.0001806080666151815 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0694_text_document -0.00018287085590792935 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0695_text_document -0.00018102703894508278 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0696_text_document -0.00017985249563069855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0697_text_document -0.00018055111208127884 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0698_text_document -0.00017436715651687287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0699_text_document -0.0001750410902836745 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0700_text_document -0.0001755658852086883 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0701_text_document -0.00017704710809249836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0702_text_document -0.00017563712144304312 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0703_text_document -0.00017646118668991032 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0704_text_document -0.0001738273848965312 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0705_text_document -0.00017355052248297015 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0706_text_document -0.00017182494917422235 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0707_text_document -0.0001796801127149085 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0708_text_document -0.0001535678074475219 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0709_text_document -0.00016509131806569352 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0710_text_document -0.0001660762988129014 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0711_text_document -0.00017181117317139103 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0712_text_document -0.00016385189811495075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0713_text_document -0.00016321938466065 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0714_text_document -0.0001627668114510062 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0715_text_document -0.0001667874841569603 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0716_text_document -0.0001647336272051215 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0717_text_document -0.00015927038206724374 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0718_text_document -0.000163069807004626 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0719_text_document -0.00016643362662749963 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0720_text_document -0.0001598347201275479 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0721_text_document -0.00016414824852047793 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0722_text_document -0.00016387374849716915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0723_text_document -0.00016218986007283508 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0724_text_document -0.00016170100645242406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0725_text_document -0.00016794279442600715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0726_text_document -0.00016410407241508566 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0727_text_document -0.00016663924614304762 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0728_text_document -0.0001610334643678992 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0729_text_document -0.00016082817926927476 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0730_text_document -0.00016483710320531984 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0731_text_document -0.00015950564573034403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0732_text_document -0.00016176598872010603 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0733_text_document -0.00016374799045777884 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0734_text_document -0.00016207070843359862 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0735_text_document -0.000161310121195263 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0736_text_document -0.0001590930806312555 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0737_text_document -0.00015872700071854542 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0738_text_document -0.0001601426608559989 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0739_text_document -0.0001592737504230903 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0740_text_document -0.0001599609389465664 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0741_text_document -0.0001573951015313392 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0742_text_document -0.00015918138446881715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0743_text_document -0.00016063409035052854 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0744_text_document -0.00015479247307168076 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0745_text_document -0.0001590206266750552 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0746_text_document -0.00016413616409963463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0747_text_document -0.00015909403254717725 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0748_text_document -0.00015912638065916792 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0749_text_document -0.00015509170291798033 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0750_text_document -0.00015668221053756931 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0751_text_document -0.00015993661313870757 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0752_text_document -0.00015986553041529475 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0753_text_document -0.0001551253906720823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0754_text_document -0.0001569044427999477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0755_text_document -0.00015512319487328638 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0756_text_document -0.00016021882869106635 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0757_text_document -0.00015415106017838012 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0758_text_document -0.00015711650631982987 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0759_text_document -0.00015512670736159294 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0760_text_document -0.00016200410442893923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0761_text_document -0.00015949285619573655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0762_text_document -0.0001625616727060612 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0763_text_document -0.00016316486655764686 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0764_text_document -0.0001571167311565954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0765_text_document -0.00016128213234978153 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0766_text_document -0.00015535324730882956 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0767_text_document -0.0001579934311592013 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0768_text_document -0.00015195311864613838 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0769_text_document -0.0001615190125670139 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0770_text_document -0.00015867133202388371 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0771_text_document -0.00015932910049616658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0772_text_document -0.00015735730575532447 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0773_text_document -0.00016192787415292593 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0774_text_document -0.00015443514945271916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0775_text_document -0.00015290872574095856 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0776_text_document -0.0001586657525675075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0777_text_document -0.0001561292345081933 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0778_text_document -0.0001584146414910674 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0779_text_document -0.00015282231142071527 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0780_text_document -0.0001561252202711004 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0781_text_document -0.00015508367049496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0782_text_document -0.00015211947613405347 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0783_text_document -0.00014976529550875275 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0784_text_document -0.00015418186133444713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0785_text_document -0.00015777360151582686 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0786_text_document -0.000152640262498424 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0787_text_document -0.00015418142572863903 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0788_text_document -0.00015502601134089746 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0789_text_document -0.00015405733434421877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0790_text_document -0.00015484459497253604 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0791_text_document -0.0001541867208689297 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0792_text_document -0.00015014404352940876 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0793_text_document -0.00015357544967633106 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0794_text_document -0.00015037823631794736 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0795_text_document -0.00015025795679285078 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0796_text_document -0.00014876992710553488 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0797_text_document -0.00015032669711698612 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0798_text_document -0.00015596697517010466 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0799_text_document -0.00015498394440674378 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0800_text_document -0.00014757314272111684 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0801_text_document -0.00014919071614611802 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0802_text_document -0.00014686280514246915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0803_text_document -0.00015882771228777683 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0804_text_document -0.00014763597756322578 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0805_text_document -0.00014785441795725526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0806_text_document -0.00015313024795352964 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0807_text_document -0.0001497627986113246 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0808_text_document -0.00014499607432690722 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0809_text_document -0.0001461719027401259 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0810_text_document -0.00014839933441537366 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0811_text_document -0.0001475840995029022 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0812_text_document -0.00015065512711375653 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0813_text_document -0.00015285087358760883 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0814_text_document -0.00014861957547794477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0815_text_document -0.00014996949492468605 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0816_text_document -0.0001472998668365096 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0817_text_document -0.0001464012147691964 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0818_text_document -0.00015227635617231567 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0819_text_document -0.0001491494017117428 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0820_text_document -0.00014464475787246092 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0821_text_document -0.00014410767861685618 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0822_text_document -0.000144919516791233 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0823_text_document -0.00014507990635617585 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0824_text_document -0.0001468797342896656 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0825_text_document -0.0001422000420712919 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0826_text_document -0.00014228987139298954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0827_text_document -0.00014481016912090385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0828_text_document -0.000142802473797815 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0829_text_document -0.00014812295450003065 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0830_text_document -0.00014697991622146685 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0831_text_document -0.000143946325289488 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0832_text_document -0.0001418544716646782 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0833_text_document -0.00014706985092768576 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0834_text_document -0.0001411487598988699 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0835_text_document -0.0001583983550166893 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0836_text_document -0.00015370277071378533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0837_text_document -0.0001574284524004961 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0838_text_document -0.00016033599900258183 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0839_text_document -0.00016159470012508268 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0840_text_document -0.00015624921021983388 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0841_text_document -0.0001603288323615303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0842_text_document -0.00016421653645625842 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0843_text_document -0.00016136751182857813 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0844_text_document -0.0001644008542307843 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0845_text_document -0.00016320230298972016 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0846_text_document -0.00016176830866038722 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0847_text_document -0.00015883945834286212 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0848_text_document -0.00015854734059433728 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0849_text_document -0.00015424048326372636 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0850_text_document -0.00015913631543321879 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0851_text_document -0.00016242367155204024 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0852_text_document -0.00016352898883564303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0853_text_document -0.00016283852574114027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0854_text_document -0.0001597064012689706 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0855_text_document -0.00015723207463854053 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0856_text_document -0.00016082454091186785 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0857_text_document -0.00015148430437371348 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0858_text_document -0.00015699196205345046 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0859_text_document -0.00016323993834433252 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0860_text_document -0.00015419189482936103 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0861_text_document -0.00014984592429281824 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0862_text_document -0.0001540327550705441 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0863_text_document -0.00015559458082419316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0864_text_document -0.00015809601003355687 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0865_text_document -0.00015561437781246056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0866_text_document -0.00015650965510707114 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0867_text_document -0.00015654223175785975 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0868_text_document -0.00015966194232830576 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0869_text_document -0.0001542791440813034 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0870_text_document -0.00016358133853488976 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0871_text_document -0.0001610108148402946 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0872_text_document -0.0001567861463301872 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0873_text_document -0.00015916579076809533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0874_text_document -0.00015834187212170972 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0875_text_document -0.00015492852942470005 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0876_text_document -0.0001565761307746086 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0877_text_document -0.00016111787860345758 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0878_text_document -0.00015262185821473176 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0879_text_document -0.00015609313599061615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0880_text_document -0.00015265109415151545 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0881_text_document -0.00015596676711588585 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0882_text_document -0.00015602244000618423 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0883_text_document -0.00015533087814847594 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0884_text_document -0.000148761688602713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0885_text_document -0.00015124065708812265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0886_text_document -0.00015177148904071277 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0887_text_document -0.00015551510213818192 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0888_text_document -0.00015328016792414618 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0889_text_document -0.00014826652573194586 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0890_text_document -0.00015618973632950672 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0891_text_document -0.00016465597460827412 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0892_text_document -0.00017729797829003265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0893_text_document -0.00017645710877786075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0894_text_document -0.000173993320599559 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0895_text_document -0.0001752697954262395 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0896_text_document -0.00017545831920313468 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0897_text_document -0.00017512052874093406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0898_text_document -0.00017596295211949001 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0899_text_document -0.0001763343681416489 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0900_text_document -0.00016737628055788186 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0901_text_document -0.00017659674006013248 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0902_text_document -0.00017521085067973818 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0903_text_document -0.00018110203496350606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0904_text_document -0.00016887408015540739 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0905_text_document -0.0001730418383091983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0906_text_document -0.00017084812178309202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0907_text_document -0.00016928946570955264 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0908_text_document -0.00017272373105947043 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0909_text_document -0.00016793546933797045 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0910_text_document -0.00016510473373737477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0911_text_document -0.0001656625036518595 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0912_text_document -0.00016849674877913583 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0913_text_document -0.00017492155042464418 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0914_text_document -0.00017092357710033054 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0915_text_document -0.00016970730743877006 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0916_text_document -0.00016573665091766286 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0917_text_document -0.00016358480536479716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0918_text_document -0.0001653802811890403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0919_text_document -0.00017231807148475074 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0920_text_document -0.00017361608596973323 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0921_text_document -0.00017404933358323055 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0922_text_document -0.00016371945617952907 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0923_text_document -0.00017000836658266155 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0924_text_document -0.00017142976487027857 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0925_text_document -0.00017006281434704977 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0926_text_document -0.0001751965302313473 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0927_text_document -0.00016954848753554936 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0928_text_document -0.0001683555446267139 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0929_text_document -0.00016921278107076727 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0930_text_document -0.00016808682594394623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0931_text_document -0.00017711704047105475 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0932_text_document -0.0001675247295876393 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0933_text_document -0.00017061773073498863 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0934_text_document -0.0001644856648306077 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0935_text_document -0.00016530682645009105 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0936_text_document -0.00016993430076157017 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0937_text_document -0.00016716870217360928 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0938_text_document -0.0001672477045314564 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0939_text_document -0.00016150529456268964 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0940_text_document -0.0001642955368396883 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0941_text_document -0.0001650135010986092 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0942_text_document -0.0001719916971031507 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0943_text_document -0.0001663860254017646 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0944_text_document -0.00016810785027934324 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0945_text_document -0.00016663511368772123 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0946_text_document -0.00017120237493641126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0947_text_document -0.0001651698100366788 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0948_text_document -0.00016069571413445028 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0949_text_document -0.0001631772602215936 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0950_text_document -0.00016994484266892867 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0951_text_document -0.00016821930169126347 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0952_text_document -0.0001680542144940534 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0953_text_document -0.00015807234911071054 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0954_text_document -0.00016287290799651364 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0955_text_document -0.00016674360421415713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0956_text_document -0.0001663549971877126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0957_text_document -0.0001699417467826641 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0958_text_document -0.0001661066433849769 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0959_text_document -0.00016736976350010906 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0960_text_document -0.00016160049405253383 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0961_text_document -0.0001625500850979611 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0962_text_document -0.00016172349111618741 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0963_text_document -0.00016041582790085466 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0964_text_document -0.00016369413378455798 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0965_text_document -0.00016245798272839223 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0966_text_document -0.00016458727969573578 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0967_text_document -0.0001618972714257936 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0968_text_document -0.00016149423535800886 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0969_text_document -0.00015886933917368354 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0970_text_document -0.00015721961433801126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0971_text_document -0.00015609496997744904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0972_text_document -0.0001608435755282705 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0973_text_document -0.00015730100598754584 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0974_text_document -0.00015955845719642757 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0975_text_document -0.00015469663090901824 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0976_text_document -0.00015812452037199733 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0977_text_document -0.00015443940925795885 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0978_text_document -0.00015678701926941855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0979_text_document -0.00015787925332384637 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0980_text_document -0.00015669644312439214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0981_text_document -0.00015342587917756964 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0982_text_document -0.00015642024238741553 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0983_text_document -0.0001540823378708023 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0984_text_document -0.00015238224416999995 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0985_text_document -0.0001522695061784323 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0986_text_document -0.00020085620305657233 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0987_text_document -0.00014698197479826313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0988_text_document -0.00014796924883111914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0989_text_document -0.0001483800966807953 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0990_text_document -0.00014550940307048242 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0991_text_document -0.00015052597307667803 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0992_text_document -0.00014866583878918362 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0993_text_document -0.00014440801314961302 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0994_text_document -0.00014295564464645108 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0995_text_document -0.00014903049761507035 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0996_text_document -0.00014820091066353183 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0997_text_document -0.0001429454882440627 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0998_text_document -0.00015048550764172483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-0999_text_document -0.0001430543312039796 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1000_text_document -0.00014661342883839465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1001_text_document -0.00014721354013103223 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1002_text_document -0.00014780017824708586 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1003_text_document -0.0001463184859455721 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1004_text_document -0.00014654870719379106 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1005_text_document -0.00020943212095457075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1006_text_document -0.00021205821955900777 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1007_text_document -0.00014176730212983274 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1008_text_document -0.00014026276433980122 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1009_text_document -0.00013570196535880505 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1010_text_document -0.00014776685378575983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1011_text_document -0.00014138218982193943 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1012_text_document -0.0001412602382122253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1013_text_document -0.00013944232659104602 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1014_text_document -0.00014570617769030735 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1015_text_document -0.00014233071172042007 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1016_text_document -0.00014016762901851798 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1017_text_document -0.0001434413757259645 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1018_text_document -0.00014003324697133565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1019_text_document -0.00014567282904236987 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1020_text_document -0.00013992559507863123 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1021_text_document -0.00021096883039305026 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1022_text_document -0.00014274603730164107 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1023_text_document -0.00013914595792215918 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1024_text_document -0.00013666688380542608 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1025_text_document -0.00014001152690065646 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1026_text_document -0.00021392615254787925 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1027_text_document -0.00014251166508793392 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1028_text_document -0.00013886942449587415 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1029_text_document -0.0002078004025575127 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1030_text_document -0.00020928673622040174 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1031_text_document -0.00020558733131260538 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1032_text_document -0.0002036663760886078 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1033_text_document -0.00014592860566679667 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1034_text_document -0.00014346325128200297 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1035_text_document -0.00014068142446497316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1036_text_document -0.000142996292961803 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1037_text_document -0.00020633185839414136 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1038_text_document -0.00013684538988274547 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1039_text_document -0.0002033768324865864 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1040_text_document -0.000200593087523188 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1041_text_document -0.0002297294147093001 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1042_text_document -0.00022971372080690233 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1043_text_document -0.00023092966691083417 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1044_text_document -0.00015159247973379415 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1045_text_document -0.00015257723761865372 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1046_text_document -0.00015750287090187065 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1047_text_document -0.00015557071949799488 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1048_text_document -0.00015138603787345713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1049_text_document -0.00014966823068820163 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1050_text_document -0.00015481393029806212 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1051_text_document -0.0001521335747073047 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1052_text_document -0.00015447866363472483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1053_text_document -0.0001564823000495303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1054_text_document -0.00015484698673224505 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1055_text_document -0.00022305811126444646 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1056_text_document -0.00015308102523761935 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1057_text_document -0.00022494528198789627 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1058_text_document -0.0002206911435725598 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1059_text_document -0.00021440132246946592 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1060_text_document -0.00014934935094772055 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1061_text_document -0.00015275047150828305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1062_text_document -0.00021692931968428998 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1063_text_document -0.00023057843831795596 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1064_text_document -0.00022061661869945533 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1065_text_document -0.0001475889972917192 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1066_text_document -0.00014965255899799802 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1067_text_document -0.000146325773766483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1068_text_document -0.00021849119850040293 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1069_text_document -0.00021649545481859658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1070_text_document -0.00014463616989778393 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1071_text_document -0.00014301572221485565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1072_text_document -0.00014804643324427358 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1073_text_document -0.0002143783669071859 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1074_text_document -0.0001479303814401362 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1075_text_document -0.00015068744684349907 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1076_text_document -0.00021658806091136903 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1077_text_document -0.00021333945668012075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1078_text_document -0.000142221472149436 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1079_text_document -0.0002158096794842747 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1080_text_document -0.00021541031163695796 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1081_text_document -0.0002160301031804424 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1082_text_document -0.00014484879119054217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1083_text_document -0.00014717950537309672 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1084_text_document -0.00021016132927298846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1085_text_document -0.00021433713539833563 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1086_text_document -0.0001438233936284062 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1087_text_document -0.0001447086593934949 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1088_text_document -0.00021440017582664183 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1089_text_document -0.00020841624205804798 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1090_text_document -0.000213227136771408 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1091_text_document -0.00020931414236598925 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1092_text_document -0.0002134545412666026 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1093_text_document -0.0002126803251195216 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1094_text_document -0.00014114550507201583 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1095_text_document -0.00016444080384922814 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1096_text_document -0.0001542515002652382 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1097_text_document -0.0001608177523717217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1098_text_document -0.0001577693965006662 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1099_text_document -0.0001615213258436368 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1100_text_document -0.00014975169893108998 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1101_text_document -0.00015902857074290308 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1102_text_document -0.00015523901418979132 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1103_text_document -0.00015842052994374488 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1104_text_document -0.0001543439686424067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1105_text_document -0.0001559141331005536 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1106_text_document -0.0001558557495821586 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1107_text_document -0.00016108187362389814 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1108_text_document -0.0001605357063724452 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1109_text_document -0.0001588416921491903 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1110_text_document -0.00015452564563384654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1111_text_document -0.0001575925464658241 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1112_text_document -0.000155416389913229 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1113_text_document -0.00015834897089216795 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1114_text_document -0.00015376802717866433 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1115_text_document -0.00015257616131444455 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1116_text_document -0.00015333466381495513 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1117_text_document -0.00015356006723825613 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1118_text_document -0.00015392513748333956 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1119_text_document -0.00015808193589371923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1120_text_document -0.00015572715307115401 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1121_text_document -0.00015677288071421776 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1123_text_document -0.00015564703516755468 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1124_text_document -0.00015473730933423342 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1125_text_document -0.00015227152970932222 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1126_text_document -0.00015062363935408713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1127_text_document -0.0001608838990519831 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1128_text_document -0.00016058746991656767 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1129_text_document -0.00015232158785053588 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1130_text_document -0.00015216796930278597 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1131_text_document -0.00015531087359959403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1132_text_document -0.00017455174602057423 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1133_text_document -0.00015220395996782025 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1134_text_document -0.00022536045257736233 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1135_text_document -0.00023391977994072452 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1136_text_document -0.00022316737354122904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1137_text_document -0.00023097409031198833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1138_text_document -0.0001536444602488289 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1139_text_document -0.00015290857223001657 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1140_text_document -0.00015053717764782956 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1141_text_document -0.0001487906308449292 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1142_text_document -0.00022796481136694752 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1143_text_document -0.00022388054021300896 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1144_text_document -0.00015633876287631285 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1146_text_document -0.00015683128496399404 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1147_text_document -0.0001498588984354396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1148_text_document -0.00015466674094651695 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1149_text_document -0.00015104328866230663 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1150_text_document -0.0001510288850415886 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1151_text_document -0.00015453329995596143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1152_text_document -0.0001717890160140908 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1153_text_document -0.00016303689223488152 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1154_text_document -0.00017438742884609578 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1155_text_document -0.00017195307231868866 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1156_text_document -0.00016630614911747752 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1157_text_document -0.0001738954845222655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1158_text_document -0.00016759158755171884 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1159_text_document -0.00017061259922452842 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1160_text_document -0.00017196072417278202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1161_text_document -0.00016824585118656202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1162_text_document -0.00016301309236242047 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1163_text_document -0.0001718575393991296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1164_text_document -0.00017003663826341565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1165_text_document -0.00017018328983305946 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1166_text_document -0.00017218141861091656 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1167_text_document -0.00016559619112054818 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1168_text_document -0.00016284882257395627 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1169_text_document -0.0001617104078870124 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1170_text_document -0.00016849349395228177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1171_text_document -0.00016378319727916067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1172_text_document -0.00017114019486042634 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1173_text_document -0.0001726823065513329 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1174_text_document -0.00016244897469644304 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1175_text_document -0.0001613681046473606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1176_text_document -0.00018118661924575096 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1177_text_document -0.00016563345750593493 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1178_text_document -0.00016790014898759615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1179_text_document -0.0001629142142864177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1180_text_document -0.00016191717527939525 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1181_text_document -0.0001671004065869619 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1182_text_document -0.0001675370141650324 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1183_text_document -0.00016799445480682778 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1184_text_document -0.0001719736620354862 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1185_text_document -0.00016261057260474936 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1186_text_document -0.00015865991174764644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1187_text_document -0.00015739800441831657 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1188_text_document -0.00016171134746282626 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1189_text_document -0.00016720238820009615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1190_text_document -0.00016497201020069133 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1191_text_document -0.00016081080933342493 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1192_text_document -0.0001598451415954535 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1193_text_document -0.00016189725587725768 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1194_text_document -0.00015376149407875128 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1195_text_document -0.00015923032632387212 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1196_text_document -0.000161420662154024 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1197_text_document -0.00015926844960634996 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1198_text_document -0.000156372807999939 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1199_text_document -0.00016050285429044874 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1200_text_document -0.00015617925982472 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1201_text_document -0.00016514079794945202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1202_text_document -0.00016522274070820443 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1203_text_document -0.0001597381170738336 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1204_text_document -0.0001616744058690789 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1205_text_document -0.00016029435854255644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1206_text_document -0.0001600416279503584 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1207_text_document -0.0001607379715998696 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1208_text_document -0.0001593514911283079 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1209_text_document -0.00015864317782095664 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1210_text_document -0.00015911735436385907 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1211_text_document -0.0001556275795066712 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1212_text_document -0.0001656764173702947 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1213_text_document -0.00015679155524627255 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1214_text_document -0.00016376988600479205 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1215_text_document -0.0001581538165285075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1216_text_document -0.0001610240227045592 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1217_text_document -0.00015776131940645536 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1218_text_document -0.00015818231748846595 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1219_text_document -0.0001625550897521123 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1220_text_document -0.0001547371099180901 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1221_text_document -0.00015414283944531357 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1222_text_document -0.00016266088273096592 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1223_text_document -0.00016083169545961368 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1224_text_document -0.0001573027086756309 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1225_text_document -0.00015728313997935927 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1226_text_document -0.00016781226249248295 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1227_text_document -0.00014976228995207784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1228_text_document -0.00015444629923379175 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1229_text_document -0.00015203154472094758 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1230_text_document -0.00015416974359531256 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1231_text_document -0.00015545110214308707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1232_text_document -0.0001510309557116906 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1233_text_document -0.000150151986610048 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1234_text_document -0.00014833490597173326 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1235_text_document -0.00014730918386476007 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1236_text_document -0.00014903663558472915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1237_text_document -0.00014834903218682616 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1238_text_document -0.00015322537809196756 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1239_text_document -0.0001511230642513134 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1240_text_document -0.00015357591909403477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1241_text_document -0.00015295542934724653 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1242_text_document -0.00015013958035919124 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1243_text_document -0.00015023610122778707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1244_text_document -0.00014784318253583398 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1245_text_document -0.00015065966876706016 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1246_text_document -0.0001481405433493943 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1247_text_document -0.00014721741369089534 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1248_text_document -0.00014730057861393202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1249_text_document -0.00015235999841072513 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1250_text_document -0.00014541040677624616 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1251_text_document -0.00014639042630648248 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1252_text_document -0.00015068532335773535 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1253_text_document -0.00015516053357170532 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1254_text_document -0.00014515004876336832 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1255_text_document -0.0001488593805475465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1256_text_document -0.0001506759742452044 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1257_text_document -0.0001429840653957083 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1258_text_document -0.00014437998012654534 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1259_text_document -0.0001428860592717282 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1260_text_document -0.0001475220383855572 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1261_text_document -0.00014640582972274082 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1262_text_document -0.0001505350968588391 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1263_text_document -0.00014784485165882563 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1264_text_document -0.00014770697193146622 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1265_text_document -0.0001433464625266231 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1266_text_document -0.00014139730694769496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1267_text_document -0.00014139435371307747 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1268_text_document -0.00014164383589527758 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1269_text_document -0.0001429075740030123 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1270_text_document -0.00014605872692153072 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1271_text_document -0.0001424796215298057 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1272_text_document -0.00014112515203848743 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1273_text_document -0.00014039188160335826 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1274_text_document -0.00014502736267043328 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1275_text_document -0.00014184146815260007 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1276_text_document -0.0001453216584479987 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1277_text_document -0.00014226985746562565 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1278_text_document -0.00013903471234323833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1279_text_document -0.00014633669945119654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1280_text_document -0.00015567823959834718 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1281_text_document -0.00016711998145328748 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1282_text_document -0.00016716820782888765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1283_text_document -0.00016788189624042867 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1284_text_document -0.00016762149528397544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1285_text_document -0.00016394982452183396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1286_text_document -0.00017499487929449305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1287_text_document -0.00017285598246362648 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1288_text_document -0.0001813127546456402 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1289_text_document -0.00016923644001919636 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1290_text_document -0.00016671545149204298 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1291_text_document -0.0001691584149978932 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1292_text_document -0.00016279240063910965 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1293_text_document -0.00016581675179191334 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1294_text_document -0.00016709742151486606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1295_text_document -0.00016462921631835026 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1296_text_document -0.0001635773235573904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1297_text_document -0.0001629499633321397 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1298_text_document -0.00016244603775076793 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1299_text_document -0.00016565874682941692 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1300_text_document -0.00016704769334813707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1301_text_document -0.00016527793060668047 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1302_text_document -0.0001614670182628741 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1303_text_document -0.00016090321773766912 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1304_text_document -0.00016205158644923216 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1305_text_document -0.00016115649647745916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1306_text_document -0.00016750884342636079 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1307_text_document -0.0001593023982303325 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1308_text_document -0.00015894512446540672 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1309_text_document -0.00016391499925658774 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1310_text_document -0.0001615310219600013 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1311_text_document -0.00016109142610140696 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1312_text_document -0.0001622135071747606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1313_text_document -0.00016686311075489617 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1314_text_document -0.00016322992039795453 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1315_text_document -0.00015923727775344227 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1316_text_document -0.00016528070219491 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1317_text_document -0.00016089805290891765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1318_text_document -0.00016142731643379644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1319_text_document -0.00016164621217780662 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1320_text_document -0.00015738061325748116 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1321_text_document -0.0001591233926254462 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1322_text_document -0.00016649327648776514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1323_text_document -0.00016299925243783037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1324_text_document -0.00016490993699004063 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1325_text_document -0.0001589061309585213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1326_text_document -0.00015701373074415468 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1327_text_document -0.00015755460137450403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1328_text_document -0.00016368403834230255 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1329_text_document -0.0001619141257919363 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1330_text_document -0.0002274793692927606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1331_text_document -0.0001567633247814788 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1332_text_document -0.00022905033511751312 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1333_text_document -0.0001548301064518758 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1334_text_document -0.000226605319945327 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1335_text_document -0.00022667037674726058 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1336_text_document -0.00022923961805784498 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1337_text_document -0.00014906828549341607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1338_text_document -0.00015829222539969273 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1339_text_document -0.0001509036911919305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1340_text_document -0.00022536653378252486 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1341_text_document -0.00015104016760222197 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1342_text_document -0.00015099364342110257 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1343_text_document -0.00022777331115603203 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1344_text_document -0.00021580582739619934 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1345_text_document -0.0001492017484493636 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1346_text_document -0.0002232038326367584 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1347_text_document -0.0002173110715340058 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1348_text_document -0.0002106853410947563 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1349_text_document -0.00021523392953900664 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1350_text_document -0.00021996424976477582 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1351_text_document -0.00021735745725911482 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1352_text_document -0.00014743618479981591 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1353_text_document -0.00021587099328468655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1354_text_document -0.00021669175360386172 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1355_text_document -0.00021667379282364665 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1356_text_document -0.0002192120523189847 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1357_text_document -0.00021547193097844086 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1358_text_document -0.00021621049112421326 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1359_text_document -0.00021196265801039842 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1360_text_document -0.00021115416894129982 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1361_text_document -0.00021548122875612305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1362_text_document -0.0002167839127379268 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1363_text_document -0.00021388435981092266 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1364_text_document -0.00021247309275187394 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1365_text_document -0.00020865156988970925 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1366_text_document -0.00021232420243985875 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1367_text_document -0.00020288941772275403 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1368_text_document -0.00020534370920083462 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1369_text_document -0.00014906807620518648 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1370_text_document -0.0002110153701227056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1371_text_document -0.00020709542453451886 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1372_text_document -0.00020465988557797482 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1373_text_document -0.000195974694790701 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1374_text_document -0.0002006410964660873 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1375_text_document -0.00020083864604468702 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1376_text_document -0.00020640909562295756 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1377_text_document -0.0002009390668809768 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1378_text_document -0.00019660322090934407 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1379_text_document -0.0002031382964736789 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1380_text_document -0.00019629671755665872 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1381_text_document -0.00019754174238439996 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1382_text_document -0.0002056909946356413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1383_text_document -0.0001979138566098626 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1384_text_document -0.0001932131948461709 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1385_text_document -0.00020416546879013 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1386_text_document -0.00020460391232945065 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1387_text_document -0.00019389888059130955 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1388_text_document -0.00019783854863351214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1389_text_document -0.000200961415063147 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1390_text_document -0.0001956818423121531 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1391_text_document -0.00020637040765714317 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1392_text_document -0.00020119793791085526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1393_text_document -0.0002019159752232148 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1394_text_document -0.00020709690510066213 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1395_text_document -0.00019733093804912572 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1396_text_document -0.0001880608678579731 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1397_text_document -0.0002016375431479316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1398_text_document -0.00019179791527764437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1399_text_document -0.00018506553224762644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1400_text_document -0.00019958850500821938 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1401_text_document -0.0001963985599733761 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1402_text_document -0.00019686962952391687 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1403_text_document -0.00019466431453041557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1404_text_document -0.00019423474723069192 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1405_text_document -0.00018645004940802463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1406_text_document -0.0001957563417646353 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1407_text_document -0.00019567310057973193 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1408_text_document -0.00019820964060443815 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1409_text_document -0.0001922448994056278 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1410_text_document -0.00018809380854194413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1411_text_document -0.00019183325882742152 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1412_text_document -0.00018979529371331087 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1413_text_document -0.00018194205843788177 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1414_text_document -0.000185326810832552 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1415_text_document -0.00018768967790659056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1416_text_document -8.238038512980449e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1417_text_document -7.037628876350043e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1418_text_document -7.031761895460266e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1419_text_document -6.852561440270574e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1420_text_document -7.163053214543125e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1421_text_document -6.965337217248569e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1422_text_document -7.217926984135532e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1423_text_document -6.887448282655111e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1424_text_document -7.065036798913058e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1425_text_document -7.00112034634854e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1426_text_document -7.22545398101735e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1427_text_document -7.06581990215903e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1428_text_document -8.704526082745054e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1429_text_document -7.647895905010174e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1430_text_document -6.667599117230014e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1431_text_document -9.175439580281598e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1432_text_document -6.936196694178977e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1433_text_document -6.994579003243415e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1434_text_document -6.85501978720171e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1435_text_document -6.733846418731063e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1436_text_document -6.760126406073544e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1437_text_document -6.979877393600358e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1438_text_document -6.866399513844505e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1439_text_document -6.599386727589954e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1440_text_document -7.022110351565428e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1441_text_document -6.889110495186351e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1442_text_document -7.249533430962498e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1443_text_document -7.061312850517899e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1444_text_document -7.026495137417699e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1445_text_document -7.053710208774785e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1446_text_document -7.079302654666706e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1447_text_document -7.142821385554296e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1448_text_document -6.884074447800683e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1449_text_document -6.775299728680366e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1450_text_document -6.935640081273007e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1451_text_document -7.071164131398859e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1452_text_document -7.251697614402021e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1453_text_document -0.00012391766284956256 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1454_text_document -6.876051279861284e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1455_text_document -7.000563116437178e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1456_text_document -7.021430732464126e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1457_text_document -7.320305084935923e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1458_text_document -6.93854906426365e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1459_text_document -7.268065730933861e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1460_text_document -0.00015861223006440801 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1461_text_document -0.00018316051674097559 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1462_text_document -0.00018895109829526356 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1463_text_document -0.0001814266629730391 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1464_text_document -0.00019157095403912478 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1465_text_document -0.0001816102282477865 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1466_text_document -0.00018024098024327291 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1467_text_document -0.00018881439877582162 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1468_text_document -0.00018219752655961166 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1469_text_document -0.00018211134990984607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1470_text_document -0.00018153994220173833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1471_text_document -0.00018033319169939537 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1472_text_document -0.00017832883141386175 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1473_text_document -0.00018126942359083546 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1474_text_document -0.0001780495979719729 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1475_text_document -0.00017938729404825616 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1476_text_document -0.00017959022144586935 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1477_text_document -0.00017639465376427234 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1478_text_document -0.00017486395149030808 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1479_text_document -0.0001808240528085561 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1480_text_document -0.00017274905690967175 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1481_text_document -0.00017486036693577152 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1482_text_document -0.00017698269026135074 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1483_text_document -0.00016841434450034874 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1484_text_document -0.00016863721573351308 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1485_text_document -0.00017483420640067329 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1486_text_document -0.0001732828833128397 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1487_text_document -0.00017295290907496933 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1488_text_document -0.00017133232046800912 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1489_text_document -0.00016977827391836668 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1490_text_document -0.0001740935435005184 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1491_text_document -3.6341265268857285e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/cc_en_tail-1492_text_document diff --git a/ALCF/data-lists/sunspot/data_file_list_peS2o.txt b/ALCF/data-lists/sunspot/data_file_list_peS2o.txt deleted file mode 100644 index dff1249cf5..0000000000 --- a/ALCF/data-lists/sunspot/data_file_list_peS2o.txt +++ /dev/null @@ -1,26 +0,0 @@ -0.001258392312111664 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0000_text_document -0.0012624388132337304 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0001_text_document -0.0012626279540316713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0002_text_document -0.0012611745647392154 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0003_text_document -0.001261744080471196 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0004_text_document -0.0012540395730196387 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0005_text_document -0.0012674794155994474 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0006_text_document -0.0015181894311854882 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0007_text_document -0.003353431842116585 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0008_text_document -0.0033457523561418873 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0009_text_document -0.0035360737173355393 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0010_text_document -0.0035328021064248917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0011_text_document -0.003537853575841124 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0012_text_document -0.0033495442948704096 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0013_text_document -0.0033515559656802623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0014_text_document -0.0035292441286648877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0015_text_document -0.0033472466636064995 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0016_text_document -0.003347244907254542 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0017_text_document -0.003361109976122766 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0018_text_document -0.003527949940706846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0019_text_document -0.0033629959027952918 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0020_text_document -0.003534363177394335 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0021_text_document -0.0033534091101340303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0022_text_document -0.003362863367631581 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0023_text_document -0.0035295619700253587 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0024_text_document -0.00039091085286111746 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/pes2o_v2-0025_text_document diff --git a/ALCF/data-lists/sunspot/data_file_list_reddit.txt b/ALCF/data-lists/sunspot/data_file_list_reddit.txt deleted file mode 100644 index 644d717021..0000000000 --- a/ALCF/data-lists/sunspot/data_file_list_reddit.txt +++ /dev/null @@ -1,78 +0,0 @@ -0.0011541728836721287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0000_text_document -0.001194214065746794 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0001_text_document -0.0012074645870644872 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0002_text_document -0.0011669676257397446 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0003_text_document -0.0011730429598479002 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0004_text_document -0.0011829645036126979 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0005_text_document -0.0011882713613863669 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0006_text_document -0.0011409601969657492 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0007_text_document -0.0011370779956530767 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0008_text_document -0.0011566277261230336 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0009_text_document -0.0011377530435595722 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0010_text_document -0.0011422212106036002 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0011_text_document -0.0011359648236479313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0012_text_document -0.0011422819300771266 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0013_text_document -0.0011613422088431185 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0014_text_document -0.001149222546698594 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0015_text_document -0.0011520428345756523 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0016_text_document -0.0011408015787470732 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0017_text_document -0.001145413257179254 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0018_text_document -0.0011543340882314167 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0019_text_document -0.0011397083750923865 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0020_text_document -0.001163788652940794 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0021_text_document -0.0011441686420414542 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0022_text_document -0.0011429505546541332 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0023_text_document -0.00117471168582067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0024_text_document -0.0011456585273133617 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0025_text_document -0.0011738665177335344 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0026_text_document -0.0011646176186295262 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0027_text_document -0.0011629386473461694 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0028_text_document -0.0011421097688385183 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0029_text_document -0.0011459477142114253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0030_text_document -0.0011756431096178663 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0031_text_document -0.0011482680809577622 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0032_text_document -0.0011445710176100962 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0033_text_document -0.001142534803152167 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0034_text_document -0.0011422043218494292 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0035_text_document -0.0011678344410475695 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0036_text_document -0.0011562147470581413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0037_text_document -0.0011468122833549663 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0038_text_document -0.0011532706690152916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0039_text_document -0.0011292882378850658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0040_text_document -0.0011300177059999066 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0041_text_document -0.0011287171558685828 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0042_text_document -0.0011295841562723513 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0043_text_document -0.0011279954847952854 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0044_text_document -0.0011283817109930107 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0045_text_document -0.001128286479630481 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0046_text_document -0.0011276081740353844 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0047_text_document -0.0011268985652144736 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0048_text_document -0.0011261863340342809 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0049_text_document -0.0011248860240274238 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0050_text_document -0.0011253794147731645 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0051_text_document -0.0011242857628861397 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0052_text_document -0.0011228472942657042 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0053_text_document -0.00112269047698053 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0054_text_document -0.0011234938283922757 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0055_text_document -0.0011230927745087202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0056_text_document -0.0011247141749506225 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0057_text_document -0.0011241207913742775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0058_text_document -0.0011220187728072355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0059_text_document -0.0011227320045060405 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0060_text_document -0.0011217839100677303 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0061_text_document -0.0011210875921360617 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0062_text_document -0.0011221651716921029 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0063_text_document -0.0011248396609954611 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0064_text_document -0.0012275703827670792 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0065_text_document -0.0011056036331311184 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0066_text_document -0.001107902944963784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0067_text_document -0.0010968114497626087 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0068_text_document -0.0011027306309299484 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0069_text_document -0.0010853624892717291 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0070_text_document -0.0011051858405711837 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0071_text_document -0.0010808015771539223 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0072_text_document -0.0010855928806935572 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0073_text_document -0.0010442141182932184 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0074_text_document -0.0011804749731815143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0075_text_document -0.0011670805522744465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0076_text_document -0.0008366052616529944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/reddit-v5-dedupe-pii-nsfw-toxic-0077_text_document diff --git a/ALCF/data-lists/sunspot/data_file_list_stack.txt b/ALCF/data-lists/sunspot/data_file_list_stack.txt deleted file mode 100644 index cbaf3cedde..0000000000 --- a/ALCF/data-lists/sunspot/data_file_list_stack.txt +++ /dev/null @@ -1,149 +0,0 @@ -0.0010659025986423038 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0000_text_document -0.001089820700651703 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0001_text_document -0.0010894690468995446 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0002_text_document -0.0010893103153582777 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0003_text_document -0.001092968830569157 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0004_text_document -0.0010927822953669655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0005_text_document -0.0010948538530423937 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0006_text_document -0.0010914947459084862 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0007_text_document -0.0011531345061061805 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0008_text_document -0.0009273732822541429 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0009_text_document -0.0009298094568342398 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0010_text_document -0.0009269985376241653 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0011_text_document -0.0009299414467502114 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0012_text_document -0.0009281292496915194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0013_text_document -0.0009300797305068478 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0014_text_document -0.0009575658299825903 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0015_text_document -0.001124706364232967 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0016_text_document -0.0011201757618238954 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0017_text_document -0.001126433347327465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0018_text_document -0.0011299837668245817 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0019_text_document -0.001127851225271931 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0020_text_document -0.0011265589698280143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0021_text_document -0.0011227970380980016 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0022_text_document -0.001131300918127052 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0023_text_document -0.00112588381546472 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0024_text_document -0.0011692456277892793 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0025_text_document -0.0011330744556493294 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0026_text_document -0.001041946972706877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0027_text_document -0.0010493121881969634 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0028_text_document -0.0009912570469629923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0029_text_document -0.0012717963903526445 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0030_text_document -0.0014051955824199262 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0031_text_document -0.0011248653480876683 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0032_text_document -0.0015096975127629315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0033_text_document -0.001056885183600456 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0034_text_document -0.0010523010671513575 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0035_text_document -0.001055691055690255 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0036_text_document -0.0012434898779499373 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0037_text_document -0.0009615620261395163 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0038_text_document -0.0011689290747945063 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0039_text_document -0.0012610288149681123 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0040_text_document -0.0012183045747008489 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0041_text_document -0.0012232394891956877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0042_text_document -0.0012316862572191265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0043_text_document -0.001171858466558184 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0044_text_document -0.0009288715082322405 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0045_text_document -0.0009096255640660796 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0046_text_document -0.0009098493089021282 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0047_text_document -0.000908428701094243 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0048_text_document -0.0009115948236386599 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0049_text_document -0.0009109761446993803 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0050_text_document -0.0009097199236925156 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0051_text_document -0.0009103946801923116 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0052_text_document -0.0009109038594994949 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0053_text_document -0.0009098133932243314 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0054_text_document -0.0009111744494635876 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0055_text_document -0.0008961257268851344 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0056_text_document -0.0008499219991848833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0057_text_document -0.000848817192629684 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0058_text_document -0.0008469931268429987 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0059_text_document -0.0008487804660301039 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0060_text_document -0.0008535293627452302 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0061_text_document -0.0008508082359285502 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0062_text_document -0.000847764423021283 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0063_text_document -0.0008661814491784624 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0064_text_document -0.0012598427266996145 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0065_text_document -0.0015411645064455006 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0066_text_document -0.0015500690406153115 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0067_text_document -0.0010431702414192465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0068_text_document -0.0010103298065465376 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0069_text_document -0.0009173697763272889 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0070_text_document -0.0009149081716719212 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0071_text_document -0.0009223001515794829 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0072_text_document -0.0009231205497115238 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0073_text_document -0.0009205400022638854 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0074_text_document -0.000921891356231865 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0075_text_document -0.0009206550523916788 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0076_text_document -0.000919101114727538 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0077_text_document -0.0009189314293443922 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0078_text_document -0.0009187845413397615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0079_text_document -0.0009212488966514148 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0080_text_document -0.0009193937503280587 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0081_text_document -0.0013803871878583557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0082_text_document -0.0009950213666737198 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0083_text_document -0.000927893134699511 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0084_text_document -0.0009256115426841411 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0085_text_document -0.0009245248815034989 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0086_text_document -0.0009239324963431647 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0087_text_document -0.00093017264782812 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0088_text_document -0.0009246774971430524 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0089_text_document -0.0009246651817682976 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0090_text_document -0.0009220962135479767 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0091_text_document -0.0009218191222144196 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0092_text_document -0.0009271314108370893 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0093_text_document -0.0011393174361636815 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0094_text_document -0.0010056046636817732 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0095_text_document -0.000985188940051775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0096_text_document -0.0009834908338499898 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0097_text_document -0.0009841221104671695 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0098_text_document -0.0009846688252964021 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0099_text_document -0.0009846837273836892 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0100_text_document -0.000983200779763785 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0101_text_document -0.000983626091844726 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0102_text_document -0.0009227550215195058 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0103_text_document -0.0008517634745985513 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0104_text_document -0.0009820984183696825 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0105_text_document -0.001062956613371643 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0106_text_document -0.0009446580160861343 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0107_text_document -0.000849273787178016 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0108_text_document -0.0010838798124933814 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0109_text_document -0.0016259767652594482 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0110_text_document -0.0009261166233974987 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0111_text_document -0.0013044836937627727 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0112_text_document -0.0017111272224419217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0113_text_document -0.0017274616815008634 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0114_text_document -0.0017204942871235126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0115_text_document -0.0017119592701771347 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0116_text_document -0.0016979912192342588 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0117_text_document -0.001701886248500233 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0118_text_document -0.0017227272126357288 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0119_text_document -0.0017014517255794117 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0120_text_document -0.0016995002579026628 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0121_text_document -0.0016958447424626011 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0122_text_document -0.0017111887981161064 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0123_text_document -0.0017172926007805738 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0124_text_document -0.0016938659465618113 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0125_text_document -0.0016877576226485259 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0126_text_document -0.0017144361080061983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0127_text_document -0.0017173753931755767 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0128_text_document -0.001713308056226134 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0129_text_document -0.0017126769067653286 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0130_text_document -0.0017129095633438736 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0131_text_document -0.001704961253905759 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0132_text_document -0.0009282082505873367 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0133_text_document -0.0007973220067601047 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0134_text_document -0.0008407445714413182 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0135_text_document -0.0008403726198530843 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0136_text_document -0.0008371632157580058 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0137_text_document -0.0013060325919558903 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0138_text_document -0.0014100060700040244 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0139_text_document -0.0008750222172256031 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0140_text_document -0.0016918433420911735 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0141_text_document -0.001838605753011377 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0142_text_document -0.0016004536814984726 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0143_text_document -0.0011738110086663097 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0144_text_document -0.0011269892510041232 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0145_text_document -0.0011251329530758676 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0146_text_document -0.0011788404279377853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0147_text_document -0.0007876495048700586 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/stack-v4-train-0148_text_document diff --git a/ALCF/data-lists/sunspot/data_file_list_wiki.txt b/ALCF/data-lists/sunspot/data_file_list_wiki.txt deleted file mode 100644 index 65169566eb..0000000000 --- a/ALCF/data-lists/sunspot/data_file_list_wiki.txt +++ /dev/null @@ -1,2 +0,0 @@ -0.0035577638528123345 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/en_simple_wiki_v0-0000_text_document -0.0018422361471876658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_Llama2Tokenizer/en_simple_wiki_v0-0001_text_document diff --git a/ALCF/data-lists/sunspot/falcon.txt b/ALCF/data-lists/sunspot/falcon.txt new file mode 100644 index 0000000000..0b2fd6d43f --- /dev/null +++ b/ALCF/data-lists/sunspot/falcon.txt @@ -0,0 +1,501 @@ +0.0003547982093445404 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0000_text_document +0.00035934014428504944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0001_text_document +0.00035707704501371544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0002_text_document +0.00035287930712815354 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0003_text_document +0.00035977166728996823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0004_text_document +0.0003581675664109838 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0005_text_document +0.0003548617059697185 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0006_text_document +0.0003639582000286208 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0007_text_document +0.00035375839698688127 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0008_text_document +0.0003743722020080678 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0009_text_document +0.0003530399715341242 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0010_text_document +0.00035511875882752406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0011_text_document +0.0003618733574783154 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0012_text_document +0.00035185243285420104 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0013_text_document +0.0003541503739732106 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0014_text_document +0.0003631679485751914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0015_text_document +0.00035748045578182274 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0016_text_document +0.0003606490690555877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0017_text_document +0.0003626383296610091 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0018_text_document +0.00035442644361264756 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0019_text_document +0.00035978370170539796 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0020_text_document +0.0003585562375341541 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0021_text_document +0.0003601958372888019 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0022_text_document +0.000350277765402227 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0023_text_document +0.0003616521184211704 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0024_text_document +0.0003620625543608188 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0025_text_document +0.0003560781983850704 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0026_text_document +0.0003553209610592676 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0027_text_document +0.00035905348643915075 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0028_text_document +0.00034744258805696526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0029_text_document +0.00035462784035661496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0030_text_document +0.00034768186175100895 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0031_text_document +0.0003568534635532736 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0032_text_document +0.00035586511544371234 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0033_text_document +0.0003524567827568137 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0034_text_document +0.0003512453770426313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0035_text_document +0.0003591792726468799 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0036_text_document +0.0003514024529343127 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0037_text_document +0.0003584880112586934 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0038_text_document +0.00035133552916418045 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0039_text_document +0.0003600811981350215 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0040_text_document +0.0003571663974228119 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0041_text_document +0.00035768103378874214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0042_text_document +0.00035939205561113694 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0043_text_document +0.00035186773916029825 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0044_text_document +0.0003542829672490847 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0045_text_document +0.0003592783642898726 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0046_text_document +0.0003556367340099302 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0047_text_document +0.00035391392271377027 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0048_text_document +0.00035486725707484836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0049_text_document +0.00034866743396828035 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0050_text_document +0.0003517219808644735 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0051_text_document +0.00034874458549673823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0052_text_document +0.000355773136961014 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0053_text_document +0.00035611750387841917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0054_text_document +0.00035305602013916315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0055_text_document +0.0003578207127071924 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0056_text_document +0.00035514635841943707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0057_text_document +0.00034816946212866206 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0058_text_document +0.0003512707269761496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0059_text_document +0.0003483392117980654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0060_text_document +0.0003572169607204321 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0061_text_document +0.00035139153281660794 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0062_text_document +0.00035536422129036537 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0063_text_document +0.000352017164107143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0064_text_document +0.000351889550179365 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0065_text_document +0.000358759689953589 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0066_text_document +0.0003569286079869268 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0067_text_document +0.0003657752958602099 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0068_text_document +0.00035396127934790697 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0069_text_document +0.0003618565071224743 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0070_text_document +0.00035146051531973204 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0071_text_document +0.00036107135765783567 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0072_text_document +0.00035019554279994576 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0073_text_document +0.00035567858879904983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0074_text_document +0.0003504753174793183 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0075_text_document +0.00035931140831329194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0076_text_document +0.0003502967866002823 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0077_text_document +0.0003532911801041972 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0078_text_document +0.0003583543013070199 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0079_text_document +0.0003566243489931224 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0080_text_document +0.0003468752314799221 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0081_text_document +0.0003597840618138091 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0082_text_document +0.00035128822484768084 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0083_text_document +0.00035889496943437507 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0084_text_document +0.000352400524650424 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0085_text_document +0.0003518689536768735 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0086_text_document +0.00035866864741303467 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0087_text_document +0.0003454687659106334 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0088_text_document +0.00035348007259317576 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0089_text_document +0.0003539752270940644 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0090_text_document +0.00035146495994081 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0091_text_document +0.00035397212846310423 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0092_text_document +0.00035208246467162587 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0093_text_document +0.0003490843168676626 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0094_text_document +0.00035299633658644394 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0095_text_document +0.00034868327466167065 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0096_text_document +0.00035941351365601583 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0097_text_document +0.0003545343062735255 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0098_text_document +0.0003528956380445978 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0099_text_document +0.0003553355770443352 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0100_text_document +0.0003644224004937743 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0101_text_document +0.00035234291036216907 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0102_text_document +0.0003596237469847771 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0103_text_document +0.0003531996065735989 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0104_text_document +0.0003547177054106099 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0105_text_document +0.0003575586499260483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0106_text_document +0.00035262635135283667 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0107_text_document +0.0003624191962188944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0108_text_document +0.0003488398052948616 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0109_text_document +0.0003598294093147917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0110_text_document +0.00035583006534466323 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0111_text_document +0.00035403139653225103 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0112_text_document +0.00036134702642187156 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0113_text_document +0.0003573689927162834 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0114_text_document +0.0003577141131435527 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0115_text_document +0.00035208814419277406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0116_text_document +0.00035996720683665625 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0117_text_document +0.00035415304658912596 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0118_text_document +0.00036353353029443546 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0119_text_document +0.0003537326003150983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0120_text_document +0.00036053976358299083 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0121_text_document +0.000352380489373494 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0122_text_document +0.00036154661616900994 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0123_text_document +0.00035959332325963614 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0124_text_document +0.0003597954667189692 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0125_text_document +0.0003563108270597542 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0126_text_document +0.0003582891940460143 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0127_text_document +0.0003497728210484297 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0128_text_document +0.0003549834902179354 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0129_text_document +0.0003529828233484542 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0130_text_document +0.00034627483903285777 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0131_text_document +0.00035569006572589215 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0132_text_document +0.00035449377946910314 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0133_text_document +0.00035802844396194623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0134_text_document +0.0003617277809353208 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0135_text_document +0.00035034118898654814 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0136_text_document +0.000351091193908611 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0137_text_document +0.0003527914342210668 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0138_text_document +0.00035028288369781376 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0139_text_document +0.00035775745592780506 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0140_text_document +0.0003449630690661468 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0141_text_document +0.0003583490698830361 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0142_text_document +0.0003476995746684122 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0143_text_document +0.0003535632505019212 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0144_text_document +0.00035640180641147417 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0145_text_document +0.000361731045691765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0146_text_document +0.0003534082129597368 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0147_text_document +0.0003550344149828664 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0148_text_document +0.00035363002411364057 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0149_text_document +0.0003537265579677396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0150_text_document +0.00034950531383577937 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0151_text_document +0.00035008511827347514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0152_text_document +0.00035594533400871325 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0153_text_document +0.00035266312861335946 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0154_text_document +0.00035280268794863923 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0155_text_document +0.0003565470391528536 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0156_text_document +0.0003588492322689137 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0157_text_document +0.00035469909697832775 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0158_text_document +0.00034712082813410526 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0159_text_document +0.000348701157101807 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0160_text_document +0.0003500192014479944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0161_text_document +0.00035120560544669755 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0162_text_document +0.00035403656850437445 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0163_text_document +0.00035852376560749366 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0164_text_document +0.0003534754068111774 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0165_text_document +0.00035591740046720765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0166_text_document +0.000348522354782563 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0167_text_document +0.0003533533959664415 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0168_text_document +0.00035631425964030697 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0169_text_document +0.0003485886551574741 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0170_text_document +0.00035917652631065777 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0171_text_document +0.0003482975272111288 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0172_text_document +0.00035580661277480167 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0173_text_document +0.0003492290722955348 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0174_text_document +0.00034989284450240613 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0175_text_document +0.0003545677216162781 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0176_text_document +0.00034622286859463484 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0177_text_document +0.00036070626989861965 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0178_text_document +0.00035518365036320786 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0179_text_document +0.00035272907057848406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0180_text_document +0.0003547343638218734 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0181_text_document +0.0003496450144966242 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0182_text_document +0.0003537407829294287 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0183_text_document +0.0003489722653985685 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0184_text_document +0.00035057186899911295 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0185_text_document +0.0003507566548933051 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0186_text_document +0.00035630360179023747 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0187_text_document +0.00035631362503416367 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0188_text_document +0.0003490204248026821 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0189_text_document +0.00035761724058371226 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0190_text_document +0.00035037664777467137 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0191_text_document +0.000353402110481068 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0192_text_document +0.00034524163568371745 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0193_text_document +0.00035528523728570974 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0194_text_document +0.00034784916132431703 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0195_text_document +0.00034928476408048925 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0196_text_document +0.00034989205973784984 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0197_text_document +0.00034201664404094254 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0198_text_document +0.0003529676016338611 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0199_text_document +0.00034643433682346637 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0200_text_document +0.0003511666373001904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0201_text_document +0.00034828669066575333 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0202_text_document +0.0003494625207264413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0203_text_document +0.0003458957535879216 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0204_text_document +0.0003543020478990003 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0205_text_document +0.00034754384069014956 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0206_text_document +0.0003598856392240133 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0207_text_document +0.0003503335458553846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0208_text_document +0.00035919595619778716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0209_text_document +0.00035767737970754404 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0210_text_document +0.00035197152783998165 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0211_text_document +0.0003549609834422404 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0212_text_document +0.0003568184100569753 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0213_text_document +0.0003512652818651935 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0214_text_document +0.00035912648958665754 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0215_text_document +0.00034764526964056546 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0216_text_document +0.000352439784960359 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0217_text_document +0.00035295886560764226 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0218_text_document +0.0003518132693658672 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0219_text_document +0.00035589987915465713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0220_text_document +0.00034923863317385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0221_text_document +0.0003457987267929692 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0222_text_document +0.0003560928663480501 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0223_text_document +0.0003529603811204932 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0224_text_document +0.0003524438555443043 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0225_text_document +0.0003438847030263783 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0226_text_document +0.00035981978898461613 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0227_text_document +0.0003446342778566972 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0228_text_document +0.00035529584995236537 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0229_text_document +0.00034855740895831116 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0230_text_document +0.00034932634912802544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0231_text_document +0.00035805518303064666 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0232_text_document +0.0003497941877073061 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0233_text_document +0.00035774398685405447 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0234_text_document +0.0003560421780316607 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0235_text_document +0.0003508844468369392 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0236_text_document +0.00035731928892270107 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0237_text_document +0.0003557884626314314 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0238_text_document +0.00034992996760289355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0239_text_document +0.000360752554360921 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0240_text_document +0.0003452321668708545 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0241_text_document +0.0003591745226131023 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0242_text_document +0.00035256981433229084 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0243_text_document +0.00035378123159712034 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0244_text_document +0.000350464354895999 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0245_text_document +0.00035074625557389677 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0246_text_document +0.00035025894701994667 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0247_text_document +0.00035437902514857614 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0248_text_document +0.0003514684519732232 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0249_text_document +0.00035449717909633905 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0250_text_document +0.0003436816402714221 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0251_text_document +0.00035139158071782116 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0252_text_document +0.0003509424079843335 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0253_text_document +0.000343894618577506 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0254_text_document +0.0003500789770661659 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0255_text_document +0.0003407788080680086 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0256_text_document +0.0003581908175239701 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0257_text_document +0.0003465541618780918 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0258_text_document +0.00034600228792437736 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0259_text_document +0.00034416738982773204 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0260_text_document +0.0003519900340150641 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0261_text_document +0.000343369616864659 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0262_text_document +0.0003544993883274688 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0263_text_document +0.0003504441365073392 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0264_text_document +0.00034859160702727056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0265_text_document +0.00035355909532647185 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0266_text_document +0.0003471900922691849 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0267_text_document +0.0003563015508709187 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0268_text_document +0.0003487888744148821 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0269_text_document +0.00034711767548688336 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0270_text_document +0.0003530734609369085 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0271_text_document +0.00035123969242560935 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0272_text_document +0.0003517127620891489 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0273_text_document +0.00035232835416868673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0274_text_document +0.0003524437481912308 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0275_text_document +0.0003525996167005602 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0276_text_document +0.00035064770545242043 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0277_text_document +0.00035311558274981226 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0278_text_document +0.00034952204800569914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0279_text_document +0.0003541471367344846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0280_text_document +0.00035418812454561825 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0281_text_document +0.0003528951372900714 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0282_text_document +0.0003542338042975688 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0283_text_document +0.00034937738939942796 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0284_text_document +0.0003522182190878447 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0285_text_document +0.0003501406466507449 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0286_text_document +0.00034973079877492633 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0287_text_document +0.0003485274567713538 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0288_text_document +0.00034999308679368985 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0289_text_document +0.0003570051724707296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0290_text_document +0.00034567230462019706 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0291_text_document +0.00035529000940160696 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0292_text_document +0.00034956512308671755 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0293_text_document +0.0003496962834028953 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0294_text_document +0.0003468745282493457 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0295_text_document +0.0003502717155809202 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0296_text_document +0.0003556240880896514 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0297_text_document +0.0003515109488424343 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0298_text_document +0.0003563156688192592 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0299_text_document +0.00035040277363989817 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0300_text_document +0.0003481408593290717 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0301_text_document +0.0003624575124332874 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0302_text_document +0.0003522684124250313 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0303_text_document +0.00035286996027653544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0304_text_document +0.00034967623997256725 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0305_text_document +0.00035182649587602765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0306_text_document +0.0003524892557026489 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0307_text_document +0.0003507642477451811 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0308_text_document +0.00036190408389835666 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0309_text_document +0.00035102739424880766 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0310_text_document +0.00035239718753257265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0311_text_document +0.00035298076121821316 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0312_text_document +0.0003478704389752654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0313_text_document +0.0003503109191567942 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0314_text_document +0.00035143250975654426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0315_text_document +0.0003480663923069012 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0316_text_document +0.00035691540219998623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0317_text_document +0.000348815437166351 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0318_text_document +0.00035202073257766225 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0319_text_document +0.0003491569096274706 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0320_text_document +0.00035277390475511834 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0321_text_document +0.0003524972090026609 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0322_text_document +0.0003504854249750236 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0323_text_document +0.00034740238025423914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0324_text_document +0.00034968015462277606 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0325_text_document +0.0003493798632762674 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0326_text_document +0.0003488202537862122 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0327_text_document +0.0003525461864643725 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0328_text_document +0.00034903815232825664 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0329_text_document +0.00035536982539258216 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0330_text_document +0.00034858083265155483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0331_text_document +0.0003505014973608067 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0332_text_document +0.00035327984042622104 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0333_text_document +0.0003503286677453136 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0334_text_document +0.00035835274842442816 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0335_text_document +0.00034970302660275595 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0336_text_document +0.000357929573140149 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0337_text_document +0.0003517238649788585 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0338_text_document +0.00036097027318848475 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0339_text_document +0.0003502734074110026 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0340_text_document +0.00035801510806036273 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0341_text_document +0.0003568006373479869 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0342_text_document +0.00036128108717454636 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0343_text_document +0.0003563436883111686 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0344_text_document +0.00035559725321852463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0345_text_document +0.00035089656006854944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0346_text_document +0.000359453964362057 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0347_text_document +0.00035629498059104033 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0348_text_document +0.0003622207707090437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0349_text_document +0.0003540946784512821 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0350_text_document +0.0003594750565232011 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0351_text_document +0.0003566007415086991 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0352_text_document +0.0003562142599126134 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0353_text_document +0.0003569948186744601 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0354_text_document +0.00035166554847920186 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0355_text_document +0.00035047994419295137 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0356_text_document +0.0003561578193739437 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0357_text_document +0.00035470866838811544 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0358_text_document +0.00034216920464876335 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0359_text_document +0.0003550021513075795 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0360_text_document +0.0003488045105938729 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0361_text_document +0.0003513340720840151 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0362_text_document +0.0003448558566387584 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0363_text_document +0.0003460966026953241 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0364_text_document +0.0003488157616036459 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0365_text_document +0.0003446120387842362 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0366_text_document +0.000351528602987427 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0367_text_document +0.00035661118227454713 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0368_text_document +0.0003551342699877457 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0369_text_document +0.0003478953397924445 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0370_text_document +0.00034625782458988215 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0371_text_document +0.0003527515447405871 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0372_text_document +0.00034823744889805696 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0373_text_document +0.00034823314560254406 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0374_text_document +0.00035162668292961944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0375_text_document +0.0003477307716074623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0376_text_document +0.0003446457989477787 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0377_text_document +0.00034782916273767795 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0378_text_document +0.0003517249130302248 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0379_text_document +0.0003449873430908556 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0380_text_document +0.00034841291749669877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0381_text_document +0.0003466028498941749 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0382_text_document +0.0003486436831199424 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0383_text_document +0.0003478279234211838 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0384_text_document +0.0003495903653274374 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0385_text_document +0.00034896893881218957 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0386_text_document +0.000348941645312426 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0387_text_document +0.0003474221308416894 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0388_text_document +0.0003462621543839385 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0389_text_document +0.0003669373860863891 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0390_text_document +0.00034691156268163006 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0391_text_document +0.0003527774103765281 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0392_text_document +0.00034684565672734663 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0393_text_document +0.0003454250599604457 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0394_text_document +0.0003541536557159006 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0395_text_document +0.000345735737037366 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0396_text_document +0.0003524669816385214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0397_text_document +0.0003441817133096468 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0398_text_document +0.0003519093265859089 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0399_text_document +0.00035080085480352095 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0400_text_document +0.00035285227929327434 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0401_text_document +0.00034354836346901676 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0402_text_document +0.00034789770937373467 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0403_text_document +0.000343665920520102 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0404_text_document +0.0003490884931060568 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0405_text_document +0.00034380029463398654 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0406_text_document +0.00034874768005099945 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0407_text_document +0.0003457058510967673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0408_text_document +0.00034644265227023904 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0409_text_document +0.00035008339858594957 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0410_text_document +0.0003462377193296194 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0411_text_document +0.0003620491787114201 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0412_text_document +0.000348717011044469 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0413_text_document +0.00034370072363913706 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0414_text_document +0.0003551981066775649 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0415_text_document +0.0003500119496799342 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0416_text_document +0.0003485082952669081 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0417_text_document +0.0003508155580978919 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0418_text_document +0.00035311375163251416 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0419_text_document +0.00034945972003423253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0420_text_document +0.0003474220353789879 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0421_text_document +0.0003536443686585001 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0422_text_document +0.0003560350489042953 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0423_text_document +0.0003493655927914396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0424_text_document +0.0003528423977146383 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0425_text_document +0.00035255554724471217 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0426_text_document +0.0003479760010190111 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0427_text_document +0.00035458598862501956 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0428_text_document +0.0003458990560538315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0429_text_document +0.00035157946422379875 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0430_text_document +0.00034736860650169996 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0431_text_document +0.0003529152313394119 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0432_text_document +0.00034586294329524465 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0433_text_document +0.00035707214923794877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0434_text_document +0.0003509580363496512 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0435_text_document +0.00035244176725524474 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0436_text_document +0.0003467539557999047 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0437_text_document +0.00034919687962275546 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0438_text_document +0.00035094031731719953 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0439_text_document +0.0003484309008351352 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0440_text_document +0.0003485409424916253 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0441_text_document +0.0003499590776117838 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0442_text_document +0.0003492842758957848 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0443_text_document +0.0003529712275178912 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0444_text_document +0.0003566141287087449 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0445_text_document +0.0003649496522047409 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0446_text_document +0.0003563218912208234 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0447_text_document +0.00035614782126966145 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0448_text_document +0.0003531944298453266 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0449_text_document +0.0003535950949566616 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0450_text_document +0.0003544295554928795 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0451_text_document +0.0003519908503740376 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0452_text_document +0.00035752817626134463 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0453_text_document +0.0003515322689589972 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0454_text_document +0.0003486893890307115 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0455_text_document +0.0003446520464889867 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0456_text_document +0.0003509421562481707 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0457_text_document +0.00035335015702909084 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0458_text_document +0.0003490178167345008 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0459_text_document +0.0003520497821155174 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0460_text_document +0.0003549762618908944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0461_text_document +0.00035072190850833103 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0462_text_document +0.0003542458638526423 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0463_text_document +0.000352419194572916 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0464_text_document +0.0003545102564672614 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0465_text_document +0.0003495437992331806 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0466_text_document +0.0003542843376993964 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0467_text_document +0.000352827529313958 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0468_text_document +0.00035442506093223886 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0469_text_document +0.0003496970719044257 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0470_text_document +0.0003553096424442362 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0471_text_document +0.00034986845565067564 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0472_text_document +0.000352131055186658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0473_text_document +0.0003527021708198983 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0474_text_document +0.00034905885414547214 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0475_text_document +0.0003583433842468394 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0476_text_document +0.00034409435202828383 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0477_text_document +0.00034846410520871483 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0478_text_document +0.0003554459991927314 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0479_text_document +0.00035310507471843076 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0480_text_document +0.000350028910786098 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0481_text_document +0.00035049727458009896 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0482_text_document +0.0003519047735925826 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0483_text_document +0.0003513027429919726 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0484_text_document +0.0003626947260354396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0485_text_document +0.0003500087324849783 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0486_text_document +0.0003618315726725285 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0487_text_document +0.0003535385113938023 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0488_text_document +0.0003487064058517615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0489_text_document +0.0003618709124780938 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0490_text_document +0.00035040070335625915 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0491_text_document +0.0003506279032267829 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0492_text_document +0.0003498435310527524 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0493_text_document +0.0003554634749821431 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0494_text_document +0.00035091209738758963 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0495_text_document +0.00035034103678978573 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0496_text_document +0.00035398931854386146 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0497_text_document +0.00035495529304989485 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0498_text_document +0.00036067883473356603 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/falcon-0499_text_document + diff --git a/ALCF/data-lists/sunspot/megawiki.txt b/ALCF/data-lists/sunspot/megawiki.txt new file mode 100644 index 0000000000..9fc9ca5dab --- /dev/null +++ b/ALCF/data-lists/sunspot/megawiki.txt @@ -0,0 +1,262 @@ +6.322825248625475e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0000_text_document +2.4432314037946264e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0001_text_document +5.6313888721313454e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0002_text_document +2.4208171781595055e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0003_text_document +2.325811856369237e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0004_text_document +2.4010790356322705e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0005_text_document +5.36773610843632e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0006_text_document +1.360574433501002e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0007_text_document +1.3076540344853244e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0008_text_document +1.3386534334886313e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0009_text_document +1.2498103719605153e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0010_text_document +1.403763836949682e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0011_text_document +1.3636756723495417e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0012_text_document +1.2242489446940814e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0013_text_document +1.2398255818973339e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0014_text_document +1.2972616994216281e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0015_text_document +1.3947809855914134e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0016_text_document +1.3144843787829514e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0017_text_document +1.1693809976572487e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0018_text_document +1.3677252682893802e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0019_text_document +1.3940876719849597e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0020_text_document +1.4222245138730965e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0021_text_document +1.3201677767919704e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0022_text_document +1.1421717796486169e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0023_text_document +1.2890514724498703e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0024_text_document +1.3649507648749037e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0025_text_document +1.2400732563490717e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0026_text_document +1.1557681453277616e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0027_text_document +1.2294483595964517e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0028_text_document +1.2137484472122283e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0029_text_document +1.3299663426456e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0030_text_document +1.2461984216479532e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0031_text_document +1.4666434217609636e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0032_text_document +1.1876997894686238e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0033_text_document +1.2939155338964078e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0034_text_document +1.3859590039728515e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0035_text_document +1.317917848615668e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0036_text_document +1.1335281536110342e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0037_text_document +1.2889923952861426e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0038_text_document +1.3471671647053326e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0039_text_document +1.2221720014475102e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0040_text_document +1.2632647276287541e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0041_text_document +1.28276219004076e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0042_text_document +1.36213704321643e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0043_text_document +1.2414858625261553e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0044_text_document +1.3173700421883744e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0045_text_document +1.295597796725686e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0046_text_document +1.242783936442904e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0047_text_document +1.2417374088427464e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0048_text_document +1.2134479405400744e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0049_text_document +1.3090040663304255e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0050_text_document +1.2713470581614905e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0051_text_document +5.5750231378906594e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0052_text_document +5.777597358425469e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0053_text_document +5.349786767471258e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0054_text_document +5.675165050453583e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0055_text_document +5.482611216158831e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0056_text_document +5.065421899890121e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0057_text_document +5.384718357480146e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0058_text_document +4.872037363236061e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0059_text_document +4.532709250783155e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0060_text_document +5.7257963030489613e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0061_text_document +4.9014365579652036e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0062_text_document +5.722863552770969e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0063_text_document +6.149911636146833e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0064_text_document +5.2178057608273506e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0065_text_document +4.990228161160431e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0066_text_document +5.866186875255134e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0067_text_document +5.004185734360719e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0068_text_document +4.79401853705107e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0069_text_document +5.435219965052376e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0070_text_document +5.035997225792266e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0071_text_document +5.622401774211625e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0072_text_document +5.028826157387559e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0073_text_document +5.596379470128795e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0074_text_document +6.027824493191489e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0075_text_document +5.5358270009931474e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0076_text_document +5.9839051807685496e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0077_text_document +5.1221077499249595e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0078_text_document +5.517228560620279e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0079_text_document +5.1687858285052305e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0080_text_document +5.684188244145645e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0081_text_document +5.212693275535878e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0082_text_document +4.8551007022784084e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0083_text_document +5.4888506639203145e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0084_text_document +5.345098688527242e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0085_text_document +4.8506420625516594e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0086_text_document +5.132168603397676e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0087_text_document +5.719476795114223e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0088_text_document +5.7448621149792696e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0089_text_document +4.9068410568059265e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0090_text_document +5.382937299647678e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0091_text_document +4.8288432136304634e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0092_text_document +5.841703200305416e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0093_text_document +5.1589611587885584e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0094_text_document +6.031113829732574e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0095_text_document +5.4558202844532094e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0096_text_document +5.341852317196142e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0097_text_document +5.1402942738369954e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0098_text_document +5.735421384377395e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0099_text_document +5.473629863586958e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0100_text_document +5.4708993245733936e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0101_text_document +4.931161863634078e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0102_text_document +5.104173022127248e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0103_text_document +5.510157161510824e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0104_text_document +5.652501401782597e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0105_text_document +5.7273656573031666e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0106_text_document +5.638363224821738e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0107_text_document +5.6128115396668704e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0108_text_document +5.00304877998141e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0109_text_document +5.596120554779096e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0110_text_document +5.5280923889040006e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0111_text_document +5.223477917938408e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0112_text_document +5.29472809986569e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0113_text_document +2.205682378243213e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0114_text_document +1.4367563720603185e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0115_text_document +3.5506193487931076e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0116_text_document +3.0442910855821778e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0117_text_document +2.2540042508019627e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0118_text_document +2.6880163202623216e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0119_text_document +2.534473148048727e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0120_text_document +2.6560945431318916e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0121_text_document +2.547470248967691e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0122_text_document +2.5248825388073738e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0123_text_document +2.5828729575000054e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0124_text_document +2.4026583817957736e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0125_text_document +2.3930425429834413e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0126_text_document +2.5037365362599724e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0127_text_document +2.6696745470595603e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0128_text_document +2.140323051341762e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0129_text_document +2.617354786691592e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0130_text_document +1.538359101762691e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0131_text_document +1.2871029252377856e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0132_text_document +2.255195411289217e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0133_text_document +2.4832313897952067e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0134_text_document +9.303873918189968e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0135_text_document +2.179532302620228e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0136_text_document +1.9750517506901206e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0137_text_document +2.7740420380648435e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0138_text_document +2.7813714782319335e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0139_text_document +4.1595357937609806e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0140_text_document +2.741365122389175e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0141_text_document +2.117451071361901e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0142_text_document +1.7132649760565998e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0143_text_document +1.7492547092602047e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0144_text_document +1.7499951097392276e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0145_text_document +1.6632444789170958e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0146_text_document +1.6678802252361607e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0147_text_document +1.5519208704558896e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0148_text_document +1.652420992967167e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0149_text_document +1.6119931034508755e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0150_text_document +1.6638882076736552e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0151_text_document +1.7198076782652946e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0152_text_document +1.572927860565175e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0153_text_document +1.5194822618169918e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0154_text_document +1.6677776832669846e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0155_text_document +1.595612492245688e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0156_text_document +1.682350633181197e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0157_text_document +1.663983380609724e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0158_text_document +1.710187842689243e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0159_text_document +1.5733697527539038e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0160_text_document +1.6972104757911438e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0161_text_document +1.6610142847616577e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0162_text_document +1.61094882403031e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0163_text_document +1.4789207305138325e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0164_text_document +1.639299617676302e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0165_text_document +1.3241204512116132e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0166_text_document +8.582260726625535e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0167_text_document +8.213000975576739e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0168_text_document +9.549247732811947e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0169_text_document +9.17242785339013e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0170_text_document +7.632868223725218e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0171_text_document +8.674401118222175e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0172_text_document +9.124384255505347e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0173_text_document +8.344222222417358e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0174_text_document +8.992299957499065e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0175_text_document +8.76689497361025e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0176_text_document +7.973396239586015e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0177_text_document +9.006935606644125e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0178_text_document +8.725545954955498e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0179_text_document +1.215449694669174e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0180_text_document +3.3041720284158646e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0181_text_document +2.0593512412624502e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0182_text_document +1.893608946986248e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0183_text_document +1.737111666788535e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0184_text_document +1.4915923449873955e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0185_text_document +2.289370239067605e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0186_text_document +2.8615335689614638e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0187_text_document +8.847283630883125e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0188_text_document +1.8175470362373804e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0189_text_document +1.8152226683368038e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0190_text_document +1.789149655314284e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0191_text_document +1.7690523036477663e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0192_text_document +1.8333732213753644e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0193_text_document +1.8794105687718654e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0194_text_document +1.721841156706417e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0195_text_document +2.0612008685724796e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0196_text_document +1.9297370681336376e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0197_text_document +2.0188440409661018e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0198_text_document +5.1741216329695265e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0199_text_document +1.3417913926038429e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0200_text_document +1.1010813016469651e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0201_text_document +1.1252416134320087e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0202_text_document +1.2801744104313002e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0203_text_document +1.3041514955795817e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0204_text_document +1.3428837580879075e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0205_text_document +1.320809382267804e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0206_text_document +1.3451566676555968e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0207_text_document +1.228284926657501e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0208_text_document +1.2410599573923043e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0209_text_document +1.3815343367377182e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0210_text_document +1.3895126265148832e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0211_text_document +1.2306773644401741e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0212_text_document +1.32981021906281e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0213_text_document +1.101337469221607e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0214_text_document +1.513094184404692e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0215_text_document +1.1073759547073234e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0216_text_document +1.2879348765857567e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0217_text_document +9.619595770228435e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0218_text_document +1.2384340836286436e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0219_text_document +1.1766667232211577e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0220_text_document +1.2871049236196452e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0221_text_document +1.2010645926497744e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0222_text_document +1.3971428231518597e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0223_text_document +1.2283733550547932e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0224_text_document +1.2659530508255308e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0225_text_document +1.551775613074462e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0226_text_document +1.1169413343776979e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0227_text_document +1.1433700593712463e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0228_text_document +4.964773647323492e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0229_text_document +1.0995586595687313e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0230_text_document +1.2957393071411267e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0231_text_document +2.75899247407709e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0232_text_document +2.8269344597344854e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0233_text_document +2.329108187246831e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0234_text_document +2.4231761430460284e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0235_text_document +1.2434140512230442e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0236_text_document +1.638718338352859e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0237_text_document +3.272953556801187e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0238_text_document +6.061314500486327e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0239_text_document +1.2465979731210292e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0240_text_document +1.2737557327967737e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0241_text_document +1.038428658075627e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0242_text_document +2.61666472045566e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0243_text_document +3.6506873212272224e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0244_text_document +1.5066359138295701e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0245_text_document +1.1166290872121178e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0246_text_document +1.5546966228590285e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0247_text_document +1.2583434625014828e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0248_text_document +1.3398826881300862e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0249_text_document +1.2944933160515968e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0250_text_document +1.0971437399901365e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0251_text_document +1.2787922795775774e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0252_text_document +1.404979227816985e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0253_text_document +1.3344734431324463e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0254_text_document +4.886031157107555e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0255_text_document +3.277261443596394e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0256_text_document +3.5057957685786495e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0257_text_document +3.287625301718589e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0258_text_document +3.1370056372668855e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0259_text_document +3.186092015785841e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0260_text_document +7.271819324142512e-06 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/megawika-0261_text_document diff --git a/ALCF/data-lists/sunspot/open-web-math-train.txt b/ALCF/data-lists/sunspot/open-web-math-train.txt new file mode 100644 index 0000000000..b36e9977c0 --- /dev/null +++ b/ALCF/data-lists/sunspot/open-web-math-train.txt @@ -0,0 +1,13 @@ +0.001451215788905126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0000_text_document +0.0014486847196258788 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0001_text_document +0.0008861032722895899 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0002_text_document +0.0018119590809459816 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0003_text_document +0.0008916937917547129 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0004_text_document +6.960128832809415e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0005_text_document +0.002008403651063623 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0006_text_document +0.0014374900742131454 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0007_text_document +0.00180213596996716 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0008_text_document +0.001956178877532413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0009_text_document +0.0008829547017667033 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0010_text_document +0.0008910853619157279 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0011_text_document +0.0018260998845299973 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/open-web-math-train-0012_text_document diff --git a/ALCF/data-lists/sunspot/pes2o.txt b/ALCF/data-lists/sunspot/pes2o.txt new file mode 100644 index 0000000000..63f805c06d --- /dev/null +++ b/ALCF/data-lists/sunspot/pes2o.txt @@ -0,0 +1,26 @@ +0.0012499632072059553 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0000_text_document +0.00125398260359913 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0001_text_document +0.0012541704774729071 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0002_text_document +0.0012527268234360602 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0003_text_document +0.0012532925243737164 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0004_text_document +0.0012456396241204315 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0005_text_document +0.0012589894424352072 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0006_text_document +0.001508020123999618 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0007_text_document +0.00333096950781965 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0008_text_document +0.0033233414614415547 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0009_text_document +0.003512387990689828 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0010_text_document +0.0035091382940513126 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0011_text_document +0.003514155927147005 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0012_text_document +0.003327108000579638 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0013_text_document +0.003329106196589836 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0014_text_document +0.003505604148738077 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0015_text_document +0.003324825759567855 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0016_text_document +0.0033248240149804913 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0017_text_document +0.0033385962112851358 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0018_text_document +0.0035043186296553615 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0019_text_document +0.003340469505431529 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0020_text_document +0.0035106889084796276 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0021_text_document +0.0033309469281030167 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0022_text_document +0.003340337858029757 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0023_text_document +0.003505919861097801 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0024_text_document +0.0003882924098240512 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/pes2o-0025_text_document diff --git a/ALCF/data-lists/sunspot/reddit.txt b/ALCF/data-lists/sunspot/reddit.txt new file mode 100644 index 0000000000..59eafce1ee --- /dev/null +++ b/ALCF/data-lists/sunspot/reddit.txt @@ -0,0 +1,78 @@ +0.0005759963691850877 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0000_text_document +0.0005959971675332674 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0001_text_document +0.0006026179290353799 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0002_text_document +0.0005824184320784846 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0003_text_document +0.0005854598548616037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0004_text_document +0.0005903767055633473 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0005_text_document +0.0005930306490982049 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0006_text_document +0.000569425602700746 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0007_text_document +0.0005675060415179408 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0008_text_document +0.0005772431621253389 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0009_text_document +0.0005678026053826858 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0010_text_document +0.0005700398263483378 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0011_text_document +0.0005669467963528824 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0012_text_document +0.0005701015953324305 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0013_text_document +0.0005795907287413296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0014_text_document +0.0005735602737531164 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0015_text_document +0.0005749862745842101 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0016_text_document +0.0005693257015931971 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0017_text_document +0.0005716568794795563 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0018_text_document +0.0005761083919774021 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0019_text_document +0.0005688343169797355 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0020_text_document +0.0005807913190929842 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0021_text_document +0.0005710229258078636 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0022_text_document +0.0005704083039826862 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0023_text_document +0.0005862132348308056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0024_text_document +0.0005717662049559556 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0025_text_document +0.0005858155213694451 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0026_text_document +0.0005812012281792392 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0027_text_document +0.0005803981414588498 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0028_text_document +0.0005700102108287723 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0029_text_document +0.0005719243459052329 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0030_text_document +0.0005867253401661752 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0031_text_document +0.0005731087218860733 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0032_text_document +0.0005712197789109317 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0033_text_document +0.0005702376926310089 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0034_text_document +0.0005700411527742972 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0035_text_document +0.0005828090098178196 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0036_text_document +0.0005770140826168056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0037_text_document +0.0005723509664597896 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0038_text_document +0.0005755499231836962 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0039_text_document +0.0005636407438471367 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0040_text_document +0.0005640281556500104 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0041_text_document +0.0005633159058766496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0042_text_document +0.0005638034311151449 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0043_text_document +0.0005630066273073224 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0044_text_document +0.0005631803831128559 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0045_text_document +0.0005631228881679657 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0046_text_document +0.0005628178701487633 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0047_text_document +0.0005624448092256196 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0048_text_document +0.0005620957024062329 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0049_text_document +0.0005614201504177484 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0050_text_document +0.0005616890951464056 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0051_text_document +0.0005611348559279058 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0052_text_document +0.0005604238061828518 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0053_text_document +0.0005603301490194237 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0054_text_document +0.0005607291294548833 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0055_text_document +0.0005605234569930727 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0056_text_document +0.0005613778566640694 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0057_text_document +0.0005610248539992471 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0058_text_document +0.0005599977416780475 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0059_text_document +0.0005603632562116935 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0060_text_document +0.0005599177479509897 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0061_text_document +0.0005595202318298379 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0062_text_document +0.0005600975633499175 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0063_text_document +0.0005614075491213365 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0064_text_document +0.000612563885043477 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0065_text_document +0.0005515469909644413 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0066_text_document +0.0005526782014946906 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0067_text_document +0.0005472463408095445 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0068_text_document +0.0005502284746004587 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0069_text_document +0.0005414514790555363 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0070_text_document +0.0005513499500134784 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0071_text_document +0.0005391391454105187 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0072_text_document +0.0005415836910001838 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0073_text_document +0.0005208132468536551 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0074_text_document +0.0005889827143132871 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0075_text_document +0.0005822520817765276 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0076_text_document +0.0004173155230758696 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/reddit-0077_text_document diff --git a/ALCF/data-lists/sunspot/stack.txt b/ALCF/data-lists/sunspot/stack.txt new file mode 100644 index 0000000000..297783ac22 --- /dev/null +++ b/ALCF/data-lists/sunspot/stack.txt @@ -0,0 +1,26 @@ +0.0009994361338078242 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0000_text_document +0.001087156194657966 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0001_text_document +0.0010667737163656816 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0002_text_document +0.0009602877882124873 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0003_text_document +0.0008968956271971105 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0004_text_document +0.0009198034843762967 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0005_text_document +0.0009423901016715341 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0006_text_document +0.0009674094553686345 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0007_text_document +0.0009858331322519164 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0008_text_document +0.0009970593645879198 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0009_text_document +0.0010027035193731686 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0010_text_document +0.0010128291154221853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0011_text_document +0.0010215631382631918 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0012_text_document +0.0010288663771461238 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0013_text_document +0.0010346219929285867 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0014_text_document +0.00104544019940344 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0015_text_document +0.0010525172676724333 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0016_text_document +0.0010609529620775127 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0017_text_document +0.0010725892748610153 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0018_text_document +0.0010818563598181568 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0019_text_document +0.0010992760196793917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0020_text_document +0.0011178992762079917 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0021_text_document +0.001124687532085676 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0022_text_document +0.001118303661267191 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0023_text_document +0.0010206825575416534 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0024_text_document +0.0005512280117499715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/stackexchange-0025_text_document diff --git a/ALCF/data-lists/sunspot/starcoder.txt b/ALCF/data-lists/sunspot/starcoder.txt new file mode 100644 index 0000000000..37e6333de5 --- /dev/null +++ b/ALCF/data-lists/sunspot/starcoder.txt @@ -0,0 +1,50 @@ +0.004474659408857016 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0000_text_document +0.00409944473890653 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0001_text_document +0.005137179939941845 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0002_text_document +0.005143172251066109 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0003_text_document +0.005206134363352808 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0004_text_document +0.004892747858974329 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0005_text_document +0.004844731352552902 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0006_text_document +0.005308320169123755 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0007_text_document +0.005124709815666577 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0008_text_document +0.005424710744483826 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0009_text_document +0.00538244648861977 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0010_text_document +0.0029107284679086853 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0011_text_document +0.0026825258998444705 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0012_text_document +0.0026904503191419243 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0013_text_document +0.002687906577174073 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0014_text_document +0.002850165346048818 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0015_text_document +0.005322698571717847 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0016_text_document +0.004450334290869719 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0017_text_document +0.004700990083440683 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0018_text_document +0.003903568556500995 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0019_text_document +0.00390561515396931 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0020_text_document +0.0039046402900912262 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0021_text_document +0.003907454839379547 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0022_text_document +0.0038583224578603824 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0023_text_document +0.0037914116657695 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0024_text_document +0.003786665266798682 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0025_text_document +0.003792000802430658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0026_text_document +0.00319266847466091 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0027_text_document +0.0032658716699838944 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0028_text_document +0.0034801959532460023 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0029_text_document +0.0028307012092022594 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0030_text_document +0.0028420360878146276 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0031_text_document +0.0028410455248484914 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0032_text_document +0.00283497183526842 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0033_text_document +0.002840187195459487 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0034_text_document +0.0028398709431369834 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0035_text_document +0.004364722843422023 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0036_text_document +0.004093255713117101 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0037_text_document +0.004092331079566252 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0038_text_document +0.004005326985579649 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0039_text_document +0.0036205502856964207 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0040_text_document +0.003625316793034984 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0041_text_document +0.003604743435602363 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0042_text_document +0.0035405823343673125 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0043_text_document +0.0041601413517253945 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0044_text_document +0.005886303658937057 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0045_text_document +0.003600909532810332 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0046_text_document +0.0034941365817168658 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0047_text_document +0.0004992164842980224 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/starcoder-0048_text_document + diff --git a/ALCF/data-lists/sunspot/tulu.txt b/ALCF/data-lists/sunspot/tulu.txt new file mode 100644 index 0000000000..2b75802501 --- /dev/null +++ b/ALCF/data-lists/sunspot/tulu.txt @@ -0,0 +1,66 @@ +0.00032927705604725614 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0000_text_document +0.0002860154190878753 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0001_text_document +0.0002845217585425619 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0002_text_document +0.0002743528685497456 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0003_text_document +0.00026025323737738766 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0004_text_document +0.00023493876414603155 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0005_text_document +0.00029665994994226705 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0006_text_document +0.00031808102075993956 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0007_text_document +0.00031813573046011285 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0008_text_document +0.0002711905171855542 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0009_text_document +0.00028892513401817095 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0010_text_document +0.00030003908676979083 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0011_text_document +0.00026839878771944684 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0012_text_document +0.00029155935002690497 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0013_text_document +0.0002998624927624209 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0014_text_document +0.0003091705447974841 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0015_text_document +0.00026873195794309786 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0016_text_document +0.00027721873498527547 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0017_text_document +0.0002841662554024377 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0018_text_document +0.0002839461156551537 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0019_text_document +0.0002861705604659811 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0020_text_document +0.0002460995649635886 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0021_text_document +0.00019420142619795496 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0022_text_document +0.00021967677816173628 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0023_text_document +0.0002620283200480949 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0024_text_document +0.0002433390542188936 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0025_text_document +0.00021254976608350767 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0026_text_document +0.00022094815569522115 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0027_text_document +0.000342862378668244 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0028_text_document +0.00033784225259118157 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0029_text_document +0.0003367278459543952 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0030_text_document +0.00029843279042852765 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0031_text_document +0.0002926583661257988 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0032_text_document +0.00029320337282010673 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0033_text_document +0.00029281450669483455 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0034_text_document +0.0002915338187002653 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0035_text_document +0.0002864226923084572 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0036_text_document +0.00028643439083586396 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0037_text_document +0.00028253710956299054 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0038_text_document +0.0002810856078805806 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0039_text_document +0.00031474941344656715 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0040_text_document +0.0002139130222205655 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0041_text_document +0.0003084648871862831 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0042_text_document +0.0003309477872140129 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0043_text_document +0.0003360096824695161 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0044_text_document +0.0003355452655196557 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0045_text_document +0.00038119390366386037 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0046_text_document +0.00038078927630086064 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0047_text_document +0.0003386200917551554 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0048_text_document +0.0002158905159938882 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0049_text_document +0.00021621682877018768 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0050_text_document +0.00021553306942740535 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0051_text_document +0.00021581563462722296 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0052_text_document +0.0002157694110556169 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0053_text_document +0.000215643699847159 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0054_text_document +0.00021532716715168094 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0055_text_document +0.00021531221326022472 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0056_text_document +0.0002831801179028896 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0057_text_document +0.0002514844936507595 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0058_text_document +0.00031638782778107964 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0059_text_document +0.0002749197545278445 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0060_text_document +0.00026159721512464495 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0061_text_document +0.0002630052420096968 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0062_text_document +0.00031106811228913666 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0063_text_document +0.0002852973415334161 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0064_text_document +3.7555372465932136e-05 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/tulu_flan-0065_text_document diff --git a/ALCF/data-lists/sunspot/wiki.txt b/ALCF/data-lists/sunspot/wiki.txt new file mode 100644 index 0000000000..52af00d57b --- /dev/null +++ b/ALCF/data-lists/sunspot/wiki.txt @@ -0,0 +1,2 @@ +0.003548077173506675 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/wiki-0000_text_document +0.0018372203137874265 /gila/Aurora_deployment/AuroraGPT/datasets/dolma/data_v1.7_Llama2Tokenizer/wiki-0001_text_document diff --git a/ALCF/helpers.sh b/ALCF/helpers.sh index 216798876b..fdd42b56b8 100644 --- a/ALCF/helpers.sh +++ b/ALCF/helpers.sh @@ -1,20 +1,24 @@ #!/bin/bash --login +# +# set -euxo pipefail if [[ -n "${PBS_O_WORKDIR}" ]]; then WORKING_DIR="${PBS_O_WORKDIR}" elif [[ -n "${SLURM_SUBMIT_DIR}" ]]; then WORKING_DIR="${SLURM_SUBMIT_DIR}" else - WORKING_DIR="$(realpath $(pwd))" + echo "Unable to detect PBS or SLURM working directory info..." + WORKING_DIR=$(python3 -c 'import os; print(os.getcwd())') + echo "Using ${WORKING_DIR} as working directory..." fi export WORKING_DIR="${WORKING_DIR}" -printf "Using WORKING_DIR: %s" ${WORKING_DIR} +printf "Using WORKING_DIR: %s\n" ${WORKING_DIR} printJobInfo() { echo "++++++++++++++++++++++++++++++++++++++++++++++++++" - echo "- MPICH_DIR=$MPICH_DIR" + echo "- MPICH_DIR=${MPICH_DIR:-${MPI_ROOT}}" echo "- Using $(which python3)" echo "- WORLD_SIZE:${WORLD_SIZE}" echo "- NCCL: ${NCCL:-nccl}" @@ -24,7 +28,7 @@ printJobInfo() { } -function setupSrun() { +setupSrun() { if [[ $(hostname) == login* || $(hostname) == nid* ]]; then export NHOSTS="${SLURM_NNODES:-1}" export NGPU_PER_HOST="${SLURM_GPUS_ON_NODE:-$(nvidia-smi -L | wc -l)}" @@ -36,7 +40,20 @@ function setupSrun() { } -function setDSlauncher() { +setupLauncher() { + # outdir=$1 + if [[ -n "${DIST_LAUNCH}" && ${LAUNCH_CMD:-"MPICH"} != "deepspeed" ]]; then + export LAUNCH_CMD="${DIST_LAUNCH} --cpu-bind depth -d 16 python3 -Wignore ${EXEC}" + else + # Assert `./hostfile_deepspeed` exists + export hfds="${WORKING_DIR}/hostfile_deepspeed" && [ -f "${hfds}" ] || exit + export LAUNCH_CMD="deepspeed --hostfile $hfds --launcher MPICH ${EXEC}" + fi + printf "%s" "$(printRed 'Launching with:')" + printf " %s" "$(printMagenta ${LAUNCH_CMD})" +} + +setDSlauncher() { # launcher setting outdir=$1 export hfds="$outdir/hostfile_deepspeed" @@ -53,19 +70,23 @@ function setDSlauncher() { setParams() { LLAMA_ARGS="" - # ---- [Parallelism Settings] -------------------------------------------- - # -------- [Aurora] ---- || ----- [SunSpot] ------------ + # +----[Parallelism Settings] -------------------------------------------+ + # +------[Aurora]--------||-------[SunSpot]-------------+ if [[ $(hostname) == x4* || $(hostname) == x1* ]]; then TP=${TP:-1} # TP = 1 export CCL=${CCL:-ccl} # CCL export BE="${CCL}" # BE = CCL export DTYPE=${DTYPE:-bf16} # DTYPE: bf16 MICRO_BATCH=${MICRO_BATCH:-4} # MICRO_BATCH = 4 - # export WORKING_DIR="${PBS_O_WORKDIR}" - if [[ -z "${NO_FLASH_ATTN}" ]]; then - LLAMA_ARGS="${LLAMA_ARGS} --use-flash-attn" + ####################################################### + # if NO_FLASH_ATTN is NON-empty; then NO FLASH ATTN !! + if [[ -n "${NO_FLASH_ATTN-}" ]]; then + echo "Not using flash-attn!!" + else + LLAMA_ARGS="${LLAMA_ARGS} --use-flash-attn-builder" fi - # -------- [Polaris] ----------------------------------- + ####################################################### + # +--------[Polaris]-----------------------------------+ elif [[ $(hostname) == x3* ]]; then TP=${TP:-2} # TP = 2 export NCCL=${NCCL:-nccl} # NCCL @@ -73,54 +94,64 @@ setParams() { # export DTYPE=${DTYPE:-bf16} # DTYPE: BF16 ?? export DTYPE=${DTYPE:-fp16} # DTYPE: FP16 MICRO_BATCH=${MICRO_BATCH:-8} # MICRO_BATCH = 8 - # export WORKING_DIR="${PBS_O_WORKDIR}" - if [[ -z "${NO_FLASH_ATTN}" ]]; then + if [[ -n "${NO_FLASH_ATTN-}" ]]; then + echo "Not using flash-attn!!" + else LLAMA_ARGS="${LLAMA_ARGS} --use-flash-attn-v2" fi - # -------- [Perlmutter] --------------------------------- + # +--------[Perlmutter]---------------------------------+ elif [[ $(hostname) == login* || $(hostname) == nid* ]]; then TP="${TP:-2}" export NCCL="${NCCL:-nccl}" export BE="${NCCL}" export DTYPE="${DTYPE:-bf16}" MICRO_BATCH="${MICRO_BATCH:-8}" - # export WORKING_DIR="${SLURM_SUBMIT_DIR}" - if [[ -z "${NO_FLASH_ATTN}" ]]; then + if [[ -n "${NO_FLASH_ATTN-}" ]]; then + echo "Not using flash-attn!!" + else LLAMA_ARGS="${LLAMA_ARGS} --use-flash-attn-v2" fi fi - # ------------------------------------------------------------------------ + # +----------------------------------------------------------------------+ export TP="${TP}" export PP="${PP:-1}" export DTYPE="${DTYPE:-bf16}" export OPT="${OPT:-adamw}" export HOSTFILE="${HOSTFILE:-${PBS_NODEFILE}}" - export WORLD_SIZE=${WORLD_SIZE:-$(wc -l < "${HOSTFILE}")} - # ---- Llama2 7B Config ------------------------------ + NHOSTS=$(wc -l < "${HOSTFILE}") + if [[ -z "${NGPU_PER_HOST-}" ]]; then + NGPU_PER_HOST=$(python3 -c 'import ezpz as ez; print(ez.get_gpus_per_node())') + fi + export WORLD_SIZE="${WORLD_SIZE:-$(( NHOSTS * NGPU_PER_HOST ))}" + # export WORLD_SIZE="${WORLD_SIZE:-${NGPUS:-$(( ))}}" + # export WORLD_SIZE=${WORLD_SIZE:-$(wc -l < "${HOSTFILE}")} + # +---[Llama2 7B Config]-----------------------------+ export MODEL_KEY="Llama-7B" export HEADS=${HEADS:-${NHEADS:-32}} export NLAYERS=${NLAYERS:-${NUM_LAYERS:-32}} export HIDDEN=${HIDDEN:-4096} export NUM_KV_HEAD=${NUM_KV_HEAD:-8} export FFN_HIDDEN_SIZE=${FFN_HIDDEN_SIZE:-11008} - # ---- Run Settings ---------------------------------- - export LR=${LR:-0.0003} + # +---[Run Settings]------------------------------------------------------+ + export LR=${LR:-0.0003} # LEARNING_RATE export SEQ=${SEQ:-4096} # SEQ_LEN: 4096 - export ZERO_STAGE=${ZERO_STAGE:-2} - export MICRO_BATCH=${MICRO_BATCH:-8} - export GRAD_ACC_STEPS=${GRAD_ACC_STEPS:-1} - export EVAL_ITERS="${EVAL_ITERS:-10}" - export TRAIN_ITER=${TRAIN_ITER:-317892} - export EVAL_INTERVAL="${EVAL_INTERVAL:-50000}" - export SAVE_INTERVAL=${SAVE_INTERVAL:-200} - export USE_ACTIVATION_CHECKPOINTING=${USE_ACTIVATION_CHECKPOINTING:-1} - export GLOBAL_BATCH_MAX=$(( $WORLD_SIZE * $MICRO_BATCH * $GRAD_ACC_STEPS / $TP / $PP )) - export GLOBAL_BATCH="${GLOBAL_BATCH:-${GLOBAL_BATCH_MAX}}" - tm="${WORKING_DIR}/ALCF/tokenizer.model" - export TOKENIZER_MODEL="${TOKENIZER_MODEL:-${tm}}" - export MODEL_TYPE="llama-seq${SEQ}-pp${PP}-tp${TP}-${NLAYERS}layers-${HEADS}heads-${HIDDEN}hidden" + export ZERO_STAGE=${ZERO_STAGE:-2} # ZERO OFFLOADING STAGE + export MICRO_BATCH=${MICRO_BATCH:-8} # MICRO BATCH SIZE + export GRAD_ACC_STEPS=${GRAD_ACC_STEPS:-1} # GRADIENT ACCUMULATION STEPS + export EVAL_ITERS="${EVAL_ITERS:-10}" # NUMBER OF EVAL ITERS TO RUN + export TRAIN_ITER=${TRAIN_ITER:-317892} # NUMBER OF TRAIN ITERS + export EVAL_INTERVAL="${EVAL_INTERVAL:-50000}" # HOW FREQUENTLY TO RUN EVAL + export SAVE_INTERVAL=${SAVE_INTERVAL:-200} # HOW FREQUENTLY TO SAVE CKPTS + export TIMING_LOG_LEVEL="${TIMING_LOG_LEVEL:-1}" # TIMING VERBOSITY IN LOGS + export USE_ACTIVATION_CHECKPOINTING=${USE_ACTIVATION_CHECKPOINTING:-1} # USE ACTIVATION CHECKPOINTING ? + export GLOBAL_BATCH_MAX=$(( $WORLD_SIZE * $MICRO_BATCH * $GRAD_ACC_STEPS / $TP / $PP )) # MAX GLOBAL BATCH SIZE + export GLOBAL_BATCH="${GLOBAL_BATCH:-${GLOBAL_BATCH_MAX}}" # WILL USE MAX IF NOT SET IN ENVIRONMENT + tm="${WORKING_DIR}/ALCF/tokenizer.model" # fallback: Megatron-DeepSpeed/ALCF/tokenizer.model + export TOKENIZER_MODEL="${TOKENIZER_MODEL:-${tm}}" # USE TOKENIZER_MODEL from env, else fallback from ^ + export MODEL_TYPE="llama-seq${SEQ}-pp${PP}-tp${TP}-${NLAYERS}layers-${HEADS}heads-${HIDDEN}hidden" # STRING FOR IDENTIFYING MODEL + # +----[ADDITIONAL LLAMA SPECIFIC ARGUMENTS]------------------------------ export LLAMA_ARGS="${LLAMA_ARGS} --no-query-key-layer-scaling --use-rotary-position-embeddings --untie-embeddings-and-output-weights --swiglu --normalization rmsnorm --disable-bias-linear" - # ---------------------------------------------------- + # +----------------------------------------------------------------------+ } @@ -154,27 +185,43 @@ setArgs() { export gpt_args } + +make_ds_hostfile() { + export GPUS_PER_NODE="${GPUS_PER_NODE:-${NGPU_PER_HOST:-${SLURM_GPUS_ON_NODE:-$(nvidia-smi -L | wc -l)}}}" + # ---- Make MPICH hostfile ---------------- + hf="${HOSTFILE:-${PBS_NODEFILE}}" + export hostfile_mpich=hostfile_mpich + cat "${hf}" > "${hostfile_mpich}" + # ---- Make DeepSpeed hostfile ------------------- + export hostfile_deepspeed=hostfile_deepspeed + cat "${hf}" > "${hostfile_deepspeed}" + sed -e "s/$/ slots=${GPUS_PER_NODE}/" -i "${hostfile_deepspeed}" +} + # +---------------------------------------+ # | 1. Git clone ezpz (if not found) | # | 2. Install ezpz (if not installed) | # +---------------------------------------+ ezpz() { - if [[ ! -d "${PBS_O_WORKDIR}/deps/ezpz" ]]; then - mkdir -p "${PBS_O_WORKDIR}/deps" - git clone https://github.com/saforem2/ezpz "${PBS_O_WORKDIR}/deps" + if [[ ! -d "${WORKING_DIR}/deps/ezpz" ]]; then + mkdir -p "${WORKING_DIR}/deps" + git clone https://github.com/saforem2/ezpz "${WORKING_DIR}/deps/ezpz" else echo "Found ezpz!" fi - if python3 -c 'import ezpz; print(ezpz.__file__)' 2> '/dev/null'; then + echo "Done with clone. Now, checking if ezpz is installed..." + # if python3 -c 'import ezpz; print(ezpz.__file__)' 2> '/dev/null'; then + if python3 -c "import sys; any(['ezpz' in s for s in sys.path])" 2> '/dev/null'; then echo "Has ezpz installed. Nothing to do." else echo "Does not have ezpz installed. Installing..." echo "Using $(which python3) to install ezpz:" - python3 -m pip install -e "${PBS_O_WORKDIR}/edps/ezpz" # > ezpz-install.log 2>&1 + python3 -m pip install -e "${WORKING_DIR}/deps/ezpz" # > ezpz-install.log 2>&1 fi echo "Done with ezpz." - source "${WORKING_DIR}/deps/ezpz/src/ezpz/bin/savejobenv" > /tmp/savejobenv.log 2>&1 || exit - source "${WORKING_DIR}/deps/ezpz/src/ezpz/bin/getjobenv" || exit + source ${WORKING_DIR}/deps/ezpz/src/ezpz/bin/savejobenv > /dev/null 2>&1 #> /tmp/savejobenv.log 2>&1 || exit + source ${WORKING_DIR}/deps/ezpz/src/ezpz/bin/getjobenv || exit + make_ds_hostfile || exit } # +------------------------------------------------------------------------+ @@ -196,7 +243,8 @@ saveDSenv() { setOutput() { # ---- Specify output location -------------------------------- export OUTPUT_PREFIX="ds_stage${ZERO_STAGE}_nl${NLAYERS}_hs${HIDDEN}_mb${MICRO_BATCH}_seq${SEQ}_gb${GLOBAL_BATCH}_pp${PP}_tp${TP}_${DTYPE}_opt${OPT}" - OUTPUT_DIR="logs/${OUTPUT_PREFIX}/$(date +%m%d%H%M%S)_${HOSTNAME}" + # OUTPUT_DIR="logs/${OUTPUT_PREFIX}/$(date +%m%d%H%M%S)_${HOSTNAME}" + OUTPUT_DIR="logs/${OUTPUT_PREFIX}/$(date +%Y%m%d-%H%M%S)_${WORLD_SIZE}_${HOSTNAME}" export OUTPUT_DIR="${OUTPUT_DIR}" export OUTPUT_LOG="${OUTPUT_DIR}/output.log" export CKPT_DIR="checkpoints/${OUTPUT_PREFIX}" @@ -207,11 +255,14 @@ setOutput() { buildDSconfig() { # ---- Build DeepSpeed Config --------------------------------- - export DS_CONFIG="ds_stage${ZERO_STAGE}_mb${MICRO_BATCH}_gb${GLOBAL_BATCH}_pp${PP}_${DTYPE}.json" + export CPU_OPTIMIZER="${CPU_OPTIMIZER:-0}" + export DS_CONFIG="${WORKING_DIR}/ds-configs/ds_stage${ZERO_STAGE}_mb${MICRO_BATCH}_gb${GLOBAL_BATCH}_pp${PP}_${DTYPE}.json" + mkdir -p $(dirname "${DS_CONFIG}") echo "DS_CONFIG: ${DS_CONFIG}" - printf "ZS: %s, CPU_OPTIMIZER: %s, MB: %s, GB: %s, PP: %s, DTYPE: %s" "${ZERO_STAGE}" "${CPU_OPTIMIZER}" "${MICRO_BATCH}" "${GLOBAL_BATCH}" "${PP}" "${DTYPE}" - working_dir="${PBS_O_WORKDIR:-${SLURM_SUBMIT_DIR:-$(pwd)}}" - bash "${working_dir}/generate_config.sh" "${DS_CONFIG}" + printf "ZS: %s, , MB: %s, GB: %s, PP: %s, DTYPE: %s" "${ZERO_STAGE}" "${CPU_OPTIMIZER}" "${MICRO_BATCH}" "${GLOBAL_BATCH}" "${PP}" "${DTYPE}" + # working_dir="${PBS_O_WORKDIR:-${SLURM_SUBMIT_DIR:-$(pwd)}}" + generateDSconfig "${DS_CONFIG}" + # bash "${WORKING_DIR}/ALCF/generate_ds_config.sh" "${DS_CONFIG}" # ------------------------------------------------------------- } @@ -234,16 +285,19 @@ sumFiles() { setEnv() { # ---- [SunSpot] ------- || ---- [Aurora] -------------- if [[ $(hostname) == x1* || $(hostname) == x4* ]]; then - PBS_PARENT=$(dirname ${PBS_O_WORKDIR}) - echo "Sourcing ${PBS_PARENT}/setenv.sh..." - source "${PBS_PARENT}/setenv.sh" || exit + # PBS_PARENT=$(dirname ${PBS_O_WORKDIR}) + # echo "Sourcing ${PBS_PARENT}/setenv.sh..." + # source "${PBS_PARENT}/setenv.sh" || exit + source "${WORKING_DIR}/ALCF/sunspot-env.sh" || exit # ----- [Aurora] ----------------------------------- - if [[ $(hostname) == x4* ]]; then - eval "$(/home/foremans/miniconda3/bin/conda shell.zsh hook)" && conda activate anl_release_q4v2 - # ----- [SunSpot] ---------------------------------- - elif [[ $(hostname) == x1* ]]; then - echo "Running on SunSpot !!" - eval "$(/home/foremans/miniconda3/bin/conda shell.zsh hook)" && conda activate q4-drop + if [[ -z "${CONDA_PREFIX}" && -z "${VIRTUAL_ENV}" ]]; then + if [[ $(hostname) == x4* ]]; then + eval "$(conda shell.zsh hook)" && conda activate anl_release_q4v2 + # ----- [SunSpot] ---------------------------------- + elif [[ $(hostname) == x1* ]]; then + echo "Running on SunSpot !!" + eval "$(/home/foremans/miniconda3/bin/conda shell.zsh hook)" && conda activate q4-drop + fi fi # ----- [Polaris] --------------------------------------- elif [[ $(hostname) == x3* ]]; then @@ -268,18 +322,15 @@ setEnv() { echo "[python] Using: $(which python3)" } + makeHostfiles() { - # source "${WORKING_DIR}/deps/ezpz/src/ezpz/bin/savejobenv" || exit #> /tmp/savejobenv.log 2>&1 & - # source "${WORKING_DIR}/deps/ezpz/src/ezpz/bin/getjobenv" || exit - export GPUS_PER_NODE="${GPUS_PER_NODE:-${NGPU_PER_HOST:-${SLURM_GPUS_ON_NODE:-$(nvidia-smi -L | wc -l)}}}" - # ---- Make MPICH hostfile ---------------- - hf="${HOSTFILE:-${PBS_NODEFILE}}" - export hostfile_mpich=hostfile_mpich - cat "${hf}" > "${hostfile_mpich}" - # ---- Make DeepSpeed hostfile ------------------- - export hostfile_deepspeed=hostfile_deepspeed - cat "${hf}" > "${hostfile_deepspeed}" - sed -e "s/$/ slots=${GPUS_PER_NODE}/" -i "${hostfile_deepspeed}" + if [[ -n "${HOSTFILE}" ]]; then + printf "!! USING CUSTOM HOSTFILE FROM: %s" "${HOSTFILE}" + else + make_ds_hostfile + # source "${WORKING_DIR}/deps/ezpz/src/ezpz/bin/savejobenv" || exit #> /tmp/savejobenv.log 2>&1 & + # source "${WORKING_DIR}/deps/ezpz/src/ezpz/bin/getjobenv" || exit + fi } setData() { # ---- [dfl: abbrv. for DATA_FILE_LIST] ------------------------- @@ -302,8 +353,8 @@ setData() { # ---- [dfl: abbrv. for DATA_FILE_LIST] ------------------------- ndocs=$(wc -l < "${dfl}") ws=$(sumWeights "${dfl}") dfl_stem=$(echo "${dfl}" | tr "\/" "\t" | awk '{print $NF}' | sed "s/\.txt//g") - dcp="${HERE}/.cache/${dfl_stem}/index-cache" - mkdir -p dcp + dcp=".cache/${dfl_stem}/index-cache" + # mkdir -p dcp export DATA_FILE_LIST="${dfl}" export NUM_DOCS="${ndocs}" export WEIGHT_SUM="${ws}" @@ -319,6 +370,168 @@ setData() { # ---- [dfl: abbrv. for DATA_FILE_LIST] ------------------------- echo "--------------------" } +generateDSconfig() { + for v in "$GLOBAL_BATCH" "$MICRO_BATCH" "$GRAD_ACC_STEPS" "$ZERO_STAGE" \ + "$PP" "$DTYPE" + do + if [ -z $v ]; then + echo "Please export required envs before execute $0" + exit 1 + fi + done + if [ $# -ne 1 ]; then + echo "Usage: $0 config_file" + exit 1 + fi + # \"optimizer\": { + # \"type\": \"AdamW\", + # \"params\": { + # \"lr\": ${LR}, + # \"beta1\": 0.9, + # \"beta2\": 0.95, + # \"eps\": 1e-5, + # \"weight_decay\": 1e-1 + # } + # }, + # \"scheduler\": { + # \"type\": \"WarmupLR\", + # \"params\": { + # \"warmup_min_lr\": 0.00003, + # \"warmup_max_lr\": 0.0003, + # \"warmup_num_steps\": 5000 + # } + # }, + extra="" + common="\ + \"train_batch_size\": $GLOBAL_BATCH, + \"train_micro_batch_size_per_gpu\": $MICRO_BATCH, + \"steps_per_print\": 1, + \"gradient_accumulation_steps\": $GRAD_ACC_STEPS, + \"zero_allow_untested_optimizer\": true, + \"gradient_clipping\": 1.0, + \"activation_checkpointing\": { + \"partition_activations\": true, + \"contiguous_memory_optimization\": false + }, + \"wall_clock_breakdown\": false," + flops_profiler="\ + \"flops_profiler\": { + \"enabled\": true, + \"profile_step\": 4, + \"module_depth\": -1, + \"top_modules\": 1, + \"detailed\": true, + \"output_file\": null + }" + if [[ $DTYPE == "bf16" ]]; then + dtype="\ + \"communication_data_type\": \"bf16\", + \"fp16\": { + \"enabled\": false, + \"loss_scale\": 0, + \"loss_scale_window\": 1000, + \"hysteresis\": 2, + \"min_loss_scale\": 1 + }, + \"bfloat16\": { + \"enabled\": true, + \"loss_scale\": 1.0 + }," + elif [[ $DTYPE == "fp16" ]]; then + dtype="\ + \"communication_data_type\": \"fp16\", + \"fp16\": { + \"enabled\": true, + \"loss_scale\": 0, + \"loss_scale_window\": 1000, + \"hysteresis\": 2, + \"min_loss_scale\": 1 + }, + \"bfloat16\": { + \"enabled\": false, + \"loss_scale\": 1.0 + }," + else + dtype="\"communication_data_type\": \"fp32\"," + fi + if [ $ZERO_STAGE == 3 ]; then + zero="\ + \"zero_optimization\": { + \"stage\": 3, + \"reduce_scatter\": false, + \"mics_shard_size\": 4, + \"mics_hierarchical_params_gather\": true, + \"stage3_max_live_parameters\": 3e9, + \"stage3_max_reuse_distance\": 3e9, + \"stage3_param_persistence_threshold\": 1e5, + \"stage3_prefetch_bucket_size\": 5e7, + \"contiguous_gradients\": true, + \"overlap_comm\": true, + \"reduce_bucket_size\": 90000000, + \"sub_group_size\": 1e9, + \"offload_optimizer\": { + \"device\": \"none\", + \"buffer_count\": 4, + \"pipeline_read\": false, + \"pipeline_write\": false, + \"pin_memory\": true + } + }," + # elif [[ $ZERO_STAGE == 2 ]]; then + elif [ "${ZERO_STAGE}" == 2 ] || [ "${ZERO_STAGE}" == 1 ]; then + # if [[ -n "${CPU_OPTIMIZER}" ]]; then + if [[ "${CPU_OPTIMIZER}" != 0 ]]; then + echo "!!!! CAUGHT CPU_OPTIMIZER !!!!" + zero="\ + \"zero_optimization\": { + \"stage\": $ZERO_STAGE, + \"offload_optimizer\": { + \"device\": \"cpu\" + } + }," + else + zero="\ + \"zero_optimization\": { + \"stage\": $ZERO_STAGE + }," + fi + # elif [[ $ZERO_STAGE == 1 ]]; then + if [[ $PP > 1 ]]; then + extra="\ + \"data_types\": { + \"grad_accum_dtype\": \"fp32\" + }, + \"comms_logger\": { + \"enabled\": true, + \"verbose\": false, + \"prof_all\": true, + \"debug\": false + }," + else + # echo 'please add the config for zero_stage 1 without pipeline-parallelism' + extra="\ + \"comms_logger\": { + \"enabled\": true, + \"verbose\": false, + \"prof_all\": true, + \"debug\": false + }," + fi + else + echo 'Please add the correct config set!!!' + fi +# flops_profiler must at the end because no ',' is allowed at the end +cat < $1 +{ +$common +$zero +$dtype +$extra +$flops_profiler +} +EOT +} + printBlack() { printf "\e[1;30m%s\e[0m\n" "$@" } diff --git a/ALCF/sunspot-env.sh b/ALCF/sunspot-env.sh new file mode 100644 index 0000000000..e97545aafe --- /dev/null +++ b/ALCF/sunspot-env.sh @@ -0,0 +1,27 @@ +#!/bin/bash --login + +export CCL_OP_SYNC=1 # Required by current oneCCL (HPCS-8067) +export CCL_PROCESS_LAUNCHER=pmix # Required by Aurora mpich +export FI_PROVIDER=cxi # Required by Aurora mpich +export PALS_PMI=pmix # Required by Aurora mpich +export CCL_ATL_TRANSPORT=mpi # Required by Aurora mpich +export FI_MR_CACHE_MONITOR=disabled # Required by Aurora mpich (HPCS-6501) +export CCL_SKIP_SCHEDULER=1 # Required by current oneCCL, will remove when set by default +export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 +export CCL_TOPO_COLOR="card:{0,1},{2,3},{4,5},{6,7},{8,9},{10,11};plane:{0,3,4,6,8,11},{1,2,5,7,9,10}" +export UR_L0_IN_ORDER_BARRIER_BY_SIGNAL=0 # Required by current oneCCL + + +export LLM_DK_DIR=/home/$(whoami)/q4-drop_sunspot/llm.devkit + +module load oneapi/release/2023.12.15.001 +unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE +unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE +unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE + + +module use /home/ftartagl/graphics-compute-runtime/modulefiles +module load graphics-compute-runtime/agama-ci-devel-736.9 +source /home/$(whoami)/q4-drop_sunspot/llm.devkit/torch-ccl/third_party/oneCCL/build/_install/env/vars.sh +module load gcc/12.1.0 +module unload intel_compute_runtime/release/agama-devel-647 diff --git a/ALCF/test_sirius.sh b/ALCF/test_sirius.sh new file mode 100755 index 0000000000..6199c5c157 --- /dev/null +++ b/ALCF/test_sirius.sh @@ -0,0 +1,66 @@ +#!/bin/bash --login +# +# Run complete test of +# https://github.com/argonne-lcf/Megatron-DeepSpeed +# on Sirius @ ALCF +# to launch (inside an interactive `qsub -I` job) on Sirius: +# +# ```bash` +# $ git clone https://github.com/argonne-lcf/Megatron-DeepSpeed +# $ cd Megatron-DeepSpeed/ALCF +# $ bash test_sirius.sh +# ```` + +# EXIT ON ERROR(s) +set -euxo pipefail + +NOW="$(date "+%Y-%m-%d-%H%M%S")" + +######################################################## +# Setup / activate conda environment, +# mine is called q4-drop +######################################################## +setup_conda() { + export MAMBA_ROOT_PREFIX=/lus/tegu/projects/PolarisAT/foremans/micromamba + shell_name=$(echo "${SHELL}" | tr "\/" "\t" | awk '{print $NF}') + eval "$("${MAMBA_ROOT_PREFIX}/bin/micromamba" shell hook --shell ${shell_name})" + micromamba activate 2024-04-23 +} + + +######################################## +# Make sure ./tmp/Megatron-DeepSpeed +# does not already exist +######################################## +setup_megatron_deepspeed() { + OUTDIR="OUTPUTS/test-sirius-${NOW}" && mkdir -p "${OUTDIR}" && cd "${OUTDIR}" + echo "Running test in: ${OUTDIR}" + echo "WORKING DIRECTORY: $(realpath $(pwd .))" + if [[ -d "Megatron-DeepSpeed" ]]; then + # rm -rfv Megatron-DeepSpeed/ + echo "Found existing Megatron-DeepSpeed. + Remove existing directory to run test." + exit + fi + git clone https://github.com/argonne-lcf/Megatron-DeepSpeed && cd Megatron-DeepSpeed + git checkout remove-apex-deps +} + + +main() { + setup_conda + setup_megatron_deepspeed + export DEBUG=1 + export PBS_O_WORKDIR="$(pwd)" + export DATA_FILE_LIST=./ALCF/data-lists/sirius/books.txt + # LR=0.0008 + # GRAD_ACC_STEPS=8 + export ZERO_STAGE=1 + export NUM_LAYERS=10 + export MICRO_BATCH=8 + export TRAIN_ITERS=20 + export TIMING_LOG_LEVEL=1 + bash train_llama_alcf.sh |& tee "test-sirius-${NOW}".log +} + +main diff --git a/ALCF/test_sunspot.sh b/ALCF/test_sunspot.sh new file mode 100755 index 0000000000..a8a4a21f32 --- /dev/null +++ b/ALCF/test_sunspot.sh @@ -0,0 +1,48 @@ +#!/bin/bash --login +# +# Run complete test of +# https://github.com/argonne-lcf/Megatron-DeepSpeed +# on Sunspot @ ALCF + +# EXIT ON ERROR(s) +set -euxo pipefail + +######################################################## +# Setup / activate conda environment, +# mine is called q4-drop +######################################################## +setup_conda() { + if [[ "${SHELL}" = "/bin/zsh" ]]; then + eval "$(~/miniconda3/bin/conda shell.zsh hook)" + else + eval "$(~/miniconda3/bin/conda shell.bash hook)" + fi + conda activate q4-drop +} + + +######################################## +# Make sure ./tmp/Megatron-DeepSpeed +# does not already exist +######################################## +setup_megatron_deepspeed() { + mkdir tmp && cd tmp + if [[ -d "Megatron-DeepSpeed" ]]; then + # rm -rfv Megatron-DeepSpeed/ + echo "Found existing Megatron-DeepSpeed. + Remove existing directory to run test." + exit + fi + git clone https://github.com/argonne-lcf/Megatron-DeepSpeed && cd Megatron-DeepSpeed + git checkout remove-apex-deps +} + + +main() { + setup_conda + setup_megatron_deepspeed + # NOTE: to use OPT=adamwschedulefree, you will need to pip install schedulefree + DEBUG=1 PBS_O_WORKDIR="$(pwd)" DATA_FILE_LIST=./ALCF/data-lists/sunspot/books.txt LR=0.0008 GRAD_ACC_STEPS=8 ZERO_STAGE=1 NUM_LAYERS=10 MICRO_BATCH=8 OPT=adamwschedulefree TIMING_LOG_LEVEL=1 bash train_llama_alcf.sh +} + +main diff --git a/generate_config.sh b/generate_config.sh deleted file mode 100644 index b164b5e610..0000000000 --- a/generate_config.sh +++ /dev/null @@ -1,172 +0,0 @@ -#!/bin/bash --login - -for v in "$GLOBAL_BATCH" "$MICRO_BATCH" "$GRAD_ACC_STEPS" "$ZERO_STAGE" \ - "$PP" "$DTYPE" -do - if [ -z $v ]; then - echo "Please export required envs before execute $0" - exit 1 - fi -done - -if [ $# -ne 1 ]; then - echo "Usage: $0 config_file" - exit 1 -fi - -# \"optimizer\": { -# \"type\": \"AdamW\", -# \"params\": { -# \"lr\": ${LR}, -# \"beta1\": 0.9, -# \"beta2\": 0.95, -# \"eps\": 1e-5, -# \"weight_decay\": 1e-1 -# } -# }, -# \"scheduler\": { -# \"type\": \"WarmupLR\", -# \"params\": { -# \"warmup_min_lr\": 0.00003, -# \"warmup_max_lr\": 0.0003, -# \"warmup_num_steps\": 5000 -# } -# }, - -extra="" -common="\ - \"train_batch_size\": $GLOBAL_BATCH, - \"train_micro_batch_size_per_gpu\": $MICRO_BATCH, - \"steps_per_print\": 1, - \"gradient_accumulation_steps\": $GRAD_ACC_STEPS, - \"zero_allow_untested_optimizer\": true, - \"gradient_clipping\": 1.0, - \"activation_checkpointing\": { - \"partition_activations\": true, - \"contiguous_memory_optimization\": false - }, - \"wall_clock_breakdown\": false," - -flops_profiler="\ - \"flops_profiler\": { - \"enabled\": true, - \"profile_step\": 4, - \"module_depth\": -1, - \"top_modules\": 1, - \"detailed\": true, - \"output_file\": null - }" - -if [[ $DTYPE == "bf16" ]]; then -dtype="\ - \"communication_data_type\": \"bfp16\", - \"fp16\": { - \"enabled\": false, - \"loss_scale\": 0, - \"loss_scale_window\": 1000, - \"hysteresis\": 2, - \"min_loss_scale\": 1 - }, - \"bfloat16\": { - \"enabled\": true, - \"loss_scale\": 1.0 - }," -elif [[ $DTYPE == "fp16" ]]; then -dtype="\ - \"communication_data_type\": \"fp16\", - \"fp16\": { - \"enabled\": true, - \"loss_scale\": 0, - \"loss_scale_window\": 1000, - \"hysteresis\": 2, - \"min_loss_scale\": 1 - }, - \"bfloat16\": { - \"enabled\": false, - \"loss_scale\": 1.0 - }," -else - dtype="\"communication_data_type\": \"fp32\"," -fi - -if [ $ZERO_STAGE == 3 ]; then -zero="\ - \"zero_optimization\": { - \"stage\": 3, - \"reduce_scatter\": false, - \"mics_shard_size\": 4, - \"mics_hierarchical_params_gather\": true, - \"stage3_max_live_parameters\": 3e9, - \"stage3_max_reuse_distance\": 3e9, - \"stage3_param_persistence_threshold\": 1e5, - \"stage3_prefetch_bucket_size\": 5e7, - \"contiguous_gradients\": true, - \"overlap_comm\": true, - \"reduce_bucket_size\": 90000000, - \"sub_group_size\": 1e9, - \"offload_optimizer\": { - \"device\": \"none\", - \"buffer_count\": 4, - \"pipeline_read\": false, - \"pipeline_write\": false, - \"pin_memory\": true - } - }," - -# elif [[ $ZERO_STAGE == 2 ]]; then -elif [ "${ZERO_STAGE}" == 2 ] || [ "${ZERO_STAGE}" == 1 ]; then - -if [[ -n "${CPU_OPTIMIZER}" ]]; then -echo "!!!! CAUGHT CPU_OPTIMIZER !!!!" - -zero="\ - \"zero_optimization\": { - \"stage\": $ZERO_STAGE, - \"offload_optimizer\": { - \"device\": \"cpu\" - } - }," - -else -zero="\ - \"zero_optimization\": { - \"stage\": $ZERO_STAGE - }," -fi - -# elif [[ $ZERO_STAGE == 1 ]]; then -if [[ $PP > 1 ]]; then - extra="\ - \"data_types\": { - \"grad_accum_dtype\": \"fp32\" - }, - \"comms_logger\": { - \"enabled\": true, - \"verbose\": false, - \"prof_all\": true, - \"debug\": false - }," -else - # echo 'please add the config for zero_stage 1 without pipeline-parallelism' - extra="\ - \"comms_logger\": { - \"enabled\": true, - \"verbose\": false, - \"prof_all\": true, - \"debug\": false - }," -fi -else - echo 'Please add the correct config set!!!' -fi - -# flops_profiler must at the end because no ',' is allowed at the end -cat < $1 -{ -$common -$zero -$dtype -$extra -$flops_profiler -} -EOT diff --git a/megatron/model/__init__.py b/megatron/model/__init__.py index 2306749fcb..141c901ffa 100644 --- a/megatron/model/__init__.py +++ b/megatron/model/__init__.py @@ -1,12 +1,18 @@ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -from deepspeed.accelerator.real_accelerator import get_accelerator -if get_accelerator().device_name() == 'cuda': +# from deepspeed.accelerator.real_accelerator import get_accelerator +# if get_accelerator().device_name() == 'cuda': +try: from .fused_layer_norm import MixedFusedLayerNorm as LayerNorm from apex.normalization import MixedFusedRMSNorm as RMSNorm -else: + HAS_APEX = True +except Exception: + HAS_APEX = False from .rmsnorm import RMSNorm from torch.nn import LayerNorm +# else: +# from .rmsnorm import RMSNorm +# from torch.nn import LayerNorm from .distributed import DistributedDataParallel from .bert_model import BertModel diff --git a/megatron/optimizer/clip_grads.py b/megatron/optimizer/clip_grads.py index afec8f220c..b5141d0059 100644 --- a/megatron/optimizer/clip_grads.py +++ b/megatron/optimizer/clip_grads.py @@ -8,10 +8,14 @@ except ModuleNotFoundError: from torch import inf as inf -from deepspeed.accelerator import get_accelerator -if get_accelerator().device_name() == 'cuda': +# from deepspeed.accelerator import get_accelerator +# if get_accelerator().device_name() == 'cuda': +try: from apex.multi_tensor_apply import multi_tensor_applier import amp_C + HAS_APEX = True +except Exception: + HAS_APEX = False from megatron.model.module import param_is_not_shared from megatron.core.tensor_parallel import param_is_not_tensor_parallel_duplicate @@ -71,7 +75,7 @@ def clip_grad_norm_fp32(parameters, grads_for_norm, else: if norm_type == 2.0: - if get_accelerator().device_name() == 'cuda': + if get_accelerator().device_name() == 'cuda' and HAS_APEX: dummy_overflow_buf = torch.cuda.IntTensor([0]) # Use apex's multi-tensor applier for efficiency reasons. # Multi-tensor applier takes a function and a list of list diff --git a/megatron/optimizer/distrib_optimizer.py b/megatron/optimizer/distrib_optimizer.py index 1aeeac3444..10331607d9 100644 --- a/megatron/optimizer/distrib_optimizer.py +++ b/megatron/optimizer/distrib_optimizer.py @@ -2,10 +2,11 @@ """Megatron distributed optimizer.""" -from deepspeed.accelerator import get_accelerator -if get_accelerator().device_name() == 'cuda': +# from deepspeed.accelerator import get_accelerator +# if get_accelerator().device_name() == 'cuda': +try: from apex.optimizers import FusedAdam as Adam -else: +except Exception: from torch.optim import Adam import math diff --git a/megatron/utils.py b/megatron/utils.py index 97294070af..f6a293281a 100644 --- a/megatron/utils.py +++ b/megatron/utils.py @@ -10,8 +10,12 @@ from deepspeed.accelerator import get_accelerator if get_accelerator().device_name() == 'cuda': - from apex.multi_tensor_apply import multi_tensor_applier - import amp_C + try: + from apex.multi_tensor_apply import multi_tensor_applier + import amp_C + HAS_APEX = True + except Exception: + HAS_APEX = False from megatron import ( get_args, @@ -74,15 +78,14 @@ def calc_params_l2_norm(model): # Calculate norm dummy_overflow_buf = get_accelerator().IntTensor([0]) - if get_accelerator().device_name() == 'cuda': - + if get_accelerator().device_name() == 'cuda' and HAS_APEX: norm, _ = multi_tensor_applier( amp_C.multi_tensor_l2norm, dummy_overflow_buf, [params_data], False # no per-parameter norm ) - else : + else: norm = torch.norm(params_data,p=2.0) norm_2 = norm * norm # Sum across all model-parallel GPUs. diff --git a/pretrain_gpt_alcf.py b/pretrain_gpt_alcf.py index 0139330277..6530340c19 100644 --- a/pretrain_gpt_alcf.py +++ b/pretrain_gpt_alcf.py @@ -39,20 +39,19 @@ import ezpz as ez -# ---- SETUP DISTRIBUTED COMMS ---- -# RANK = ez.setup_torch( -# backend='deepspeed', -# port='5432', -# ) -RANK = ez.get_rank() +# ---- [SETUP COMMS] ------------------------ +RANK = ez.setup_torch(backend="deepspeed") +# RANK = ez.get_rank() WORLD_SIZE = ez.get_world_size() +LOCAL_RANK = ez.get_local_rank() DEVICE = ez.get_torch_device() - -# --- TURN OFF LOGGER ON ALL RANK != 0 ---- +if torch.cuda.is_available(): + torch.cuda.set_device(LOCAL_RANK) +# ------------------------------------------- +# --- [TURN OFF LOGGER ON ALL RANK != 0] ---- log = get_logger(__name__) log.setLevel("INFO") if RANK == 0 else log.setLevel("CRITICAL") - -# ---- SETUP WANDB FROM RANK 0 ---------------- +# ---- [SETUP WANDB FROM RANK 0] -------------- WANDB_MODE = os.environ.get('WANDB_MODE', None) DISABLE_WANDB = ( WANDB_MODE is not None and str(WANDB_MODE).lower() == 'disabled' @@ -70,7 +69,7 @@ print('--------------------------------------------------') print(f"Setting up W&B from: {RANK} with {project_name}") print('--------------------------------------------------') - ez.setup_wandb(project_name=project_name) + _ = ez.setup_wandb(project_name=project_name) def model_provider(pre_process=True, post_process=True): @@ -163,6 +162,12 @@ def model_provider(pre_process=True, post_process=True): print_rank_0(80 * '-') see_memory_usage("After Building Model", force=True) if wandb.run is not None: + tbdir = args.tensorboard_dir + # tbdir = args.getattr('tensorboard_dir', None) + if tbdir is not None: + log.info(f'Patching tensorboard from {tbdir}') + wandb.tensorboard.patch(root_logdir=tbdir) + wandb.run.config.update({'num_params': num_params}) if "args" not in wandb.run.config: log.info( diff --git a/train_llama_alcf.sh b/train_llama_alcf.sh index ce18842850..4aac1153c7 100644 --- a/train_llama_alcf.sh +++ b/train_llama_alcf.sh @@ -5,6 +5,11 @@ #PBS -l select=48 #PBS -l filesystems=eagle:home +if [[ -n "${DEBUG-}" ]]; then + printf "\e[1;31m%s\e[0m\n" "!! RUNNING IN DEBUG MODE !!" + set -euxo pipefail +fi + function sourceFile() { fp="$1" echo "source-ing ${fp}" @@ -20,32 +25,34 @@ function sourceFile() { cd "${PBS_O_WORKDIR}" || exit HERE=$(python3 -c 'import os; print(os.getcwd())') export HERE + # ----[1. Assert `./pretrain_gpt_alcf.py` exists:]----------------------------- export EXEC="${HERE}/pretrain_gpt_alcf.py" [ -f "${EXEC}" ] || exit + # ----[2. `source ./ALCF/helpers_alcf.sh`:]------------------------------------ sourceFile "${HERE}/ALCF/helpers.sh" || exit + # ----[3. Call fns from `./ALCF/helpers_alcf.sh`]------------------------------ setEnv || exit # 1. load `conda` environment -saveDSenv || exit # 2. save env vars to `.deepspeed_env` +# saveDSenv || exit # 2. save env vars to `.deepspeed_env` ezpz || exit # 3. determine WORLD_SIZE, etc. from `PBS_*` vars -makeHostfiles || exit # 4. create `deepspeed` hostfile from `$PBS_NODEFILE` + +# if [[ -z "${HOSTFILE}" ]]; then +# makeHostfiles || exit # 4. create `deepspeed` hostfile from `$PBS_NODEFILE` +# else +# echo "!! USING CUSTOM HOSTFILE FROM: ${HOSTFILE}" +# fi setParams || exit # 5. set command line arguments to pass to `"${EXEC}"` buildDSconfig || exit # 6. create `deepspeed_config.json` from runtime params from ^ setOutput || exit # 7. specify output directory for {logs, checkpoints, etc.} setArgs || exit # 8. specify additional `deepspeed` arguments setData "${DATA_FILE_LIST}"|| exit # 9. specify `DATA_FILE_LIST` for dolma dataset -setDSlauncher "${HERE}" || exit # 10. set `launcher` args for `deepspeed ${launcher} ${EXEC} ${args}` +# setDSlauncher "${HERE}" || exit # 10. set `launcher` args for `deepspeed ${launcher} ${EXEC} ${args}` printJobInfo || exit # 11. print job info +setupLauncher || exit # ----------------------------------------------------------------------------- -# Take custom args -custom_args=" $@" - -# Assert `./hostfile_deepspeed` exists -export hfds="${HERE}/hostfile_deepspeed" && [ -f "${hfds}" ] || exit -TBDIR="${CKPT_DIR}/tensorboard" -mkdir -p "${TBDIR}" # TORCH_DEVICE=$(python3 -c 'import ezpz as ez; print(ez.get_torch_device())') # printf %s "Using TORCH_DEVICE=${TORCH_DEVICE}" @@ -56,20 +63,27 @@ mkdir -p "${TBDIR}" # fi -# source "${HERE}/venvs/polaris/2024-03-14/bin/activate" || exit -# echo "Using $(which python3)" -# --launcher_args='--pmi=pmix' -# deepspeed --hostfile $hfds --launcher ${LAUNCHER} ${EXEC} \ -# ${launch_cmd} \ -# --use-flash-attn-v2 \ -# --num-workers 0 \ +# export MPICH_GPU_SUPPORT_ENABLED=1 +# export CUDA_DEVICE_MAX_CONNECTIONS=1 +# export NCCL_DEBUG=INFO +# +# +# Assert TBDIR exists inside our $CKPT_DIR +TBDIR="${CKPT_DIR}/tensorboard" +mkdir -p "${TBDIR}" + +data_cache_path="${CKPT_DIR}/${DATA_CACHE_PATH}" +mkdir -p "${data_cache_path}" +module list - # aprun -n "${NGPUS}" -N "${NGPU_PER_HOST}" --pmi=pmix ${PBS_O_WORKDIR}/local_rank.sh - # ${DIST_LAUNCH} $(which python3) ${EXEC} \ -# yeet="${DIST_LAUNCH} ./local_rank.sh" +# Take custom args +custom_args=" $@" + + # --log-num-zeros-in-grad \ + # --log-memory-to-tensorboard \ run_cmd=" - deepspeed --hostfile $hfds --launcher MPICH ${EXEC} \ - --$DTYPE \ + ${LAUNCH_CMD} \ + --${DTYPE} \ --optimizer ${OPT} \ --split 100,0,0 \ --log-interval 1 \ @@ -103,9 +117,12 @@ run_cmd=" --global-batch-size ${GLOBAL_BATCH} \ --pipeline-model-parallel-size ${PP} \ --num-key-value-heads ${NUM_KV_HEAD} \ - --data-cache-path ${DATA_CACHE_PATH} \ + --data-cache-path ${data_cache_path} \ --ffn-hidden-size ${FFN_HIDDEN_SIZE} \ --tokenizer-model ${TOKENIZER_MODEL} \ + --timing-log-level ${TIMING_LOG_LEVEL} \ + --log-timers-to-tensorboard \ + --log-optimizer-states-to-tensorboard \ ${LLAMA_ARGS} \ $ds_args \ ${gpt_args[*]} \ @@ -113,7 +130,8 @@ run_cmd=" |& tee ${OUTPUT_LOG} " -echo "! Using $(which deepspeed)" +# ds_exec +# echo "! Using $(which deepspeed)" ds_report echo "${run_cmd}"