From b4813e38eada6ea27c61c41674c76ede56582b11 Mon Sep 17 00:00:00 2001 From: Anna Date: Wed, 13 Feb 2019 12:32:38 -0500 Subject: [PATCH 1/9] Ignore OSX system files --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index aea0bd6..0ebd11d 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ src/*.o src/*.so inst/doc +*.DS_Store +*._* +*.swp From da3eaf0b40e389559686967d8dfd61529ffcf2f1 Mon Sep 17 00:00:00 2001 From: Anna Date: Wed, 13 Feb 2019 12:33:29 -0500 Subject: [PATCH 2/9] Add test cases for reading 10x bed files --- inst/extdata/test_x10_bed.tsv | 1000 ++++++++++++++++++++++++++++++ tests/testthat/test_get_counts.R | 20 +- 2 files changed, 1011 insertions(+), 9 deletions(-) create mode 100644 inst/extdata/test_x10_bed.tsv diff --git a/inst/extdata/test_x10_bed.tsv b/inst/extdata/test_x10_bed.tsv new file mode 100644 index 0000000..2373c24 --- /dev/null +++ b/inst/extdata/test_x10_bed.tsv @@ -0,0 +1,1000 @@ +chr1 10066 10198 CTCTCAGTCCGGGCAT-1 3 +chr1 10073 10279 CTTGCCACAGCGTGAA-1 2 +chr1 10073 10333 ACTAACGCAGATGGCA-1 1 +chr1 10085 10284 ATTTGTCCAATGCCAT-1 1 +chr1 10089 10267 TACTGCCGTCTCTGCT-1 1 +chr1 10090 10536 CCGTACTTCTGGCCAG-1 1 +chr1 10096 10315 TAACTTCGTTTCGATG-1 2 +chr1 10096 10543 TGAGCCGGTTAGGCTT-1 1 +chr1 10097 10352 CACCTTGCACACATGT-1 1 +chr1 10132 10261 TGTGGCGTCTGAAAGA-1 1 +chr1 10157 10204 TACGGATCACTCAAGT-1 6 +chr1 10167 10437 CTGAATGAGTCTCTAG-1 1 +chr1 10168 10405 TTAGCGAGTAAAGGCC-1 1 +chr1 10215 10533 TCAAAGCGTTAGAGAT-1 1 +chr1 10216 10271 GGGACCTAGGATATCA-1 1 +chr1 10216 10334 AATGGAAGTCAAACAA-1 2 +chr1 10233 10339 TATCTGTTCTTGTCAT-1 1 +chr1 10234 10589 TGCTCACAGTACCCAT-1 3 +chr1 10235 10569 GAACCGCCATGCGCTG-1 1 +chr1 10239 10497 ACCAAACCAGCAAACG-1 2 +chr1 10247 10309 GAGGCTCAGAACGTCG-1 3 +chr1 10279 10401 CACTAAGAGTTACCAC-1 1 +chr1 10284 10615 TCGCCTACAACACGGA-1 1 +chr1 10289 10589 TGCATTTCACTCAAGT-1 1 +chr1 10308 10344 GGAATCTCAGGAGCAT-1 1 +chr1 10308 10351 CGCTAGGAGCGATACG-1 2 +chr1 10314 10357 TTACCCGGTTTAGGAA-1 1 +chr1 10315 10357 TTACCCGGTTTAGGAA-1 1 +chr1 10369 10617 TGTACGAGTAACCGAG-1 3 +chr1 10373 10543 GACCCAGAGATATGTG-1 4 +chr1 10374 10587 CTCTACGGTTGTGACT-1 1 +chr1 10385 10587 CTTCCAGTCAGTGGTT-1 1 +chr1 10387 10565 GATGATCCACGTTACA-1 1 +chr1 10393 10565 GGGACCTCACAACGGA-1 2 +chr1 10393 10581 GAGCGCTCAACAACTC-1 2 +chr1 10394 10589 GTCACAAAGACGCCCT-1 2 +chr1 10394 10601 GCTGCGAGTATCTGCA-1 1 +chr1 10431 10625 TAATCGGCACGCGTTG-1 3 +chr1 10437 10594 TGGGTTACATGGGTGA-1 4 +chr1 10444 10472 GCTTGCTTCTGATCCC-1 1 +chr1 10444 10548 TTTGCGCTCTATCTAC-1 1 +chr1 10449 10606 ATTACCTTCCCAGCGA-1 1 +chr1 10450 10543 CCCTGATCAAAGCATA-1 2 +chr1 10450 10609 TCAGGTATCCGTGCGA-1 1 +chr1 10498 10543 TGTGTCCAGGATGCCA-1 1 +chr1 10517 10569 ATGTACGAGTATGCTC-1 1 +chr1 10530 10584 CCGCATTTCTTCATAC-1 2 +chr1 16205 16248 ACTACCCTCCAGGTAT-1 1 +chr1 16215 16253 AAGGTTCCAACGAGGT-1 2 +chr1 16226 16261 ACCGAATCAACGGGTA-1 1 +chr1 16236 16274 AAAGGGCCACAAACGG-1 1 +chr1 17100 17517 CCGTGAGAGACCATAA-1 1 +chr1 17123 17515 GAACCGCGTTTGCCAA-1 1 +chr1 17144 17501 GCACGGTCACCCATCT-1 1 +chr1 17170 17525 CCGTGAGAGACCATAA-1 1 +chr1 17183 17517 TAAGCCATCAAGTTGC-1 1 +chr1 17310 17527 TTCATTGAGAGACTCG-1 1 +chr1 17356 17515 CTAGGATAGTCGAAAT-1 1 +chr1 17468 17649 ACAATCGCACCCTTTG-1 5 +chr1 17477 17499 GCGCCAATCATCGCCT-1 1 +chr1 17489 17692 AAACTGCGTGAGCTAG-1 1 +chr1 20115 20165 GTCGTAAAGAGCGAAA-1 2 +chr1 28948 29347 GAACCGCTCAGCACGC-1 4 +chr1 28955 29336 GAGCGCTGTGCCCGAT-1 2 +chr1 28956 29321 AGCCCGAAGTAATGTG-1 1 +chr1 51475 51802 GACCGACCATAGTCCA-1 1 +chr1 54343 54507 ATTACCTTCTGTTTAC-1 1 +chr1 54452 54497 TTACTCACATCCCAAA-1 2 +chr1 55285 55673 GATGATCGTAGTTTGC-1 1 +chr1 56447 56645 TTGAGTGTCAGCACTA-1 1 +chr1 56447 56648 GATGATCGTAGTTTGC-1 1 +chr1 56854 57066 GGGTTATAGCAGAAAG-1 1 +chr1 61998 62107 GCTTGCTAGTAACATG-1 2 +chr1 62152 62743 TTTGTGTTCGACCCGA-1 1 +chr1 62896 63084 GACCGACCATAGTCCA-1 1 +chr1 64627 64799 TTAACGGTCCGAGAGA-1 1 +chr1 64627 64824 CTCCCAACAACTACTG-1 2 +chr1 64629 64827 CAAGGCCTCCTAGAGT-1 2 +chr1 68280 68496 CAAAGCTTCAATTCCT-1 2 +chr1 73212 73403 GTTGGGCGTGTCCCAG-1 1 +chr1 73216 73249 CGCAGGTTCTACCCGT-1 1 +chr1 79073 79241 GTGACATCAGCAACAG-1 4 +chr1 80266 80465 GCAGATTAGTAGGTCG-1 2 +chr1 83500 83842 CAGCTAAGTATCTGCA-1 2 +chr1 83723 83912 GTGACATCAGCAACAG-1 4 +chr1 85709 85946 AACTGGTAGCTCCGGT-1 1 +chr1 85999 86185 GTGACATCAGCAACAG-1 3 +chr1 86211 86342 TTGAGTGTCAGCACTA-1 1 +chr1 86950 87140 GTGACATCAGCAACAG-1 1 +chr1 86970 87172 TCACAAGGTCAAGACG-1 1 +chr1 87070 87224 CTCAACCCACATTGCA-1 1 +chr1 87144 87312 CTAACTTAGAGTCCGA-1 1 +chr1 87504 87868 CAGCTAAGTATCTGCA-1 1 +chr1 88289 88576 CAGCTAAGTATCTGCA-1 1 +chr1 89737 89929 AACTGGTAGCTCCGGT-1 1 +chr1 91231 91483 AGCCCGAGTCTGGTCG-1 2 +chr1 91509 91883 CTTTGCGGTATTGTCG-1 1 +chr1 94970 95122 CAGCTAAGTATCTGCA-1 1 +chr1 96232 96599 CCTCCCTAGGGAGATA-1 1 +chr1 96577 96624 ATATTCCGTATGCTAC-1 1 +chr1 96585 96629 CAAGAAACAAGGCGTA-1 1 +chr1 98372 98413 AGATTCGCAATGGTAA-1 1 +chr1 99608 99754 CTTCCAGAGCTTTCCC-1 1 +chr1 102928 102991 TTACTCATCTGTTGCC-1 2 +chr1 104154 104621 GGTGCTGAGTACGCGA-1 1 +chr1 104156 104329 AAAGGGCCAACTAGAA-1 1 +chr1 115608 115762 ACTCGGGAGGTCCTCG-1 2 +chr1 115675 115741 GAGACTTTCTCATATC-1 1 +chr1 123756 123790 GAGATTCCATTCGTCC-1 1 +chr1 125997 126157 TTAGCTTGTCTGGTTA-1 1 +chr1 127515 127553 ACTACGAGTGGCCTTG-1 2 +chr1 127763 127796 ATCCTGCGTCAAACAA-1 1 +chr1 127778 127825 GGCGTTGCACCTATTT-1 1 +chr1 128663 128693 GAACCTAAGAGTCCGA-1 1 +chr1 133387 133449 GCGGGTTCAACGACAG-1 2 +chr1 133834 133867 ATGGATCCAGGCAAGT-1 1 +chr1 135085 135217 ACTCGGGAGGTCCTCG-1 1 +chr1 135178 135359 GACTAACCACTTATGG-1 2 +chr1 137969 137990 ATGTACGGTCTGGGCT-1 1 +chr1 138312 138532 CCGTGAGAGACCATAA-1 2 +chr1 138532 138564 AATGGAAGTAATGCAA-1 2 +chr1 138916 139063 GAGATTCCAGTAGTCT-1 2 +chr1 139298 139640 CTCAACCCAAGCAACG-1 1 +chr1 139431 139629 GCACCTTTCCAACGCG-1 1 +chr1 150411 150588 GAGCGCTCATAGCCAT-1 1 +chr1 229753 229905 GCATTGACACGTACAT-1 1 +chr1 232996 233491 TTCATTGCAAGCGTCC-1 1 +chr1 234345 234519 GCGGTGTCATCGGCCA-1 4 +chr1 234356 234733 AATGCCATCGGTCGAC-1 1 +chr1 235631 235678 CTGGCAGTCTGTCGGG-1 3 +chr1 235705 236070 AACAAAGAGGTACATA-1 1 +chr1 235898 235960 TTACGTTCAATAGTGA-1 3 +chr1 237078 237124 TAGCACAGTCGTCGCA-1 1 +chr1 237094 237143 AACCTTTAGTCTCGAT-1 1 +chr1 237103 237138 GTGCTGGCATCCCTTG-1 2 +chr1 237712 237756 ATCCCTGCAAGCAACG-1 4 +chr1 237712 237767 TTAGCGAAGGCCTCTG-1 1 +chr1 237712 237774 ACTAACGAGCTACGCC-1 1 +chr1 237712 237788 GGCATTACATCATGTG-1 3 +chr1 237712 237789 GCAGCCAAGCAACGGT-1 2 +chr1 237714 237777 CCAGAATAGAAGGGCG-1 1 +chr1 237716 237751 ATTGTCTGTTTGGATC-1 4 +chr1 237716 237756 CTCAACCCAAGCAACG-1 1 +chr1 237716 237767 CTCTAGCCACTGATTG-1 2 +chr1 237717 237751 AACCTTTAGTCTCGAT-1 4 +chr1 237719 237762 CTGTTCGAGAATCAAC-1 3 +chr1 237721 237756 GACTAGTTCTCTGTTA-1 2 +chr1 237721 237774 AACTGTGGTAGGGTTG-1 1 +chr1 237722 237764 CGCGCAACATGCACTA-1 2 +chr1 237722 237784 TCGGGACGTTCACGTA-1 1 +chr1 237724 237782 AAACGAAGTGGACGAT-1 4 +chr1 237724 237788 ACTACGACACGAACGA-1 2 +chr1 237726 237757 CAAGAAATCCAATAGC-1 2 +chr1 237726 237772 ATCCCTGCATTACTTC-1 1 +chr1 237726 237782 CTGCGTTGTAACTGAA-1 1 +chr1 237727 237762 CAGCCTTCATGGCCGT-1 2 +chr1 237731 237788 CTAGCGGCAAAGGAAG-1 1 +chr1 237736 237772 GCCCAGACATTTGGCA-1 1 +chr1 237738 237783 CTTGCTGCAAGGGTAC-1 1 +chr1 237738 237794 TTCATTGGTTAGTAGA-1 1 +chr1 237741 237775 CGCTATCGTGCACTTA-1 1 +chr1 237743 237767 GGAGTAGGTGGACTGA-1 1 +chr1 237743 237788 TGGACATTCTATCTAC-1 1 +chr1 237746 237774 CAGGGCTAGGGCGAAG-1 3 +chr1 237746 237779 TGCGTAATCTTACCTA-1 1 +chr1 237746 237782 AACGGGAAGTGGTGTG-1 1 +chr1 237746 237788 CCCTCTCCAACATAAG-1 1 +chr1 237746 237789 CGCTATCTCTTCTTCC-1 1 +chr1 237746 237793 AGACAAACATGCTATG-1 2 +chr1 237748 237782 GTCGTAAAGAGCGAAA-1 1 +chr1 237749 237794 AGGCGTCGTTCATTTC-1 1 +chr1 237751 237774 GAACTTGAGGGTCCCT-1 1 +chr1 237751 237777 CCCACATTCGGAAAGC-1 1 +chr1 237751 237782 CTCTACGGTTCCCTTG-1 2 +chr1 237751 237783 TGTGACAAGCCTTTGA-1 4 +chr1 237751 237788 TCGATTTGTAACAATG-1 1 +chr1 237751 237794 GCGCCAAAGGATCCTT-1 2 +chr1 237867 237999 GGAACTTCAATTGTGC-1 1 +chr1 239693 239751 TGTAGCATCTCGGCGA-1 2 +chr1 240753 240805 GAACCGCAGTCGAGCA-1 1 +chr1 240764 240815 ACCGCAGAGGTACACG-1 1 +chr1 240779 240820 TTTGAGGAGGTAAGCC-1 2 +chr1 241008 241091 CCCACATAGAAAGGGT-1 4 +chr1 241008 241230 GCGAGTCTCGATAGCT-1 1 +chr1 241023 241057 AGTGCGCGTGGAAAGA-1 1 +chr1 241030 241205 TTTGAGGAGGTAAGCC-1 1 +chr1 241032 241096 TTTGGTTCATGGGAAC-1 2 +chr1 241032 241265 AAGGAGCAGGAGTACC-1 2 +chr1 241039 241095 TGCTCACGTACGTATC-1 2 +chr1 241125 241217 TACCTCGCAAGGATGC-1 3 +chr1 241312 241464 TAGACTGAGAGTTCGG-1 1 +chr1 243671 243833 TTACCCGTCTCTATCA-1 2 +chr1 244120 244186 TGTGGCGTCACTCGGG-1 1 +chr1 244122 244193 TGCTTCGCAAGTAATG-1 1 +chr1 244130 244193 TGACAACGTCTGTTGA-1 1 +chr1 244132 244174 GTAGGAGTCTCAGATG-1 4 +chr1 244132 244185 AACGTACAGTTAGAGT-1 1 +chr1 244132 244197 AATGGCTAGGATATCA-1 1 +chr1 244145 244185 TACATGGCAACGTCGC-1 6 +chr1 245441 245473 GGCACGTCACCACAGC-1 3 +chr1 247572 248027 TCAGCTCCAACTCGAT-1 1 +chr1 247803 247970 GGACACCTCAAGGCAG-1 1 +chr1 247819 247985 GATTGACTCCTGAAAC-1 1 +chr1 250272 250514 TTGTTCAAGTTGTCCA-1 2 +chr1 250306 250498 TGCGTAACACTGTCAA-1 1 +chr1 250342 250380 GGACACCTCAAGGCAG-1 1 +chr1 250465 250598 GTGCACGAGGCAAGCT-1 5 +chr1 250471 250936 GGTGCTGAGTACGCGA-1 1 +chr1 255389 255434 CCTCCCTAGCCGCTGT-1 1 +chr1 256572 256736 TCTCTGGAGTGAAGGA-1 1 +chr1 256594 256649 AGATAGAAGTAGCAAT-1 1 +chr1 261999 262037 CAGCTGGAGATATGTG-1 2 +chr1 266701 266762 GGACACCTCAAGGCAG-1 1 +chr1 267251 267302 GTTGGTACACATCATG-1 1 +chr1 362890 363094 TACGCAATCGCTTGAA-1 1 +chr1 521544 521591 TAGCTTTTCGCATACA-1 1 +chr1 521545 521589 CGCTATCTCTTCTTCC-1 1 +chr1 521547 521591 TGAGCCGGTTGTGACT-1 2 +chr1 521547 521599 GTTCAAGAGCTTTCCC-1 1 +chr1 521547 521601 CGCTAGGAGAGAACCC-1 1 +chr1 521547 521611 CTGAATGAGCAGAATT-1 3 +chr1 521550 521599 TCAAGCAAGAATGCTA-1 4 +chr1 521552 521599 CAAAGCTGTCTGATTG-1 1 +chr1 521555 521596 CGGACCAGTGGACTGA-1 1 +chr1 521555 521611 CATTCCGAGGCGATTG-1 5 +chr1 521560 521611 AACATCGAGACCGCAA-1 1 +chr1 521565 521590 ACTTTCAGTCCTTATT-1 1 +chr1 521565 521596 TCTCAGCCACGATATC-1 1 +chr1 521565 521601 TGCCTGTGTGGACCAA-1 1 +chr1 521565 521611 GCAACCGGTTGAATAG-1 2 +chr1 521571 521599 ACTGTCCTCTACCCGT-1 1 +chr1 521574 521617 TAATCGGTCGATGTGT-1 1 +chr1 521577 521606 ACCCAAACATCTCTCG-1 1 +chr1 521577 521611 ACAAAGAAGAACGCCA-1 2 +chr1 523750 523793 TGCACCTTCGGCTATA-1 1 +chr1 523773 523820 TTACTTGTCTCCTTGG-1 1 +chr1 525660 525709 GCTGTTCGTCCGTGCA-1 2 +chr1 525861 526074 GCCTACTAGTTGCTTG-1 1 +chr1 525871 526074 GAAATGACATCCTCGT-1 1 +chr1 526561 526982 GTAGGAGTCCTGAAAC-1 1 +chr1 526717 526915 GCCTACTAGTTGCTTG-1 1 +chr1 527052 527180 CAGTATGGTCACCCGA-1 1 +chr1 528003 528186 GTGTCAATCCACGCTT-1 1 +chr1 531908 532087 CCGTGAGAGACCATAA-1 1 +chr1 532046 532091 TTCGATTGTAAAGCTA-1 1 +chr1 532431 532592 ATCCAGAGTCAGCAAG-1 1 +chr1 532894 533086 CAGTATGGTCACCCGA-1 1 +chr1 533975 534350 TCACAAGGTCAAGACG-1 1 +chr1 534306 534474 CCTTAATCAGGCATTT-1 1 +chr1 534527 534566 AACCTTTTCCGTTAGA-1 1 +chr1 536004 536036 CACAACATCTCTTCCT-1 1 +chr1 537169 537203 TTGCTATCACATAAAG-1 3 +chr1 537603 537641 CACCACTGTTCGGGAA-1 1 +chr1 538584 538754 CACAACATCTCTTCCT-1 3 +chr1 538910 538944 CCGTGAGAGACCATAA-1 1 +chr1 540608 540652 GATCATGCAATTGGCT-1 1 +chr1 540614 540998 AACTGTGGTAGGGTTG-1 2 +chr1 540617 540820 CGATGATCATTCGTCC-1 3 +chr1 540627 540668 TTTGTGTCAAGGATGC-1 1 +chr1 540627 540991 GTGCCAGTCGGTTCCT-1 2 +chr1 540708 540812 AACCTGACACACATTG-1 2 +chr1 540767 540902 CAAGGCCCAGGTGTGA-1 1 +chr1 540876 540996 ACAGACTTCACCCGGA-1 2 +chr1 540929 540977 ATTTGTCCAATGCCAT-1 1 +chr1 540936 540973 CGGTGCATCTCATATC-1 1 +chr1 540941 540973 GCACGGTGTCTGCACG-1 2 +chr1 540947 540973 GGACACCAGGCTGGAT-1 1 +chr1 540948 540973 TTTGCGCTCTATCTAC-1 2 +chr1 540952 541185 GAACCTAAGGTTCTCA-1 1 +chr1 540960 541013 GTTACTTAGCATGATA-1 1 +chr1 540971 541003 GTTACTTCAGCAACAG-1 1 +chr1 540971 541005 AACAAAGGTCCAAGTT-1 4 +chr1 540971 541006 TGGTCCTCACAAGGGT-1 2 +chr1 540972 540998 CTGCTCATCTATCTCA-1 1 +chr1 542677 542708 TCAAGGTCACAGTTCA-1 1 +chr1 544494 544531 CTTCCAGTCCCGCAAG-1 1 +chr1 549339 549584 ACCTGCTCACTGAAGG-1 1 +chr1 549537 549567 AACCTGACACACATTG-1 1 +chr1 553331 553361 TTGACGACATTGTGAC-1 1 +chr1 561443 561463 AGTCCGGAGCTGAAAT-1 1 +chr1 563394 563443 CCGTAGGGTCGATTAC-1 1 +chr1 563396 563492 CCTGCTAAGTCTCGAT-1 1 +chr1 564294 564443 TTCGATTGTAAAGCTA-1 1 +chr1 564334 564647 CAACGTAGTTCTGAAC-1 1 +chr1 564661 564864 ATTGTGGTCTCGCGTT-1 1 +chr1 564694 564864 TTTGCGCCAATTGTGC-1 1 +chr1 564739 564894 GTGCACGTCCTTCGAC-1 2 +chr1 564841 565027 TGGGTGCGTGAGCTAG-1 1 +chr1 564977 565020 CCGAAGCCAACCTCCT-1 1 +chr1 565088 565471 ACGTGGCCAAAGAAGG-1 2 +chr1 565288 565322 AACTGGTCACTGTCAA-1 8 +chr1 565288 565323 CCGCATTTCTAGCTGA-1 1 +chr1 565288 565336 TACGGATAGGATTAAC-1 1 +chr1 565288 565337 TACATGGCAATCCATG-1 2 +chr1 565288 565342 ACTAGGTAGATTCCGT-1 2 +chr1 565288 565343 TCAGTCCGTTGGTAAA-1 1 +chr1 565288 565344 TGCTCGTGTTTGTGAC-1 3 +chr1 565288 565348 TTAGCGACAGGTAGGT-1 1 +chr1 565288 565350 ACTAACGAGACCGCAA-1 3 +chr1 565288 565353 TGCATGAAGTAGCAAT-1 4 +chr1 565288 565355 ACGTGGCGTTGGCTAT-1 2 +chr1 565288 565356 CTTCCAGTCGGGAAAC-1 2 +chr1 565288 565358 CATGTTTAGCGAGAAA-1 1 +chr1 565288 565359 GACCAATGTCCGTAGC-1 1 +chr1 565289 565360 ATAGTCGGTCATAGAA-1 2 +chr1 565290 565330 CGTTCCATCCCACTTG-1 1 +chr1 565290 565348 ACTTCCGGTCTCTGGG-1 1 +chr1 565290 565350 AAGATAGCAAGGAAGA-1 2 +chr1 565290 565361 TAGCTTTAGTCAGCCC-1 1 +chr1 565291 565360 GGTGAAGCATCGACCG-1 1 +chr1 565292 565360 TACAGCAAGGGATCCA-1 3 +chr1 565293 565319 AATGGCTAGTGCTAGG-1 1 +chr1 565293 565322 CCCGTTAAGGCAGTAC-1 12 +chr1 565293 565324 AAACTCGGTCAGTGCC-1 1 +chr1 565293 565335 GGAGTAGCACTCGGAC-1 1 +chr1 565293 565336 ACTACGACAAAGGAAG-1 3 +chr1 565293 565337 TTCAACTCACCCTTAC-1 6 +chr1 565293 565339 CGGACTGAGTAGTTCC-1 1 +chr1 565293 565340 CCTAAAGCAAACAGTA-1 1 +chr1 565293 565341 AAACGAAAGTAACATG-1 3 +chr1 565293 565342 TTGAGCAAGACCAATA-1 11 +chr1 565293 565343 GTTATGGTCCTATCCG-1 5 +chr1 565293 565344 GACTAACTCTATTTCG-1 1 +chr1 565293 565346 CGGTGCATCCGGGCAT-1 2 +chr1 565293 565347 AATGTCGGTATCTAGC-1 4 +chr1 565293 565348 AGCTGTAAGCCATGGA-1 3 +chr1 565293 565350 GTTACTTTCATCGCCT-1 2 +chr1 565293 565351 TAGCTTTTCGCATACA-1 1 +chr1 565293 565352 CTGTATTAGATGTTCC-1 1 +chr1 565293 565353 GGTGCTGGTCCAAGAG-1 16 +chr1 565293 565358 CAGGATTAGCGTTGCC-1 22 +chr1 565293 565360 CACGTTCTCGTATAGC-1 2 +chr1 565293 565361 GTCGTAAGTATCATGC-1 11 +chr1 565294 565322 CCTTGCAAGGCAGTGT-1 1 +chr1 565294 565330 CTGTATTGTGCTTCCT-1 2 +chr1 565294 565350 GCAAAGGAGAATCAAC-1 2 +chr1 565294 565353 CCGTACTTCTCGCGGA-1 11 +chr1 565295 565322 CTTGCCATCCTGGAAT-1 1 +chr1 565295 565337 AGCTGATTCTGGTACA-1 3 +chr1 565295 565342 TACATGGGTCAACAGG-1 4 +chr1 565295 565347 ACATGCATCGGCATAT-1 3 +chr1 565295 565348 TCACAAGGTTTCTTAC-1 1 +chr1 565295 565355 GTGTGATCAAGTAACA-1 1 +chr1 565295 565358 GCTGTTCAGGTGAACC-1 2 +chr1 565295 565360 CTTCCAGTCAAGGCAG-1 2 +chr1 565296 565353 GAAGAGCAGGGACGTT-1 1 +chr1 565298 565337 ATTCGTTTCCTCTCTT-1 1 +chr1 565298 565342 CCGTACTTCGATGTGT-1 9 +chr1 565298 565347 GGATGAGCAGGTCCTG-1 3 +chr1 565298 565353 GGGAACATCTCGGCGA-1 12 +chr1 565298 565358 GGAATCTAGTGTCGCC-1 12 +chr1 565298 565360 ATCGAGTTCAACACGT-1 6 +chr1 565300 565353 GTCACAACAGGTTATC-1 1 +chr1 565300 565358 TAGGTCAAGTCTTAGC-1 1 +chr1 565301 565322 CTTTGCGAGACCAATA-1 1 +chr1 565301 565353 AGGCCCATCTGGGCGT-1 2 +chr1 565302 565350 TGCTTTAGTGTCCAGC-1 1 +chr1 565303 565337 GGTCATAAGTGTCGCC-1 2 +chr1 565303 565346 CCACAGGGTACATGGG-1 1 +chr1 565303 565347 GTTACGACAGATGGCA-1 4 +chr1 565303 565348 AACTTGGGTGGACTGA-1 1 +chr1 565303 565351 GTTATTCCAATTGCCA-1 4 +chr1 565303 565353 AACTTGGGTTATAGAG-1 3 +chr1 565303 565358 ACTAGGTAGTCCAGAG-1 2 +chr1 565303 565360 GCAAAGGAGAATCAAC-1 3 +chr1 565304 565350 GCTTTCGAGTTGCGCC-1 1 +chr1 565305 565347 TTCGCGTTCCATGTTT-1 1 +chr1 565305 565353 AGCGATTCAAGTCCTA-1 5 +chr1 565305 565360 TACCTATGTACGGTTT-1 3 +chr1 565306 565342 GCGATTAGTAGCTGTT-1 1 +chr1 565306 565347 CGATGATCATTCGTCC-1 1 +chr1 565306 565350 TGTAAGCTCCTGTAGA-1 1 +chr1 565306 565355 TGATTTCAGCCTTTGA-1 1 +chr1 565306 565358 ATAGGCTTCCCGGGTA-1 3 +chr1 565306 565360 AGCTGTAAGCCATGGA-1 1 +chr1 565307 565337 GGAGGATTCGTCCCAT-1 1 +chr1 565307 565351 CTCATGCAGCAGTAGC-1 1 +chr1 565307 565353 GCACGCAAGCTGAGGT-1 1 +chr1 565307 565358 AACTTGGGTTAGGAGC-1 1 +chr1 565308 565330 TACTCGCGTGCGTTTA-1 1 +chr1 565308 565335 GTCACCTAGCTTCAAC-1 1 +chr1 565308 565337 TTGCTATCAAGGAAGA-1 1 +chr1 565308 565342 TAGCGGCCAAGAAACT-1 6 +chr1 565308 565350 TCGCCTAGTTTGACCA-1 1 +chr1 565308 565351 GGAATCTTCGATGCAT-1 1 +chr1 565308 565353 GTCCATCGTCGCTACG-1 7 +chr1 565308 565358 GCTTGCTCAAGCGACA-1 3 +chr1 565308 565360 TCAGGGCAGGGAAGCG-1 3 +chr1 565309 565342 ACCATCCTCACCACAA-1 4 +chr1 565310 565343 ATCCAGACAGGTAGGT-1 1 +chr1 565310 565350 CAGCTAATCTCATATC-1 1 +chr1 565310 565358 GAAATGACAAACGACG-1 1 +chr1 565310 565360 AACGAGGCATGTAGAA-1 1 +chr1 565311 565358 ACTGTCCAGGTAGGAA-1 1 +chr1 565312 565354 CCCACATTCACCGCGA-1 1 +chr1 565312 565358 TTAGGTGGTTTAGAAG-1 3 +chr1 565313 565353 GGTGAAGGTGCTTGAT-1 6 +chr1 565313 565358 CGGTGCAAGGACCGAT-1 2 +chr1 565313 565360 TTCTAACAGGGCTCTC-1 7 +chr1 565314 565342 TTGCCCACATCTCTCG-1 1 +chr1 565314 565347 TCCGACTTCCATGACA-1 1 +chr1 565314 565358 TTGAGTGCAAGTAATG-1 1 +chr1 565314 565361 AAACTGCAGCTGCTCG-1 2 +chr1 565315 565348 TGGTCAAGTTCCCAAA-1 2 +chr1 565315 565353 AGACAAAGTAAGGTCG-1 8 +chr1 565315 565358 TTAGCGAGTTGGCTAT-1 6 +chr1 565315 565360 CTAGCGGAGCCATTCA-1 5 +chr1 565316 565346 CTCATGCGTTTCGTTT-1 2 +chr1 565316 565347 CTGTTCGCAAACAGTA-1 2 +chr1 565316 565350 TTCGGTCGTTTGCGAT-1 1 +chr1 565316 565351 CACAACAAGGAGACTC-1 2 +chr1 565316 565353 CAGGGCTAGCGCAATG-1 4 +chr1 565316 565358 CCTAAAGCAAGTCTCA-1 1 +chr1 565316 565361 ACTGTCCAGTAACACA-1 1 +chr1 565317 565341 TGTGACAAGTAGAAGG-1 1 +chr1 565317 565342 TTTGCGCTCATTCGGA-1 3 +chr1 565317 565350 CGTAAACAGCGGACTA-1 2 +chr1 565317 565351 TGATCAGGTGCTAGTT-1 6 +chr1 565317 565355 GTCGTAATCCCAGCGA-1 2 +chr1 565317 565356 GGTGCTGAGTACGCGA-1 1 +chr1 565317 565358 CTCTCGACAACTTGGT-1 2 +chr1 565317 565360 TCCAGAAGTCTGGTTA-1 5 +chr1 565318 565360 TCAGGTAAGGATGTCG-1 3 +chr1 565319 565353 ACTGTCCTCATTCTTG-1 1 +chr1 565319 565360 GTCTACCGTGTGCTTA-1 1 +chr1 565406 565437 AACGAGGAGTGGACGT-1 1 +chr1 565437 565612 TCAAGCAAGATATGTG-1 1 +chr1 565697 565904 GTTATTCTCCAGTACA-1 1 +chr1 565894 566154 CAGCTAAAGAAACGCC-1 1 +chr1 565894 566261 ACTACCCTCCCGGGTA-1 1 +chr1 565935 566079 CTCAACCCAAGCAACG-1 1 +chr1 565989 566022 TTCTGTAAGCCTCGCA-1 1 +chr1 565997 566138 CCGTGAGAGACCATAA-1 1 +chr1 566007 566191 TGCTCGTGTTCTGAGT-1 1 +chr1 566530 566579 GTGTCAATCTTAACGG-1 1 +chr1 566532 566595 TCGGGACCAGTTAAAG-1 1 +chr1 566538 566579 ACTTCCGGTAGAAGCC-1 1 +chr1 566540 566574 GAGCGCTTCCCAATGA-1 1 +chr1 566540 566614 AGATAGAAGGCAAGGG-1 1 +chr1 566550 566595 AGATAGAAGGCAAGGG-1 1 +chr1 566550 566597 GCGATTAGTACGTAGG-1 1 +chr1 566555 566602 TACATTCTCTCCAACC-1 1 +chr1 566558 566583 GCTCAGGTCTTGTCGC-1 1 +chr1 566560 566585 TGGGTGCTCCGGAAAG-1 1 +chr1 566561 566593 ATCCTGCTCTTCGTTA-1 1 +chr1 566569 566595 TCGGGACAGCGAATAT-1 1 +chr1 566574 566608 GAGACTTAGCGATACG-1 1 +chr1 567176 567233 ATCGAGTGTCACAGTT-1 1 +chr1 567218 567267 GAAGTCTGTCAAACAA-1 2 +chr1 567225 567271 CCCACATCAGTAAGAT-1 1 +chr1 567231 567269 TCAAGACAGAAAGCAG-1 1 +chr1 567656 567820 ACAGCGCAGTGTCCCG-1 2 +chr1 568162 568239 GAACCTACATACAACC-1 1 +chr1 568246 568377 CCGTGAGAGACCATAA-1 1 +chr1 568710 568906 GGGTCTGCAGATACAA-1 1 +chr1 568721 568872 CAACGTATCCTATCAT-1 2 +chr1 568906 569286 GGGTCTGCAGATACAA-1 1 +chr1 568931 568973 CCACAGGGTCATAGCT-1 2 +chr1 569055 569256 AAATGCCCACGCCGAT-1 1 +chr1 569230 569438 TGGGTGCGTGAGCTAG-1 2 +chr1 569253 569417 CTCAGAATCCCGATTC-1 1 +chr1 569364 569414 CTGTTCGGTTAGGAAT-1 1 +chr1 569364 569417 ATATTCCTCGAGTGTT-1 1 +chr1 569364 569418 GGGACCTGTGCATTCA-1 5 +chr1 569364 569424 TGCATGATCCTCCATG-1 5 +chr1 569364 569437 ACATGCATCGGCATAT-1 1 +chr1 569364 569438 ACTAACGAGACCGCAA-1 8 +chr1 569364 569449 GACCAATGTCCGTAGC-1 1 +chr1 569365 569410 TTGAGCACATACCCGG-1 2 +chr1 569365 569412 GTCGTAATCACATCCC-1 1 +chr1 569365 569413 CGCTAGGAGCGATACG-1 2 +chr1 569365 569415 CCTTAATTCGGTCCGA-1 1 +chr1 569365 569417 AACCTGAGTCCACCAG-1 3 +chr1 569365 569418 TTGAGCAAGACCAATA-1 5 +chr1 569365 569419 CACTAAGGTCGCGCTA-1 1 +chr1 569365 569432 CACCTGTGTGCAAGAC-1 1 +chr1 569365 569436 GTTACGAAGATGCGAC-1 1 +chr1 569365 569438 AATGCCATCGGTCGAC-1 7 +chr1 569365 569444 GCGATCGCACACACAT-1 1 +chr1 569366 569417 TCCATCGAGGTCACTT-1 1 +chr1 569367 569409 GTTACTTTCCAGAATC-1 3 +chr1 569367 569412 GCACGGTCAACACGGA-1 3 +chr1 569367 569413 CAAGCTAGTGCATTCA-1 5 +chr1 569367 569417 AGGCCCAGTGGCGCTT-1 2 +chr1 569367 569418 GCTCGAGTCGCGTTCT-1 2 +chr1 569367 569419 TATTGCTTCACATCCC-1 2 +chr1 569367 569421 CCAGAATTCACATTCT-1 1 +chr1 569367 569424 GCTCCTAAGGTACATA-1 1 +chr1 569367 569437 ACTACCCCATCCCGGA-1 3 +chr1 569367 569438 CTCCCAACATTGCACA-1 4 +chr1 569368 569416 CCCTCTCCATTGTTCT-1 4 +chr1 569368 569417 GAGGTCCCACGCGATC-1 1 +chr1 569369 569410 CCCACATCATATTGGC-1 2 +chr1 569369 569413 TTCATCACAAACGTTC-1 1 +chr1 569369 569414 GATGGCCAGTCTAGAA-1 2 +chr1 569369 569417 GTGTCAAGTGCAACAG-1 4 +chr1 569369 569418 CACCACTAGGATGCCA-1 10 +chr1 569369 569419 AAACTGCGTTACTACG-1 4 +chr1 569369 569434 GGAGAACTCACATTCT-1 1 +chr1 569369 569443 GTCCATCTCCAATCCC-1 3 +chr1 569369 569444 CGTAAACAGTTCCCGG-1 1 +chr1 569369 569451 ACTGTCCGTAGGTAAT-1 3 +chr1 569370 569417 GCATTCCAGGTACTCT-1 6 +chr1 569370 569430 TTACTCAAGAGTCCGA-1 5 +chr1 569371 569417 GCTCACTAGGTCTTGT-1 1 +chr1 569372 569409 GTCTACCCAAGTCCTA-1 1 +chr1 569372 569413 ATAGGCTCACCCTTTG-1 1 +chr1 569372 569414 ACAGCGCGTGTCGTCG-1 5 +chr1 569372 569415 AGCGATTCAAGCCTTA-1 1 +chr1 569372 569416 GGACACCTCCCACTAC-1 3 +chr1 569372 569417 GACTAGTTCTCTGTTA-1 13 +chr1 569372 569418 TATGTTCCAAACGTTC-1 10 +chr1 569372 569419 AAGGAGCAGCTACGCC-1 9 +chr1 569372 569421 TGATGCAAGAGATTAC-1 4 +chr1 569372 569424 CAACGGCAGTTGCGCC-1 1 +chr1 569372 569426 GACTAGTCAGGCATTT-1 4 +chr1 569372 569438 TCAGGGCCAGTATACC-1 3 +chr1 569372 569442 AGTGCCGCATTGTGAC-1 1 +chr1 569372 569443 AACGTACCAAGTGGCA-1 2 +chr1 569372 569447 GTGCACGGTATCAGCT-1 3 +chr1 569372 569448 CAGCTAATCTGAGTAC-1 3 +chr1 569372 569449 GTTATTCTCTTAATCC-1 9 +chr1 569372 569451 AACGGGATCGGTCAGC-1 1 +chr1 569372 569453 CTGAATGAGTTCGCGC-1 2 +chr1 569373 569410 TACTGCCCATCGCCTT-1 3 +chr1 569373 569411 CCGAAGCGTAACGTAA-1 2 +chr1 569373 569415 TGCCTCAAGTAGAAGG-1 1 +chr1 569373 569416 CAAGCTAGTTACTACG-1 1 +chr1 569373 569417 GTCCATCGTCCTATTT-1 4 +chr1 569373 569418 ACTAACGTCGGGATCC-1 7 +chr1 569373 569424 TACTAGGCACCACCAG-1 2 +chr1 569373 569432 ACGGATTGTATTGTCG-1 2 +chr1 569373 569443 ATGTTTCGTCCAAGAG-1 3 +chr1 569373 569448 TCAGGGCAGGGAAGCG-1 5 +chr1 569373 569449 ACTACGAAGAGCACTG-1 1 +chr1 569374 569409 ACAAACCCAAGCCTTA-1 13 +chr1 569374 569410 TTGCGGGAGCAGAATT-1 9 +chr1 569374 569412 GCGTAGCTCGGTTCCT-1 2 +chr1 569374 569414 AGCGTGCCAATTCAGC-1 1 +chr1 569374 569415 TGCTATTGTGTGTGTT-1 17 +chr1 569374 569416 AACTGTGCATACCCGG-1 3 +chr1 569374 569417 GGTAGGATCACGATTG-1 42 +chr1 569374 569418 CTCTACGTCGGTTAGT-1 50 +chr1 569374 569419 GCCTACTGTTAGCTCA-1 8 +chr1 569374 569421 CTCTAGCGTGACAAGC-1 5 +chr1 569374 569424 AATGGAATCTATCTAC-1 6 +chr1 569374 569425 CAAGAAAAGTAGGAAG-1 1 +chr1 569374 569426 GTCCATCGTATTCGCA-1 6 +chr1 569374 569430 TCCAGAAGTAAGTCTC-1 4 +chr1 569374 569431 TTGAGCAAGACCAATA-1 3 +chr1 569374 569432 CCACAGGAGTAATCAG-1 4 +chr1 569374 569436 TTCGCGTGTGTACGCC-1 3 +chr1 569374 569438 CCCGTTAAGATAGGTT-1 8 +chr1 569374 569439 GTTGGTAGTGGCCTTG-1 2 +chr1 569374 569443 GTGTGATTCCCAGCAG-1 4 +chr1 569374 569444 GCAACCGGTTCCCAAA-1 2 +chr1 569374 569448 CAACGTAGTCACCGCA-1 6 +chr1 569374 569449 GAAGTCTGTCAAACAA-1 11 +chr1 569374 569453 TTCGGTCTCATTCGGA-1 13 +chr1 569376 569418 CTAACTTCACTGTCAA-1 3 +chr1 569378 569432 AAAGATGAGAGGCCTA-1 3 +chr1 569378 569443 ATTACTCGTATGGGTG-1 4 +chr1 569379 569409 GCGTAGCGTCTGATTG-1 1 +chr1 569379 569410 ATAGTCGCAGTTGGAG-1 1 +chr1 569379 569412 TTGCGGGAGCAGAATT-1 1 +chr1 569379 569413 GAGTGAGTCCCTACCA-1 7 +chr1 569379 569414 ATGTACGAGGCACCTC-1 2 +chr1 569379 569415 GGAATCTAGCGTCTGC-1 1 +chr1 569379 569417 TAGGAGGAGTGTCGGA-1 33 +chr1 569379 569419 CCCAGAGCAGTTAAAG-1 2 +chr1 569379 569424 GTCGTAATCGCACCTT-1 3 +chr1 569379 569426 TTAGCGAAGGGCTTCC-1 6 +chr1 569379 569430 GTGTCAAGTAAACCCT-1 2 +chr1 569379 569434 CATTGGACAGACTAAA-1 1 +chr1 569379 569438 GAGAACGAGTCTCCAA-1 3 +chr1 569379 569448 AAAGGGCTCACTAGGT-1 6 +chr1 569379 569449 CCCTGATCAAAGCATA-1 3 +chr1 569380 569409 CTTGCTGGTCATGAGG-1 3 +chr1 569380 569410 TACATTCTCAGGGTTT-1 2 +chr1 569380 569412 CCACAGGTCCTATCCG-1 2 +chr1 569380 569413 AACCGATAGCAGAAAG-1 2 +chr1 569380 569415 TCCCACAAGAGTTTGA-1 1 +chr1 569380 569417 CTTGTCGTCCCACTAC-1 9 +chr1 569380 569418 TTTACGTAGATGCGCA-1 10 +chr1 569380 569421 TTCTAACTCAGGAAGC-1 2 +chr1 569380 569423 CGGACCACATTCACGA-1 1 +chr1 569380 569424 TACGCAAGTAGACACG-1 7 +chr1 569380 569425 CCTTGCATCGTCGCAG-1 8 +chr1 569380 569426 GCAAAGGAGAATCAAC-1 3 +chr1 569380 569432 AGCCCGACACTAGGAG-1 1 +chr1 569380 569438 GCATTCCGTCTAAAGA-1 4 +chr1 569380 569443 GCAACCGGTTACTACG-1 1 +chr1 569380 569448 CTAACTTCACTGTCAA-1 5 +chr1 569380 569449 AGCTGTAGTTTGATCG-1 3 +chr1 569380 569453 GAGGTCCGTTCATTTC-1 3 +chr1 569381 569418 GGCGAAACAAGACTTC-1 1 +chr1 569381 569432 TGTGACAAGGATGCCA-1 5 +chr1 569381 569436 TAACAGCGTTAGAGAT-1 1 +chr1 569381 569438 CATTCATGTCCCGTGA-1 1 +chr1 569383 569418 CGTACAAAGTAGCAAT-1 2 +chr1 569384 569409 CCCTCTCCAAATTGAG-1 1 +chr1 569384 569410 TTCGTTAGTTCCTATT-1 5 +chr1 569384 569411 ACATGCATCACAAGCT-1 1 +chr1 569384 569412 TTGCACCGTAGCAGCA-1 3 +chr1 569384 569413 AGACAAACATGCTATG-1 18 +chr1 569384 569414 TGCCTGTGTCCTTATT-1 6 +chr1 569384 569415 AAACGAACATGCTATG-1 11 +chr1 569384 569416 GCTCGAGTCGCGTTCT-1 1 +chr1 569384 569417 TTGGTCCGTCCTCAGG-1 41 +chr1 569384 569418 CATAACGGTGGATTCT-1 39 +chr1 569384 569419 TTGCCCAGTGTCGTCG-1 9 +chr1 569384 569421 CCTTAATCAGAACTTC-1 1 +chr1 569384 569425 AACTGTGGTAACGGAC-1 2 +chr1 569384 569426 TTGTTCAGTAAGTGCG-1 6 +chr1 569384 569430 TCAGTTTGTCTGGGAA-1 3 +chr1 569384 569431 GACCAATGTCCGTAGC-1 1 +chr1 569384 569432 AATGTCGCACAGCTTA-1 18 +chr1 569384 569436 TGCATTTAGCCAGAAC-1 4 +chr1 569384 569438 CAATCCCCAATTGCCA-1 4 +chr1 569384 569439 TCAAGCAGTATTCTCT-1 7 +chr1 569384 569442 ACGGATTCAGCTTACA-1 1 +chr1 569384 569443 GCTTGCTAGTAGTGTA-1 4 +chr1 569384 569444 GCGGAAACATAGAATG-1 1 +chr1 569384 569448 GCAACCGCACAGTTCA-1 4 +chr1 569384 569449 GCGTAGCTCGCTACCT-1 8 +chr1 569384 569451 TGGGTGCCAACTCCCT-1 1 +chr1 569384 569453 TCAGTTTTCCCTACGT-1 8 +chr1 569385 569412 GTGACATCAAGGCTTT-1 1 +chr1 569385 569413 GGAGAACTCGTCGCAG-1 5 +chr1 569385 569414 TCAAGGTTCGCTTCAC-1 2 +chr1 569385 569415 GACCAATAGTGCTAGG-1 5 +chr1 569385 569416 GACCAATGTCCGTAGC-1 2 +chr1 569385 569417 GCATGATCACACTAGT-1 11 +chr1 569385 569418 TGGTCAAGTTCCCAAA-1 12 +chr1 569385 569419 CCACAGGCATACTGCA-1 7 +chr1 569385 569420 TAAGCCAAGGATTTGA-1 2 +chr1 569385 569423 CTCTACGGTTACGGAG-1 2 +chr1 569385 569425 TTAGCGACAAGATTAG-1 2 +chr1 569385 569430 CATGTTTTCCAACGCG-1 3 +chr1 569385 569432 AAGATAGAGCTCGTTA-1 4 +chr1 569385 569438 GACCAATGTCCGTAGC-1 2 +chr1 569385 569443 CCCACATGTATTCACG-1 1 +chr1 569385 569448 TATTGCTGTCCTCAGG-1 2 +chr1 569385 569449 TACTAGGGTATTGTGC-1 1 +chr1 569386 569413 GACCCAGGTCAGTGTT-1 1 +chr1 569386 569416 GTAGGAGGTAATGCAA-1 1 +chr1 569386 569438 GTGCCAGGTCAGAAAT-1 2 +chr1 569388 569413 GGCGAAAGTCTCGGGT-1 1 +chr1 569388 569418 TACTAGGAGTATCTGC-1 1 +chr1 569389 569414 CTTGCCAGTTCAGAAA-1 1 +chr1 569389 569415 GTAATCGCACATTCTT-1 1 +chr1 569389 569417 CCGTGAGCAATCCATG-1 1 +chr1 569389 569418 TCAATTCTCTTCTCTC-1 3 +chr1 569389 569448 TCAGGTACAAGCAATA-1 2 +chr1 569390 569413 CTGAATGAGCAGAATT-1 1 +chr1 569390 569414 TACCTATTCTGAACGT-1 2 +chr1 569390 569415 GTGTCCTTCAATGCAC-1 3 +chr1 569390 569416 TAGACTGAGTAACACA-1 1 +chr1 569390 569417 TTCAACTAGGAGAACA-1 16 +chr1 569390 569418 CCACGTTGTCAGAAGC-1 10 +chr1 569390 569419 TAATCGGCAGGTGTCC-1 2 +chr1 569390 569421 CATTCATGTACTTGAC-1 2 +chr1 569390 569425 GAAGTCTCAACGCACC-1 1 +chr1 569390 569426 CGTGGCAGTAAACCCT-1 1 +chr1 569390 569432 GCGGAAACATAGAATG-1 3 +chr1 569390 569438 GTTATGGCACTGCTTC-1 5 +chr1 569390 569443 GGTCATAAGACGTCAG-1 3 +chr1 569391 569413 TAGGAGGTCTAAACGC-1 1 +chr1 569391 569414 TATCTGTAGACAGCTG-1 2 +chr1 569391 569415 CCTTGCAGTCCCGTGA-1 1 +chr1 569391 569416 CTGAATGGTTTAAGGA-1 1 +chr1 569391 569417 GGGACCTAGGATATCA-1 7 +chr1 569391 569418 GCAACCGGTCATTGCA-1 4 +chr1 569391 569419 TTCATCAAGCTAACAA-1 6 +chr1 569391 569420 GTGATCAGTTGGCTTA-1 1 +chr1 569391 569425 GCAGCCAAGAGAGTTT-1 1 +chr1 569391 569426 TGGGTGCGTGGACAGT-1 1 +chr1 569391 569431 GGACACCTCAAGGCAG-1 2 +chr1 569391 569432 GTTACGAAGATGCGAC-1 2 +chr1 569391 569436 TTACGGATCCACGCTT-1 2 +chr1 569391 569438 ACCGCAGGTCGAGGTA-1 2 +chr1 569391 569443 TGGTCCTGTGTACGCC-1 1 +chr1 569391 569448 TGGAAGGGTACGGAGT-1 7 +chr1 569391 569449 TAACAGCGTAATGTAG-1 10 +chr1 569392 569413 GGGTGTCCACTCCACT-1 1 +chr1 569392 569415 GCTCACTTCGTCCCTA-1 1 +chr1 569392 569421 ACAGACTAGTATACCC-1 2 +chr1 569392 569425 GAACGTTAGTCTGTGT-1 1 +chr1 569392 569432 CCCACATCACATGATC-1 1 +chr1 569394 569417 GAGATTCTCCAGAGAG-1 2 +chr1 569394 569418 GACCAATCAAGCCTTA-1 3 +chr1 569394 569419 CTTTGCGGTAAACCCT-1 1 +chr1 569394 569424 TAGCATGGTATGGTTC-1 2 +chr1 569394 569438 TATCTGTAGACAGCTG-1 1 +chr1 569394 569443 GAACCGCTCGAGTTAC-1 1 +chr1 569394 569449 TTCAACTTCCGTACGG-1 1 +chr1 569395 569417 CAGTGTACAACACGGA-1 1 +chr1 569395 569421 GCTGTTCCAAGATGCG-1 1 +chr1 569395 569432 GAAGTGGCAGTCAGCC-1 3 +chr1 569395 569448 CGCTAGGTCATGTTCT-1 1 +chr1 569396 569414 GTCACCTTCTTTGCAT-1 1 +chr1 569396 569417 TCCATCGAGAGTGGAA-1 5 +chr1 569396 569418 ACAGCGCAGCCCATGC-1 9 +chr1 569396 569419 CATGCCTGTAGAGAGA-1 4 +chr1 569396 569421 AAACGAACAATCAGGG-1 2 +chr1 569396 569424 TCGATTTAGCAATAAC-1 3 +chr1 569396 569426 TCAAGCACAGCGTGAA-1 5 +chr1 569396 569430 GACCAATGTCCGTAGC-1 1 +chr1 569396 569431 CATTGGAGTTCCCTTG-1 6 +chr1 569396 569432 AGTTTGGCATGATTGC-1 9 +chr1 569396 569436 GAGGTCCGTTCATTTC-1 2 +chr1 569396 569438 TGAGTCAGTACTTGAC-1 13 +chr1 569396 569443 TGCTTTAGTGTCCAGC-1 3 +chr1 569396 569444 AGCCTGGGTCAGAAAT-1 1 +chr1 569396 569447 GGTGTCGGTGCACCCA-1 1 +chr1 569396 569448 AAACGAACATGCTATG-1 16 +chr1 569396 569449 GCCTACTGTTAGCTCA-1 11 +chr1 569396 569451 TAGCCCTTCTCGGCGA-1 4 +chr1 569396 569453 CTCAACCGTGGTTCTA-1 7 +chr1 569397 569417 AGCGTATCAGGTCCTG-1 1 +chr1 569397 569432 GTAGGAGTCTCAGATG-1 3 +chr1 569397 569438 CTAGGGCGTGAAACAT-1 1 +chr1 569397 569443 TCAGGTAAGACACAAT-1 1 +chr1 569397 569448 GAACCGCCATTCACCC-1 3 +chr1 569397 569449 TTAGCGACATCGTACA-1 3 +chr1 569397 569453 GAGGTCCGTTCATTTC-1 4 +chr1 569398 569417 TCACCTGGTAGGGTTG-1 1 +chr1 569398 569418 CCCGTTATCGTGGAAG-1 1 +chr1 569398 569419 CGGACTGAGCCATCAT-1 4 +chr1 569398 569420 GACCGACAGGGTAATT-1 1 +chr1 569398 569424 GTCACAACATGGATGG-1 1 +chr1 569398 569425 CAAAGCTGTCTGATTG-1 2 +chr1 569398 569426 AGACAAATCTCGGCGA-1 9 +chr1 569398 569430 AGGCGTCTCCCTAAAG-1 6 +chr1 569398 569432 TGTAAGCTCGGTCGAC-1 14 +chr1 569398 569436 CGCTATCTCAAACCCA-1 5 +chr1 569398 569443 CACCACTGTGTGACCC-1 7 +chr1 569398 569446 CAGGGCTCATCAACTG-1 3 +chr1 569398 569448 TCAGTTTTCCTTCGAC-1 8 +chr1 569398 569449 CGGACCATCGTCAACA-1 13 +chr1 569398 569451 AGCGTATCACAGCTTA-1 7 +chr1 569398 569452 TATGTGGAGCCGCAAA-1 1 +chr1 569398 569453 AAGGAGCAGGATGTAT-1 26 +chr1 569399 569426 TTGCTTAGTACATGGG-1 3 +chr1 569399 569431 CCTTGCAGTAGCAGGT-1 1 +chr1 569399 569436 CCGCATTTCCAAGTCA-1 7 +chr1 569399 569448 AAACGAATCGCAAGCC-1 3 +chr1 569399 569449 GGCATTAGTTTCTCTA-1 5 +chr1 569400 569430 GCGGAAACATAGAATG-1 2 +chr1 569400 569432 TACAGCAGTTTGACCA-1 3 +chr1 569400 569433 TACCTCGTCCCAGTAA-1 1 +chr1 569400 569437 GAACGTTTCGAGGTAG-1 3 +chr1 569400 569442 TGCATTTCATAGACGG-1 2 +chr1 569400 569449 GTCACAACATGGATGG-1 6 +chr1 569401 569443 TTAGCTTGTGCTGTCG-1 1 +chr1 569401 569449 CCAATGAGTTCAGAAA-1 1 +chr1 569402 569438 AGCGATTTCGCAAGCC-1 1 +chr1 569402 569443 GCGTTGGGTAAGGTCG-1 2 +chr1 569403 569448 AGCTGATGTGTCGTGC-1 2 +chr1 569403 569449 AATACGCAGGCAAGCT-1 1 +chr1 569404 569448 GAACGTTCACGGCCAT-1 1 +chr1 569404 569449 AAAGGGCTCCATCATT-1 1 +chr1 569405 569430 ACCCAAAGTGAGTTGG-1 1 +chr1 569405 569436 GCGCCAATCGATTACG-1 3 +chr1 569405 569438 TCAGTCCTCCCTACGT-1 1 +chr1 569405 569449 GGAGTAGAGGCGTCCT-1 1 +chr1 569405 569451 TTGCAGAAGGGCATTG-1 1 +chr1 569405 569453 CTCAGAAAGGATCCTT-1 4 +chr1 569406 569438 GTGGCGTGTCGATTAC-1 2 +chr1 569407 569448 CCTGGGACACAAACAA-1 1 +chr1 569409 569432 CCCGTTAAGGCAGTAC-1 2 +chr1 569409 569438 CGCTAGGTCTTTATCG-1 2 +chr1 569417 569615 GTTGGGCGTAACTCCA-1 1 +chr1 569586 569611 TGACAACGTCAACTGT-1 1 +chr1 569626 569821 ACAAGCTCACAGCCAC-1 1 +chr1 569840 569981 TTCTGTAGTACGGTTT-1 1 +chr1 569956 570112 TCAAGACCAAACAGTA-1 1 +chr1 569963 570197 CGCTGGAGTTGCAGAG-1 1 +chr1 569970 570332 GTTATTCTCCAGTACA-1 1 +chr1 570072 570247 GAGGCTCCAAGGAACC-1 1 +chr1 570081 570111 CCTAAAGGTTCTCGAA-1 1 +chr1 570083 570111 ATGTTTCAGCACACCC-1 1 +chr1 570085 570111 CACATGACATTAGCAC-1 3 +chr1 570086 570109 ACAGACTCATCGTGAT-1 1 +chr1 570086 570111 GCCAGACAGGCTTTAC-1 1 +chr1 570086 570116 AAGGTTCCAATCATCG-1 1 +chr1 570088 570108 GGGTGTCTCGCGTTCT-1 1 +chr1 570088 570111 AAGGAGCTCCAGTTAG-1 1 +chr1 570088 570117 AACGTACAGATCTCAC-1 1 +chr1 570089 570111 TAATCGGCAGGTGTCC-1 1 +chr1 570096 570125 TCACAGATCTGTCGGG-1 1 +chr1 570096 570126 TAGTCCCCATCCCTTG-1 3 +chr1 570096 570132 GCGGAAACATAGAATG-1 10 +chr1 570098 570126 TAACTTCTCTATCTCA-1 2 +chr1 570098 570127 CGCGCAAGTTTGTCTT-1 3 +chr1 570098 570130 ATAGTCGCAGGCTACC-1 2 +chr1 570098 570132 ATCCTGCTCAAGGCAG-1 6 +chr1 570098 570133 GTCACTCTCAGGCGCT-1 2 +chr1 570098 570134 GGATGAGCAGGCAGAT-1 1 +chr1 570098 570135 AACCTGAAGAGCCACA-1 7 +chr1 570149 570207 TGGAAGGAGTCGACCC-1 1 +chr1 570164 570197 TGACAACGTCAACTGT-1 1 +chr1 570201 570426 AAATGCCCACGCCGAT-1 1 +chr1 570267 570646 TTGCCCATCGCGTTCT-1 3 +chr1 573789 574178 TAATCGGGTTCTCGAA-1 1 +chr1 574118 574277 TCACAAGGTCAAGACG-1 1 +chr1 574315 574486 CAACGTATCCTATCAT-1 1 +chr1 574327 574503 GAGATTCGTATCCTTT-1 1 +chr1 585820 585882 AAGGTTCCAACGAGGT-1 3 +chr1 586168 586203 GCGCCAATCCTCCTGA-1 1 +chr1 586170 586329 CAACGTATCCTATCAT-1 2 +chr1 592081 592112 TTCGATTGTCATCGTA-1 1 +chr1 600846 600880 ACGGATTAGAATGCTA-1 1 +chr1 601063 601108 TAGGTGTGTGGACCAA-1 2 +chr1 601355 601557 ACAAACCCAGTTACAC-1 1 +chr1 601368 601555 GGAGTAGTCAGGTTTG-1 1 +chr1 601524 601690 TACCTCGCATGGGAAC-1 2 +chr1 601632 601676 TCAGCTCAGTTAGAGT-1 1 +chr1 601779 602014 GACTAACAGCAACGGT-1 2 +chr1 601820 602008 GCACCTTTCCAACGCG-1 1 +chr1 601824 602006 GGATGAGCAAGGAACC-1 1 +chr1 601847 602022 GAAGAGCTCGTGGTAT-1 1 +chr1 601973 602115 CTACAGACATGCGTTA-1 1 +chr1 601975 602118 TAGGTGTGTGGACCAA-1 1 +chr1 602794 602836 TGCACCTTCGGCTATA-1 1 +chr1 603448 603639 GGAGTAGTCAGGTTTG-1 3 +chr1 603703 603736 CTACAGACATGCGTTA-1 3 +chr1 610167 610219 TCGATTTCAACTCCCT-1 2 +chr1 610202 610350 TGAGTCATCACACGTA-1 1 +chr1 610209 610231 AACTTGGGTTATAGAG-1 1 +chr1 626589 626776 CAAGGCCAGGTTCGAG-1 4 +chr1 636663 636803 CAGCCTTTCTAAACGC-1 1 +chr1 636844 637012 TAGGTGTGTGGACCAA-1 1 +chr1 637554 637774 GCTGCGAGTAACGTAA-1 1 +chr1 652572 652700 CCGTAGGCATCCCTTG-1 2 +chr1 657533 657723 GCTCCTAAGGTACATA-1 1 +chr1 666180 666474 AAATGAGCAGATGGCA-1 3 +chr1 666185 666447 TTCGCGTAGCCTGTAT-1 2 +chr1 672809 672845 AACTGGTAGCTCCGGT-1 1 +chr1 693130 693176 GTCACAAGTGCGCTCA-1 2 +chr1 700367 700548 CCTTAATCAACTACTG-1 1 +chr1 701629 701820 GGAGTAGCAAGGTTCT-1 1 +chr1 701646 701846 TCCATCGTCTGGCGCA-1 1 +chr1 701683 701837 TCACAAGGTGAGCTAG-1 1 +chr1 702037 702208 TCACAAGGTGAGCTAG-1 1 +chr1 702055 702264 TCTAGTTGTTCTCGAA-1 3 +chr1 704144 704326 GATTGACAGTTAGCAA-1 5 +chr1 704154 704326 CGCTATCAGCTCCGGT-1 1 +chr1 704534 704563 AACGTACAGCAATAAC-1 2 +chr1 704622 704665 AAACGAACAACAAACA-1 1 +chr1 705828 705916 TAGCACAGTAGTCTGT-1 3 +chr1 706293 706425 TCACAAGGTGAGCTAG-1 1 +chr1 707847 708045 GCAGCTGCACGCGTTG-1 1 +chr1 707853 708032 TACGGATAGTAACTCC-1 3 +chr1 710174 710368 GGAGTAGCAAGGTTCT-1 2 +chr1 710406 710452 CCGTGAGAGACCATAA-1 1 +chr1 710420 710470 CACCTGTTCTACTGCC-1 1 +chr1 710501 710629 CGCTATCAGCTCCGGT-1 1 +chr1 710577 710651 TGTACGAGTCTGGATT-1 1 +chr1 710668 710947 TACTGCCCAGATACAA-1 1 +chr1 711721 711845 TGACAACGTATGTCCA-1 1 +chr1 711734 711927 CCTTAATTCTTCAGAG-1 1 +chr1 712303 712546 CTTTGCGGTAAACCCT-1 1 +chr1 712335 712780 GAGATTCCACTGAAGG-1 1 +chr1 712341 712581 TGTTAGGGTAACCGAG-1 2 +chr1 712350 712523 GTGCTGGTCACGTGTA-1 1 +chr1 712484 712682 CCCTAGTTCACGTCAA-1 1 +chr1 712510 712546 TAGCCCTAGACACTTC-1 2 +chr1 712546 712875 TAGCCCTAGACACTTC-1 1 +chr1 712569 712897 CTTTGCGGTAAACCCT-1 1 +chr1 712633 712784 ACAATCGGTGACGCAA-1 2 +chr1 712697 712905 ACGTGGCCATCGTACA-1 4 +chr1 712799 713287 TCACCTGGTAGGGTTG-1 1 +chr1 712851 713033 AGCTATGTCCCACTTG-1 1 +chr1 712877 713035 CCTCCCTGTAACGGCA-1 2 +chr1 712966 713295 TTTGCGCGTCTGGGAA-1 2 +chr1 712998 713300 AAACTGCGTTACTACG-1 1 +chr1 713004 713180 AGACAAACAAGGATGC-1 1 +chr1 713004 713199 CGCTAGGCATCCCTCA-1 2 +chr1 713006 713043 AACGGGAGTCCAACCG-1 1 +chr1 713008 713501 TCCGACTTCGCATAAC-1 1 +chr1 713011 713157 CGATGATGTGGACCAA-1 1 +chr1 713042 713302 AACGGGAGTCCAACCG-1 5 +chr1 713078 713284 ATTACTCAGGATGCCA-1 2 +chr1 713078 713444 GAAGTGGCATAGCCAT-1 1 +chr1 713145 713285 CGCTATCAGCTCCGGT-1 1 +chr1 713161 713380 TAGCACAGTAGTCTGT-1 1 +chr1 713180 713482 TTCTAACAGAGGTCCA-1 2 +chr1 713185 713660 GCTGTTCCATACTGCA-1 1 +chr1 713239 713431 TCAGGTAAGAGCAGCT-1 3 +chr1 713285 713333 GATCATGTCTCCTTGG-1 1 +chr1 713355 713751 AACTGGTGTATCGCGC-1 2 +chr1 713380 713756 GTGCTGGTCGCGGCAT-1 1 +chr1 713388 713555 AGCGTGCCAGGTCCTG-1 1 +chr1 713436 713955 GATCGTACATCCCGGA-1 1 +chr1 713491 713996 CACATGAGTAACACTC-1 1 +chr1 713501 713970 TCCAGAATCGACTGGC-1 2 +chr1 713501 714042 TTGAGCAAGATGGCAC-1 1 +chr1 713506 713990 TTGCCCACATCCCGGA-1 1 +chr1 713510 714017 ATAGGCTGTAAGCCTT-1 1 +chr1 713511 714011 AGCCAGCGTCAGCAAG-1 1 +chr1 713521 713972 CTTTGCGGTAAACCCT-1 1 +chr1 713531 714035 GGCGAAACACGTTGTA-1 1 +chr1 713542 713974 CGGACCACACTCAAGT-1 1 +chr1 713542 714003 GCACCTTGTAATGCCT-1 4 +chr1 713563 714017 ATTTGTCGTATTCTTC-1 4 +chr1 713563 714040 CGGACCACAATGCACT-1 1 +chr1 713563 714043 CAACGGCTCCTGTAGA-1 2 +chr1 713565 713955 TGGGTTACATGGGTGA-1 1 +chr1 713572 713945 CTGTATTTCCCAGCGA-1 2 +chr1 713575 713996 AAGATAGGTGACCAGA-1 1 +chr1 713577 714011 TTAGCTTCACATGATC-1 4 +chr1 713580 713944 TTGCCCACATCCCGGA-1 2 +chr1 713581 714017 GTCACTCAGCTCCATA-1 2 +chr1 713581 714043 ACAATCGTCCTTCGAC-1 1 +chr1 713581 714119 TCAGCTCTCGCGCCAA-1 1 +chr1 713585 713982 CACAGATAGGCTTTAC-1 1 +chr1 713601 713979 CTTGAAGCATGTGAGG-1 2 +chr1 713603 713783 GACTAGTTCTCTGTTA-1 1 +chr1 713606 713783 CACATGATCCTGGAAT-1 2 +chr1 713611 713801 TGATCAGCAGGGTACA-1 1 +chr1 713611 713945 ACAGACTCATCGTGAT-1 6 +chr1 713611 713972 TAATCGGGTTCTCGAA-1 2 +chr1 713611 713980 TATGTTCAGCGTCAAG-1 1 +chr1 713611 713996 GTGCTGGGTTCTGAGT-1 1 +chr1 713611 714017 CAACGTATCTACCCGT-1 2 +chr1 713611 714121 GCTTAAGCAACATAAG-1 1 +chr1 713611 714135 AAACTCGAGACTCGGA-1 1 +chr1 713614 713936 CGTACAACACAGGAAC-1 1 +chr1 713614 713972 ATTTGTCGTATTCTTC-1 2 +chr1 713614 714040 CCGTACTTCTGGCCAG-1 2 +chr1 713616 713826 TGCTATTGTATTCACG-1 1 +chr1 713616 714040 ACTAACGCAGCAACGA-1 2 +chr1 713619 714040 CTGGCAGGTAATGCAA-1 1 +chr1 713619 714053 AGGCCTGCATCCCAAA-1 1 +chr1 713621 713816 AGGCGTCTCCAATAGC-1 3 +chr1 713621 714011 TGTGACAAGAGGAACA-1 1 +chr1 713621 714040 CAGCTAAGTATCTGCA-1 6 +chr1 713622 713809 CCCTCTCCATTGTTCT-1 1 +chr1 713627 713801 CAAGCTATCTTCCGTG-1 1 +chr1 713627 713974 CTGTTCGGTTAGGAAT-1 1 +chr1 713627 713977 ATCCCTGCACAGCCAC-1 1 +chr1 713627 713992 AGCGTGCTCGGTTGTA-1 2 +chr1 713629 713799 TGGAAGGGTTTAGGAA-1 2 +chr1 713629 714040 TTGGTCCGTAAGCCGA-1 3 +chr1 713655 714135 GGTGCTGGTCCAAGAG-1 1 +chr1 713659 713957 GCTCAGGTCGCGTGAC-1 2 +chr1 713665 713993 GAGTGAGAGCTACGTT-1 3 +chr1 713666 714004 CTGTATTTCCCAGCGA-1 2 +chr1 713666 714015 GTGGCGTGTCGATTAC-1 1 +chr1 713671 713972 GTGGCGTGTCGATTAC-1 2 +chr1 713671 714125 CCCTAGTAGAAGAGTG-1 1 +chr1 713678 714017 TCACCTGGTCTCGGGT-1 2 +chr1 713679 713934 TGTAAGCAGACACAAT-1 2 +chr1 713679 713993 TTGGTCCGTTAAGGGC-1 2 +chr1 713680 714038 CCTTGCACATTTGTTC-1 5 +chr1 713681 714040 CAGTATGGTGCTTGAT-1 2 +chr1 713683 713980 GCTTAAGCAACATAAG-1 1 +chr1 713686 714043 GGTGAAGAGAACGTCG-1 1 +chr1 713693 713987 GGATGAGCAGGCAGAT-1 2 +chr1 713693 714127 CTGGCAGGTAATGCAA-1 2 +chr1 713698 713980 TACCTATGTATCAGCT-1 3 +chr1 713698 714000 GCACGCAAGCTGAGGT-1 3 +chr1 713698 714017 GCACGGTGTCGCGCTA-1 2 +chr1 713710 714040 TTGCTATAGCAACTGG-1 1 +chr1 713729 714120 GAGATTCAGGCTGGAT-1 2 +chr1 713731 714130 CATAACGGTCGAGAAC-1 2 +chr1 713733 713985 CTCAGCTGTGCTTCCT-1 1 +chr1 713739 713990 CCCAGAGGTTAGGCTT-1 1 +chr1 713739 714017 TATTGCTTCACATCCC-1 1 +chr1 713740 713967 GCGGTGTTCGACTGGC-1 1 +chr1 713740 714040 TCAGGTAAGATACCAA-1 1 +chr1 713742 713990 GATTGACAGTTAGCAA-1 2 +chr1 713745 714040 AGCCCGAGTCTGGTCG-1 1 +chr1 713745 714043 GGGTTATGTGTTGTTG-1 2 +chr1 713745 714067 TGTAAGCCAAAGAAGG-1 4 +chr1 713746 714013 TGCTTTAGTGTCCAGC-1 4 +chr1 713746 714188 GCTGAGCTCAGTGGTT-1 2 +chr1 713747 713965 TTTGGTTAGAAGGGCG-1 2 +chr1 713747 713973 TGGTCCTTCGAACCGC-1 2 +chr1 713750 713935 GCTTAAGCAATGTAAG-1 4 +chr1 713750 713944 TACGCAACAACTCGAT-1 2 +chr1 713750 713989 ATTCGTTGTTTCCTAT-1 1 +chr1 713750 714120 AACCTGAAGTGCTCGC-1 3 +chr1 713750 714135 CTCTACGGTTCCCTTG-1 1 +chr1 713750 714182 TGGGTGCTCAAAGTAG-1 3 +chr1 713751 713937 TAGGTGTGTTTGCCCT-1 2 +chr1 713751 713950 GAACCGCCATGCGCTG-1 2 +chr1 713751 713970 ATTGTCTAGCTGCTCG-1 1 +chr1 713751 713979 GAGTGAGTCCATACGA-1 7 +chr1 713751 713980 GTGACATCATGCGCTG-1 1 +chr1 713751 713982 TCGGGACCACCAAGGA-1 3 +chr1 713751 713990 GCACGGTGTCTGCACG-1 6 +chr1 713751 713992 GAGGCTCAGAACGTCG-1 1 +chr1 713751 713997 GGGACCTAGATTGACA-1 4 +chr1 713751 714001 TTGTTGTGTCTGGTTA-1 2 +chr1 713751 714015 GTTACTTCACGAACGA-1 3 +chr1 713751 714017 TCAATTCTCACGGGTC-1 2 +chr1 713751 714018 GTAGGAGTCGATGCAT-1 2 +chr1 713751 714040 CCCTCTCTCCTTACGC-1 4 diff --git a/tests/testthat/test_get_counts.R b/tests/testthat/test_get_counts.R index 28bcc92..8590322 100644 --- a/tests/testthat/test_get_counts.R +++ b/tests/testthat/test_get_counts.R @@ -9,6 +9,15 @@ test_bam3 <- system.file("extdata", "test_single3.bam", package = "chromVAR") peaks_file <- system.file("extdata", "test_bed.txt", package = "chromVAR") test_peaks <- getPeaks(peaks_file, sort = TRUE) test_bed <- system.file("extdata", "test_reads.bed", package = "chromVAR") +test_bed_10x <- system.file("extdata", "test_x10_bed.tsv", package = "chromVAR") + +# Test 10x bed file ------------------------------------------------------------ + +test_that("can read in 10x", { + counts <- getCounts(test_bed_10x, test_peaks, x10 = TRUE, format = "bed", + paired = TRUE, colData = DataFrame(cell_name = c("Test"))) + expect_is(counts, "RangedSummarizedExperiment") +}) # Test fragment counts with RG ___________-------------------------------------- @@ -24,7 +33,7 @@ test_that("can count fragments using RG tags", { # Test fragment counts with multiple bam -------------------------------------- test_that("can count fragments with multiple bams", { - counts <- getCounts(c(test_bam1, test_bam2, test_bam3), test_peaks, + counts <- getCounts(c(test_bam1, test_bam2, test_bam3), test_peaks, by_rg = FALSE, paired = TRUE) expect_is(counts, "RangedSummarizedExperiment") expect_equal(assays(counts)$counts[11,3][[1]],1) @@ -37,7 +46,7 @@ test_that("can count fragments with multiple bams", { # Test fragment counts with bed file ------------------------------------------- test_that("can count fragments with bed file", { - counts <- getCounts(test_bed, test_peaks, by_rg = FALSE, paired = FALSE, + counts <- getCounts(test_bed, test_peaks, by_rg = FALSE, paired = FALSE, format = "bed") expect_is(counts, "RangedSummarizedExperiment") expect_equal(assays(counts)$counts[2,1][[1]], 2) @@ -45,10 +54,3 @@ test_that("can count fragments with bed file", { expect_equal(colData(counts)$depth, 4) expect_equal(getTotalFragments(counts),3) }) - - - - - - - From e6553df0599f60de8a8732d327f97e9f16c6f4f6 Mon Sep 17 00:00:00 2001 From: Anna Date: Wed, 13 Feb 2019 12:34:42 -0500 Subject: [PATCH 3/9] Add new functionality to getCounts. Read 10x bed files. --- R/get_inputs.R | 401 +++++++++++++++++++++++++++++-------------------- 1 file changed, 240 insertions(+), 161 deletions(-) diff --git a/R/get_inputs.R b/R/get_inputs.R index 5a4b2dc..cbfb977 100644 --- a/R/get_inputs.R +++ b/R/get_inputs.R @@ -13,21 +13,21 @@ #' chromosome, second is assumed to be start of peak (0-based), and third is #' assumed to be end of peak (1-based). Note that in output GenomicRanges #' output, start and end indices are both 1-based. Extra columns can be added -#' as metadata or strand information if provided, but the user must indicate +#' as metadata or strand information if provided, but the user must indicate #' column index and name using named vector for extra_cols. -#' @seealso \code{\link{getCounts}}, \code{\link{filterPeaks}}, +#' @seealso \code{\link{getCounts}}, \code{\link{filterPeaks}}, #' \code{\link{readNarrowpeaks}} #' @export -#' @examples +#' @examples #' peaks_file <- system.file("extdata", "test_bed.txt", package = "chromVAR") #' peaks <- getPeaks(peaks_file, sort = TRUE) getPeaks <- function(filename, extra_cols = c(), sort_peaks = FALSE) { if (is.installed("readr")) { bed <- as.data.frame( - suppressMessages(readr::read_tsv(file = filename, + suppressMessages(readr::read_tsv(file = filename, col_names = FALSE)[,c(1:3, extra_cols)])) } else { - bed <- read.delim(file = filename, header = FALSE, sep = "\t", + bed <- read.delim(file = filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE)[, c(1:3, extra_cols)] } colnames(bed) <- c("chr", "start", "end", names(extra_cols)) @@ -61,34 +61,34 @@ getPeaks <- function(filename, extra_cols = c(), sort_peaks = FALSE) { } #' readNarrowpeaks -#' -#' Reads in peaks in narrowpeaks format, as output by macs2. Uses summit as -#' center of peak, and makes peak the given 'width'. By default removes +#' +#' Reads in peaks in narrowpeaks format, as output by macs2. Uses summit as +#' center of peak, and makes peak the given 'width'. By default removes #' overlapping peaks to get set of peaks with no overlaps #' @param filename filename #' @param width desired width of peaks #' @param non_overlapping remove overlapping peaks #' @return \code{\link[GenomicRanges]{GRanges-class}} #' @export -readNarrowpeaks <- function(filename, - width = 500, +readNarrowpeaks <- function(filename, + width = 500, non_overlapping = TRUE) { - cn <- c("chr", "start", "end", "name", "score", "strand", "fc", + cn <- c("chr", "start", "end", "name", "score", "strand", "fc", "pval", "qval", "summit") if (is.installed("readr")) { bed <- as.data.frame(readr::read_tsv(file = filename, col_names = cn)) } else { - bed <- read.delim(file = filename, - header = FALSE, - sep = "\t", - stringsAsFactors = FALSE, + bed <- read.delim(file = filename, + header = FALSE, + sep = "\t", + stringsAsFactors = FALSE, col.names = cn) } bed[, "summit"] <- bed[, "start"] + bed[, "summit"] bed <- as(bed, "DataFrame") - bed <- makeGRangesFromDataFrame(bed[, c("chr", "summit", "score", - "qval", "name")], - start.field = "summit", end.field = "summit", + bed <- makeGRangesFromDataFrame(bed[, c("chr", "summit", "score", + "qval", "name")], + start.field = "summit", end.field = "summit", keep.extra.columns = TRUE) bed <- resize(bed, width = width, fix = "center") if (non_overlapping) { @@ -99,15 +99,15 @@ readNarrowpeaks <- function(filename, chr_names <- as.character(seqnames(bed[keep_peaks])) starts <- start(bed[keep_peaks]) ends <- end(bed[keep_peaks]) - overlap_next <- - intersect(which(chr_names[seq_len(length(keep_peaks) - 1)] == + overlap_next <- + intersect(which(chr_names[seq_len(length(keep_peaks) - 1)] == chr_names[seq_len(length(keep_peaks) - 1) + 1]), - which(ends[seq_len(length(keep_peaks) - 1)] >= + which(ends[seq_len(length(keep_peaks) - 1)] >= starts[seq_len(length(keep_peaks) - 1) + 1])) overlap_previous <- overlap_next + 1 - overlap_comparison <- bed[keep_peaks[overlap_previous]]$qval > + overlap_comparison <- bed[keep_peaks[overlap_previous]]$qval > bed[keep_peaks[overlap_next]]$qval - discard <- keep_peaks[c(overlap_previous[!overlap_comparison], + discard <- keep_peaks[c(overlap_previous[!overlap_comparison], overlap_next[overlap_comparison])] keep_peaks <- keep_peaks[keep_peaks %ni% discard] } @@ -127,10 +127,10 @@ readNarrowpeaks <- function(filename, #' @importMethodsFrom BiocGenerics counts #' @export #' @return Matrix of counts -#' @examples +#' @examples #' data(mini_counts, package = "chromVAR") #' fragment_counts <- counts(mini_counts) -setMethod("counts", signature(object = "SummarizedExperiment"), +setMethod("counts", signature(object = "SummarizedExperiment"), function(object) { stopifnot("counts" %in% assayNames(object)) assays(object)$counts @@ -142,8 +142,8 @@ setMethod("counts", signature(object = "SummarizedExperiment"), #' @aliases counts<-,SummarizedExperiment-method #' @importMethodsFrom BiocGenerics counts<- #' @exportMethod 'counts<-' -setReplaceMethod("counts", signature(object = "SummarizedExperiment", - value = "MatrixOrmatrix"), +setReplaceMethod("counts", signature(object = "SummarizedExperiment", + value = "MatrixOrmatrix"), function(object, value) { assays(object)[["counts"]] <- value validObject(object) @@ -154,7 +154,7 @@ setReplaceMethod("counts", signature(object = "SummarizedExperiment", #' getCounts #' -#' makes matrix of fragment counts in peaks using one or multiple bam or bed +#' makes matrix of fragment counts in peaks using one or multiple bam or bed #' files #' @param alignment_files filenames for bam or bed files with aligned reads #' @param peaks GRanges object with peaks @@ -164,60 +164,64 @@ setReplaceMethod("counts", signature(object = "SummarizedExperiment", #' @param colData sample annotation DataFrame #' @return \code{\link[SummarizedExperiment]{RangedSummarizedExperiment-class}} #' object -#' @seealso \code{\link{getSampleDepths}}, \code{\link{getPeaks}}, +#' @seealso \code{\link{getSampleDepths}}, \code{\link{getPeaks}}, #' \code{\link{filterSamples}} #' @export -#' @examples -#' +#' @examples +#' #' # First we'll read in some peaks #' peaks_file <- system.file("extdata", "test_bed.txt", package = "chromVAR") #' test_peaks <- getPeaks(peaks_file, sort = TRUE) -#' +#' #' # With single bam with RG tags (can also give multiple bams with RG) #' test_rg <- system.file("extdata", "test_RG.bam", package = "chromVAR") -#' test_counts <- getCounts(test_rg, peaks = test_peaks, by_rg = TRUE, -#' paired = TRUE, +#' test_counts <- getCounts(test_rg, peaks = test_peaks, by_rg = TRUE, +#' paired = TRUE, #' colData = S4Vectors::DataFrame(condition ="A")) -#' -#' +#' +#' #' # Multiple bams without RG tags #' test_bam1 <- system.file("extdata", "test_single1.bam", package = "chromVAR") #' test_bam2 <- system.file("extdata", "test_single2.bam", package = "chromVAR") #' test_bam3 <- system.file("extdata", "test_single3.bam", package = "chromVAR") -#' test_counts2 <- getCounts(c(test_bam1, test_bam2,test_bam3), -#' peaks = test_peaks, by_rg = FALSE, -#' paired = TRUE, -#' colData = S4Vectors::DataFrame(celltype = -#' c("A","B","C"))) -#' +#' test_counts2 <- getCounts(c(test_bam1, test_bam2,test_bam3), +#' peaks = test_peaks, by_rg = FALSE, +#' paired = TRUE, +#' colData = S4Vectors::DataFrame(celltype = +#' c("A","B","C"))) +#' #' # Bed file with reads (can give multiple bed files, here we will just read 1) #' test_bed <- system.file("extdata", "test_reads.bed", package = "chromVAR") -#' test_counts3 <- getCounts(test_bed, test_peaks, by_rg = FALSE, -#' paired = FALSE, -#' format = "bed") -getCounts <- function(alignment_files, peaks, paired, by_rg = FALSE, - format = c("bam", "bed"), colData = NULL) { - +#' test_counts3 <- getCounts(test_bed, test_peaks, by_rg = FALSE, +#' paired = FALSE, +#' format = "bed") +getCounts <- function(alignment_files, peaks, paired, by_rg = FALSE, + format = c("bam", "bed"), colData = NULL, x10 = FALSE) { + format <- match.arg(format) if (format == "bam") { return(get_counts_from_bams(alignment_files, peaks, paired, by_rg, colData)) - } else { - return(get_counts_from_beds(alignment_files, peaks, paired, colData)) + } else if (format == "bed"){ + if (x10 == TRUE) { + return(get_counts_from_x10_beds(alignment_files, peaks, paired, colData, x10)) + } else { + return(get_counts_from_beds(alignment_files, peaks, paired, colData)) + } } } get_counts_from_bams <- function(bams, peaks, paired, by_rg = FALSE, sample_annotation = NULL) { - + if (by_rg) { tmp <- lapply(bams, getFragmentCountsByRG, peaks = peaks, paired = paired) if (!is.null(sample_annotation) && nrow(sample_annotation) == length(bams)){ sample_annotation <- as(sample_annotation, "DataFrame") l <- vapply(tmp, function(x) length(x$depths), 0) - sample_annotation <- - do.call(rbind, - lapply(seq_along(l), + sample_annotation <- + do.call(rbind, + lapply(seq_along(l), function(x){ rep(sample_annotation[x, ,drop = FALSE], l[x])})) } @@ -226,12 +230,12 @@ get_counts_from_bams <- function(bams, peaks, paired, by_rg = FALSE, } else { mat <- matrix(nrow = length(peaks), ncol = length(bams)) depths <- vector("numeric", length(bams)) - + for (i in seq_along(bams)) { message("Reading in file: ", bams[i]) fragments <- bamToFragments(bams[i], paired = paired) depths[i] <- length(fragments) - mat[, i] <- countOverlaps(peaks, fragments, type = "any", + mat[, i] <- countOverlaps(peaks, fragments, type = "any", ignore.strand = TRUE) } colnames(mat) <- basename(bams) @@ -242,31 +246,105 @@ get_counts_from_bams <- function(bams, peaks, paired, by_rg = FALSE, } else { sample_annotation$depth <- depths } - out <- SummarizedExperiment(assays = list(counts = counts_mat), - rowRanges = peaks, + out <- SummarizedExperiment(assays = list(counts = counts_mat), + rowRanges = peaks, colData = sample_annotation) return(out) } +get_counts_from_x10_beds <- function(beds, peaks, paired, colData = NULL, x10 = FALSE) { + if(length(beds == NULL)){ + beds <- list(beds) + } + print(length(beds)) + # for each bed file + # if 10x, the output is a list (bed) of lists (cells) + results <- lapply(beds, function(i) { + # read in alignments from bed file. If 10x, readAlignmentFromBed will add + # barcode information in a metadata field + + fragment <- readAlignmentFromBed(i, paired = paired, x10 = TRUE) + if (paired) { + left <- resize(fragment, width = 1, fix = "start", ignore.strand = TRUE) + right <- resize(fragment, width = 1, fix = "end", ignore.strand = TRUE) + fragments <- left_right_to_grglist(left, right) + } else { + fragments <- resize(fragment, width = 1, ignore.strand = FALSE) + } + unique_barcodes <- unique(fragment$barcodes) + cell_list <- lapply(unique_barcodes, function(i) { + # subset data based on barcode + current_barcode <- fragments[mcols(fragments)$barcodes == i] + if (!isTRUE(all.equal(sort(seqlevels(current_barcode)), sort(seqlevels(peaks))))) { + merged_seq <- unique(c(seqlevels(current_barcode), seqlevels(peaks))) + seqlevels(current_barcode) <- merged_seq + seqlevels(peaks) <- merged_seq + } + return(list(counts = countOverlaps(peaks, current_barcode, + type = "any", ignore.strand = TRUE), + depth = length(current_barcode), barcodes = i)) + }) + }) + + mat <- lapply(results, function(x) vapply(x, function(x) x[["counts"]], rep(0, length(peaks)))) + depths <- lapply(results, function(x) vapply(x, function(x) x[["depth"]], 0)) + # get barcodes here + codes <- lapply(results, function(result) { barcodes <- unlist(lapply(result, function(x) x$barcodes))}) + + # if no colData was given, set colData$depths to depths and set names of + if (is.null(colData)){ + colData <- DataFrame(depth = depths) + mat<- lapply(seq_along(mat), function(x) { colnames(mat[[x]]) =codes[[x]]; mat[[x]]}) + } else { + mat<- lapply(seq_along(mat), function(x) { colnames(mat[[x]]) =paste(codes[[x]], colData[[x,1]], sep = "_"); mat[[x]]}) + save <- character() + for(j in 1:length(colData[,1])){ + save <- c(save, rep(colData[j,1], ncol(mat[[j]]))) + } + colData <- c() + colData$cell_name <- save + colData$depth <- unlist(depths) + } + + mat <- t(do.call(rbind, lapply(mat, function(x) if (length(x) == 1L && is.na(x)) NULL else t(x)))) + + counts_mat <- Matrix::Matrix(mat) + + out <- SummarizedExperiment(assays = list(counts = counts_mat), + rowRanges = peaks, + colData = colData) + return(out) +} + get_counts_from_beds <- function(beds, peaks, paired, colData = NULL) { - - + + results <- bplapply(seq_along(beds), function(i) { - fragments <- readAlignmentFromBed(beds[i], paired = paired) + + fragment <- readAlignmentFromBed(beds[i], paired = paired) + + if (paired) { + left <- resize(fragment, width = 1, fix = "start", ignore.strand = TRUE) + right <- resize(fragment, width = 1, fix = "end", ignore.strand = TRUE) + fragments <- left_right_to_grglist(left, right) + } else { + fragments <- resize(fragment, width = 1, ignore.strand = FALSE) + } + if (!isTRUE(all.equal(sort(seqlevels(fragments)), sort(seqlevels(peaks))))){ merged_seq <- unique(c(seqlevels(fragments), seqlevels(peaks))) seqlevels(fragments) <- merged_seq seqlevels(peaks) <- merged_seq } - return(list(counts = countOverlaps(peaks, fragments, type = "any", - ignore.strand = TRUE), + return(list(counts = countOverlaps(peaks, fragments, type = "any", + ignore.strand = TRUE), depth = length(fragments))) }) - + mat <- vapply(results, function(x) x[["counts"]], rep(0, length(peaks))) depths <- vapply(results, function(x) x[["depth"]], 0) - + colnames(mat) <- basename(beds) counts_mat <- Matrix::Matrix(mat) if (is.null(colData)) { @@ -274,8 +352,8 @@ get_counts_from_beds <- function(beds, peaks, paired, colData = NULL) { } else { colData$depth <- depths } - out <- SummarizedExperiment(assays = list(counts = counts_mat), - rowRanges = peaks, + out <- SummarizedExperiment(assays = list(counts = counts_mat), + rowRanges = peaks, colData = colData) return(out) } @@ -285,20 +363,26 @@ get_counts_from_beds <- function(beds, peaks, paired, colData = NULL) { # Helper functions for reading in counts from bam ------------------------------ -readAlignmentFromBed <- function(filename, paired) { +readAlignmentFromBed <- function(filename, paired, x10 = FALSE) { if (is.installed("readr")) { tmp <- suppressMessages(readr::read_tsv(file = filename, col_names = FALSE)) } else { - tmp <- read.delim(file = filename, col.names = FALSE, sep = "\t", + tmp <- read.delim(file = filename, col.names = FALSE, sep = "\t", stringsAsFactors = FALSE) } - strand_col <- which(apply(tmp[seq_len(min(100, nrow(tmp))), ], 2, + strand_col <- which(apply(tmp[seq_len(min(100, nrow(tmp))), ], 2, function(x) all(x %in% c("+", "-", "*")))) - if (length(strand_col) == 1) { + if (x10) { + colnames(tmp) <- c("chr", "start", "end", "barcodes", "num_pcr") + tmp[, "start"] <- tmp[, "start"] + 1 + tmp_tmp <- GRanges(tmp$chr, ranges = IRanges(tmp$start, tmp$end)) + values(tmp_tmp) <- DataFrame(barcodes = tmp$barcodes) + tmp <- tmp_tmp + } else if (length(strand_col) == 1) { tmp <- tmp[, c(1:3, strand_col)] colnames(tmp) <- c("chr", "start", "end", "strand") tmp[, "start"] <- tmp[, "start"] + 1 - tmp <- GRanges(tmp$chr, ranges = IRanges(tmp$start, tmp$end), + tmp <- GRanges(tmp$chr, ranges = IRanges(tmp$start, tmp$end), strand = tmp$strand) } else { tmp <- tmp[, 1:6] @@ -306,133 +390,131 @@ readAlignmentFromBed <- function(filename, paired) { tmp[, "start"] <- tmp[, "start"] + 1 tmp <- GRanges(tmp$chr, ranges = IRanges(tmp$start, tmp$end)) } - if (paired) { - left <- resize(tmp, width = 1, fix = "start", ignore.strand = TRUE) - right <- resize(tmp, width = 1, fix = "end", ignore.strand = TRUE) - out <- left_right_to_grglist(left, right) - } else { - out <- resize(tmp, width = 1, ignore.strand = FALSE) - } - return(out) + return(tmp) } #' @importFrom IRanges PartitioningByEnd #' @importFrom BiocGenerics relist -left_right_to_grglist <- function(left, right) { +left_right_to_grglist <- function(left, right, x10 = FALSE) { stopifnot(length(left) == length(right)) if (length(left) == 0) { return(GenomicRangesList()) } - x <- c(left, right)[as.vector(matrix(seq_len(2L * length(left)), nrow = 2L, + x <- c(left, right)[as.vector(matrix(seq_len(2L * length(left)), nrow = 2L, byrow = TRUE))] p <- PartitioningByEnd(cumsum(rep(2, length(x)/2))) out <- relist(x, p) + # if 10x, get every other barcode + if(x10) { + codes <- x$barcodes[c(TRUE, FALSE)] + mcols(out)$barcodes <- codes + } return(out) } bamToFragmentsByRG <- function(bamfile, paired) { - + if (paired) { - scanned <- scanBam(bamfile, + scanned <- scanBam(bamfile, param = ScanBamParam( - flag = scanBamFlag(isMinusStrand = FALSE, + flag = scanBamFlag(isMinusStrand = FALSE, isProperPair = TRUE), what = c("rname", "pos", "isize"), tag = "RG"))[[1]] RG_tags <- mxsort(unique(scanned$tag$RG)) - + out <- bplapply(RG_tags, function(RG) { match_RG <- which(scanned$tag$RG == RG) - scanned_left <- GRanges(seqnames = scanned$rname[match_RG], - IRanges(start = scanned$pos[match_RG], + scanned_left <- GRanges(seqnames = scanned$rname[match_RG], + IRanges(start = scanned$pos[match_RG], width = 1), strand = "+") - scanned_right <- GRanges(seqnames = scanned$rname[match_RG], - IRanges(start = scanned$pos[match_RG] + - abs(scanned$isize[match_RG]) - 1, + scanned_right <- GRanges(seqnames = scanned$rname[match_RG], + IRanges(start = scanned$pos[match_RG] + + abs(scanned$isize[match_RG]) - 1, width = 1), strand = "-") return(left_right_to_grglist(scanned_left, scanned_right)) }) } else { scanned <- scanBam(bamfile, - param = ScanBamParam(what = c("rname", + param = ScanBamParam(what = c("rname", "pos", "strand", "qwidth"), tag = "RG"))[[1]] RG_tags <- mxsort(unique(scanned$tag$RG)) - + out <- bplapply(RG_tags, function(RG) { match_RG <- which(scanned$tag$RG == RG) - return(GRanges(seqnames = scanned$rname[match_RG], - IRanges(start = ifelse(scanned$strand[match_RG] == "-", - scanned$pos[match_RG] + - scanned$qwidth[match_RG] - 1, - scanned$pos[match_RG]), + return(GRanges(seqnames = scanned$rname[match_RG], + IRanges(start = ifelse(scanned$strand[match_RG] == "-", + scanned$pos[match_RG] + + scanned$qwidth[match_RG] - 1, + scanned$pos[match_RG]), width = 1))) }) } - + names(out) <- RG_tags - + return(out) } bamToFragments <- function(bamfile, paired) { if (paired) { - scanned <- scanBam(bamfile, - param = - ScanBamParam(flag = - scanBamFlag(isMinusStrand = FALSE, + scanned <- scanBam(bamfile, + param = + ScanBamParam(flag = + scanBamFlag(isMinusStrand = FALSE, isProperPair = TRUE), what = c("rname", "pos", "isize")))[[1]] - scanned_left <- GRanges(seqnames = scanned$rname, - IRanges(start = scanned$pos, + scanned_left <- GRanges(seqnames = scanned$rname, + IRanges(start = scanned$pos, width = 1), strand = "+") - scanned_right <- GRanges(seqnames = scanned$rname, - IRanges(start = scanned$pos + + scanned_right <- GRanges(seqnames = scanned$rname, + IRanges(start = scanned$pos + abs(scanned$isize) - 1, width = 1), strand = "-") out <- left_right_to_grglist(scanned_left, scanned_right) } else { scanned <- scanBam(bamfile, - param = ScanBamParam(what = c("rname", - "pos", - "strand", + param = ScanBamParam(what = c("rname", + "pos", + "strand", "qwidth")))[[1]] - out <- GRanges(seqnames = scanned$rname, - IRanges(start = ifelse(scanned$strand == "-", - scanned$pos + scanned$qwidth - 1, + out <- GRanges(seqnames = scanned$rname, + IRanges(start = ifelse(scanned$strand == "-", + scanned$pos + scanned$qwidth - 1, scanned$pos), width = 1)) } return(out) - + } getFragmentCountsByRG <- function(bam, peaks, paired) { message("Reading in file: ", bam) rg_fragments <- bamToFragmentsByRG(bam, paired) - + tmpfun <- function(frags) { - overlaps <- as.data.frame(findOverlaps(peaks, - frags, - type = "any", + overlaps <- as.data.frame(findOverlaps(peaks, + frags, + type = "any", ignore.strand = TRUE)) return(overlaps) } - + all_overlaps <- bplapply(rg_fragments, tmpfun) counts_mat <- sparseMatrix( i = do.call(rbind, all_overlaps)$queryHits, - j = unlist(lapply(seq_along(all_overlaps), - function(y) rep(y, - nrow(all_overlaps[[y]]))), + j = unlist(lapply(seq_along(all_overlaps), + function(y) rep(y, + nrow(all_overlaps[[y]]))), use.names = FALSE), - x = 1, - dims = c(length(peaks), length(rg_fragments)), + x = 1, + dims = c(length(peaks), length(rg_fragments)), dimnames = list(NULL, names(rg_fragments))) - - return(list(counts = counts_mat, + + return(list(counts = counts_mat, depths = vapply(rg_fragments, length, 0))) } @@ -448,25 +530,25 @@ getFragmentCountsByRG <- function(bam, peaks, paired) { #' @return numeric vector #' @seealso \code{\link{getCounts}}, \code{\link{filterSamples}} #' @export -#' @examples -#' +#' @examples +#' #' # With single bam with RG tags (can also give multiple bams with RG) #' test_rg <- system.file("extdata", "test_RG.bam", package = "chromVAR") -#' test_counts <- getSampleDepths(test_rg, by_rg = TRUE, +#' test_counts <- getSampleDepths(test_rg, by_rg = TRUE, #' paired = TRUE) -#' -#' +#' +#' #' # Multiple bams without RG tags #' test_bam1 <- system.file("extdata", "test_single1.bam", package = "chromVAR") #' test_bam2 <- system.file("extdata", "test_single2.bam", package = "chromVAR") #' test_bam3 <- system.file("extdata", "test_single3.bam", package = "chromVAR") -#' test_counts2 <- getSampleDepths(c(test_bam1, test_bam2,test_bam3), -#' by_rg = FALSE, -#' paired = TRUE) -#' -getSampleDepths <- function(alignment_files, - paired = TRUE, - by_rg = FALSE, +#' test_counts2 <- getSampleDepths(c(test_bam1, test_bam2,test_bam3), +#' by_rg = FALSE, +#' paired = TRUE) +#' +getSampleDepths <- function(alignment_files, + paired = TRUE, + by_rg = FALSE, format = c("bam", "bed")) { format <- match.arg(format) if (format == "bam") { @@ -482,13 +564,13 @@ get_sample_depths_from_bams <- function(bams, paired = TRUE, by_rg = FALSE) { out <- do.call(c, lapply(bams, getSampleDepthsByRG, paired = paired)) } else { if (paired) { - out <- vapply(bams, - function(x) + out <- vapply(bams, + function(x) countBam(x, - param = + param = ScanBamParam( - scanBamFlag(isMinusStrand = FALSE, - isProperPair = TRUE)))$records, + scanBamFlag(isMinusStrand = FALSE, + isProperPair = TRUE)))$records, 0) } else { out <- vapply(bams, function(x) countBam(x)$records, 0) @@ -500,18 +582,18 @@ get_sample_depths_from_bams <- function(bams, paired = TRUE, by_rg = FALSE) { get_sample_depths_from_beds <- function(beds) { if (is.installed("readr")) { - out <- - do.call(c, - bplapply(beds,function(filename){ - nrow(suppressMessages(readr::read_tsv(file = filename, + out <- + do.call(c, + bplapply(beds,function(filename){ + nrow(suppressMessages(readr::read_tsv(file = filename, col_names = FALSE)))})) } else { - out <- do.call(c, - bplapply(beds, - function(filename) - nrow(read.delim(file = filename, - header = FALSE, - sep = "\t", + out <- do.call(c, + bplapply(beds, + function(filename) + nrow(read.delim(file = filename, + header = FALSE, + sep = "\t", stringsAsFactors = FALSE)))) } names(out) <- vapply(beds, basename,"") @@ -520,20 +602,17 @@ get_sample_depths_from_beds <- function(beds) { getSampleDepthsByRG <- function(bamfile, paired = TRUE) { if (paired) { - tags <- scanBam(bamfile, - param = ScanBamParam(flag = - scanBamFlag(isMinusStrand = FALSE, + tags <- scanBam(bamfile, + param = ScanBamParam(flag = + scanBamFlag(isMinusStrand = FALSE, isProperPair = TRUE), tag = "RG"))[[1]]$tag$RG } else { tags <- scanBam(bamfile, param = ScanBamParam(tag = "RG"))[[1]]$tag$RG } - + RG_tags <- mxsort(unique(tags)) out <- tabulate(factor(tags, levels = RG_tags, ordered = TRUE)) names(out) <- RG_tags return(out) } - - - From dedee52a9cc63ffc967a85a090a5739c9dd77f60 Mon Sep 17 00:00:00 2001 From: Anna Date: Wed, 13 Feb 2019 12:51:09 -0500 Subject: [PATCH 4/9] added a test --- tests/testthat/test_get_counts.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testthat/test_get_counts.R b/tests/testthat/test_get_counts.R index 8590322..d88cb5d 100644 --- a/tests/testthat/test_get_counts.R +++ b/tests/testthat/test_get_counts.R @@ -17,6 +17,8 @@ test_that("can read in 10x", { counts <- getCounts(test_bed_10x, test_peaks, x10 = TRUE, format = "bed", paired = TRUE, colData = DataFrame(cell_name = c("Test"))) expect_is(counts, "RangedSummarizedExperiment") + # 861 unique barcodes = 861 cells + expect_equal(ncol(assays(counts)$counts),861) }) # Test fragment counts with RG ___________-------------------------------------- From 9645e8798bce0cefada24447609f5e7637514ba3 Mon Sep 17 00:00:00 2001 From: Anna Yeaton Date: Fri, 1 Mar 2019 16:36:50 -0500 Subject: [PATCH 5/9] changed x10 to is_10x and added error message --- R/get_inputs.R | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/R/get_inputs.R b/R/get_inputs.R index cbfb977..c097be3 100644 --- a/R/get_inputs.R +++ b/R/get_inputs.R @@ -196,14 +196,18 @@ setReplaceMethod("counts", signature(object = "SummarizedExperiment", #' paired = FALSE, #' format = "bed") getCounts <- function(alignment_files, peaks, paired, by_rg = FALSE, - format = c("bam", "bed"), colData = NULL, x10 = FALSE) { + format = c("bam", "bed"), colData = NULL, is_10x = FALSE) { format <- match.arg(format) if (format == "bam") { return(get_counts_from_bams(alignment_files, peaks, paired, by_rg, colData)) + if(is_10 == TRUE){ + message("Error: Cannot input bam files for 10x data. To read in 10x data, + please input a bed file") + } } else if (format == "bed"){ - if (x10 == TRUE) { - return(get_counts_from_x10_beds(alignment_files, peaks, paired, colData, x10)) + if (is_10x == TRUE) { + return(get_counts_from_10x_beds(alignment_files, peaks, paired, colData, is_10x)) } else { return(get_counts_from_beds(alignment_files, peaks, paired, colData)) } @@ -252,7 +256,7 @@ get_counts_from_bams <- function(bams, peaks, paired, by_rg = FALSE, return(out) } -get_counts_from_x10_beds <- function(beds, peaks, paired, colData = NULL, x10 = FALSE) { +get_counts_from_10x_beds <- function(beds, peaks, paired, colData = NULL) { if(length(beds == NULL)){ beds <- list(beds) } @@ -263,11 +267,11 @@ get_counts_from_x10_beds <- function(beds, peaks, paired, colData = NULL, x10 = # read in alignments from bed file. If 10x, readAlignmentFromBed will add # barcode information in a metadata field - fragment <- readAlignmentFromBed(i, paired = paired, x10 = TRUE) + fragment <- readAlignmentFromBed(i, paired = paired, is_10x = TRUE) if (paired) { left <- resize(fragment, width = 1, fix = "start", ignore.strand = TRUE) right <- resize(fragment, width = 1, fix = "end", ignore.strand = TRUE) - fragments <- left_right_to_grglist(left, right) + fragments <- left_right_to_grglist(left, right, is_10x = TRUE) } else { fragments <- resize(fragment, width = 1, ignore.strand = FALSE) } @@ -363,7 +367,7 @@ get_counts_from_beds <- function(beds, peaks, paired, colData = NULL) { # Helper functions for reading in counts from bam ------------------------------ -readAlignmentFromBed <- function(filename, paired, x10 = FALSE) { +readAlignmentFromBed <- function(filename, paired, is_10x = FALSE) { if (is.installed("readr")) { tmp <- suppressMessages(readr::read_tsv(file = filename, col_names = FALSE)) } else { @@ -372,7 +376,7 @@ readAlignmentFromBed <- function(filename, paired, x10 = FALSE) { } strand_col <- which(apply(tmp[seq_len(min(100, nrow(tmp))), ], 2, function(x) all(x %in% c("+", "-", "*")))) - if (x10) { + if is_10x) { colnames(tmp) <- c("chr", "start", "end", "barcodes", "num_pcr") tmp[, "start"] <- tmp[, "start"] + 1 tmp_tmp <- GRanges(tmp$chr, ranges = IRanges(tmp$start, tmp$end)) @@ -395,7 +399,7 @@ readAlignmentFromBed <- function(filename, paired, x10 = FALSE) { #' @importFrom IRanges PartitioningByEnd #' @importFrom BiocGenerics relist -left_right_to_grglist <- function(left, right, x10 = FALSE) { +left_right_to_grglist <- function(left, right, is_10x = FALSE) { stopifnot(length(left) == length(right)) if (length(left) == 0) { return(GenomicRangesList()) @@ -405,7 +409,7 @@ left_right_to_grglist <- function(left, right, x10 = FALSE) { p <- PartitioningByEnd(cumsum(rep(2, length(x)/2))) out <- relist(x, p) # if 10x, get every other barcode - if(x10) { + if(is_10x) { codes <- x$barcodes[c(TRUE, FALSE)] mcols(out)$barcodes <- codes } From 997bdeeb7e73dc25fec2857109ca43f73ecd7be1 Mon Sep 17 00:00:00 2001 From: Anna Yeaton Date: Fri, 1 Mar 2019 17:06:04 -0500 Subject: [PATCH 6/9] fix switch from x10 to is_10x --- R/get_inputs.R | 5 ++--- tests/testthat.R | 2 +- tests/testthat/test_get_counts.R | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/R/get_inputs.R b/R/get_inputs.R index c097be3..25b6f75 100644 --- a/R/get_inputs.R +++ b/R/get_inputs.R @@ -207,14 +207,13 @@ getCounts <- function(alignment_files, peaks, paired, by_rg = FALSE, } } else if (format == "bed"){ if (is_10x == TRUE) { - return(get_counts_from_10x_beds(alignment_files, peaks, paired, colData, is_10x)) + return(get_counts_from_10x_beds(alignment_files, peaks, paired, colData)) } else { return(get_counts_from_beds(alignment_files, peaks, paired, colData)) } } } - get_counts_from_bams <- function(bams, peaks, paired, by_rg = FALSE, sample_annotation = NULL) { @@ -376,7 +375,7 @@ readAlignmentFromBed <- function(filename, paired, is_10x = FALSE) { } strand_col <- which(apply(tmp[seq_len(min(100, nrow(tmp))), ], 2, function(x) all(x %in% c("+", "-", "*")))) - if is_10x) { + if (is_10x) { colnames(tmp) <- c("chr", "start", "end", "barcodes", "num_pcr") tmp[, "start"] <- tmp[, "start"] + 1 tmp_tmp <- GRanges(tmp$chr, ranges = IRanges(tmp$start, tmp$end)) diff --git a/tests/testthat.R b/tests/testthat.R index e22dcc4..857db25 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,4 @@ library(testthat) library(chromVAR) -BiocParallel::register(BiocParallel::SerialParam()) + test_check("chromVAR") diff --git a/tests/testthat/test_get_counts.R b/tests/testthat/test_get_counts.R index d88cb5d..af9d056 100644 --- a/tests/testthat/test_get_counts.R +++ b/tests/testthat/test_get_counts.R @@ -14,7 +14,7 @@ test_bed_10x <- system.file("extdata", "test_x10_bed.tsv", package = "chromVAR") # Test 10x bed file ------------------------------------------------------------ test_that("can read in 10x", { - counts <- getCounts(test_bed_10x, test_peaks, x10 = TRUE, format = "bed", + counts <- getCounts(test_bed_10x, test_peaks, is_10x = TRUE, format = "bed", paired = TRUE, colData = DataFrame(cell_name = c("Test"))) expect_is(counts, "RangedSummarizedExperiment") # 861 unique barcodes = 861 cells From ec4c1aaab352943cc1fbf0c086be9e22b7286974 Mon Sep 17 00:00:00 2001 From: AliciaSchep Date: Sat, 2 Mar 2019 12:04:10 -0800 Subject: [PATCH 7/9] add new arg to docs --- DESCRIPTION | 2 +- R/get_inputs.R | 3 ++- man/annotationMatches.Rd | 1 - man/computeDeviations.Rd | 20 +++++++++++-------- man/computeExpectations.Rd | 4 ++-- man/computeVariability.Rd | 5 +++-- man/deviationScores.Rd | 1 - man/deviations.Rd | 1 - man/deviationsTsne.Rd | 6 +++--- man/differentialDeviations.Rd | 4 ++-- man/getAnnotationCorrelation.Rd | 14 +++++++------- man/getAnnotationSynergy.Rd | 20 +++++++++++-------- man/getAnnotations.Rd | 4 ++-- man/getBackgroundPeaks.Rd | 7 ++++--- man/getCisGroups.Rd | 3 ++- man/getCounts.Rd | 34 +++++++++++++++++---------------- man/getPeaks.Rd | 4 ++-- man/getSampleDepths.Rd | 12 ++++++------ man/makePermutedSets.Rd | 10 ++++++---- man/matchKmers.Rd | 10 ++++++---- man/readNarrowpeaks.Rd | 4 ++-- 21 files changed, 92 insertions(+), 77 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cb590de..dd34ca1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,4 +56,4 @@ LazyData: TRUE LinkingTo: Rcpp, RcppArmadillo SystemRequirements: C++11 VignetteBuilder: knitr -RoxygenNote: 6.0.1 +RoxygenNote: 6.1.0 diff --git a/R/get_inputs.R b/R/get_inputs.R index 25b6f75..0a51d3f 100644 --- a/R/get_inputs.R +++ b/R/get_inputs.R @@ -162,6 +162,7 @@ setReplaceMethod("counts", signature(object = "SummarizedExperiment", #' @param paired paired end data? #' @param format bam or bed? default is bam #' @param colData sample annotation DataFrame +#' @param is_10x is this 10x format? if so, format must also be set to 'bed' #' @return \code{\link[SummarizedExperiment]{RangedSummarizedExperiment-class}} #' object #' @seealso \code{\link{getSampleDepths}}, \code{\link{getPeaks}}, @@ -201,7 +202,7 @@ getCounts <- function(alignment_files, peaks, paired, by_rg = FALSE, format <- match.arg(format) if (format == "bam") { return(get_counts_from_bams(alignment_files, peaks, paired, by_rg, colData)) - if(is_10 == TRUE){ + if(is_10x == TRUE){ message("Error: Cannot input bam files for 10x data. To read in 10x data, please input a bed file") } diff --git a/man/annotationMatches.Rd b/man/annotationMatches.Rd index 470cfb1..47f9e79 100644 --- a/man/annotationMatches.Rd +++ b/man/annotationMatches.Rd @@ -6,7 +6,6 @@ \alias{annotationMatches,SummarizedExperiment-method} \alias{annoation_matches<-,SummarizedExperiment-method} \alias{annotationMatches<-} -\alias{annotationMatches,SummarizedExperiment-method} \alias{annotationMatches<-,SummarizedExperiment-method} \title{annotationMatches} \usage{ diff --git a/man/computeDeviations.Rd b/man/computeDeviations.Rd index f45c3c1..20b9c68 100644 --- a/man/computeDeviations.Rd +++ b/man/computeDeviations.Rd @@ -15,7 +15,8 @@ \usage{ computeDeviations(object, annotations, ...) -\S4method{computeDeviations}{SummarizedExperiment,SummarizedExperiment}(object, + + \S4method{computeDeviations}{SummarizedExperiment,SummarizedExperiment}(object, annotations, background_peaks = getBackgroundPeaks(object), expectation = computeExpectations(object)) @@ -23,8 +24,8 @@ computeDeviations(object, annotations, ...) annotations, background_peaks = getBackgroundPeaks(object), expectation = computeExpectations(object)) -\S4method{computeDeviations}{SummarizedExperiment,list}(object, annotations, - background_peaks = getBackgroundPeaks(object), +\S4method{computeDeviations}{SummarizedExperiment,list}(object, + annotations, background_peaks = getBackgroundPeaks(object), expectation = computeExpectations(object)) \S4method{computeDeviations}{SummarizedExperiment,missingOrNULL}(object, @@ -32,16 +33,19 @@ computeDeviations(object, annotations, ...) expectation = computeExpectations(object)) \S4method{computeDeviations}{MatrixOrmatrix,SummarizedExperiment}(object, - annotations, background_peaks, expectation = computeExpectations(object)) + annotations, background_peaks, + expectation = computeExpectations(object)) -\S4method{computeDeviations}{MatrixOrmatrix,MatrixOrmatrix}(object, annotations, - background_peaks, expectation = computeExpectations(object)) +\S4method{computeDeviations}{MatrixOrmatrix,MatrixOrmatrix}(object, + annotations, background_peaks, + expectation = computeExpectations(object)) \S4method{computeDeviations}{MatrixOrmatrix,list}(object, annotations, background_peaks, expectation = computeExpectations(object)) -\S4method{computeDeviations}{MatrixOrmatrix,missingOrNULL}(object, annotations, - background_peaks, expectation = computeExpectations(object)) +\S4method{computeDeviations}{MatrixOrmatrix,missingOrNULL}(object, + annotations, background_peaks, + expectation = computeExpectations(object)) } \arguments{ \item{object}{chromVARCounts object} diff --git a/man/computeExpectations.Rd b/man/computeExpectations.Rd index d98e4e8..d9d48ff 100644 --- a/man/computeExpectations.Rd +++ b/man/computeExpectations.Rd @@ -12,8 +12,8 @@ computeExpectations(object, ...) \S4method{computeExpectations}{MatrixOrmatrix}(object, norm = FALSE, group = NULL) -\S4method{computeExpectations}{SummarizedExperiment}(object, norm = FALSE, - group = NULL) +\S4method{computeExpectations}{SummarizedExperiment}(object, + norm = FALSE, group = NULL) } \arguments{ \item{object}{SummarizedExperiment} diff --git a/man/computeVariability.Rd b/man/computeVariability.Rd index 853cb1c..4cc907b 100644 --- a/man/computeVariability.Rd +++ b/man/computeVariability.Rd @@ -4,8 +4,9 @@ \alias{computeVariability} \title{computeVariability} \usage{ -computeVariability(object, bootstrap_error = TRUE, bootstrap_samples = 1000, - bootstrap_quantiles = c(0.025, 0.975), na.rm = TRUE) +computeVariability(object, bootstrap_error = TRUE, + bootstrap_samples = 1000, bootstrap_quantiles = c(0.025, 0.975), + na.rm = TRUE) } \arguments{ \item{object}{output from \code{\link{computeDeviations}}} diff --git a/man/deviationScores.Rd b/man/deviationScores.Rd index fa2fdf6..67ef05b 100644 --- a/man/deviationScores.Rd +++ b/man/deviationScores.Rd @@ -4,7 +4,6 @@ \name{deviationScores} \alias{deviationScores} \alias{deviationScores,chromVARDeviations-method} -\alias{deviationScores,chromVARDeviations-method} \title{deviationScores} \usage{ deviationScores(object) diff --git a/man/deviations.Rd b/man/deviations.Rd index 32ad3d3..9a43a14 100644 --- a/man/deviations.Rd +++ b/man/deviations.Rd @@ -4,7 +4,6 @@ \name{deviations} \alias{deviations} \alias{deviations,chromVARDeviations-method} -\alias{deviations,chromVARDeviations-method} \title{deviations} \usage{ deviations(object) diff --git a/man/deviationsTsne.Rd b/man/deviationsTsne.Rd index 1c17710..7430ee6 100644 --- a/man/deviationsTsne.Rd +++ b/man/deviationsTsne.Rd @@ -4,9 +4,9 @@ \alias{deviationsTsne} \title{deviationsTsne} \usage{ -deviationsTsne(object, threshold = 1.5, perplexity = if (what == "samples") - 30 else 8, max_iter = 1000, theta = 0.5, what = c("samples", - "annotations"), shiny = FALSE) +deviationsTsne(object, threshold = 1.5, perplexity = if (what == + "samples") 30 else 8, max_iter = 1000, theta = 0.5, + what = c("samples", "annotations"), shiny = FALSE) } \arguments{ \item{object}{deviations result} diff --git a/man/differentialDeviations.Rd b/man/differentialDeviations.Rd index b2e6ccf..dff230c 100644 --- a/man/differentialDeviations.Rd +++ b/man/differentialDeviations.Rd @@ -4,8 +4,8 @@ \alias{differentialDeviations} \title{differentialDeviations} \usage{ -differentialDeviations(object, groups, alternative = c("two.sided", "less", - "greater"), parametric = TRUE) +differentialDeviations(object, groups, alternative = c("two.sided", + "less", "greater"), parametric = TRUE) } \arguments{ \item{object}{chromVARDeviations object} diff --git a/man/getAnnotationCorrelation.Rd b/man/getAnnotationCorrelation.Rd index 996eb53..93e5583 100644 --- a/man/getAnnotationCorrelation.Rd +++ b/man/getAnnotationCorrelation.Rd @@ -29,16 +29,16 @@ getAnnotationCorrelation(object, annotations, ...) \S4method{getAnnotationCorrelation}{MatrixOrmatrix,SummarizedExperiment}(object, - annotations, background_peaks, expectation = computeExpectations(object), - variabilities = NULL) + annotations, background_peaks, + expectation = computeExpectations(object), variabilities = NULL) \S4method{getAnnotationCorrelation}{MatrixOrmatrix,MatrixOrmatrix}(object, - annotations, background_peaks, expectation = computeExpectations(object), - variabilities = NULL) + annotations, background_peaks, + expectation = computeExpectations(object), variabilities = NULL) -\S4method{getAnnotationCorrelation}{MatrixOrmatrix,list}(object, annotations, - background_peaks, expectation = computeExpectations(object), - variabilities = NULL) +\S4method{getAnnotationCorrelation}{MatrixOrmatrix,list}(object, + annotations, background_peaks, + expectation = computeExpectations(object), variabilities = NULL) } \arguments{ \item{object}{result from computeDeviations} diff --git a/man/getAnnotationSynergy.Rd b/man/getAnnotationSynergy.Rd index 9e7d55e..da0b93d 100644 --- a/man/getAnnotationSynergy.Rd +++ b/man/getAnnotationSynergy.Rd @@ -19,23 +19,27 @@ getAnnotationSynergy(object, annotations, ...) expectation = computeExpectations(object), variabilities = NULL, nbg = 25) -\S4method{getAnnotationSynergy}{SummarizedExperiment,MatrixOrmatrix}(object, + + \S4method{getAnnotationSynergy}{SummarizedExperiment,MatrixOrmatrix}(object, annotations, background_peaks = getBackgroundPeaks(object), expectation = computeExpectations(object), variabilities = NULL, nbg = 25) -\S4method{getAnnotationSynergy}{SummarizedExperiment,list}(object, annotations, - background_peaks = getBackgroundPeaks(object), +\S4method{getAnnotationSynergy}{SummarizedExperiment,list}(object, + annotations, background_peaks = getBackgroundPeaks(object), expectation = computeExpectations(object), variabilities = NULL, nbg = 25) -\S4method{getAnnotationSynergy}{MatrixOrmatrix,SummarizedExperiment}(object, - annotations, background_peaks, expectation = computeExpectations(object), - variabilities = NULL, nbg = 25) + + \S4method{getAnnotationSynergy}{MatrixOrmatrix,SummarizedExperiment}(object, + annotations, background_peaks, + expectation = computeExpectations(object), variabilities = NULL, + nbg = 25) \S4method{getAnnotationSynergy}{MatrixOrmatrix,MatrixOrmatrix}(object, - annotations, background_peaks, expectation = computeExpectations(object), - variabilities = NULL, nbg = 25) + annotations, background_peaks, + expectation = computeExpectations(object), variabilities = NULL, + nbg = 25) \S4method{getAnnotationSynergy}{MatrixOrmatrix,list}(object, annotations, background_peaks, expectation = computeExpectations(object), diff --git a/man/getAnnotations.Rd b/man/getAnnotations.Rd index 6f7c223..40874a3 100644 --- a/man/getAnnotations.Rd +++ b/man/getAnnotations.Rd @@ -20,8 +20,8 @@ getAnnotations(annotations, ...) \S4method{getAnnotations}{list}(annotations, npeaks = NULL, ...) -\S4method{getAnnotations}{character}(annotations, rowRanges, column = NULL, - ...) +\S4method{getAnnotations}{character}(annotations, rowRanges, + column = NULL, ...) } \arguments{ \item{annotations}{matrix, Matrix, or data.frame of fragment counts, diff --git a/man/getBackgroundPeaks.Rd b/man/getBackgroundPeaks.Rd index 99ef258..7e55c7b 100644 --- a/man/getBackgroundPeaks.Rd +++ b/man/getBackgroundPeaks.Rd @@ -14,10 +14,11 @@ getBackgroundPeaks(object, ...) bias = rowData(object)$bias, niterations = 50, w = 0.1, bs = 50) \S4method{getBackgroundPeaks}{RangedSummarizedExperiment}(object, - bias = rowRanges(object)$bias, niterations = 50, w = 0.1, bs = 50) + bias = rowRanges(object)$bias, niterations = 50, w = 0.1, + bs = 50) -\S4method{getBackgroundPeaks}{MatrixOrmatrix}(object, bias, niterations = 50, - w = 0.1, bs = 50) +\S4method{getBackgroundPeaks}{MatrixOrmatrix}(object, bias, + niterations = 50, w = 0.1, bs = 50) } \arguments{ \item{object}{fragment counts as SummarizedExperiment, RangedSummarized, diff --git a/man/getCisGroups.Rd b/man/getCisGroups.Rd index 4808630..0fe37cc 100644 --- a/man/getCisGroups.Rd +++ b/man/getCisGroups.Rd @@ -12,7 +12,8 @@ getCisGroups(object, ...) \S4method{getCisGroups}{RangedSummarizedExperiment}(object, grpsize = 25, stepsize = 10) -\S4method{getCisGroups}{GenomicRanges}(object, grpsize = 25, stepsize = 10) +\S4method{getCisGroups}{GenomicRanges}(object, grpsize = 25, + stepsize = 10) } \arguments{ \item{object}{GenomicRanges or RangedSummarizedExperiment} diff --git a/man/getCounts.Rd b/man/getCounts.Rd index 0b183d1..8bf48ad 100644 --- a/man/getCounts.Rd +++ b/man/getCounts.Rd @@ -4,8 +4,8 @@ \alias{getCounts} \title{getCounts} \usage{ -getCounts(alignment_files, peaks, paired, by_rg = FALSE, format = c("bam", - "bed"), colData = NULL) +getCounts(alignment_files, peaks, paired, by_rg = FALSE, + format = c("bam", "bed"), colData = NULL, is_10x = FALSE) } \arguments{ \item{alignment_files}{filenames for bam or bed files with aligned reads} @@ -19,13 +19,15 @@ getCounts(alignment_files, peaks, paired, by_rg = FALSE, format = c("bam", \item{format}{bam or bed? default is bam} \item{colData}{sample annotation DataFrame} + +\item{is_10x}{is this 10x format? if so, format must also be set to 'bed'} } \value{ \code{\link[SummarizedExperiment]{RangedSummarizedExperiment-class}} object } \description{ -makes matrix of fragment counts in peaks using one or multiple bam or bed +makes matrix of fragment counts in peaks using one or multiple bam or bed files } \examples{ @@ -36,28 +38,28 @@ test_peaks <- getPeaks(peaks_file, sort = TRUE) # With single bam with RG tags (can also give multiple bams with RG) test_rg <- system.file("extdata", "test_RG.bam", package = "chromVAR") -test_counts <- getCounts(test_rg, peaks = test_peaks, by_rg = TRUE, - paired = TRUE, +test_counts <- getCounts(test_rg, peaks = test_peaks, by_rg = TRUE, + paired = TRUE, colData = S4Vectors::DataFrame(condition ="A")) - + # Multiple bams without RG tags test_bam1 <- system.file("extdata", "test_single1.bam", package = "chromVAR") test_bam2 <- system.file("extdata", "test_single2.bam", package = "chromVAR") test_bam3 <- system.file("extdata", "test_single3.bam", package = "chromVAR") -test_counts2 <- getCounts(c(test_bam1, test_bam2,test_bam3), - peaks = test_peaks, by_rg = FALSE, - paired = TRUE, - colData = S4Vectors::DataFrame(celltype = - c("A","B","C"))) - +test_counts2 <- getCounts(c(test_bam1, test_bam2,test_bam3), + peaks = test_peaks, by_rg = FALSE, + paired = TRUE, + colData = S4Vectors::DataFrame(celltype = + c("A","B","C"))) + # Bed file with reads (can give multiple bed files, here we will just read 1) test_bed <- system.file("extdata", "test_reads.bed", package = "chromVAR") -test_counts3 <- getCounts(test_bed, test_peaks, by_rg = FALSE, - paired = FALSE, - format = "bed") +test_counts3 <- getCounts(test_bed, test_peaks, by_rg = FALSE, + paired = FALSE, + format = "bed") } \seealso{ -\code{\link{getSampleDepths}}, \code{\link{getPeaks}}, +\code{\link{getSampleDepths}}, \code{\link{getPeaks}}, \code{\link{filterSamples}} } diff --git a/man/getPeaks.Rd b/man/getPeaks.Rd index f72a475..9301b7d 100644 --- a/man/getPeaks.Rd +++ b/man/getPeaks.Rd @@ -24,7 +24,7 @@ As in standard definition of bed file, first column is assumed to be chromosome, second is assumed to be start of peak (0-based), and third is assumed to be end of peak (1-based). Note that in output GenomicRanges output, start and end indices are both 1-based. Extra columns can be added - as metadata or strand information if provided, but the user must indicate + as metadata or strand information if provided, but the user must indicate column index and name using named vector for extra_cols. } \examples{ @@ -32,6 +32,6 @@ peaks_file <- system.file("extdata", "test_bed.txt", package = "chromVAR") peaks <- getPeaks(peaks_file, sort = TRUE) } \seealso{ -\code{\link{getCounts}}, \code{\link{filterPeaks}}, +\code{\link{getCounts}}, \code{\link{filterPeaks}}, \code{\link{readNarrowpeaks}} } diff --git a/man/getSampleDepths.Rd b/man/getSampleDepths.Rd index 910c005..d53fb62 100644 --- a/man/getSampleDepths.Rd +++ b/man/getSampleDepths.Rd @@ -26,18 +26,18 @@ makes vector of read depths in bam files or RG groups within bam files # With single bam with RG tags (can also give multiple bams with RG) test_rg <- system.file("extdata", "test_RG.bam", package = "chromVAR") -test_counts <- getSampleDepths(test_rg, by_rg = TRUE, +test_counts <- getSampleDepths(test_rg, by_rg = TRUE, paired = TRUE) - + # Multiple bams without RG tags test_bam1 <- system.file("extdata", "test_single1.bam", package = "chromVAR") test_bam2 <- system.file("extdata", "test_single2.bam", package = "chromVAR") test_bam3 <- system.file("extdata", "test_single3.bam", package = "chromVAR") -test_counts2 <- getSampleDepths(c(test_bam1, test_bam2,test_bam3), - by_rg = FALSE, - paired = TRUE) - +test_counts2 <- getSampleDepths(c(test_bam1, test_bam2,test_bam3), + by_rg = FALSE, + paired = TRUE) + } \seealso{ \code{\link{getCounts}}, \code{\link{filterSamples}} diff --git a/man/makePermutedSets.Rd b/man/makePermutedSets.Rd index 248de1b..45939ae 100644 --- a/man/makePermutedSets.Rd +++ b/man/makePermutedSets.Rd @@ -16,7 +16,8 @@ \usage{ makePermutedSets(object, annotations, ...) -\S4method{makePermutedSets}{SummarizedExperiment,SummarizedExperiment}(object, + + \S4method{makePermutedSets}{SummarizedExperiment,SummarizedExperiment}(object, annotations, bias = rowData(object)$bias, window = 10) @@ -29,11 +30,12 @@ makePermutedSets(object, annotations, ...) \S4method{makePermutedSets}{SummarizedExperiment,MatrixOrmatrix}(object, annotations, bias = rowData(object)$bias, window = 10) -\S4method{makePermutedSets}{RangedSummarizedExperiment,MatrixOrmatrix}(object, + + \S4method{makePermutedSets}{RangedSummarizedExperiment,MatrixOrmatrix}(object, annotations, bias = rowRanges(object)$bias, window = 10) -\S4method{makePermutedSets}{MatrixOrmatrix,MatrixOrmatrix}(object, annotations, - bias, window = 10) +\S4method{makePermutedSets}{MatrixOrmatrix,MatrixOrmatrix}(object, + annotations, bias, window = 10) \S4method{makePermutedSets}{SummarizedExperiment,list}(object, annotations, bias = rowData(object)$bias, window = 10) diff --git a/man/matchKmers.Rd b/man/matchKmers.Rd index 1357a3d..d6bae02 100644 --- a/man/matchKmers.Rd +++ b/man/matchKmers.Rd @@ -15,8 +15,8 @@ \usage{ matchKmers(k, subject, ...) -\S4method{matchKmers}{character,DNAStringSet}(k, subject, out = c("matches", - "positions"), ranges = NULL) +\S4method{matchKmers}{character,DNAStringSet}(k, subject, + out = c("matches", "positions"), ranges = NULL) \S4method{matchKmers}{character,character}(k, subject, out = c("matches", "positions"), ranges = NULL) @@ -25,9 +25,11 @@ matchKmers(k, subject, ...) "positions"), ranges = NULL) \S4method{matchKmers}{character,GenomicRanges}(k, subject, - genome = GenomeInfoDb::genome(subject), out = c("matches", "positions")) + genome = GenomeInfoDb::genome(subject), out = c("matches", + "positions")) -\S4method{matchKmers}{character,RangedSummarizedExperiment}(k, subject, ...) +\S4method{matchKmers}{character,RangedSummarizedExperiment}(k, subject, + ...) \S4method{matchKmers}{numeric,ANY}(k, subject, ...) diff --git a/man/readNarrowpeaks.Rd b/man/readNarrowpeaks.Rd index 539b8a7..4766d34 100644 --- a/man/readNarrowpeaks.Rd +++ b/man/readNarrowpeaks.Rd @@ -17,7 +17,7 @@ readNarrowpeaks(filename, width = 500, non_overlapping = TRUE) \code{\link[GenomicRanges]{GRanges-class}} } \description{ -Reads in peaks in narrowpeaks format, as output by macs2. Uses summit as -center of peak, and makes peak the given 'width'. By default removes +Reads in peaks in narrowpeaks format, as output by macs2. Uses summit as +center of peak, and makes peak the given 'width'. By default removes overlapping peaks to get set of peaks with no overlaps } From ac533b1f4ecb8870fd818eb81b2fe96918a4a412 Mon Sep 17 00:00:00 2001 From: Anna Yeaton Date: Sun, 3 Mar 2019 12:09:45 -0500 Subject: [PATCH 8/9] move step to convert to sparse matrix up --- R/get_inputs.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/R/get_inputs.R b/R/get_inputs.R index 25b6f75..3e9989e 100644 --- a/R/get_inputs.R +++ b/R/get_inputs.R @@ -202,7 +202,7 @@ getCounts <- function(alignment_files, peaks, paired, by_rg = FALSE, if (format == "bam") { return(get_counts_from_bams(alignment_files, peaks, paired, by_rg, colData)) if(is_10 == TRUE){ - message("Error: Cannot input bam files for 10x data. To read in 10x data, + message("Error: Cannot input bam files for 10x data. To read in 10x data, please input a bed file") } } else if (format == "bed"){ @@ -265,7 +265,6 @@ get_counts_from_10x_beds <- function(beds, peaks, paired, colData = NULL) { results <- lapply(beds, function(i) { # read in alignments from bed file. If 10x, readAlignmentFromBed will add # barcode information in a metadata field - fragment <- readAlignmentFromBed(i, paired = paired, is_10x = TRUE) if (paired) { left <- resize(fragment, width = 1, fix = "start", ignore.strand = TRUE) @@ -275,21 +274,27 @@ get_counts_from_10x_beds <- function(beds, peaks, paired, colData = NULL) { fragments <- resize(fragment, width = 1, ignore.strand = FALSE) } unique_barcodes <- unique(fragment$barcodes) + # iterate over the unique barcodes cell_list <- lapply(unique_barcodes, function(i) { - # subset data based on barcode + # subset data based on that unique barcodes current_barcode <- fragments[mcols(fragments)$barcodes == i] + + # make seqlevels for peaks and barcodes the same if (!isTRUE(all.equal(sort(seqlevels(current_barcode)), sort(seqlevels(peaks))))) { merged_seq <- unique(c(seqlevels(current_barcode), seqlevels(peaks))) seqlevels(current_barcode) <- merged_seq seqlevels(peaks) <- merged_seq } + + # count overlaps between peaks and current barcode return(list(counts = countOverlaps(peaks, current_barcode, type = "any", ignore.strand = TRUE), depth = length(current_barcode), barcodes = i)) }) }) - mat <- lapply(results, function(x) vapply(x, function(x) x[["counts"]], rep(0, length(peaks)))) + # lapply over the bed files and vapply over the cells + mat <- lapply(results, function(x) Matrix::Matrix(vapply(x, function(x) x[["counts"]], rep(0, length(peaks))))) depths <- lapply(results, function(x) vapply(x, function(x) x[["depth"]], 0)) # get barcodes here codes <- lapply(results, function(result) { barcodes <- unlist(lapply(result, function(x) x$barcodes))}) @@ -304,6 +309,7 @@ get_counts_from_10x_beds <- function(beds, peaks, paired, colData = NULL) { for(j in 1:length(colData[,1])){ save <- c(save, rep(colData[j,1], ncol(mat[[j]]))) } + # overwrite colData colData <- c() colData$cell_name <- save colData$depth <- unlist(depths) @@ -311,9 +317,7 @@ get_counts_from_10x_beds <- function(beds, peaks, paired, colData = NULL) { mat <- t(do.call(rbind, lapply(mat, function(x) if (length(x) == 1L && is.na(x)) NULL else t(x)))) - counts_mat <- Matrix::Matrix(mat) - - out <- SummarizedExperiment(assays = list(counts = counts_mat), + out <- SummarizedExperiment(assays = list(counts = mat), rowRanges = peaks, colData = colData) return(out) From 990f62f7ab60ca4ad8a6b8286843654fabf11dac Mon Sep 17 00:00:00 2001 From: Anna Yeaton Date: Sun, 3 Mar 2019 12:14:55 -0500 Subject: [PATCH 9/9] add test case for two 10x bed files with the same barcodes --- tests/testthat/test_get_counts.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testthat/test_get_counts.R b/tests/testthat/test_get_counts.R index af9d056..cb63afe 100644 --- a/tests/testthat/test_get_counts.R +++ b/tests/testthat/test_get_counts.R @@ -21,6 +21,16 @@ test_that("can read in 10x", { expect_equal(ncol(assays(counts)$counts),861) }) +# Test 10x multiple bed files -------------------------------------------------- + +test_that("can read in two 10x bed files", { + counts <- getCounts(c(test_bed_10x,test_bed_10x), test_peaks, is_10x = TRUE, + format = "bed",paired = TRUE, colData = DataFrame(cell_name = c("Test1", "Test2"))) + expect_is(counts, "RangedSummarizedExperiment") + # 861 unique barcodes = 861*2 cells + expect_equal(ncol(assays(counts)$counts),1722) +}) + # Test fragment counts with RG ___________-------------------------------------- test_that("can count fragments using RG tags", {