From f8da5cde2b4b5392c0535719469057086b2d71c8 Mon Sep 17 00:00:00 2001 From: Pedro Feijao Date: Wed, 2 Aug 2017 17:17:13 -0700 Subject: [PATCH] updated basic usage --- docs/Basic Usage.ipynb | 452 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 404 insertions(+), 48 deletions(-) diff --git a/docs/Basic Usage.ipynb b/docs/Basic Usage.ipynb index 7721f75..2b1df8c 100644 --- a/docs/Basic Usage.ipynb +++ b/docs/Basic Usage.ipynb @@ -145,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "collapsed": false }, @@ -169,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "collapsed": false }, @@ -178,9 +178,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "2017-08-02T12:46:31.172 - info: Downloading the MLST database xml file...\n", + "2017-08-02T15:50:45.58 - info: Downloading the MLST database xml file...\n", " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n" + " Dload Upload Total Spent Left Speed\n", + "100 110k 100 110k 0 0 12022 0 0:00:09 0:00:09 --:--:-- 12022\n", + "Campylobacter concisus/curvus ID:23\n", + "Campylobacter fetus ID:24\n", + "Campylobacter helveticus ID:25\n", + "Campylobacter hyointestinalis ID:26\n", + "Campylobacter insulaenigrae ID:27\n", + "Campylobacter jejuni ID:28\n", + "Campylobacter lanienae ID:29\n", + "Campylobacter lari ID:30\n", + "Campylobacter sputorum ID:31\n", + "Campylobacter upsaliensis ID:32\n", + "10 schema found.\n" ] } ], @@ -199,22 +211,78 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2017-08-02T15:50:59.684 - info: Searching for the scheme ... \n", + "2017-08-02T15:50:59.882 - info: Downloading scheme for Campylobacter jejuni ... \n", + "2017-08-02T15:50:59.883 - info: Downloading profile ...\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 298k 100 298k 0 0 35655 0 0:00:08 0:00:08 --:--:-- 35655\n", + "2017-08-02T15:51:08.575 - info: Downloading locus aspA ...\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 211k 100 211k 0 0 17411 0 0:00:12 0:00:12 --:--:-- 17412\n", + "2017-08-02T15:51:21.033 - info: Downloading locus glnA ...\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 297k 100 297k 0 0 31496 0 0:00:09 0:00:09 --:--:-- 31496\n", + "2017-08-02T15:51:30.709 - info: Downloading locus gltA ...\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 215k 100 215k 0 0 26040 0 0:00:08 0:00:08 --:--:-- 26043\n", + "2017-08-02T15:51:39.18 - info: Downloading locus glyA ...\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 353k 100 353k 0 0 23765 0 0:00:15 0:00:15 --:--:-- 106k\n", + "2017-08-02T15:51:54.419 - info: Downloading locus pgm ...\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 448k 100 448k 0 0 45147 0 0:00:10 0:00:10 --:--:-- 107k\n", + "2017-08-02T15:52:04.612 - info: Downloading locus tkt ...\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 320k 100 320k 0 0 35598 0 0:00:09 0:00:09 --:--:-- 35599\n", + "2017-08-02T15:52:13.846 - info: Downloading locus uncA ...\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 258k 100 258k 0 0 24998 0 0:00:10 0:00:10 --:--:-- 24998\n", + "2017-08-02T15:52:24.44 - info: Finished downloading.\n", + "2017-08-02T15:52:24.45 - info: Building the k-mer database ...\n", + "2017-08-02T15:52:28.982 - info: Opening FASTA files ... \n", + "2017-08-02T15:52:30.302 - info: Combining results for each locus ...\n", + "2017-08-02T15:52:30.942 - info: Saving DB ...\n", + "2017-08-02T15:52:32.937 - info: Done!\n" + ] + } + ], "source": [ "MentaLiST.jl download_pubmlst -k 31 -o Campy -s 28 --db Campy/mlst_31.db " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "aspA.tfa glnA.tfa glyA.tfa mlst_31.db.profile tkt.tfa\n", + "campylobacter.txt gltA.tfa mlst_31.db pgm.tfa uncA.tfa\n" + ] + } + ], "source": [ "# The output folder has all the FASTA files and profile for the scheme, and also the kmer database file,\n", "# mlst_31.db on this example.\n", @@ -233,11 +301,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2017-08-02T15:52:37.262 - info: Downloading the cgmlist HTML to find schema...\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 4221 0 4221 0 0 551 0 --:--:-- 0:00:07 --:--:-- 1015\n", + "Clostridioides difficile - ID:3560802\n", + "Enterococcus faecium - ID:991893\n", + "Francisella tularensis - ID:260204\n", + "Legionella pneumophila - ID:1025099\n", + "Listeria monocytogenes - ID:690488\n", + "Mycobacterium tuberculosis - ID:741110\n", + "Staphylococcus aureus - ID:141106\n", + "7 schema found.\n" + ] + } + ], "source": [ "MentaLiST.jl list_cgmlst" ] @@ -251,22 +338,58 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "usage: MentaLiST.jl download_cgmlst -o OUTPUT -s SCHEME -k K --db DB\n", + " [-h]\n", + "\n", + "optional arguments:\n", + " -o, --output OUTPUT Output folder for the schema files.\n", + " -s, --scheme SCHEME Species name or ID of the scheme\n", + " -k K K-mer size (type: Int8)\n", + " --db DB Output file for the kmer database.\n", + " -h, --help show this help message and exit\n", + "\n" + ] + } + ], "source": [ "MentaLiST.jl download_cgmlst -h" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2017-08-02T15:52:54.016 - info: Downloading cgMLST scheme ...\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 2340k 0 2340k 0 0 291k 0 --:--:-- 0:00:08 --:--:-- 480k\n", + "2017-08-02T15:53:02.33 - info: Unzipping cgMLST scheme into individual FASTA files for each loci ...\n", + "........\n", + "2017-08-02T15:53:10.07 - info: 1521 loci found.\n", + "2017-08-02T15:53:10.07 - info: Building the k-mer database ...\n", + "2017-08-02T15:53:13.733 - info: Opening FASTA files ... \n", + "2017-08-02T15:53:36.065 - info: Combining results for each locus ...\n", + "2017-08-02T15:54:16.393 - info: Saving DB ...\n", + "2017-08-02T15:54:20.905 - info: Done!\n" + ] + } + ], "source": [ "MentaLiST.jl download_cgmlst -o cgmlst/legionella -s 1025099 -k 31 --db cgmlst/legionella/db_31" ] @@ -283,11 +406,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Campy/aspA.tfa Campy/gltA.tfa Campy/pgm.tfa Campy/uncA.tfa\n", + "Campy/glnA.tfa Campy/glyA.tfa Campy/tkt.tfa\n" + ] + } + ], "source": [ "# Each file is a different locus:\n", "ls Campy/*.tfa" @@ -295,11 +427,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">glnA_1\n", + "GATCCTTTTACGGCTGATCCTACTATCATAGTATTTTGTGATGTGTATGATATTTACAAA\n", + "GGACAAATGTATGAAAAATGTCCAAGAAGCATAGCAAAAAAAGCAATAGAACACCTTAAA\n", + "AATAGTGGCATAGCTGATACTGCTTACTTTGGACCAGAAAATGAATTCTTTGTTTTTGAT\n", + "AGTGTAAAAATAGTTGATACTACTCATTGTTCTAAGTATGAAGTTGATACCGAAGAAGGA\n", + "GAGTGGAATGATGATAGAGAATTTACCGATAGCTACAATACTGGACACAGGCCAAGAAAC\n", + "AAAGGTGGATATTTTCCAGTTCAGCCAATTGATTCTTTAGTAGATATTCGTTCTGAAATG\n", + "GTTCAAACCCTTGAAAAAGTAGGTCTTAAAACTTTTGTTCATCATCATGAAGTTGCACAA\n", + "GGACAAGCTGAAATAGGAGTAAATTTTGGCACGCTTGTAGAAGCAGCTGACAATGTT\n", + ">glnA_2\n", + "GATCCTTTTACGGCTGATCCTACTATCATAGTATTTTGTGATGTGTATGATATTTACAAA\n", + "GGACAAATGTATGAAAAATGTCCAAGAAGCATAGCAAAAAAAGCAATGGAACACCTTAAA\n", + "AATAGTGGCATAGCTGATACTGCTTACTTTGGACCAGAAAATGAATTCTTTGTTTTTGAT\n", + "AGTGTAAAAATAGTTGATACTACTCATTGTTCTAAGTATGAAGTTGATACCGAAGAAGGA\n", + "GAGTGGAATGATGATAGAGAATTTACCGATAGCTACAATACTGGACACAGGCCAAGAAAC\n", + "AAAGGTGGATATTTTCCAGTTCAGCCAATTGATTCTTTAGTAGATATTCGTTCTGAAATG\n", + "GTTCAAACCCTTGAAAAAGTAGGTCTTAAAACTTTTGTTCATCATCATGAAGTTGCACAA\n", + "GGACAAGCTGAAATAGGAGTAAATTTTGGCACGCTTGTAGAAGCAGCTGACAATGTT\n", + ">glnA_3\n", + "GATCCTTTTACAGCTGATCCTACTATCATAGTGTTTTGTGATGTGTATGATATTTACAAA\n" + ] + } + ], "source": [ "# For each locus file, a different ID and sequence for each allele:\n", "head -n 20 Campy/glnA.tfa" @@ -307,11 +466,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2017-08-02T15:54:29.65 - info: Opening FASTA files ... \n", + "2017-08-02T15:54:31.13 - info: Combining results for each locus ...\n", + "2017-08-02T15:54:31.763 - info: Saving DB ...\n", + "2017-08-02T15:54:33.771 - info: Done!\n" + ] + } + ], "source": [ "# Install the Campylobacter jejuni scheme directly from the FASTA files:\n", "MentaLiST.jl build_db -k 25 --db Campy/mlst_25.db -p Campy/campylobacter.txt -f Campy/*.tfa" @@ -328,11 +498,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "usage: MentaLiST.jl call -o O -s S --db DB [-t T] [-q] [-e] [-j J]\n", + " [-h] files...\n", + "\n", + "positional arguments:\n", + " files FastQ input files\n", + "\n", + "optional arguments:\n", + " -o O Output file with MLST call\n", + " -s S Sample name\n", + " --db DB Kmer database\n", + " -t T A read of length L is discarded if it has at less than\n", + " (L - k) * t hits to the same locus in the kmer database,\n", + " where k is the kmer length. 0 <= t <= 1 (type: Float64,\n", + " default: 0.2)\n", + " -q Quick filter (MentaLiST FAST); if middle kmer of a read\n", + " is not in the kmer DB, the read is discarded. Disabled\n", + " by default.\n", + " -e Use external kmc kmer counter. Disabled by default.\n", + " -j J Skip length between consecutive k-mers. Defaults to 1.\n", + " (type: Int64, default: 1)\n", + " -h, --help show this help message and exit\n", + "\n" + ] + } + ], "source": [ "# Help:\n", "MentaLiST.jl call -h" @@ -347,11 +546,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2017-08-02 15:54:38-- https://github.com/WGS-TB/MentaLiST/raw/master/data/SRR5824107_small.fastq.gz\n", + "Resolving github.com (github.com)... 192.30.253.112, 192.30.253.113\n", + "Connecting to github.com (github.com)|192.30.253.112|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://raw.githubusercontent.com/WGS-TB/MentaLiST/master/data/SRR5824107_small.fastq.gz [following]\n", + "--2017-08-02 15:54:39-- https://raw.githubusercontent.com/WGS-TB/MentaLiST/master/data/SRR5824107_small.fastq.gz\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.52.133\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.52.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 2992916 (2.9M) [application/octet-stream]\n", + "Saving to: ‘SRR5824107_small.fastq.gz’\n", + "\n", + "100%[======================================>] 2,992,916 --.-K/s in 0.1s \n", + "\n", + "2017-08-02 15:54:39 (20.1 MB/s) - ‘SRR5824107_small.fastq.gz’ saved [2992916/2992916]\n", + "\n" + ] + } + ], "source": [ "wget https://github.com/WGS-TB/MentaLiST/raw/master/data/SRR5824107_small.fastq.gz" ] @@ -365,11 +587,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2017-08-02T15:54:46.904 - info: Opening kmer database ... \n", + "2017-08-02T15:54:50.733 - info: Opening fastq file(s) ... \n", + "2017-08-02T15:54:52.316 - info: Writing output ...\n", + "2017-08-02T15:54:52.997 - info: Done.\n" + ] + } + ], "source": [ "MentaLiST.jl call -o campy_call.txt -s SRR5824107 --db Campy/mlst_31.db SRR5824107_small.fastq.gz " ] @@ -383,11 +616,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "campy_call.txt campy_call.txt.ties.txt campy_call.txt.votes.txt\n" + ] + } + ], "source": [ "# results:\n", "ls campy_call.*" @@ -395,11 +636,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample aspA glnA gltA glyA pgm tkt uncA ST clonal_complex\n", + "SRR5824107 2 17 2 3 2 1 5 883 ST-21 complex\n" + ] + } + ], "source": [ "# Allele calls and ST are on the campy_call.txt file:\n", "column -ts $'\\t' campy_call.txt" @@ -407,11 +657,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Locus\tAllele(votes),...\n", + "aspA\t2(1105), 43(1095), 308(1045), 150(1031), 31(1010), 398(965), 36(951), 149(919), 197(919), 144(919)\n", + "glnA\t17(842), 520(806), 607(800), 234(788), 526(788), 549(779), 289(727), 475(719), 254(719), 76(718)\n", + "gltA\t2(782), 307(761), 149(752), 89(746), 16(746), 250(743), 156(743), 206(714), 255(698), 267(698)\n", + "glyA\t3(1646), 9(1638), 10(1637), 121(1629), 389(1624), 506(1604), 658(1601), 449(1578), 73(1574), 280(1572)\n", + "pgm\t2(1318), 258(1307), 865(1304), 20(1298), 815(1298), 447(1297), 291(1294), 642(1287), 497(1287), 38(1287)\n", + "tkt\t1(2116), 298(2086), 474(2066), 343(2061), 255(2056), 454(2053), 312(2053), 90(2049), 62(2045), 617(2043)\n", + "uncA\t5(627), 25(621), 291(619), 246(612), 103(611), 195(610), 63(607), 282(607), 429(599), 225(596)\n" + ] + } + ], "source": [ "# Detailed vote count for each allele:\n", "cat campy_call.txt.votes.txt" @@ -426,60 +691,151 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2017-08-02 15:54:53-- https://github.com/WGS-TB/MentaLiST/raw/master/data/ERR2009175_small.fastq.gz\n", + "Resolving github.com (github.com)... 192.30.253.113, 192.30.253.112\n", + "Connecting to github.com (github.com)|192.30.253.113|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://raw.githubusercontent.com/WGS-TB/MentaLiST/master/data/ERR2009175_small.fastq.gz [following]\n", + "--2017-08-02 15:54:54-- https://raw.githubusercontent.com/WGS-TB/MentaLiST/master/data/ERR2009175_small.fastq.gz\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.52.133\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.52.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 26835758 (26M) [application/octet-stream]\n", + "Saving to: ‘ERR2009175_small.fastq.gz’\n", + "\n", + "100%[======================================>] 26,835,758 41.8MB/s in 0.6s \n", + "\n", + "2017-08-02 15:54:54 (41.8 MB/s) - ‘ERR2009175_small.fastq.gz’ saved [26835758/26835758]\n", + "\n" + ] + } + ], "source": [ "wget https://github.com/WGS-TB/MentaLiST/raw/master/data/ERR2009175_small.fastq.gz" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2017-08-02T16:05:19.326 - info: Opening kmer database ... \n", + "2017-08-02T16:05:42.477 - info: Opening fastq file(s) ... \n", + "2017-08-02T16:06:15.855 - info: Writing output ...\n", + "2017-08-02T16:06:16.784 - info: Done.\n" + ] + } + ], "source": [ - "## Legionela, small sample:\n", - "MentaLiST.jl call -o legionela2.txt -s ERR2009175 --db cgmlst/legionella/db_31 ERR2009175_small2.fastq.gz " + "## Call alleles for the sample:\n", + "MentaLiST.jl call -o legionela.txt -s ERR2009175 --db cgmlst/legionella/db_31 ERR2009175_small.fastq.gz " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample lpg0004 lpg0006 lpg0007 lpg0008 lpg0009 lpg0010 lpg0011 lpg0012 lpg0014\n", + "ERR2009175 4 4 4 4 1 4 4 4 4\n" + ] + } + ], "source": [ - "# Check the first 10 calls:\n", - "cut -f1-10 legionela2.txt | column -ts $'\\t' " + "# Quick check of the first 10 calls:\n", + "cut -f1-10 legionela.txt | column -ts $'\\t' " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Locus\tAllele(votes),...\n", + "lpg0004\t4(5455), 10(1058), 1(978), 13(962), 22(917), 7(862), 9(852), 33(731), 23(685), 31(427)\n", + "lpg0006\t4(10963), 1(4251), 8(4243), 22(3774), 31(3305), 6(3165), 21(2998), 12(2840), 32(2313), 7(1362)\n", + "lpg0007\t4(2267), 20(519), 14(383), 1(353), 19(245), 3(220), 9(199), 2(113), 8(113), 5(113)\n", + "lpg0008\t4(3817), 35(3662), 1(2433), 7(2274), 33(960), 15(906), 34(860), 31(802), 2(799), 13(746)\n", + "lpg0009\t1(87), 2(30), 5(-20), 6(-24), 3(-33), 7(-305), 4(-386)\n", + "lpg0010\t4(3960), 1(1567), 8(1418), 27(1405), 14(1352), 6(1165), 18(1025), 15(937), 22(665), 26(586)\n", + "lpg0011\t4(368), 6(244), 12(244), 7(232), 14(182), 17(155), 1(96), 16(62), 2(37), 5(13)\n", + "lpg0012\t4(6589), 7(1913), 2(1678), 9(1659), 17(1632), 6(1629), 16(1605), 24(1468), 21(1332), 27(1276)\n", + "lpg0014\t4(1875), 21(1021), 27(575), 1(421), 20(368), 8(347), 9(291), 2(237), 5(197), 3(193)\n" + ] + } + ], "source": [ "# votes:\n", - "head legionela2.txt.votes.txt" + "head legionela.txt.votes.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since this reduced sample has very low coverage, in some loci there are alleles with the same number of votes, as seen on the ties file: " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "lpg0073\t2, 1\n", + "lpg0350\t4, 2, 3, 1\n", + "lpg0441\t6, 1\n", + "lpg0859\t4, 2, 3, 5, 1\n", + "lpg0878\t7, 3\n", + "lpg0892\t22, 3\n", + "lpg1293\t9, 19, 4\n", + "lpg1458\t6, 3, 5, 15, 1\n", + "lpg1768\t3, 20\n", + "lpg1943\t2, 11, 7, 9, 10, 8, 6, 4, 3, 5, 1\n", + "lpg2005\t21, 19, 3, 20\n", + "lpg2280\t7, 4\n", + "lpg2323\t9, 4\n", + "lpg2517\t8, 6, 4, 12\n", + "lpg2724\t9, 4, 13\n", + "lpg2825\t4, 2, 3, 5, 6, 1\n" + ] + } + ], "source": [ - "cat legionela2.txt.ties.txt" + "# ties:\n", + "cat legionela.txt.ties.txt" ] }, {