From 3557d77a7d995c1cacca5ddbdcb5bd473ea50d3e Mon Sep 17 00:00:00 2001 From: Jason Piper Date: Thu, 5 Dec 2013 20:17:04 +0000 Subject: [PATCH 1/4] dev branch version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 89c09ff..e6799c8 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name='pyDNase', - version="0.1.1", + version="0.1.1dev", description='DNase-seq analysis library', long_description=open('README.rst',"rt").read(), author='Jason Piper', From 264c6ecbb92aa4341b57b1a849ce849fa2cb1c5a Mon Sep 17 00:00:00 2001 From: Jason Piper Date: Sat, 7 Dec 2013 10:33:10 +0000 Subject: [PATCH 2/4] Fix BED parsing --- pyDNase/__init__.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/pyDNase/__init__.py b/pyDNase/__init__.py index 8927297..5f0924e 100644 --- a/pyDNase/__init__.py +++ b/pyDNase/__init__.py @@ -246,12 +246,14 @@ def loadBEDFile(self,filename): #This is done so that if a malformed BED record is detected, no intervals are loaded. records = [] - + intervalCount = max(enumerate(open(filename)))[0] + 1 for _ in progress.bar(range(intervalCount)): line = BEDfile.readline() - #NOTE! Assume that lines not starting with c are comments or track descriptions. - if line[0] == "c": + #Skip lines in the bed files which are UCSC track metadata or comments + if self.__isBEDHeader(line): + continue + else: records.append(self.__parseBEDString(line)) for i in records: @@ -270,6 +272,22 @@ def __malformedBEDline(self,BEDString): exceptionString = "Malformed BED line: {0}".format(BEDString) raise Exception(exceptionString) + def __isBEDHeader(string): + """ + Returns True/False whether a line in a bed file should be ignored according to + http://genome.ucsc.edu/goldenPath/help/customTrack.html#TRACK + """ + if string[0] == "#": + return True + + headers = ["name","description","type","visibility","color","itemRgb","useScore","group", + "priority","db","offset","maxItems","url","htmlUrl","bigDataUrl","track","browser"] + + for each in headers: + if string.startswith(each): + return True + return False + def __parseBEDString(self,BEDString): """ Parses the following BED formats From 04dab5f6e476f67297dc6915687a2abdec16b12f Mon Sep 17 00:00:00 2001 From: Jason Piper Date: Sat, 7 Dec 2013 10:41:03 +0000 Subject: [PATCH 3/4] minor code cleanup --- pyDNase/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyDNase/__init__.py b/pyDNase/__init__.py index 5f0924e..78a156f 100644 --- a/pyDNase/__init__.py +++ b/pyDNase/__init__.py @@ -251,9 +251,7 @@ def loadBEDFile(self,filename): for _ in progress.bar(range(intervalCount)): line = BEDfile.readline() #Skip lines in the bed files which are UCSC track metadata or comments - if self.__isBEDHeader(line): - continue - else: + if not self.__isBEDHeader(line): records.append(self.__parseBEDString(line)) for i in records: From cb869ba076045246b6636e5b644a09038dc871bb Mon Sep 17 00:00:00 2001 From: Jason Piper Date: Mon, 9 Dec 2013 11:40:10 +0000 Subject: [PATCH 4/4] Fix BED parsing issue, remove custom clint dependency --- CHANGES | 8 +++++++- examples/example.bed | 10 ++++++++-- pyDNase/__init__.py | 14 +++++++++----- setup.py | 7 ++----- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/CHANGES b/CHANGES index 8f6f804..8bdaa6c 100644 --- a/CHANGES +++ b/CHANGES @@ -1,7 +1,13 @@ +0.1.2 - 2013-12-09 +================== +Fix issue where BED intervals with chromosome names not starting with "c" were silently being ignored (reported by Aaron Hardin) +Fix clint dependency issue (no longer requires custom version of clint) +Fix spelling error in CHANGES + 0.1.1 - 2013-12-05 ================== Misc. small bug fixes -Fixed Python 2.6 Compatability +Fixed Python 2.6 Compatibility Added JSON export script 0.1.0 - 2013-09-01 diff --git a/examples/example.bed b/examples/example.bed index 0bd6ac6..68b4752 100644 --- a/examples/example.bed +++ b/examples/example.bed @@ -1,2 +1,8 @@ -track name=exampleFile description="This is an example" useScore=1 -chr6 170863142 170863532 0 0 + \ No newline at end of file +#This is a comment line and should be ignored. +#The following lines should also be ignores as they are valid BED Headers +browser position chr6:170863142-170863532 +browser hide all +track name="ItemRGBDemo" description="Item RGB demonstration" visibility=2 +itemRgb="On" +visibility=2 colorByStrand="255,0,0 0,0,255" +chr6 170863142 170863532 0 0 + diff --git a/pyDNase/__init__.py b/pyDNase/__init__.py index 78a156f..6c9731c 100644 --- a/pyDNase/__init__.py +++ b/pyDNase/__init__.py @@ -12,7 +12,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -__version__ = "0.1.1" +__version__ = "0.1.2" import os import numpy as np @@ -270,7 +270,7 @@ def __malformedBEDline(self,BEDString): exceptionString = "Malformed BED line: {0}".format(BEDString) raise Exception(exceptionString) - def __isBEDHeader(string): + def __isBEDHeader(self,string): """ Returns True/False whether a line in a bed file should be ignored according to http://genome.ucsc.edu/goldenPath/help/customTrack.html#TRACK @@ -308,9 +308,13 @@ def __parseBEDString(self,BEDString): self.__malformedBEDline(BEDString) #Default if only Chrom Start End is detected - chrom = BEDSplit[0] - startbp = int(BEDSplit[1]) - endbp = int(BEDSplit[2]) + try: + chrom = BEDSplit[0] + startbp = int(BEDSplit[1]) + endbp = int(BEDSplit[2]) + except: + self.__malformedBEDline(BEDString) + label = 0 score = 0 strand = "+" diff --git a/setup.py b/setup.py index e6799c8..84f60a5 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( name='pyDNase', - version="0.1.1dev", + version="0.1.2", description='DNase-seq analysis library', long_description=open('README.rst',"rt").read(), author='Jason Piper', @@ -29,15 +29,12 @@ 'pyDNase.footprinting', ], - #Uses a custom version of clint that has a time estimator on the progress bar - dependency_links = ["http://github.com/jpiper/clint/tarball/develop#egg=clint-0.3.0p"], - install_requires=[ "numpy", "scipy", "matplotlib", "pysam", - "clint==0.3.0p" + "clint" ], package_data = {'pyDNase':["data/*"]},