Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow space in IntervalList species header value #2356

Merged
merged 1 commit into from
Mar 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ private[ds] class IntervalListParser extends FeatureParser {
def parseHeader(line: String,
stringency: ValidationStringency): Option[SequenceRecord] = {

val fields = line.split("[ \t]+")
val fields = line.split("[\t]+")

if (fields(0).startsWith("@SQ")) {
val (name, length, url, md5) = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
@HD VN:1.5 SO:coordinate
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:38 UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens
@SQ SN:chr2 LN:242193529 M5:f98db672eb0993dcfdabafe2a882905c AS:38 UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens
@PG ID:1 CL:picard.util.IntervalListTools INPUT=[HG38excludeNs.interval_list, genome.interval_list] OUTPUT=wgs_calling_regions.v3.interval_list SORT=true ACTION=INTERSECT PADDING=0 UNIQUE=false SCATTER_COUNT=1 INCLUDE_FILTERED=false BREAK_BANDS_AT_MULTIPLES_OF=0 SUBDIVISION_MODE=INTERVAL_SUBDIVISION INVERT=false VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json PN:IntervalListTools
chr1 10001 207666 + . intersection ACGTmer
chr1 257667 297968 + . intersection ACGTmer
chr1 347969 535988 + . intersection ACGTmer
chr1 585989 2702781 + . intersection ACGTmer
chr1 2746291 12954384 + . intersection ACGTmer
chr1 13004385 16799163 + . intersection ACGTmer
chr1 16849164 29552233 + . intersection ACGTmer
chr1 29553836 121976459 + . intersection ACGTmer
chr1 122026460 124977944 + . intersection ACGTmer
chr1 124978327 125130246 + . intersection ACGTmer
chr1 125131848 125171347 + . intersection ACGTmer
chr1 125173584 125184587 + . intersection ACGTmer
chr1 143184588 223558935 + . intersection ACGTmer
chr1 223608936 228558364 + . intersection ACGTmer
chr1 228608365 248946422 + . intersection ACGTmer
chr2 10001 16145119 + . intersection ACGTmer
chr2 16146120 32867130 + . intersection ACGTmer
chr2 32868131 32916625 + . intersection ACGTmer
chr2 32917626 89330679 + . intersection ACGTmer
chr2 89530680 89685992 + . intersection ACGTmer
chr2 89753993 90402511 + . intersection ACGTmer
chr2 91402512 92138145 + . intersection ACGTmer
chr2 92188146 94090557 + . intersection ACGTmer
chr2 94140558 94293015 + . intersection ACGTmer
chr2 94496016 97439618 + . intersection ACGTmer
chr2 97489619 238903659 + . intersection ACGTmer
chr2 238904048 242183529 + . intersection ACGTmer
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,12 @@ class FeatureDatasetSuite extends ADAMFunSuite {
})
}

sparkTest("allow space in IntervalList SP header value") {
val inputPath = testFile("wgs_calling_regions.hg38.interval_list")
val features = sc.loadIntervalList(inputPath)
assert(features.rdd.count() == 27)
}

sparkTest("save NarrowPeak as GTF format") {
val inputPath = testFile("wgEncodeOpenChromDnaseGm19238Pk.trunc10.narrowPeak")
val features = sc.loadNarrowPeak(inputPath)
Expand Down