diff --git a/.gitignore b/.gitignore index 2dc7a6e..ba6e5c9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ .idea/ *.iml -*.o -Release_sophia/sophia -Release_sophiaAnnotate/sophiaAnnotate +build/*.o include/strtk.hpp +include/rapidcsv.h +sophia +sophiaAnnotate +sophiaMref +testRunner +boost/ diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md new file mode 100644 index 0000000..df81802 --- /dev/null +++ b/CONTRIBUTORS.md @@ -0,0 +1,5 @@ +Individual contributions: + +* Umut Toprak: Original implementation for hg37 until SOPHIA 35. +* Naga Paramasivam: Extensive testing for hg38. +* Philip R. Kensche: Development after SOPHIA 35, including documentation, code refactoring and generalization for hg38. diff --git a/CodingConventions.md b/CodingConventions.md new file mode 100644 index 0000000..2c57290 --- /dev/null +++ b/CodingConventions.md @@ -0,0 +1,20 @@ +# Coding Conventions + +This is a list of some things that contributions should adhere to. +The code still has severe legacy problems with these issues. + +1. Keep constructors free of side effects. Prefer using static factory functions. +2. If there are many parameters, use a builder pattern. +3. Never deliberately use `nullptr` values. Prefer `std::optional` instead. +4. Don't use `using namespace std` +5. Use the type system to your advantage. +6. Separate parsing code. In general, learn something about how to separate concerns in programming, learn the SOLID principles, **and** apply them. +7. Use the standard library, including the Standard Template Library. Prefer searching in the C++ standard library over reinventing the wheel. +8. Use the boost library. It is already a dependency. Prefer search in boost over reinventing the wheel. +9. Always try to leave the code in a better (more readable, understandable, maintainable, safer) state than you found it. +10. C++ is hard to read, so don't make it harder than necessary. Code readability is **not optional**. + * Use descriptive but concise names for variables, functions, classes, etc. + * Keep lines short. + * Prefer vertical lists (e.g. of function arguments) over horizontal lists). + * Avoid "what" and "how" comments. Prefer "why" comments. +11. If you figure out something really hard and unintuitive, add a comment instead of letting the next programmer figure it out again. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1840023 --- /dev/null +++ b/Makefile @@ -0,0 +1,182 @@ +# Compiler +CXX = x86_64-conda_cos6-linux-gnu-g++ + +INCLUDE_DIR = ./include +BUILD_DIR = ./build +SRC_DIR = ./src +TESTS_DIR = ./tests + +# Compiler flags +LIBRARY_FLAGS := -lz -lm -lrt -lboost_system -lboost_iostreams -lboost_program_options -ldl -lbacktrace -lboost_stacktrace_backtrace -DBOOST_STACKTRACE_USE_BACKTRACE +LDFLAGS := $(LDFLAGS) -flto=auto -rdynamic -no-pie +# Turned on -Wsign-conversion to get warnings for conversions between signed and unsigned types. This is a cheap workaround to implementing ChrIndex and CompressedMrefIndex. +CXXFLAGS := -I$(INCLUDE_DIR) $(CXXFLAGS) -std=c++20 -flto=auto -Wall -Wextra -Wsign-conversion -Werror -c -fmessage-length=0 -Wno-attributes -lbacktrace -lboost_stacktrace_backtrace -DBOOST_STACKTRACE_USE_BACKTRACE + +ifeq ($(static),true) + LD_BEGIN_FLAGS := -L$(boost_lib_dir) + LD_END_FLAGS := $(LDFLAGS) -static -static-libgcc -static-libstdc++ $(LIBRARY_FLAGS) +else + LD_BEGIN_FLAGS := + LD_END_FLAGS := $(LDFLAGS) $(LIBRARY_FLAGS) +endif + +ifeq ($(develop),true) + # NOTE: Generally, it is a good idea to compile with -O0 during development, because it seems + # that thus the compiler actually catches some binary dependencies during linking that + # will otherwise be missed. + CXXFLAGS := $(CXXFLAGS) -O0 -ggdb3 -DDEBUG -fno-inline -fno-elide-constructors -fno-omit-frame-pointer -fno-optimize-sibling-calls + LD_END_FLAGS := $(LD_END_FLAGS) -Wl,-O0 -ggdb3 -DDEBUG -fno-inline +else + # Ignore some leftover unused variables from SvEvent::assessBreakpointClonalityStatus. + CXXFLAGS := $(CXXFLAGS) -O3 -DNDEBUG +endif + +# Source files +SOURCES = $(wildcard $(SRC_DIR)/*.cpp) + +# Object files should have .o instead of .cpp. +# Note, we put the objects for production and tests both into the build directory. +OBJECTS = $(SOURCES:$(SRC_DIR)/%.cpp=$(BUILD_DIR)/%.o) + +# Binaries +BINARIES = sophiaMref sophia sophiaAnnotate + +OBJECTS_WITH_MAIN = $(BUILD_DIR)/sophia.o $(BUILD_DIR)/sophiaMref.o $(BUILD_DIR)/sophiaAnnotate.o + +# Default rule +all: test $(BINARIES) + +# Define search paths for different file suffices +vpath %.h $(INCLUDE_DIR) +vpath %.hpp $(INCLUDE_DIR) +vpath %.cpp $(SRC_DIR) +vpath %_test.cpp $(TESTS_DIR) + +# Ensure the build/ directory exists. +$(BUILD_DIR): + mkdir -p $@ + +# Retrieve StrTK +$(INCLUDE_DIR)/strtk.hpp: + wget -c https://raw.githubusercontent.com/ArashPartow/strtk/d2b446bf1f7854e8b08f5295ec6f6852cae066a2/strtk.hpp -O $(INCLUDE_DIR)/strtk.hpp + +# Retrieve rapidcsv +$(INCLUDE_DIR)/rapidcsv.h: + wget -c https://github.com/d99kris/rapidcsv/raw/v8.80/src/rapidcsv.h -O $(INCLUDE_DIR)/rapidcsv.h + +# General compilation rule for object files that have matching .h files. +$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp $(INCLUDE_DIR)/%.h $(INCLUDE_DIR)/strtk.hpp $(INCLUDE_DIR)/rapidcsv.h Makefile | $(BUILD_DIR) + $(CXX) $(CXXFLAGS) -c $< -o $@ + +# Test source files with the suffix _test.cpp +TEST_SOURCES = $(wildcard $(TESTS_DIR)/*_test.cpp) + +# ... and the corresponding object files, all with the suffix _test.o. +TEST_OBJECTS = $(TEST_SOURCES:$(TESTS_DIR)/%_test.cpp=$(BUILD_DIR)/%_test.o) + +# There are usually no .h files for test files, so we need a separate rule for test files. +$(BUILD_DIR)/%_test.o: $(TESTS_DIR)/%_test.cpp $(TESTS_DIR)/Fixtures.h Makefile | $(BUILD_DIR) + $(CXX) $(CXXFLAGS) -c $< -o $@ + +# Link the testRunner. +testRunner: $(TEST_OBJECTS) $(filter-out $(OBJECTS_WITH_MAIN),$(OBJECTS)) + $(CXX) $(LD_BEGIN_FLAGS) -o testRunner $^ $(LDFLAGS) $(LIBRARY_FLAGS) -Wl,-Bdynamic -lgtest -lgtest_main -pthread + +# Rule for running the tests +test: testRunner + ./testRunner + +# Rules for sophia +$(BUILD_DIR)/sophia.o: $(SRC_DIR)/sophia.cpp Makefile | $(BUILD_DIR) + $(CXX) $(CXXFLAGS) -c $< -o $@ +sophia: $(BUILD_DIR)/global.o \ + $(BUILD_DIR)/ChrCategory.o \ + $(BUILD_DIR)/ChrInfo.o \ + $(BUILD_DIR)/ChrInfoTable.o \ + $(BUILD_DIR)/Alignment.o \ + $(BUILD_DIR)/Breakpoint.o \ + $(BUILD_DIR)/ChosenBp.o \ + $(BUILD_DIR)/ChrConverter.o \ + $(BUILD_DIR)/IndexRange.o \ + $(BUILD_DIR)/Hg37ChrConverter.o \ + $(BUILD_DIR)/GenericChrConverter.o \ + $(BUILD_DIR)/MateInfo.o \ + $(BUILD_DIR)/SamSegmentMapper.o \ + $(BUILD_DIR)/Sdust.o \ + $(BUILD_DIR)/SuppAlignment.o \ + $(BUILD_DIR)/HelperFunctions.o \ + $(BUILD_DIR)/GlobalAppConfig.o \ + $(BUILD_DIR)/sophia.o + $(CXX) $(LD_BEGIN_FLAGS) -o $@ $^ $(LD_END_FLAGS) + +# Rules for sophiaAnnotate +$(BUILD_DIR)/sophiaAnnotate.o: $(SRC_DIR)/sophiaAnnotate.cpp Makefile | $(BUILD_DIR) + $(CXX) $(CXXFLAGS) -c $< -o $@ +sophiaAnnotate: $(BUILD_DIR)/global.o \ + $(BUILD_DIR)/ChrCategory.o \ + $(BUILD_DIR)/ChrInfo.o \ + $(BUILD_DIR)/ChrInfoTable.o \ + $(BUILD_DIR)/MateInfo.o \ + $(BUILD_DIR)/Alignment.o \ + $(BUILD_DIR)/AnnotationProcessor.o \ + $(BUILD_DIR)/Breakpoint.o \ + $(BUILD_DIR)/BreakpointReduced.o \ + $(BUILD_DIR)/ChrConverter.o \ + $(BUILD_DIR)/IndexRange.o \ + $(BUILD_DIR)/Hg37ChrConverter.o \ + $(BUILD_DIR)/GenericChrConverter.o \ + $(BUILD_DIR)/DeFuzzier.o \ + $(BUILD_DIR)/GermlineMatch.o \ + $(BUILD_DIR)/MrefEntry.o \ + $(BUILD_DIR)/MrefEntryAnno.o \ + $(BUILD_DIR)/MrefMatch.o \ + $(BUILD_DIR)/SuppAlignment.o \ + $(BUILD_DIR)/SuppAlignmentAnno.o \ + $(BUILD_DIR)/Sdust.o \ + $(BUILD_DIR)/ChosenBp.o \ + $(BUILD_DIR)/SvEvent.o \ + $(BUILD_DIR)/HelperFunctions.o \ + $(BUILD_DIR)/GlobalAppConfig.o \ + $(BUILD_DIR)/sophiaAnnotate.o + $(CXX) $(LD_BEGIN_FLAGS) -o $@ $^ $(LD_END_FLAGS) + +# Rules for sophiaMref +$(BUILD_DIR)/sophiaMref.o: $(SRC_DIR)/sophiaMref.cpp Makefile | $(BUILD_DIR) + $(CXX) $(CXXFLAGS) -c $< -o $@ +sophiaMref: $(BUILD_DIR)/global.o \ + $(BUILD_DIR)/ChrCategory.o \ + $(BUILD_DIR)/ChrInfo.o \ + $(BUILD_DIR)/ChrInfoTable.o \ + $(BUILD_DIR)/MateInfo.o \ + $(BUILD_DIR)/Alignment.o \ + $(BUILD_DIR)/GlobalAppConfig.o \ + $(BUILD_DIR)/ChrConverter.o \ + $(BUILD_DIR)/IndexRange.o \ + $(BUILD_DIR)/Hg37ChrConverter.o \ + $(BUILD_DIR)/GenericChrConverter.o \ + $(BUILD_DIR)/HelperFunctions.o \ + $(BUILD_DIR)/SuppAlignment.o \ + $(BUILD_DIR)/SuppAlignmentAnno.o \ + $(BUILD_DIR)/Sdust.o \ + $(BUILD_DIR)/ChosenBp.o \ + $(BUILD_DIR)/MrefEntry.o \ + $(BUILD_DIR)/MrefEntryAnno.o \ + $(BUILD_DIR)/MrefMatch.o \ + $(BUILD_DIR)/MasterRefProcessor.o \ + $(BUILD_DIR)/Breakpoint.o \ + $(BUILD_DIR)/BreakpointReduced.o \ + $(BUILD_DIR)/GermlineMatch.o \ + $(BUILD_DIR)/DeFuzzier.o \ + $(BUILD_DIR)/sophiaMref.o + $(CXX) $(LD_BEGIN_FLAGS) -o $@ $^ $(LD_END_FLAGS) + +binaries: $(BINARIES) + + +# Rule for clean +.PHONY: clean clean-all +clean: + rm -f $(BUILD_DIR)/*.o $(BINARIES) + +clean-all: clean + rm -f $(INCLUDE_DIR)/strtk.hpp diff --git a/README.md b/README.md index 2e632ee..acb6dfc 100644 --- a/README.md +++ b/README.md @@ -1,54 +1,363 @@ # SOPHIA Tool for Structural Variation Calling -## Runtime Dependencies +SOPHIA is a Structural Variant(SV) detection algorithm based on the supplementary alignment(SA) concept of the aligner BWA-MEM, combined with filters based on expert-knowledge to increase specificity. -The only dependency is Boost 1.70.0 (currently). E.g. you can do +Currently, SOPHIA only is optimized for the hg37 assembly of the human genome. + +> **NOTE**: We are preparing hg38 support. See the [Changes](#changes) section for details. + +It uses a large panel of normals for filtering artifacts (most often due to mapping difficulties) and common SVs in the germline. +The parameters for filtering results are hand-tuned against the clinical gold standard FISH of V(D)J rearrangements. +Results from the hand-tuned parameter set were tested against hallmark findings from disease datasets where hallmark SVs were known (CDKN2A in various TCGA datasets, EGFR in TCGA-GBM, GFI1B, MYCN and PRDM6 in ICGC-PEDBRAIN-MB etc.) + +For a detailed description of the algorithm, please refer to [Umut Topraks's dissertation](https://doi.org/10.11588/heidok.00027429), in particular chapter 2. Section 2.2.1 describes the method in more details. + +SOPHIA is a very fast and resource-light algorithm. +It uses 2GB RAM, 2 CPU cores and runs in ~3.5 hours for 50x coverage WGS, and can detect variants with a single pass of the input BAM files. No local assembly is done. + +Sophia is included in the [SophiaWorkflow](https://github.com/DKFZ-ODCF/SophiaWorkflow) that uses the [Roddy Workflow Management Systems](https://github.com/TheRoddyWMS/Roddy). + + +### Citing + +You can cite the original version (35) of SOPHIA as follows: + + Integrative Analysis of Omics Datasets. + Doctoral dissertation, German Cancer Research Center (DKFZ), Heidelberg. + Umut Toprak (2019). + DOI 10.11588/heidok.000274296 + +The code for the original version 35 can be found in the old [SOPHIA repository](https://bitbucket.org/utoprak/sophia/src/master/) bitbucket repository. + +The code here is a fork of that repository. If you use the newer versions here, please also include a reference to this repository in your citation -- in particular if you use SOPHIA for any other reference genome that the [1000 genomes reference](ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz +). The original code was cleaned up and tested for the `hg38` assembly. See [Contributors](CONTRIBUTORS.md). + +### Tools + +For instructions and short explanations of on commandline parameters, invoke each tool with `--help`. + +#### `sophia` Tool + +The main tool for SV calling. +`sophia` reads in a position-sorted BAM file and outputs a list of SVs breakpoints in a tab-separated BED format. + +> NOTE: We have only tested BAM created by BWA-MEM. + +A call to `sophia` may look like this: ```bash -conda create -n sophia boost=1.70.0 +samtools view -F 0x600 -f 0x001 /yourPositionSorted.bam \ + | sophia --assemblyname hg38 \ + --medianisizes 323.0 \ + --stdisizepercentage 21.0 \ + --properpairpercentage 94.32 \ + --defaultreadlength 101 \ + --clipsize 10 \ + --basequality 23 \ + --basequalitylow 12 \ + --lowqualclipsize 5 \ + --isizesigma 5 \ + --bpsupport 3 \ + | gzip --best > out.breakpoints.mref.gz ``` -## Building +A run on a typical Illumina X10 sample with 30x coverage takes about 5 GB of memory, 2 cores. +In extreme cases (like with chromothripsis) the runtime can jump to 120 hours, but usually is much shorter. -### Build-time Dependencies +| Parameter | Description | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `--assemblyname` | The name of the assembly. See [Reference Genomes / Assemblies](#reference-genomes--assemblies) for details. | +| `--mergedisizes` | A file with just the median insert size in line 1 and just the standard deviation in line 3. See [Insert Size Distribution](#insert-size-distribution). | +| `--medianisize` | The median insert size of the library. See [Insert Size Distribution](#insert-size-distribution). | +| `--stdisizepercentage` | The standard deviation of the insert size of the library **in percent**. See [Insert Size Distribution](#insert-size-distribution). | +| `--isizesigma` | The sigma value for the insert size distribution. See [Insert Size Distribution](#insert-size-distribution). | +| `--properpairpercentage` | Proper pair ratio as a percentage. | +| `--defaultreadlength` | Default read length for the technology used in sequencing 101, 151, etc. | +| `--clipsize` | Minimum length of soft/hard clips in the alignment. | +| `--basequality` | The minimum base quality for a base to be considered high quality. | +| `--basequalitylow` | If 5 consecutive bases in a split read overhang have lower quality than this strict threshold, it will be low-quality clipped. | +| `--lowqualclipsize` | Maximum length of a low quality split read overhang for discarding. | +| `--bpsupport` | Minimum number of reads supporting a discordant contig. | -* g++ >= 7 -* Boost 1.70.0 +##### Insert Size Distribution -### Dynamic Build +The median insert size and its standard deviation are used for calculating the maximum insert size considered by the algorithm. + +For `--mergedisizes`, the actual value is calculated by $`min(4000.0, mean + isizeSigmaLevel * sd`$). + +For `--medianisize`/`--stdisizepercentage`, the actual values is calculated by $`min(4000.0, median + isizeSigmaLevel * median * isizeStdPercentage * 0.01)`$. + +If `--mergedisizes` is provided, `--medianisize` and `--stdisizepercentage` are ignored. + +If none of these options are provided, instead a maximum insert size of 2000 is assumed. + +##### Outputs + +The output is a BED file, which means the start and end positions are 0-based, and left-inclusive, right-exclusive. The 8 columns are: -With Conda you can do +| Column | Description | Format | +|--------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------| +| 1 | chromosome identifier | string w/o \{:space:, :tab:, :newline, :carriage_return:\} | +| 2 | 0-based start (inclusive) | `\d+` | +| 3 | 0-based end (exclusive). This is just start position + 1. | `\d+` | +| 4 | Various counts of different types of coverage, specifically (1) pairedBreaksSoft, (2) pairedBreaksHard, (3) mateSupport, (4) unpairedBreaksSoft, (5) unpairedBreaksHard, (6) breaksShortIndel, (7) normalSpans, (8) lowQualSpansSoft, (9) lowQualSpansHard, (10) lowQualBreaksSoft, (11) lowQualBreaksHard, (12) repetitiveOverhangBreaks. | `\d+(,\d+){11}` | +| 5 | Left and right coverage | `\d+,\d+` | +| 6 | suppAlignmentsDoubleSupport(primary,secondary,mate) | `#`: missing BP information; `.`: no supplementary alignments; `$support(;$support)+` | +| 7 | suppAlignments(primary,0,mate) | ibd. | +| 8 | Sequences of significant overhangs, if present | `#`: missing BP information; `.`: if the overhang is empty; `${overhangSpec}+` the overhang information (see below) | + +Lines starting with `#` are comments and can be ignored. + +For columns 6 and 7 the `$support` has a complex format. See `Breakpoint::collapseSuppRange` and `SuppAlignment::print` for details. + +For column 8, the overhang information is of the following format + +* Each overhang specification has the form `>$id:$left$sequence$right\(\d+\);` where + * `$id` consisting of the BP index an underscore `_` and a counter + * `$left` is either empty or a pipe symbol `|` + * `$sequence` is the sequence of the overhang + * `$right` is either a pipe symbol `|` or empty (either `$left` or `$right` are a pipe symbol) + * `\(\d+\)` is the number of times the overhang was observed + > Developer note: See `Breakpoints::finalizeOverhangs` and `Alignment::printOverhang` for details. + +* Columns 6, 7, and 8 are set to `#` if there is missing breakpoint information. + +> Developer Note: See `Breakpoint::printBreakpointReport` for details. + +#### `sophiaAnnotate` Tool + +`sophiaAnnotate` is used for annotating SVs with gene information. +It reads in an output of `sophiaMref` and annotates the SVs in the input file with gene information. ```bash -conda create -n sophia gxx_linux-64=8 boost=1.70.0 +sophiaAnnotate \ + --tumorresults $tumorSampleFile \ + --controlresults $controlSampleFile \ + --mref $mRef \ + --PIDS_IN_MREF $pidsInMref \ + --bpfreq $bpFreq \ + --artifactlofreq $artifactLoFreq \ + --artifacthifreq $artifactHiFreq \ + --clonalitystrictlofreq $clonalityStrictLoFreq \ + --clonalitylofreq $clonalityLoFreq \ + --clonalityhifreq $clonalityHiFreq \ + --germlineoffset $germlineFuzziness \ + --defaultreadlengthtumor $tumordefaultreadlength \ + --defaultreadlengthcontrol $controldefaulreadlength \ + --germlinedblimit $germlinedblimit \ + > $outputFile ``` -to create an environment to build the `sophia` and `sophiaAnnotate` binaries. -To build you need to do + +#### `sophiaMref` Tool + +The `sophiaMref` tool is used to create a reference file that can be used by `sophiaAnnotate` for annotating SVs with gene information. +Usually, you will only need to run `sophiaMref`, if you adapt SOPHIA to a new genome assembly. + +`sophiaMref` processes a list of gzipped BED files that were generated with the `sophia` tool. +From these it generates a reference that can be used by `sophiaAnnotate` for annotating structural variants with gene information. + +The file produced by `sophiaMref` is a BED file suffixed with with the following columns (see `MrefEntry::printBpInfo` for details): + +| Column | Description | Format | +|--------|----------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------| +| 1 | chromosome identifier | string w/o \{:space:, :tab:, :newline, :carriage_return:\} | +| 2 | 0-based start (inclusive) | `\d+` | +| 3 | 0-based end (exclusive). This is just start position + 1. | `\d+` | +| 4 | number of fileIndices given in column 10 | `\d+` | +| 5 | number of fileIndicesWithArtifactsRatio | `\d+` | +| 6 | fileIndices.size() / NUM_PIDS | `\d\.\d+` | +| 7 | fileIndicesWithArtifactsRatio / NUM_PIDS | `\d\.\d+` | +| 8 | average artifacts ratio | `NA` if there are no artifacts ratios; otherwise `\d\.\d+` | +| 9 | suppAlignments | `.` if there are no supplementary alignments; otherwise the same format as columns 6 and 7 of the `sophia` output | +| 10 | fileIndices | Comma-separated list of file indices (`\d+`). Maybe empty string. | + +The "fileIndices" are the 0-based index into the list of gzipped control-BED input files given to `sophiaMref`. + +Currently, the artifacts ratios are tracked, but not printed. +Files get an artifacts ratio only if a number of conditions are met (undocumented; see `MrefEntry::addEntry` for details). + +Note that `sophiaMref` uses a lot of memory (e.g. 400 GB is a safe choice for human assembly), but usually will be only used for generating the reference files for a new genome assembly (which, currently, are hardcoded anyway). + +`sophiaMref` expects that the input BED files match the filename pattern `.*/$realPidName.{1}$version.+`. +The `$version` is the value provided by the `--version` parameter that is only used to delimit the right end of the PID name. +For instance `/path/to/somePid1_35.1_bps.tsv.gz` would be a valid filename for the version `35.1` and the extracted PID will be `somePid`. + + +## Dependencies + +If you built SOPHIA with dynamic libraries, the some libraries are runtime requirements, namely: + + * Boost 1.82.0 + * libbacktrace 20220708 + * gtest 1.14.0 + * gmock 1.14.0 + * rapidcsv 8.0.0 + * strtk 0.6.0 + +These may be runtime dependencies, if you choose a dynamic build. +The static build creates self-contained binaries that do not have any runtime dependencies. + +You can install all dependencies for the dynamic build with [Conda](https://docs.conda.io/): ```bash -source activate sophia -cd Release_sophia -build-sophia.sh +conda create -n sophia gxx_linux-64=13 boost=1.82.0 gtest=1.14.0 backtrace=20220708 +``` + +## Building + +> Note that `make` will download [StrTk](https://github.com/ArashPartow/strtk) for string processing and [rapidcsv](https://github.com/d99kris/rapidcsv) for TSV file parsing. If you want to delete an already downloaded file and download it again, run `make clean-all` before the compilation. See the `Makefile` for details. + +### Dynamic Build + +For compilation you additionally need the g++ compiler. So extend the Conda environment a bit: -cd ../Release_sophiaAnnotate -build-sophiaAnnotate.sh +```bash +conda create -n sophia gxx_linux-64=13 boost=1.82.0 gtest=1.14.0 backtrace=20220708 ``` -Note that the build-scripts are for when you manage your dependencies with Conda. +Then you can do + +```bash +source activate sophia +make -j 4 all +``` +This will also run all unit tests. +The binaries will be located in the top-level directory. ### Static Build -If you want to compile statically you need to install glibc and boost static libraries (not possible with Conda, in the moment) and do +Static building produced 100% self-contained binaries that you can copy to any compatible OS independent of which libraries are installed there. + +For static building all dependencies need to be available as static libraries (`.a` files). +Specifically, libz, libm, glibc, and libstdc++ need to be available as static libraries. +Fortunately, static versions of these libraries are installed by Conda. +You can use the same build environment as for the dynamic build. + +The only complications are the gtest library and the Boost libraries, both of which are not available as static builds from Conda. + +The gtest library is only used for testing, and the `testRunner` binary is never build statically. + +To install boost statically (here without installing it system-wide) you need to do the following: + +```bash +# Download boost +wget https://boostorg.jfrog.io/artifactory/main/release/1.82.0/source/boost_1_82_0.tar.bz2 +tar -xjf boost_1_82_0.tar.bz2 + +# Build b2 +cd boost_1_82_0 +./bootstrap.sh --prefix=build/ + +# Build boost +./b2 --prefix=build/ link=static runtime-link=static cxxflags="-std=c++11 -fPIC" +boost_lib_dir=$PWD/stage/lib +``` + +After that you can do: ```bash source activate sophia +cd "$repoRoot" +make -j 4 static=true boost_lib_dir=$boost_lib_dir all +``` + +### Development build + +> NOTE: Please adhere to the [Coding Conventions](CodingConventions.md) when developing in SOPHIA. -cd Release_sophia -STATIC=true build-sophia.sh +The development build produces non-optimized binaries (`-O0`) with debug symbols: -cd ../Release_sophiaAnnotate -STATIC=true build-sophiaAnnotate.sh +```bash +source activate sophia +cd "$repoRoot" +make -j 4 static=true boost_lib_dir=$boost_lib_dir develop=true all +``` + +### Running the tests + +Currently, there are only very few tests that were added to the legacy code. To run them do + +```bash +make test develop=true boost_lib_dir=$boost_lib_dir ``` + +This will create a `./testRunner` binary in the root directory and execute it. +Note that the `testRunner` is never linked statically. +It uses the `libgtest_main.so` library that supports a number of command line options. +See `testRunner --help` for details. + + +## Reference Genomes / Assemblies + +### "classic_hg37" + +The 35 version of SOPHIA was extensively tested on the [1000 genomes reference](ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz) with a [phix](https://www.ncbi.nlm.nih.gov/nuccore/NC_001422.1) sequence added. + +The original code logic for the "classic_hg37" is still available with the option `--assemblyname classic_hg37`. +"classic_hg37" is also the default if the `--assemblyname` option is omitted. + +Please have a look at the [resources/hg37.tsv](resources/hg37.tsv) file for a compilation of chromosome names and parameters used for that assembly. +Note however, that this specific file will not be used if you use the `--assemblyname classic_hg37` option, because the "classic_hg37" has all these information hardcoded. + +### "hg38" and others + +Since version 35.1.0 SOPHIA supports other assemblies than "classic_hg37". +Remember that because SOPHIA needs to map chromosome names, the exact names of the chromosomes are part of the notion of reference/assembly. +With any other `$assemblyName` value for the `--assemblyname` parameter than "classic_hg37", the value will be composed to a filename `resources/$assemblyName.tsv`. + +> **WARNING**: This is a development feature only. Currently, the `resources/` directory has to reside in the execution directory! + +The `$assemblyName.tsv` file must be a TSV-separated with 4 columns and a header line with the following fields (names must match exactly): + * `chromosome`: The chromosome name, which must **not** contain the following characters, because these characters are used as separators: + * `\t`, `\n`, `\r`, ` ` (whitespace) + * `,` (comma) + * `;` (semicolon) + * `|` (pipe) + * `(`, `)` (parentheses) + + > **NOTE**: The `:` (colon) symbol is allowed. + + * `size`: The length of the chromosome FASTA sequence in base pairs. + * `compressedMref`: Whether the chromosome should be considered part of the compressed master-ref set of chromosomes. For instance, for "classic_hg37" these are all chromosomes including autosomes, gonosomes, decoys, unassigned (unplaced/random), EBV, but excluding the mitochondrion (extrachromosomal) and phix (technical). The string is converted to lower-case and matched against the following strings: + * `true`, `yes`, `t`, `y`, or `1`: The chromosome is part of the compressed master-ref set of chromosomes. + * `false`, `no`, `f`, `n`, or `0`: The chromosome is not part of the compressed master-ref set of chromosomes. + * `category`: The following categories are allowed. See `GenericChrConverter::Category` for details. Categories are converted to lower-case and matched against the following strings: + * `autosomal`: e.g. chr1, chr2, ... + * `gonosomal`: e.g. chrX, chrY + * `virus`: e.g. chrEBV + * `decoy`: e.g. all chromosomes with a _decoy suffix or hs37d5 + * `unassigned`: sequences that belong to normal nuclear genome, but could not be positioned exactly, such as "unplaced", "random", "unlocalized" chromosomes in human assemblies, e.g. chrUn_gl000220 + * `extrachromosomal`: e.g. chrM + * `technical`: Used for technical reasons, e.g. for calibrating the sequence, such as chrPhiX, lambda + * `alt`: ALT contigs + * `hla`: HLA contigs + + > **NOTE**: The fact that these "categories" exist does not mean that they are used in the code. +If you want to know more then, currently, the only documentation of we can offer you for SOPHIA is the source code itself. In the future, these categories may also be removed or combined. + + + +## Changes + +* 35.1.0 (upcoming) + * Minor: Generic assembly support + * Added `--assemblyname` option, defaulting to SOPHIA 35.0.0's chromosome converter implementation "classic_hg37" when omitted. + > WARNING: hg38 support was not excessively tested. In particular, yet hardcoded parameters may have to be adjusted. Furthermore, the runtime will be longer than for classic_hg37 and also classic_hg37 runtime has increase slightly (due to class polymorphism). + * Minor: Build system + * Use `make` as build system + * `Release_*` directories with old build-scripts removed + * Allow static building with `make static=true boost_lib_dir=/path/to/boost/lib` + * Allow development build with `make develop=true` + * Build `sophiaMref` (no build documentation before) + * Build `testRunner` for running unit tests + * Patch: A `README.md` file that is worth its name and contains first documentation about the usage of the SOPHIA binaries and input and output files. + * Patch: Added unit tests. + * Patch: Code readability improvements, documentation, `.editorconfig` file, and `clang-format` configuration + * Patch: Major refactorings for code clarity (and understanding of the convoluted code) and to improve usage of C++ type system for compiler-based checks of changes. + * Patch: For `sophiaAnnotate` the default value for clonalitylofreq was advertised in the usage information as 10, but the actual value was 5. Now, the correct values (5) is advertised as default. + +* 35 (9e3b6ed) + * Forked from [bitbucket](https://bitbucket.org/compbio_charite/sophia/src/master/) \ No newline at end of file diff --git a/Release_sophia/build-sophia.sh b/Release_sophia/build-sophia.sh deleted file mode 100755 index 286238e..0000000 --- a/Release_sophia/build-sophia.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -set -uex -trap 'echo "Compilation failed with an error" >> /dev/stderr' ERR - -CONDA_PREFIX="${CONDA_PREFIX:?No CONDA_PREFIX -- no active Conda environment}" - -install_strtk() { - wget -c https://github.com/ArashPartow/strtk/raw/master/strtk.hpp -O ../include/strtk.hpp -} - -install_strtk - -CPP=x86_64-conda_cos6-linux-gnu-g++ -INCLUDES="-I../include -I$CONDA_PREFIX/include" - -CPP_OPTS="-L$CONDA_PREFIX/lib -std=c++1z $INCLUDES -O3 -Wall -Wextra -static -static-libgcc -static-libstdc++ -flto -c -fmessage-length=0 -Wno-attributes" - -if [[ "${STATIC:-false}" == "true" ]]; then - CPP_OPTS="-static -static-libgcc -static-libstdc++ $CPP_OPTS" -fi - -$CPP $CPP_OPTS -o "Alignment.o" "../src/Alignment.cpp" -$CPP $CPP_OPTS -o "Breakpoint.o" "../src/Breakpoint.cpp" -$CPP $CPP_OPTS -o "ChosenBp.o" "../src/ChosenBp.cpp" -$CPP $CPP_OPTS -o "ChrConverter.o" "../src/ChrConverter.cpp" -$CPP $CPP_OPTS -o "SamSegmentMapper.o" "../src/SamSegmentMapper.cpp" -$CPP $CPP_OPTS -o "Sdust.o" "../src/Sdust.cpp" -$CPP $CPP_OPTS -o "SuppAlignment.o" "../src/SuppAlignment.cpp" -$CPP $CPP_OPTS -o "HelperFunctions.o" "../src/HelperFunctions.cpp" -$CPP $CPP_OPTS -o "sophia.o" "../sophia.cpp" - -$CPP -L$CONDA_PREFIX/lib -flto -o "sophia" Alignment.o Breakpoint.o ChosenBp.o ChrConverter.o SamSegmentMapper.o Sdust.o SuppAlignment.o HelperFunctions.o sophia.o -lboost_program_options diff --git a/Release_sophia/makefile b/Release_sophia/makefile deleted file mode 100644 index 34790dc..0000000 --- a/Release_sophia/makefile +++ /dev/null @@ -1,58 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - --include ../makefile.init - -RM := rm -rf - -# All of the sources participating in the build are defined here --include sources.mk --include src/subdir.mk --include subdir.mk --include objects.mk - -ifneq ($(MAKECMDGOALS),clean) -ifneq ($(strip $(CC_DEPS)),) --include $(CC_DEPS) -endif -ifneq ($(strip $(C++_DEPS)),) --include $(C++_DEPS) -endif -ifneq ($(strip $(C_UPPER_DEPS)),) --include $(C_UPPER_DEPS) -endif -ifneq ($(strip $(CXX_DEPS)),) --include $(CXX_DEPS) -endif -ifneq ($(strip $(CPP_DEPS)),) --include $(CPP_DEPS) -endif -ifneq ($(strip $(C_DEPS)),) --include $(C_DEPS) -endif -endif - --include ../makefile.defs - -# Add inputs and outputs from these tool invocations to the build variables - -# All Target -all: sophia - -# Tool invocations -sophia: $(OBJS) $(USER_OBJS) - @echo 'Building target: $@' - @echo 'Invoking: GCC C++ Linker' - g++ -static -static-libgcc -static-libstdc++ -flto -o sophia $(OBJS) $(USER_OBJS) $(LIBS) - @echo 'Finished building target: $@' - @echo ' ' - -# Other Targets -clean: - -$(RM) $(CC_DEPS)$(C++_DEPS)$(EXECUTABLES)$(C_UPPER_DEPS)$(CXX_DEPS)$(OBJS)$(CPP_DEPS)$(C_DEPS) sophia - -@echo ' ' - -.PHONY: all clean dependents - --include ../makefile.targets diff --git a/Release_sophiaAnnotate/build-sophiaAnnotate.sh b/Release_sophiaAnnotate/build-sophiaAnnotate.sh deleted file mode 100755 index 205fbb4..0000000 --- a/Release_sophiaAnnotate/build-sophiaAnnotate.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -set -uex -trap 'echo "Compilation failed with an error" >> /dev/stderr' ERR - -CONDA_PREFIX="${CONDA_PREFIX:?No CONDA_PREFIX -- no active Conda environment}" - -install_strtk() { - wget -c https://github.com/ArashPartow/strtk/raw/master/strtk.hpp -O ../include/strtk.hpp -} - -install_strtk - -CPP=x86_64-conda_cos6-linux-gnu-g++ -INCLUDES="-I../include -I$CONDA_PREFIX/include" -CPP_OPTS="-L$CONDA_PREFIX/lib -std=c++1z $INCLUDES -O3 -Wall -Wextra -static -static-libgcc -static-libstdc++ -flto -c -fmessage-length=0 -Wno-attributes" - -if [[ "${STATIC:-false}" == "true" ]]; then - CPP_OPTS="-static -static-libgcc -static-libstdc++ $CPP_OPTS" -fi - -$CPP $CPP_OPTS -o "AnnotationProcessor.o" "../src/AnnotationProcessor.cpp" -$CPP $CPP_OPTS -o "Breakpoint.o" "../src/Breakpoint.cpp" -$CPP $CPP_OPTS -o "BreakpointReduced.o" "../src/BreakpointReduced.cpp" -$CPP $CPP_OPTS -o "ChrConverter.o" "../src/ChrConverter.cpp" -$CPP $CPP_OPTS -o "DeFuzzier.o" "../src/DeFuzzier.cpp" -$CPP $CPP_OPTS -o "GermlineMatch.o" "../src/GermlineMatch.cpp" -$CPP $CPP_OPTS -o "MrefEntry.o" "../src/MrefEntry.cpp" -$CPP $CPP_OPTS -o "MrefEntryAnno.o" "../src/MrefEntryAnno.cpp" -$CPP $CPP_OPTS -o "MrefMatch.o" "../src/MrefMatch.cpp" -$CPP $CPP_OPTS -o "SuppAlignment.o" "../src/SuppAlignment.cpp" -$CPP $CPP_OPTS -o "SuppAlignmentAnno.o" "../src/SuppAlignmentAnno.cpp" -$CPP $CPP_OPTS -o "SvEvent.o" "../src/SvEvent.cpp" -$CPP $CPP_OPTS -o "HelperFunctions.o" "../src/HelperFunctions.cpp" -$CPP $CPP_OPTS -o "sophiaAnnotate.o" "../sophiaAnnotate.cpp" - -$CPP -L$CONDA_PREFIX/lib -flto -o "sophiaAnnotate" AnnotationProcessor.o Breakpoint.o BreakpointReduced.o ChrConverter.o DeFuzzier.o GermlineMatch.o MrefEntry.o MrefEntryAnno.o MrefMatch.o SuppAlignment.o SuppAlignmentAnno.o SvEvent.o HelperFunctions.o sophiaAnnotate.o -lz -lboost_system -lboost_iostreams diff --git a/Release_sophiaAnnotate/makefile b/Release_sophiaAnnotate/makefile deleted file mode 100644 index b7f07aa..0000000 --- a/Release_sophiaAnnotate/makefile +++ /dev/null @@ -1,58 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - --include ../makefile.init - -RM := rm -rf - -# All of the sources participating in the build are defined here --include sources.mk --include src/subdir.mk --include subdir.mk --include objects.mk - -ifneq ($(MAKECMDGOALS),clean) -ifneq ($(strip $(CC_DEPS)),) --include $(CC_DEPS) -endif -ifneq ($(strip $(C++_DEPS)),) --include $(C++_DEPS) -endif -ifneq ($(strip $(C_UPPER_DEPS)),) --include $(C_UPPER_DEPS) -endif -ifneq ($(strip $(CXX_DEPS)),) --include $(CXX_DEPS) -endif -ifneq ($(strip $(CPP_DEPS)),) --include $(CPP_DEPS) -endif -ifneq ($(strip $(C_DEPS)),) --include $(C_DEPS) -endif -endif - --include ../makefile.defs - -# Add inputs and outputs from these tool invocations to the build variables - -# All Target -all: sophiaAnnotate - -# Tool invocations -sophiaAnnotate: $(OBJS) $(USER_OBJS) - @echo 'Building target: $@' - @echo 'Invoking: GCC C++ Linker' - g++ -static -static-libgcc -static-libstdc++ -flto -o "sophiaAnnotate" $(OBJS) $(USER_OBJS) $(LIBS) - @echo 'Finished building target: $@' - @echo ' ' - -# Other Targets -clean: - -$(RM) $(CC_DEPS)$(C++_DEPS)$(EXECUTABLES)$(C_UPPER_DEPS)$(CXX_DEPS)$(OBJS)$(CPP_DEPS)$(C_DEPS) sophiaAnnotate - -@echo ' ' - -.PHONY: all clean dependents - --include ../makefile.targets diff --git a/hs37d5_Phix.sizes b/hs37d5_Phix.sizes new file mode 100644 index 0000000..4614706 --- /dev/null +++ b/hs37d5_Phix.sizes @@ -0,0 +1,87 @@ +1 249250621 +2 243199373 +3 198022430 +4 191154276 +5 180915260 +6 171115067 +7 159138663 +8 146364022 +9 141213431 +10 135534747 +11 135006516 +12 133851895 +13 115169878 +14 107349540 +15 102531392 +16 90354753 +17 81195210 +18 78077248 +19 59128983 +20 63025520 +21 48129895 +22 51304566 +X 155270560 +Y 59373566 +MT 16569 +GL000207.1 4262 +GL000226.1 15008 +GL000229.1 19913 +GL000231.1 27386 +GL000210.1 27682 +GL000239.1 33824 +GL000235.1 34474 +GL000201.1 36148 +GL000247.1 36422 +GL000245.1 36651 +GL000197.1 37175 +GL000203.1 37498 +GL000246.1 38154 +GL000249.1 38502 +GL000196.1 38914 +GL000248.1 39786 +GL000244.1 39929 +GL000238.1 39939 +GL000202.1 40103 +GL000234.1 40531 +GL000232.1 40652 +GL000206.1 41001 +GL000240.1 41933 +GL000236.1 41934 +GL000241.1 42152 +GL000243.1 43341 +GL000242.1 43523 +GL000230.1 43691 +GL000237.1 45867 +GL000233.1 45941 +GL000204.1 81310 +GL000198.1 90085 +GL000208.1 92689 +GL000191.1 106433 +GL000227.1 128374 +GL000228.1 129120 +GL000214.1 137718 +GL000221.1 155397 +GL000209.1 159169 +GL000218.1 161147 +GL000220.1 161802 +GL000213.1 164239 +GL000211.1 166566 +GL000199.1 169874 +GL000217.1 172149 +GL000216.1 172294 +GL000215.1 172545 +GL000205.1 174588 +GL000219.1 179198 +GL000224.1 179693 +GL000223.1 180455 +GL000195.1 182896 +GL000212.1 186858 +GL000222.1 186861 +GL000200.1 187035 +GL000193.1 189789 +GL000194.1 191469 +GL000225.1 211173 +GL000192.1 547496 +NC_007605 171823 +hs37d5 35477943 +phiX174 5386 diff --git a/include/Alignment.h b/include/Alignment.h index efe5459..f4e45b2 100644 --- a/include/Alignment.h +++ b/include/Alignment.h @@ -28,6 +28,7 @@ #include "CigarChunk.h" #include "CoverageAtBase.h" #include "SuppAlignment.h" +#include "global.h" #include #include #include @@ -37,139 +38,182 @@ namespace sophia { -using namespace std; - -class Alignment { - - public: - Alignment(); - void continueConstruction(); - static int LOWQUALCLIPTHRESHOLD, BASEQUALITYTHRESHOLD, - BASEQUALITYTHRESHOLDLOW, CLIPPEDNUCLEOTIDECOUNTTHRESHOLD, - INDELNUCLEOTIDECOUNTTHRESHOLD; - static double ISIZEMAX; - int getStartPos() const { return startPos; } - int getEndPos() const { return endPos; } - int getReadType() const { return readType; } - const vector &getReadBreakpoints() const { return readBreakpoints; } - bool isValidLine() const { return validLine; } - const string &getSamLine() const { return samLine; } - const vector &getSamChunkPositions() const { - return samChunkPositions; - } - bool assessOutlierMateDistance(); - int getMateChrIndex() const { return mateChrIndex; } - int getMatePos() const { return matePos; } - const vector &getReadBreakpointTypes() const { - return readBreakpointTypes; - } - void setChosenBp(int chosenBpLoc, int alignmentIndex); - bool isOverhangEncounteredM() const { return chosenBp->bpEncounteredM; } - int getOverhangLength() const { return chosenBp->overhangLength; } - int getOverhangStartIndex() const { return chosenBp->overhangStartIndex; } - vector generateSuppAlignments(int bpChrIndex, int bpPos); - const vector &getSupplementaryAlignments() const { - return chosenBp->supplementaryAlignments; - } - int getChrIndex() const { return chrIndex; } - const vector &getReadBreakpointsSizes() const { - return readBreakpointSizes; - } - bool isLowMapq() const { return lowMapq; } - bool isNullMapq() const { return nullMapq; } - bool isSupplementary() const { return supplementary; } - void addChildNode(int indexIn) { chosenBp->addChildNode(indexIn); } - void - addSupplementaryAlignments(const vector &suppAlignments) { - chosenBp->addSupplementaryAlignments(suppAlignments); - } - const vector &getChildrenNodes() const { - return chosenBp->childrenNodes; - } - int getOriginIndex() const { return chosenBp->selfNodeIndex; } - string printOverhang() const; - double overhangComplexityMaskRatio() const; - - bool isInvertedMate() const { return invertedMate; } - bool isDistantMate() const { return distantMate == 1; } - - private: - void mappingQualityCheck(); - bool isEventCandidate() const; - void createCigarChunks(); - void assignBreakpointsAndOverhangs(); - void qualityCheckCascade(); - bool clipCountCheck(); - bool uniqueSuppCheck(); - double overhangMedianQuality(const CigarChunk &cigarChunk) const; - template - void fullMedianQuality(Iterator qualBegin, Iterator qualEnd, - vector &overhangPerBaseQuality) const; - template - double getMedian(Iterator begin, Iterator end) const; - void assessReadType(); - bool lowMapq; - bool nullMapq; - int distantMate; - unique_ptr chosenBp; - int chrIndex; - int readType; - int startPos, endPos; - int mateChrIndex, matePos; - string samLine; - bool validLine; - vector samChunkPositions; - string::const_iterator saCbegin, saCend; - bool hasSa; - bool supplementary; - bool fwdStrand; - bool invertedMate; - bool qualChecked; - vector cigarChunks; - vector readBreakpoints; - vector readBreakpointTypes; - vector readBreakpointSizes; - vector readBreakpointComplexityMaskRatios; - deque readBreakpointsEncounteredM; - vector readOverhangCoords; -}; - -template -void -Alignment::fullMedianQuality(Iterator qualBegin, Iterator qualEnd, - vector &overhangPerBaseQuality) const { - overhangPerBaseQuality.reserve(distance(qualBegin, qualEnd)); - auto consecutiveLowQuals = 0; - for (auto cit = qualBegin; cit != qualEnd; ++cit) { - if (*cit < BASEQUALITYTHRESHOLDLOW) { // 33 + phred 11 - if (consecutiveLowQuals == 5) { - overhangPerBaseQuality.clear(); - return; - } - ++consecutiveLowQuals; - } else { - consecutiveLowQuals = 0; + class Alignment { + + public: + Alignment(); + + void continueConstruction(); + + static ChrSize LOW_QUAL_CLIP_THRESHOLD; + + static int + BASE_QUALITY_THRESHOLD, + BASE_QUALITY_THRESHOLD_LOW; + + static ChrSize CLIPPED_NUCLEOTIDE_COUNT_THRESHOLD, + INDEL_NUCLEOTIDE_COUNT_THRESHOLD; + + static double ISIZEMAX; + + ChrSize getStartPos() const { return startPos; } + + ChrSize getEndPos() const { return endPos; } + + int getReadType() const { return readType; } + + const std::vector &getReadBreakpoints() const { return readBreakpoints; } + + bool isValidLine() const { return validLine; } + + const std::string &getSamLine() const { return samLine; } + + const std::vector &getSamChunkPositions() const { + return samTabPositions; + } + + bool assessOutlierMateDistance(); + + ChrIndex getMateChrIndex() const { return mateChrIndex; } + + ChrSize getMatePos() const { return matePos; } + + const std::vector &getReadBreakpointTypes() const { + return readBreakpointTypes; + } + + void setChosenBp(ChrSize chosenBpLoc, int alignmentIndex); + + bool isOverhangEncounteredM() const { return chosenBp->bpEncounteredM; } + + ChrSize getOverhangLength() const { return ChrSize(chosenBp->overhangLength); } + + ChrSize getOverhangStartIndex() const { return ChrSize(chosenBp->overhangStartIndex); } + + std::vector generateSuppAlignments(ChrIndex bpChrIndex, int bpPos); + + const std::vector &getSupplementaryAlignments() const { + return chosenBp->supplementaryAlignments; + } + + ChrIndex getChrIndex() const { return chrIndex; } + + /** This returns a signed integer, because break-point sizes can be negative. **/ + const std::vector &getReadBreakpointsSizes() const { + return readBreakpointSizes; + } + + /** true if mapq < 13 */ + bool isLowMapq() const { return lowMapq; } + + /** mapq 0 is treated as a special case, where number of SAs and + * base qualities will be the sole determinants of read quality */ + bool isNullMapq() const { return nullMapq; } + + bool isSupplementary() const { return supplementary; } + + void addChildNode(int indexIn) { chosenBp->addChildNode(indexIn); } + + void addSupplementaryAlignments(const std::vector &suppAlignments) { + chosenBp->addSupplementaryAlignments(suppAlignments); + } + + const std::vector &getChildrenNodes() const { + return chosenBp->childrenNodes; } - overhangPerBaseQuality.push_back(*cit); - } -} - -// Median Code taken from http://rosettacode.org/wiki/Averages/Median#C.2B.2B -template -double -Alignment::getMedian(Iterator begin, Iterator end) const { - // this is middle for odd-length, and "upper-middle" for even length - Iterator middle = begin + (end - begin) / 2; - // This function runs in O(n) on average, according to the standard - nth_element(begin, middle, end); - if ((end - begin) % 2 != 0) { // odd length - return *middle; - } else { // even length - // the "lower middle" is the max of the lower half - Iterator lower_middle = max_element(begin, middle); - return (*middle + *lower_middle) / 2.0; - } -} + + int getOriginIndex() const { return chosenBp->selfNodeIndex; } + + std::string printOverhang() const; + + double overhangComplexityMaskRatio() const; + + bool isInvertedMate() const { return invertedMate; } + + bool isDistantMate() const { return distantMate == 1; } + + + private: + + void mappingQualityCheck(); + + /** The `Alignment::isEventCandidate` is true, if the last CIGAR code indicates a match, + * or if the CIGAR indicates a soft-clip, hard-clip, insertion, or deletion. + */ + bool isEventCandidate() const; + + void createCigarChunks(); + + void assignBreakpointsAndOverhangs(); + + void qualityCheckCascade(); + + bool clipCountCheck(); + + bool uniqueSuppCheck(); + + double overhangMedianQuality(const CigarChunk &cigarChunk) const; + + template + void fullMedianQuality(Iterator qualBegin, Iterator qualEnd, + std::vector &overhangPerBaseQuality) const; + + template + double getMedian(Iterator begin, Iterator end) const; + + void assessReadType(); + + bool lowMapq; + + bool nullMapq; + + int distantMate; + + std::unique_ptr chosenBp; + + ChrIndex chrIndex; + + int readType; + + ChrSize startPos, endPos; + + ChrIndex mateChrIndex; + + ChrSize matePos; + + std::string samLine; + + bool validLine; + + std::vector samTabPositions; + + std::string::const_iterator saCbegin, saCend; + + bool hasSa; + + bool supplementary; + + bool fwdStrand; + + bool invertedMate; + + bool qualChecked; + + std::vector cigarChunks; + + std::vector readBreakpoints; + + std::vector readBreakpointTypes; + + std::vector readBreakpointSizes; + + std::vector readBreakpointComplexityMaskRatios; + + std::deque readBreakpointsEncounteredM; + + std::vector readOverhangCoords; + + }; } /* namespace sophia */ diff --git a/include/AnnotationProcessor.h b/include/AnnotationProcessor.h index 4b462af..e9ed5d2 100644 --- a/include/AnnotationProcessor.h +++ b/include/AnnotationProcessor.h @@ -24,7 +24,7 @@ #ifndef ANNOTATIONPROCESSOR_H_ #define ANNOTATIONPROCESSOR_H_ -#include "ChrConverter.h" +#include "global.h" #include "GermlineMatch.h" #include "MrefMatch.h" #include "SuppAlignmentAnno.h" @@ -37,87 +37,109 @@ #include #include #include -// -// struct VectorHash { -// size_t operator()(const vector& v) const { -// hash hasher; -// size_t seed = 0; -// for (int i : v) { -// seed ^= hasher(i) + 0x9e3779b9 + (seed << 6) + (seed >> -// 2); -// } -// return seed; -// } -//}; namespace sophia { -using namespace std; - -class AnnotationProcessor { - public: - static bool ABRIDGEDOUTPUT; - AnnotationProcessor(const string &tumorResultsIn, - vector> &mref, - int defaultReadLengthTumorIn, bool controlCheckModeIn, - int germlineDbLimit); - AnnotationProcessor(const string &tumorResultsIn, - vector> &mref, - const string &controlResultsIn, - int defaultReadLengthTumorIn, - int defaultReadLengthControlIn, int germlineDbLimit, - int lowQualControlIn, bool pathogenInControlIn); - void printFilteredResults(bool contaminationInControl, - int controlPrefilteringLevel) const; - int getMassiveInvFilteringLevel() const { return massiveInvFilteringLevel; } - - bool isContaminationObserved() const { return contaminationObserved; } - - private: - void searchMatches(vector> &mref); - void createDoubleMatchSv(BreakpointReduced &sourceBp, - BreakpointReduced &targetBp, - const SuppAlignmentAnno &sa, - const SuppAlignmentAnno &saMatch, bool checkOrder, - vector> &mref); - bool createDoubleMatchSvPreCheck(const SuppAlignmentAnno &saMatch); - void createUnmatchedSaSv(BreakpointReduced &sourceBp, - BreakpointReduced &targetBp, - const SuppAlignmentAnno &sa, - vector> &mref); - void createUnknownMatchSv(BreakpointReduced &sourceBp, - const SuppAlignmentAnno &sa, - vector> &mref, - bool doubleSupportSa); - bool createUnknownMatchSvPreCheck(const SuppAlignmentAnno &sa, - bool doubleSupportSa); - void checkSvQuality(); - MrefMatch searchMrefHitsNew(const BreakpointReduced &bpIn, - int distanceThreshold, - int conservativeDistanceThreshold, - vector> &mref); - GermlineMatch searchGermlineHitsNew(const BreakpointReduced &bpIn, - int distanceThreshold, - int conservativeDistanceThreshold); - - void searchSa(int chrIndex, int dbIndex, const SuppAlignmentAnno &sa, - bool doubleSupportSa, vector> &mref); - bool applyMassiveInversionFiltering(bool stricterMode, - bool controlCheckMode); - bool applyPathogenContaminationFiltering(); - void printUnresolvedRareOverhangs(vector> &mref); - const bool NOCONTROLMODE; - const int GERMLINEDBLIMIT; - bool contaminationObserved; - int massiveInvFilteringLevel; - // unordered_set, VectorHash> filteredResultKeys; - unordered_set filteredResultKeys; - vector filteredResults; - vector> tumorResults; - vector> controlResults; - vector> overhangs; - vector visitedLineIndices; -}; + class AnnotationProcessor { + public: + + static bool ABRIDGED_OUTPUT; + + AnnotationProcessor(const std::string &tumorResultsIn, + std::vector> &mref, + ChrSize defaultReadLengthTumorIn, + bool controlCheckModeIn, + int GERMLINE_DB_LIMIT); + + AnnotationProcessor(const std::string &tumorResultsIn, + std::vector> &mref, + const std::string &controlResultsIn, + ChrSize defaultReadLengthTumorIn, + ChrSize DEFAULT_READ_LENGTHControlIn, + int GERMLINE_DB_LIMIT, + int lowQualControlIn, + bool pathogenInControlIn); + + void printFilteredResults(bool contaminationInControl, + int controlPrefilteringLevel) const; + + int getMassiveInvFilteringLevel() const { return massiveInvFilteringLevel; } + + bool isContaminationObserved() const { return contaminationObserved; } + + private: + + void searchMatches(std::vector> &mref); + + void createDoubleMatchSv(BreakpointReduced &sourceBp, + BreakpointReduced &targetBp, + const SuppAlignmentAnno &sa, + const SuppAlignmentAnno &saMatch, + bool checkOrder, + std::vector> &mref); + + bool createDoubleMatchSvPreCheck(const SuppAlignmentAnno &saMatch); + + void createUnmatchedSaSv(BreakpointReduced &sourceBp, + BreakpointReduced &targetBp, + const SuppAlignmentAnno &sa, + std::vector> &mref); + + void createUnknownMatchSv(BreakpointReduced &sourceBp, + const SuppAlignmentAnno &sa, + std::vector> &mref, + bool doubleSupportSa); + + bool createUnknownMatchSvPreCheck(const SuppAlignmentAnno &sa, + bool doubleSupportSa); + + void checkSvQuality(); + + MrefMatch searchMrefHitsNew(const BreakpointReduced &bpIn, + int distanceThreshold, + int conservativeDistanceThreshold, + std::vector> &mref); + + GermlineMatch searchGermlineHitsNew(const BreakpointReduced &bpIn, + int distanceThreshold, + int conservativeDistanceThreshold); + + void searchSa(CompressedMrefIndex chrIndex, + size_t dbIndex, + const SuppAlignmentAnno &sa, + bool doubleSupportSa, + std::vector> &mref); + + bool applyMassiveInversionFiltering(bool stricterMode, + bool controlCheckMode); + + bool applyPathogenContaminationFiltering(); + + void printUnresolvedRareOverhangs(std::vector> &mref); + + const bool NO_CONTROL_MODE; + + const int GERMLINE_DB_LIMIT; + + bool contaminationObserved; + + int massiveInvFilteringLevel; + + // unordered_set, VectorHash> filteredResultKeys; + + std::unordered_set filteredResultKeys; + + std::vector filteredResults; + + std::vector> tumorResults; + + std::vector> controlResults; + + std::vector> overhangs; + + std::vector visitedLineIndices; + + }; } /* namespace sophia */ diff --git a/include/Breakpoint.h b/include/Breakpoint.h index 3196193..f06eff9 100644 --- a/include/Breakpoint.h +++ b/include/Breakpoint.h @@ -28,227 +28,318 @@ #include "MateInfo.h" #include "SuppAlignment.h" #include "SuppAlignmentAnno.h" +#include "global.h" #include #include #include #include +#include + namespace sophia { -using namespace std; - -class Breakpoint { - public: - Breakpoint(int chrIndexIn, int posIn); - Breakpoint(const string &bpIn, bool ignoreOverhang); - ~Breakpoint() = default; - static const int PERMISSIBLEMISMATCHES = 2; - static const int MAXPERMISSIBLESOFTCLIPS = 2000; - static const int MAXPERMISSIBLEHARDCLIPS = 2000; - static const int MAXPERMISSIBLELOWMAPQHARDCLIPS = 50; - static int BPSUPPORTTHRESHOLD; - static int DEFAULTREADLENGTH; - static int DISCORDANTLOWQUALLEFTRANGE; - static int DISCORDANTLOWQUALRIGHTRANGE; - static double IMPROPERPAIRRATIO; - static bool PROPERPAIRCOMPENSATIONMODE; - static int bpindex; - static const string COLUMNSSTR; - void addSoftAlignment(shared_ptr alignmentIn); - void addHardAlignment(shared_ptr alignmentIn); - bool finalizeBreakpoint( - const deque &discordantAlignmentsPool, - const deque &discordantLowQualAlignmentsPool, - const deque &discordantAlignmentCandidatesPool); - void setLeftCoverage(int leftCoverageIn) { leftCoverage = leftCoverageIn; } - void setRightCoverage(int rightCoverageIn) { - rightCoverage = rightCoverageIn; - } - void setLowQualBreaksSoft(int lowQualBreaksSoftIn) { - lowQualBreaksSoft = lowQualBreaksSoftIn; - } - void setLowQualBreaksHard(int lowQualBreaksHardIn) { - lowQualBreaksHard = lowQualBreaksHardIn; - } - void setLowQualSpansSoft(int lowQualSpansSoftIn) { - lowQualSpansSoft = lowQualSpansSoftIn; - } - void setLowQualSpansHard(int lowQualSpansHardIn) { - lowQualSpansHard = lowQualSpansHardIn; - } - void setNormalSpans(int normalSpansIn) { normalSpans = normalSpansIn; } - void setUnpairedBreaksSoft(int unpairedBreaksSoftIn) { - unpairedBreaksSoft = unpairedBreaksSoftIn; - } - void setUnpairedBreaksHard(int unpairedBreaksHardIn) { - unpairedBreaksHard = unpairedBreaksHardIn; - } - void setBreaksShortIndel(int breaksShortIndelIn) { - breaksShortIndel = breaksShortIndelIn; - } - bool isCovFinalized() const { return covFinalized; } - void setCovFinalized(bool covFinalizedIn) { covFinalized = covFinalizedIn; } - template bool operator<(const T &rhs) const { - if (chrIndex < rhs.getChrIndex()) - return true; - if (chrIndex > rhs.getChrIndex()) - return false; - return (pos < rhs.getPos()); - } - bool closeToSupp(const SuppAlignment &compIn, int fuzziness) const { - if (chrIndex == compIn.getChrIndex()) { - if (compIn.isFuzzy()) { - fuzziness = 2.5 * DEFAULTREADLENGTH; - return (pos - fuzziness) <= - (compIn.getExtendedPos() + fuzziness) && - (compIn.getPos() - fuzziness) <= (pos + fuzziness); + class Breakpoint { + public: + + Breakpoint(ChrIndex chrIndexIn, + ChrSize posIn); + + static Breakpoint parse(const std::string &bpIn, + bool ignoreOverhang); + + ~Breakpoint() = default; + + static const int PERMISSIBLE_MISMATCHES = 2; + + static const int MAX_PERMISSIBLE_SOFTCLIPS = 2000; + + static const int MAX_PERMISSIBLE_HARDCLIPS = 2000; + + static const int MAX_PERMISSIBLE_LOW_MAPQ_HARDCLIPS = 50; + + static int BP_SUPPORT_THRESHOLD; + + static ChrSize DEFAULT_READ_LENGTH; + + static ChrSize DISCORDANT_LOW_QUAL_LEFT_RANGE; + + static ChrSize DISCORDANT_LOW_QUAL_RIGHT_RANGE; + + static double IMPROPER_PAIR_RATIO; + + static bool PROPER_PAIR_COMPENSATION_MODE; + + static int bpindex; + + static const std::string COLUMN_STR; + + void addSoftAlignment(std::shared_ptr alignmentIn); + + void addHardAlignment(std::shared_ptr alignmentIn); + + bool finalizeBreakpoint( + const std::deque &discordantAlignmentsPool, + const std::deque &discordantLowQualAlignmentsPool, + const std::deque &discordantAlignmentCandidatesPool); + + void setLeftCoverage(int leftCoverageIn) { leftCoverage = leftCoverageIn; } + + void setRightCoverage(int rightCoverageIn) { + rightCoverage = rightCoverageIn; + } + + void setLowQualBreaksSoft(int lowQualBreaksSoftIn) { + lowQualBreaksSoft = lowQualBreaksSoftIn; + } + + void setLowQualBreaksHard(int lowQualBreaksHardIn) { + lowQualBreaksHard = lowQualBreaksHardIn; + } + + void setLowQualSpansSoft(int lowQualSpansSoftIn) { + lowQualSpansSoft = lowQualSpansSoftIn; + } + + void setLowQualSpansHard(int lowQualSpansHardIn) { + lowQualSpansHard = lowQualSpansHardIn; + } + + void setNormalSpans(int normalSpansIn) { normalSpans = normalSpansIn; } + + void setUnpairedBreaksSoft(int unpairedBreaksSoftIn) { + unpairedBreaksSoft = unpairedBreaksSoftIn; + } + + void setUnpairedBreaksHard(int unpairedBreaksHardIn) { + unpairedBreaksHard = unpairedBreaksHardIn; + } + + void setBreaksShortIndel(int breaksShortIndelIn) { + breaksShortIndel = breaksShortIndelIn; + } + + bool isCovFinalized() const { return covFinalized; } + + void setCovFinalized(bool covFinalizedIn) { covFinalized = covFinalizedIn; } + + template bool operator<(const T &rhs) const { + if (chrIndex < rhs.getChrIndex()) + return true; + if (chrIndex > rhs.getChrIndex()) + return false; + return (pos < rhs.getPos()); + } + + bool closeToSupp(const SuppAlignment &compIn, ChrDistance fuzziness) const { + if (chrIndex == compIn.getChrIndex()) { + if (compIn.isFuzzy()) { + fuzziness = ChrDistance(trunc(2.5 * static_cast(DEFAULT_READ_LENGTH))); + return (ChrDistance(static_cast(pos)) - fuzziness) <= + (ChrDistance(static_cast(compIn.getExtendedPos())) + fuzziness) && + (ChrDistance(static_cast(compIn.getPos())) - fuzziness) <= + (ChrDistance(static_cast(pos)) + fuzziness); + } else { + return ChrDistance(abs(static_cast(pos) - static_cast(compIn.getPos()))) <= fuzziness; + } } else { - return abs(pos - compIn.getPos()) <= fuzziness; + return false; } - } else { - return false; - } - } - int distanceToSupp(const SuppAlignmentAnno &compIn) const { - if (chrIndex == compIn.getChrIndex()) { - if (compIn.isFuzzy()) { - if (compIn.getPos() <= pos && pos <= compIn.getExtendedPos()) { - return 0; - } else { - if (pos < compIn.getPos()) { - return compIn.getPos() - pos; + } + + ChrDistance distanceToSupp(const SuppAlignmentAnno &compIn) const { + ChrDistance result; + if (chrIndex == compIn.getChrIndex()) { + if (compIn.isFuzzy()) { + if (compIn.getPos() <= pos && pos <= compIn.getExtendedPos()) { + result = ChrDistance(0); } else { - return pos - compIn.getExtendedPos(); + if (pos < compIn.getPos()) { + result = ChrDistance(compIn.getPos() - pos); + } else { + result = ChrDistance(pos - compIn.getExtendedPos()); + } } + } else { + result = ChrDistance(abs(static_cast(pos) - static_cast(compIn.getPos()))); } } else { + result = 1000000; + } + return result; + } + + template int distanceToBp(const T &compIn) const { + if (chrIndex == compIn.getChrIndex()) { return abs(pos - compIn.getPos()); + } else { + return -1; + } + } + + bool operator==(const Breakpoint &rhs) const { + return chrIndex == rhs.getChrIndex() && pos == rhs.getPos(); + } + + ChrIndex getChrIndex() const { return chrIndex; } + + ChrSize getPos() const { return pos; } + + bool isMissingInfoBp() const { return missingInfoBp; } + + const std::vector &getDoubleSidedMatches() const { + return doubleSidedMatches; + } + + std::vector getDoubleSidedMatchesPtr() { + std::vector res{}; + for (auto &sa : doubleSidedMatches) { + res.push_back(&sa); } - } else { - return 1000000; - } - } - template int distanceToBp(const T &compIn) const { - if (chrIndex == compIn.getChrIndex()) { - return abs(pos - compIn.getPos()); - } else { - return -1; - } - } - bool operator==(const Breakpoint &rhs) const { - return chrIndex == rhs.getChrIndex() && pos == rhs.getPos(); - } - int getChrIndex() const { return chrIndex; } - int getPos() const { return pos; } - bool isMissingInfoBp() const { return missingInfoBp; } - const vector &getDoubleSidedMatches() const { - return doubleSidedMatches; - } - vector getDoubleSidedMatchesPtr() { - vector res{}; - for (auto &sa : doubleSidedMatches) { - res.push_back(&sa); - } - return res; - } - const vector &getSupplementsPrimary() const { - return supplementsPrimary; - } - vector getSupplementsPrimaryPtr() { - vector res{}; - for (auto &sa : supplementsPrimary) { - res.push_back(&sa); - } - return res; - } - bool isGermline() const { return germline; } - int getHitsInMref() const { return hitsInMref; } - int getLeftCoverage() const { return leftCoverage; } - int getRightCoverage() const { return rightCoverage; } - int getBreaksShortIndel() const { return breaksShortIndel; } - const vector &getConsensusOverhangs() const { - return consensusOverhangs; - } - int getLowQualBreaksSoft() const { return lowQualBreaksSoft; } - int getLowQualBreaksHard() const { return lowQualBreaksHard; } - int getRepetitiveOverhangBreaks() const { return repetitiveOverhangBreaks; } - int getLowQualSpansSoft() const { return lowQualSpansSoft; } - int getLowQualSpansHard() const { return lowQualSpansHard; } - int getMateSupport() const { return mateSupport; } - int getNormalSpans() const { return normalSpans; } - int getPairedBreaksSoft() const { return pairedBreaksSoft; } - int getPairedBreaksHard() const { return pairedBreaksHard; } - int getUnpairedBreaksHard() const { return unpairedBreaksHard; } - int getUnpairedBreaksSoft() const { return unpairedBreaksSoft; } - void removeMarkedFuzzies() { - cleanUpVector(doubleSidedMatches); - cleanUpVector(supplementsPrimary); - } - SuppAlignment *searchFuzzySa(const SuppAlignment &fuzzySa); - - void setGermline(bool germlineIn) { this->germline = germlineIn; } - - void setHitsInMref(int hitsInMref) { this->hitsInMref = hitsInMref; } - - private: - string finalizeOverhangs(); - void printBreakpointReport(const string &overhangStr); - bool matchDetector(const shared_ptr &longAlignment, - const shared_ptr &shortAlignment) const; - void detectDoubleSupportSupps(); - void collapseSuppRange(string &res, const vector &vec) const; - template void cleanUpVector(vector &objectPool); - void fillMatePool(const deque &discordantAlignmentsPool, - const deque &discordantLowQualAlignmentsPool, - const deque &discordantAlignmentCandidatesPool); - void collectMateSupport(); - void compressMatePool(vector &discordantAlignmentsPool); - void - collectMateSupportHelper(SuppAlignment &sa, - vector &discordantAlignmentsPool, - vector &discordantLowQualAlignmentsPool); - void saHomologyClashSolver(); - bool covFinalized; - bool missingInfoBp; - int chrIndex; - int pos; - int normalSpans, lowQualSpansSoft, lowQualSpansHard, unpairedBreaksSoft, - unpairedBreaksHard, breaksShortIndel, lowQualBreaksSoft, - lowQualBreaksHard, repetitiveOverhangBreaks; - int pairedBreaksSoft, pairedBreaksHard; - int leftSideDiscordantCandidates, rightSideDiscordantCandidates; - int mateSupport; - int leftCoverage, rightCoverage; - int totalLowMapqHardClips; - int hitsInMref; - bool germline; - vector> supportingSoftAlignments; - vector> supportingHardAlignments; - vector> supportingHardLowMapqAlignments; - vector supplementsPrimary; - vector doubleSidedMatches; - vector consensusOverhangs; - vector poolLeft, poolRight, poolLowQualLeft, poolLowQualRight; - vector supplementsSecondary; -}; - -template -inline void -Breakpoint::cleanUpVector(vector &objectPool) { - // cerr << "cleaning up" << endl; - while (!objectPool.empty() && objectPool.back().isToRemove()) { - objectPool.pop_back(); - } - for (auto saIt = objectPool.begin(); saIt != objectPool.end(); ++saIt) { - if (saIt->isToRemove()) { - swap(*saIt, objectPool.back()); - } - while (!objectPool.empty() && objectPool.back().isToRemove()) { - objectPool.pop_back(); - } - } - // cerr << "done" << endl; -} + return res; + } + + const std::vector &getSupplementsPrimary() const { + return supplementsPrimary; + } + + std::vector getSupplementsPrimaryPtr() { + std::vector res{}; + for (auto &sa : supplementsPrimary) { + res.push_back(&sa); + } + return res; + } + + bool isGermline() const { return germline; } + + int getHitsInMref() const { return hitsInMref; } + + int getLeftCoverage() const { return leftCoverage; } + + int getRightCoverage() const { return rightCoverage; } + + int getBreaksShortIndel() const { return breaksShortIndel; } + + const std::vector &getConsensusOverhangs() const { + return consensusOverhangs; + } + + int getLowQualBreaksSoft() const { return lowQualBreaksSoft; } + + int getLowQualBreaksHard() const { return lowQualBreaksHard; } + + int getRepetitiveOverhangBreaks() const { return repetitiveOverhangBreaks; } + + int getLowQualSpansSoft() const { return lowQualSpansSoft; } + + int getLowQualSpansHard() const { return lowQualSpansHard; } + + int getMateSupport() const { return mateSupport; } + + int getNormalSpans() const { return normalSpans; } + + int getPairedBreaksSoft() const { return pairedBreaksSoft; } + + int getPairedBreaksHard() const { return pairedBreaksHard; } + + int getUnpairedBreaksHard() const { return unpairedBreaksHard; } + + int getUnpairedBreaksSoft() const { return unpairedBreaksSoft; } + + void removeMarkedFuzzies() { + cleanUpVector(doubleSidedMatches); + cleanUpVector(supplementsPrimary); + } + + SuppAlignment *searchFuzzySa(const SuppAlignment &fuzzySa); + + void setGermline(bool germlineIn) { this->germline = germlineIn; } + + void setHitsInMref(int hitsInMref) { this->hitsInMref = hitsInMref; } + + private: + + // Compose the string that will be printed as column 8 into the breakpoint BED. + std::string finalizeOverhangs(); + + // Actually prints to stdout. + void printBreakpointReport(const std::string &overhangStr); + + bool matchDetector(const std::shared_ptr &longAlignment, + const std::shared_ptr &shortAlignment) const; + + void detectDoubleSupportSupps(); + + void collapseSuppRange(std::string &res, const std::vector &vec) const; + + template void cleanUpVector(std::vector &objectPool); + + void fillMatePool(const std::deque &discordantAlignmentsPool, + const std::deque &discordantLowQualAlignmentsPool, + const std::deque &discordantAlignmentCandidatesPool); + + void collectMateSupport(); + + void compressMatePool(std::vector &discordantAlignmentsPool); + + void + collectMateSupportHelper(SuppAlignment &sa, + std::vector &discordantAlignmentsPool, + std::vector &discordantLowQualAlignmentsPool); + + void saHomologyClashSolver(); + + bool covFinalized; + + bool missingInfoBp; + + ChrIndex chrIndex; + + ChrSize pos; + + int normalSpans, + lowQualSpansSoft, + lowQualSpansHard, + unpairedBreaksSoft, + unpairedBreaksHard, + breaksShortIndel, + lowQualBreaksSoft, + lowQualBreaksHard, + repetitiveOverhangBreaks; + + int pairedBreaksSoft, + pairedBreaksHard; + + int leftSideDiscordantCandidates, + rightSideDiscordantCandidates; + + int mateSupport; + + int leftCoverage, + rightCoverage; + + int totalLowMapqHardClips; + + int hitsInMref; + + bool germline; + + std::vector> supportingSoftAlignments; + + std::vector> supportingHardAlignments; + + std::vector> supportingHardLowMapqAlignments; + + std::vector supplementsPrimary; + + std::vector doubleSidedMatches; + + std::vector consensusOverhangs; + + std::vector poolLeft, poolRight, poolLowQualLeft, poolLowQualRight; + + std::vector supplementsSecondary; + }; } /* namespace sophia */ diff --git a/include/BreakpointReduced.h b/include/BreakpointReduced.h index 8db13c7..a3150b1 100644 --- a/include/BreakpointReduced.h +++ b/include/BreakpointReduced.h @@ -24,7 +24,7 @@ #ifndef BREAKPOINTREDUCED_H_ #define BREAKPOINTREDUCED_H_ - +#include "global.h" #include "Breakpoint.h" #include "GermlineMatch.h" #include "MrefMatch.h" @@ -35,157 +35,207 @@ namespace sophia { -using namespace std; - -class BreakpointReduced { - public: - static int DEFAULTREADLENGTH; - static double CLONALITYSTRICTLOWTHRESHOLD; - static double ARTIFACTFREQHIGHTHRESHOLD; - static string PIDSINMREFSTR; - static boost::format doubleFormatter; - BreakpointReduced(const Breakpoint &tmpBp, int lineIndexIn, - bool hasOverhangIn); - BreakpointReduced(const SuppAlignmentAnno &sa, - const BreakpointReduced &emittingBp, bool fuzzySecondary); - - template bool operator<(const T &rhs) const { - return pos < rhs.getPos(); - } - bool fullSmaller(const BreakpointReduced &rhs) const { - if (chrIndex < rhs.getChrIndex()) { - return true; + class BreakpointReduced { + + public: + + static ChrSize DEFAULT_READ_LENGTH; + + static double CLONALITY_STRICT_LOW_THRESHOLD; + + static double ARTIFACT_FREQ_HIGH_THRESHOLD; + + static std::string PIDS_IN_MREF_STR; + + static boost::format doubleFormatter; + + BreakpointReduced(const Breakpoint &tmpBp, int lineIndexIn, + bool hasOverhangIn); + + BreakpointReduced(const SuppAlignmentAnno &sa, + const BreakpointReduced &emittingBp, bool fuzzySecondary); + + template bool operator<(const T &rhs) const { + return pos < rhs.getPos(); } - if (chrIndex > rhs.getChrIndex()) { - return false; + + /** This is used for sorting breakpoints. No biological meaning. */ + bool fullSmaller(const BreakpointReduced &rhs) const { + if (chrIndex < rhs.getChrIndex()) { + return true; + } + if (chrIndex > rhs.getChrIndex()) { + return false; + } + return pos < rhs.getPos(); } - return pos < rhs.getPos(); - } - template int distanceTo(const T &rhs) const { - if (chrIndex != rhs.getChrIndex()) { - return 1000000; - } else { - return abs(pos - rhs.getPos()); + + template int distanceTo(const T &rhs) const { + if (chrIndex != rhs.getChrIndex()) { + // Just any big value? + return 1000000; + } else { + // Effectively always >= 0. + return abs(pos - rhs.getPos()); + } } - } - template int distanceToBp(const T &compIn) const { - if (chrIndex == compIn.getChrIndex()) { - return abs(pos - compIn.getPos()); - } else { - return -1; + + template int distanceToBp(const T &compIn) const { + if (chrIndex == compIn.getChrIndex()) { + return abs(static_cast(pos) - static_cast(compIn.getPos())); + } else { + // This seems to be a special value. It is not explicitly used in comparisons. + // Check usages, before refactoring this. + return -1; + } } - } - int getChrIndex() const { return chrIndex; } - int getPos() const { return pos; } + ChrIndex getChrIndex() const { return chrIndex; } + + ChrSize getPos() const { return pos; } + + + bool isToRemove() const { return toRemove; } + + void setToRemove(bool toRemove) { this->toRemove = toRemove; } + + void removeMarkedFuzzies(); + + SuppAlignmentAnno *searchFuzzySa(const SuppAlignmentAnno &fuzzySa); + + int getBreaksShortIndel() const { return breaksShortIndel; } - bool isToRemove() const { return toRemove; } - void setToRemove(bool toRemove) { this->toRemove = toRemove; } - void removeMarkedFuzzies(); - SuppAlignmentAnno *searchFuzzySa(const SuppAlignmentAnno &fuzzySa); + int getLeftCoverage() const { return leftCoverage; } - int getBreaksShortIndel() const { return breaksShortIndel; } + int getLowQualBreaksHard() const { return lowQualBreaksHard; } - int getLeftCoverage() const { return leftCoverage; } + int getLowQualBreaksSoft() const { return lowQualBreaksSoft; } - int getLowQualBreaksHard() const { return lowQualBreaksHard; } + int getLowQualSpansHard() const { return lowQualSpansHard; } - int getLowQualBreaksSoft() const { return lowQualBreaksSoft; } + int getLowQualSpansSoft() const { return lowQualSpansSoft; } - int getLowQualSpansHard() const { return lowQualSpansHard; } + int getMateSupport() const { return mateSupport; } - int getLowQualSpansSoft() const { return lowQualSpansSoft; } + int getNormalSpans() const { return normalSpans; } - int getMateSupport() const { return mateSupport; } - int getNormalSpans() const { return normalSpans; } - int getPairedBreaksHard() const { return pairedBreaksHard; } - int getPairedBreaksSoft() const { return pairedBreaksSoft; } - int getRepetitiveOverhangBreaks() const { return repetitiveOverhangBreaks; } - int getRightCoverage() const { return rightCoverage; } - const vector &getSuppAlignments() const { - return suppAlignments; - } + int getPairedBreaksHard() const { return pairedBreaksHard; } - int getUnpairedBreaksHard() const { return unpairedBreaksHard; } - int getUnpairedBreaksSoft() const { return unpairedBreaksSoft; } - int getLineIndex() const { return lineIndex; } - vector getSupplementsPtr() { - vector res{}; - for (auto &sa : suppAlignments) { - res.push_back(&sa); + int getPairedBreaksSoft() const { return pairedBreaksSoft; } + + int getRepetitiveOverhangBreaks() const { return repetitiveOverhangBreaks; } + + int getRightCoverage() const { return rightCoverage; } + + const std::vector &getSuppAlignments() const { + return suppAlignments; } - return res; - } - bool closeToSupp(const SuppAlignmentAnno &compIn, int fuzziness) const { - if (chrIndex == compIn.getChrIndex()) { - if (compIn.isFuzzy()) { - fuzziness = 2.5 * DEFAULTREADLENGTH; - return (pos - fuzziness) <= - (compIn.getExtendedPos() + fuzziness) && - (compIn.getPos() - fuzziness) <= (pos + fuzziness); - } else { - return abs(pos - compIn.getPos()) <= fuzziness; + + + int getUnpairedBreaksHard() const { return unpairedBreaksHard; } + + int getUnpairedBreaksSoft() const { return unpairedBreaksSoft; } + + int getLineIndex() const { return lineIndex; } + + std::vector getSupplementsPtr() { + std::vector res{}; + for (auto &sa : suppAlignments) { + res.push_back(&sa); } - } else { - return false; + return res; } - } - int distanceToSupp(const SuppAlignmentAnno &compIn) const { - if (chrIndex == compIn.getChrIndex()) { - if (compIn.isFuzzy()) { - if (compIn.getPos() <= pos && pos <= compIn.getExtendedPos()) { - return 0; + + bool closeToSupp(const SuppAlignmentAnno &compIn, ChrDistance fuzziness) const { + if (chrIndex == compIn.getChrIndex()) { + if (compIn.isFuzzy()) { + fuzziness = ChrDistance(2.5 * DEFAULT_READ_LENGTH); // truncate + return (ChrDistance(pos) - fuzziness) <= (ChrDistance(compIn.getExtendedPos()) + fuzziness) && + (ChrDistance(compIn.getPos()) - fuzziness) <= (ChrDistance(pos) + fuzziness); } else { - if (pos < compIn.getPos()) { - return compIn.getPos() - pos; + return abs(ChrDistance(pos) - ChrDistance(compIn.getPos())) <= fuzziness; + } + } else { + return false; + } + } + + ChrDistance distanceToSupp(const SuppAlignmentAnno &compIn) const { + ChrDistance result; + if (chrIndex == compIn.getChrIndex()) { + if (compIn.isFuzzy()) { + if (compIn.getPos() <= pos && pos <= compIn.getExtendedPos()) { + result = 0; } else { - return pos - compIn.getExtendedPos(); + if (pos < compIn.getPos()) { + result = ChrDistance(compIn.getPos() - pos); + } else { + // TODO Why here getExtendenPos(), but getPos() above? + result = ChrDistance(pos - compIn.getExtendedPos()); + } } + } else { + result = ChrDistance(abs(static_cast(pos) - static_cast(compIn.getPos()))); } } else { - return abs(pos - compIn.getPos()); + result = 1000000; } - } else { - return 1000000; + return result; } - } - const MrefMatch &getMrefHits() const { return mrefHits; } - void setMrefHits(MrefMatch mrefHits) { this->mrefHits = mrefHits; } - void setGermlineInfo(GermlineMatch germlineInfo) { - this->germlineInfo = germlineInfo; - } - - bool testOverhangBasedCandidacy() const; - string printOverhang(double germlineClonality, int numHits, - const string &overhang) const; - - const GermlineMatch &getGermlineInfo() const { return germlineInfo; } - void addFileIndex(int fileIndex) { - for (auto &sa : suppAlignments) { - sa.addFileIndex(fileIndex); + + const MrefMatch &getMrefHits() const { return mrefHits; } + + void setMrefHits(MrefMatch mrefHits) { this->mrefHits = mrefHits; } + + void setGermlineInfo(GermlineMatch germlineInfo) { + this->germlineInfo = germlineInfo; } - } - void complexRearrangementMateRatioRescue(bool encounteredM); - bool hasOverhang; - void addDummySa(const SuppAlignmentAnno &sa, - const BreakpointReduced &emittingBp); - const SuppAlignmentAnno &getDummySa(); - - private: - bool toRemove; - int lineIndex; - int chrIndex; - int pos; - int normalSpans, lowQualSpansSoft, lowQualSpansHard, unpairedBreaksSoft, - unpairedBreaksHard, breaksShortIndel, lowQualBreaksSoft, - lowQualBreaksHard, repetitiveOverhangBreaks; - int pairedBreaksSoft, pairedBreaksHard; - int mateSupport; - int leftCoverage, rightCoverage; - MrefMatch mrefHits; - GermlineMatch germlineInfo; - vector suppAlignments; -}; + + bool testOverhangBasedCandidacy() const; + + std::string printOverhang(double germlineClonality, int numHits, + const std::string &overhang) const; + + const GermlineMatch &getGermlineInfo() const { return germlineInfo; } + + void addFileIndex(int fileIndex) { + for (auto &sa : suppAlignments) { + sa.addFileIndex(fileIndex); + } + } + + void complexRearrangementMateRatioRescue(bool encounteredM); + + bool hasOverhang; + + void addDummySa(const SuppAlignmentAnno &sa, + const BreakpointReduced &emittingBp); + + const SuppAlignmentAnno &getDummySa(); + + private: + bool toRemove; + int lineIndex; + ChrIndex chrIndex; + ChrSize pos; + int normalSpans, + lowQualSpansSoft, + lowQualSpansHard, + unpairedBreaksSoft, + unpairedBreaksHard, + breaksShortIndel, + lowQualBreaksSoft, + lowQualBreaksHard, + repetitiveOverhangBreaks; + int pairedBreaksSoft, + pairedBreaksHard; + int mateSupport; + int leftCoverage, + rightCoverage; + MrefMatch mrefHits; + GermlineMatch germlineInfo; + std::vector suppAlignments; + }; } /* namespace sophia */ diff --git a/include/ChosenBp.h b/include/ChosenBp.h index 23f7700..0f8e5a5 100644 --- a/include/ChosenBp.h +++ b/include/ChosenBp.h @@ -30,34 +30,55 @@ namespace sophia { -using namespace std; - -class ChosenBp { - friend class Alignment; - - public: - ChosenBp(char bpTypeIn, int bpSizeIn, bool bpEncounteredMIn, - int overhangStartIndexIn, int overhangLengthIn, - int selfNodeIndexIn) - : bpType{bpTypeIn}, bpSize{bpSizeIn}, bpEncounteredM{bpEncounteredMIn}, - overhangStartIndex{overhangStartIndexIn}, - overhangLength{overhangLengthIn}, supplementaryAlignments{}, - childrenNodes{{selfNodeIndexIn}}, selfNodeIndex{selfNodeIndexIn} {} - ~ChosenBp() = default; - static int BPSUPPORTTHRESHOLD; - - private: - char bpType; - int bpSize; - bool bpEncounteredM; - int overhangStartIndex, overhangLength; - vector supplementaryAlignments; - vector childrenNodes; - int selfNodeIndex; - void addChildNode(int indexIn); - void - addSupplementaryAlignments(const vector &suppAlignments); -}; + class ChosenBp { + friend class Alignment; + + public: + + /** + * + * @param selfNodeIndexIn Index into an array of supporting alignments. + * See alignment::setChosenBp. + */ + ChosenBp(char bpTypeIn, + int bpSizeIn, + bool bpEncounteredMIn, + ChrSize overhangStartIndexIn, + ChrSize overhangLengthIn, + int selfNodeIndexIn) + : bpType{bpTypeIn}, + bpSize{bpSizeIn}, + bpEncounteredM{bpEncounteredMIn}, + overhangStartIndex{overhangStartIndexIn}, + overhangLength{overhangLengthIn}, + supplementaryAlignments{}, + childrenNodes{{selfNodeIndexIn}}, + selfNodeIndex{selfNodeIndexIn} {} + + ~ChosenBp() = default; + + static int BP_SUPPORT_THRESHOLD; + + private: + + char bpType; + + int bpSize; + + bool bpEncounteredM; + + ChrSize overhangStartIndex, overhangLength; + + std::vector supplementaryAlignments; + + std::vector childrenNodes; + + int selfNodeIndex; + + void addChildNode(int indexIn); + + void addSupplementaryAlignments(const std::vector &suppAlignments); + }; } // namespace sophia diff --git a/include/ChrCategory.h b/include/ChrCategory.h new file mode 100644 index 0000000..2bf8703 --- /dev/null +++ b/include/ChrCategory.h @@ -0,0 +1,119 @@ +#ifndef CHRCATEGORY_H_ +#define CHRCATEGORY_H_ + + +#include +#include +#include + +namespace sophia { + + /** C++ enums suck (also enum class). This is just a manually implemented rich enum. */ + class ChrCategory { + + public: + + using size_type = + boost::unordered::unordered_map::size_type; + + private: + + std::string category_name; + + // This is mostly for comparison with `operator<`. + std::size_t category_index; + + // Used for initialization. + static const boost::unordered::unordered_map categories; + + static const std::vector sorted_categories; + + public: + // Only used to define categories. + ChrCategory(const std::string &s, std::size_t index); + + // Predefined instances. + + + /** Autosomal contigs, e.g. chr1, chr2, ..., chr22 */ + static const ChrCategory& AUTOSOME; + /** X chromosome */ + static const ChrCategory& X; + /** Y chromosome */ + static const ChrCategory& Y; + /** extrachromosomalContigs Extrachromosomal contigs, e.g. chrM, chrMT */ + static const ChrCategory& EXTRACHROMOSOMAL; + /** Joined category for unlocalized, unplaced, or random placed contigs + * Compare https://www.ncbi.nlm.nih.gov/grc/help/definitions + */ + static const ChrCategory& UNASSIGNED; + /** _alt contigs */ + static const ChrCategory& ALT; + /** HLA contigs */ + static const ChrCategory& HLA; + /** Virus contigs, e.g. NC_007605, EBV. + * This is for viruses that may insert into the nuclear genome. */ + static const ChrCategory& VIRUS; + /** Decoy contigs. */ + static const ChrCategory& DECOY; + /** Technical contigs, e.g. phiX or lambda */ + static const ChrCategory& TECHNICAL; + + // Parser + static const ChrCategory& from_string(const std::string &s); + + ~ChrCategory(); + + static size_type numCategories(); + + static const std::vector& getCategories(); + + std::string getName() const; + + bool operator==(const ChrCategory &other) const; + + bool operator!=(const ChrCategory &other) const; + + bool operator<(const ChrCategory &other) const; + + bool operator>(const ChrCategory &other) const; + + }; + + +} // namespace sophia + +/** The following defines hash and equal_to functions such that ChrCategory can be used as a key in + * unordered containers without explicitly setting these two functions. Thus we can continue to + * use `boost::unordered::unordered_set`. */ + +namespace boost { + + template<> + struct hash { + std::size_t operator()(const sophia::ChrCategory& chrCategory) const { + return std::hash()(chrCategory.getName()); + } + }; + +} // namespace boost + +namespace std { + + template<> + struct equal_to { + bool operator()(const sophia::ChrCategory& lhs, const sophia::ChrCategory& rhs) const { + return lhs.operator==(rhs); + } + }; + + template<> + struct less { + bool operator()(const sophia::ChrCategory& lhs, const sophia::ChrCategory& rhs) const { + return lhs.operator<(rhs); + } + }; + +} // namespace std + +#endif /* CHRCATEGORY_H_ */ diff --git a/include/ChrConverter.h b/include/ChrConverter.h index 21afddc..f5aeb50 100644 --- a/include/ChrConverter.h +++ b/include/ChrConverter.h @@ -1,11 +1,5 @@ /* - * ChrConverter.h - * - * Created on: 28 Dec 2017 - * Author: Umut H. Toprak, DKFZ Heidelberg (Divisions of Theoretical - * Bioinformatics, Bioinformatics and Omics Data Analytics and currently - * Neuroblastoma Genomics) Copyright (C) 2018 Umut H. Toprak, Matthias - * Schlesner, Roland Eils and DKFZ Heidelberg + * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,65 +13,146 @@ * * You should have received a copy of the GNU General Public License * along with this program. If not, see . - * LICENSE: GPL + * LICENSE: GPL */ #ifndef CHRCONVERTER_H_ #define CHRCONVERTER_H_ -#include + +#include "global.h" +#include #include +#include -#include namespace sophia { -using namespace std; - -class ChrConverter { - public: - static inline int readChromosomeIndex(string::const_iterator startIt, - char stopChar) { - int chrIndex{0}; - if (isdigit(*startIt)) { - for (auto chr_cit = startIt; *chr_cit != stopChar; ++chr_cit) { - chrIndex = chrIndex * 10 + (*chr_cit - '0'); - } - return chrIndex; - } else { - switch (*startIt) { - case 'h': - return 999; - case 'X': - return 40; - case 'G': - for (auto cit = next(startIt, 2); *cit != '.'; ++cit) { - chrIndex = 10 * chrIndex + *cit - '0'; - } - return chrIndex; - case 'Y': - return 41; - case 'M': - ++startIt; - if (*startIt == 'T') { - return 1001; - } else { - return 1003; - } - case 'N': - return 1000; - case 'p': - return 1002; - default: - return 1003; - } - } - return 0; - } - static const array indexToChr; - static const array indexConverter; - static const array indexToChrCompressedMref; -}; - -} /* namespace sophia */ - -#endif /* CHRCONVERTER_H_ */ + /** ChrConverter manages information on chromosomes names, sizes, and index positions in + * data arrays. + * + * The concept of compressed master ref (mref) chromosomes is used to separate out a + * subset of particularly important chromosomes for which SOPHIA will calculate its + * statistics. Note that sophiaMref will allocate a huge vector of the size of the sum + * of the lengths of the compressed mref chromosomes (see `MasterRefProcessor` constructor. + * + * This class also is managing the mapping between all chromosome indices and indices of + * the compressed master ref chromosomes (called "all index space" and "compressed master-ref + * index space"). + **/ + class ChrConverter { + + private: + + const std::string assemblyName; + + public: + + ChrConverter(const std::string &assemblyNameIn); + + virtual ~ChrConverter(); + + std::string getAssemblyName() const; + + /** Number of chromosomes. */ + virtual ChrIndex nChromosomes() const = 0; + + /** Map an index position to a chromosome name. Throws illegal_argument error, if the + the index is not valid. */ + virtual ChrName indexToChrName(ChrIndex index) const = 0; + + /** Map a chromosome name to an index position. */ + virtual ChrIndex chrNameToIndex(ChrName chrName) const = 0; + + /** chr1-chr22 */ + virtual bool isAutosome(ChrIndex index) const = 0; + + /** chrX, Y, ...*/ + virtual bool isGonosome(ChrIndex index) const = 0; + + /** chrX */ + virtual bool isX(ChrIndex index) const = 0; + + /** chrY */ + virtual bool isY(ChrIndex index) const = 0; + + /** phix index. */ + virtual bool isTechnical(ChrIndex index) const = 0; + + /** NC_007605, EBV. */ + virtual bool isVirus(ChrIndex index) const = 0; + + /** Mitochondrial chromosome index. */ + virtual bool isExtrachromosomal(ChrIndex index) const = 0; + + /** Decoy sequence index. */ + virtual bool isDecoy(ChrIndex index) const = 0; + + /** Chromosomes that are not assigned to a specific position in a chromosome. This + * includes unplaced, unlocalized, and random contigs, such as GL000192.1. */ + virtual bool isUnassigned(ChrIndex index) const = 0; + + /** HLA contigs */ + virtual bool isHLA(ChrIndex index) const = 0; + + /** ALT contigs */ + virtual bool isALT(ChrIndex index) const = 0; + + // Methods for working with the subset of compressed master-ref chromosomes. + + /** Number of compressed mref chromosomes. */ + virtual CompressedMrefIndex nChromosomesCompressedMref() const = 0; + + /** Map an index position to a chromosome name for compressed mref files. */ + virtual ChrName compressedMrefIndexToChrName(CompressedMrefIndex index) const = 0; + + /** Map an index from the global index-space to the compressed mref index-space. */ + virtual CompressedMrefIndex indexToCompressedMrefIndex(ChrIndex index) const = 0; + + /** Whether the chromosome index is that of a compressed mref chromosome. */ + virtual bool isCompressedMref(ChrIndex index) const = 0; + + /** Map from compressed mref index space to all chromosome index space. */ + virtual ChrIndex compressedMrefIndexToIndex(CompressedMrefIndex index) const = 0; + + /** Map compressed mref index to chromosome size. */ + virtual ChrSize chrSizeCompressedMref(CompressedMrefIndex index) const = 0; + + /** Returns true, if the region of the read is aligned to is blocked. */ + virtual bool isInBlockedRegion(ChrIndex chrIndex, ChrSize position) const; + + /** Parse chromosome index. + * + * 1. Input is a plain chromosome string separated from the following string by '\t' when + * parsing a BED file (the chromosome identifier in the first column). + * 2. ... + * + * If the `stopCharExt` parameter is an empty string, then it takes a position in a + * character stream, and translates the following character(s) into index positions + * (using ChrConverter::indexToChrName). If the name cannot be parsed, throws a domain_error + * exception. + * + * IMPORTANT: Implementations may or may not use the `stopCharExt` parameter. Therefore, + * the following behavior is optional. An implementation may not even actually + * validate that the stopChar or string-end terminates the parsed identifier! + * + * If the `stopCharExt` parameter is *not* empty, the method first parses up to the first + * occurrence of the `stopCharExt`. Then within the identified start and end range, parses + * up to the last occurrence of `stopChar`. This allows to parse a chromosome name + * "HLA-DRB1*13:01:01" from a string "HLA-DRB1*13:01:01:2914|(4,0,0?/0)" by first + * separating out the `|` separator (stopCharExt), and then finding the last `:` + * separator (stopChar). + * + * If no chromosome name can be parsed, throws a std::domain_error enriched with + * boost::exception information. + **/ + virtual ChrIndex + parseChrAndReturnIndex(std::string::const_iterator startIt, + std::string::const_iterator endIt, + char stopChar, + const std::string &stopCharExt = "") const = 0; + + }; + +} + +#endif /* CHRCONVERTER_H_ */ \ No newline at end of file diff --git a/include/ChrInfo.h b/include/ChrInfo.h new file mode 100644 index 0000000..15376d3 --- /dev/null +++ b/include/ChrInfo.h @@ -0,0 +1,51 @@ +#ifndef CHRINFO_H_ +#define CHRINFO_H_ + +#include "global.h" +#include "ChrCategory.h" +#include +#include +#include +#include + +namespace sophia { + + class ChrInfo { + + private: + ChrName name; + ChrSize size; + bool compressedMref; + ChrCategory category; + + public: + ChrInfo(ChrName _name, + ChrSize _size, + bool _compressedMref, + ChrCategory _category); + + ChrName getName() const; + ChrSize getSize() const; + bool isCompressedMref() const; + ChrCategory getCategory() const; + + }; + + + + /** Read the chromosome information from a TSV table with a header (chromosome, size, class), + where class is defined for each chromosome ID as "primary", "extrachromosomal", "random", + "unplaced", "alt", "hla", "decoy", "technical". All classes but "primary" are allowed to + be empty. */ + bool to_boolean(const std::string& str); + std::vector read_chr_info(std::istream &in); + std::vector read_chr_info(const std::string &filename); + + /** Convert a sequence of ChrInfo objects into a map from category to ChrInfo. */ + boost::unordered::unordered_map> + to_chr_info_map(const std::vector &chr_info); + + +} // namespace sophia + +#endif /* CHRINFO_H_ */ diff --git a/include/ChrInfoTable.h b/include/ChrInfoTable.h new file mode 100644 index 0000000..f496da4 --- /dev/null +++ b/include/ChrInfoTable.h @@ -0,0 +1,74 @@ +#ifndef CHRINFOTABLE_H_ +#define CHRINFOTABLE_H_ + +#include "global.h" +#include "ChrInfo.h" +#include "ChrInfoTable.h" +#include + +namespace sophia { + + /** Provide access to chromosome information. + * Note that the order of the input vector is preserved. This means, if the vector is not + * sorted by the category, then also the results will not be sorted. + */ + class ChrInfoTable { + + public: + using ChrNames = std::vector; + using ChrSizes = std::vector; + + private: + const std::vector chrInfos; + + const + boost::unordered::unordered_map> + chrInfosByCategory; + + /** Helper for the constructor. */ + static + boost::unordered::unordered_map> + buildChrInfosByCategory(const std::vector &chr_info); + + const boost::unordered::unordered_map chrInfosByName; + + /** Helper for the constructor. */ + static + boost::unordered::unordered_map + buildChrInfosByName(const std::vector &chr_info); + + public: + + ChrInfoTable(const std::vector &chr_info); + + ChrIndex nChromosomes() const; + + /** Return the `ChrInfo` in the same order they were provided to the constructor. */ + const std::vector &getChrInfos() const; + const std::vector &getChrInfos(ChrCategory category) const; + + ChrNames getNames() const; + + /** Get the names of all chromosomes of the given category. The order of values is exactly + * the same as they were (maybe interrupted by other chromosomes) in the vector provided + * to the constructor. + */ + ChrNames getNames(ChrCategory category) const; + + /** The the chromosome sizes, again in the same order as provided to the constructor. */ + ChrSizes getSizes() const; + + /** Get the lengths of all chromosomes of the given category. The order of values is exactly + * the same as they were (maybe interrupted by other chromosomes) in the vector provided + * to the constructor. + */ + ChrSizes getSizes(ChrCategory category) const; + + ChrInfo getChrInfo(ChrName name) const; + + }; + + +} /* namespace sophia */ + +#endif /* CHRINFOTABLE_H_ */ diff --git a/include/CigarChunk.h b/include/CigarChunk.h index 7a59cea..5bd54a1 100644 --- a/include/CigarChunk.h +++ b/include/CigarChunk.h @@ -25,25 +25,44 @@ #ifndef CIGARCHUNK_H_ #define CIGARCHUNK_H_ +#include "global.h" + namespace sophia { -struct CigarChunk { - char chunkType; - bool encounteredM; - int startPosOnRead; - int length; - int indelAdjustment; - CigarChunk(char chunkTypeIn, bool encounteredMIn, int startPosOnReadIn, - int lengthIn) - : chunkType{chunkTypeIn}, encounteredM{encounteredMIn}, - startPosOnRead{startPosOnReadIn}, length{lengthIn}, indelAdjustment{ - 0} {} - CigarChunk(char chunkTypeIn, bool encounteredMIn, int startPosOnReadIn, - int lengthIn, int indelAdjustmentIn) - : chunkType{chunkTypeIn}, encounteredM{encounteredMIn}, - startPosOnRead{startPosOnReadIn}, length{lengthIn}, - indelAdjustment{indelAdjustmentIn} {} - ~CigarChunk() = default; -}; + struct CigarChunk { + + char chunkType; + + bool encounteredM; + + ChrSize startPosOnRead; + + ChrSize length; + + int indelAdjustment; + + CigarChunk(char chunkTypeIn, + bool encounteredMIn, + ChrSize startPosOnReadIn, + ChrSize lengthIn) + : chunkType{chunkTypeIn}, + encounteredM{encounteredMIn}, + startPosOnRead{startPosOnReadIn}, + length{lengthIn}, + indelAdjustment{0} {} + + CigarChunk(char chunkTypeIn, + bool encounteredMIn, + ChrSize startPosOnReadIn, + ChrSize lengthIn, + int indelAdjustmentIn) + : chunkType{chunkTypeIn}, + encounteredM{encounteredMIn}, + startPosOnRead{startPosOnReadIn}, + length{lengthIn}, + indelAdjustment{indelAdjustmentIn} {} + ~CigarChunk() = default; + }; + } // namespace sophia #endif /* CIGARCHUNK_H_ */ diff --git a/include/DeFuzzier.h b/include/DeFuzzier.h index 19863e9..84eba41 100644 --- a/include/DeFuzzier.h +++ b/include/DeFuzzier.h @@ -36,39 +36,48 @@ namespace sophia { -using namespace std; - -class DeFuzzier { - public: - DeFuzzier(int maxDistanceIn, bool mrefModeIn); - void deFuzzyDb(vector &bps) const; - void deFuzzyDb(vector &bps) const; - - private: - void processFuzzySa(vector &bps, - vector::iterator startingIt, - SuppAlignmentAnno *startingSa) const; - void dbSweep(vector &bps, - vector::iterator startingIt, int increment, - SuppAlignmentAnno *consensusSa, - vector &processedSas) const; - void selectBestSa(vector &processedSas, - SuppAlignmentAnno *consensusSa) const; - - void processFuzzySa(vector &bps, - vector::iterator startingIt, - SuppAlignmentAnno *startingSa) const; - void dbSweep(vector &bps, vector::iterator startingIt, - unordered_set &fileIndices, int increment, - SuppAlignmentAnno *consensusSa, - vector &processedSas) const; - void selectBestSa(vector &processedSas, - SuppAlignmentAnno *consensusSa, - const unordered_set &fileIndices) const; - - const int MAXDISTANCE; - const bool MREFMODE; -}; + class DeFuzzier { + public: + + DeFuzzier(ChrSize maxDistanceIn, + bool mrefModeIn); + + void deFuzzyDb(std::vector &bps) const; + + void deFuzzyDb(std::vector &bps) const; + + private: + void processFuzzySa(std::vector &bps, + std::vector::iterator startingIt, + SuppAlignmentAnno *startingSa) const; + + void dbSweep(std::vector &bps, + std::vector::iterator startingIt, + int increment, + SuppAlignmentAnno *consensusSa, + std::vector &processedSas) const; + + void selectBestSa(std::vector &processedSas, + SuppAlignmentAnno *consensusSa) const; + + void processFuzzySa(std::vector &bps, + std::vector::iterator startingIt, + SuppAlignmentAnno *startingSa) const; + + void dbSweep(std::vector &bps, std::vector::iterator startingIt, + std::unordered_set &fileIndices, + int increment, + SuppAlignmentAnno *consensusSa, + std::vector &processedSas) const; + + void selectBestSa(std::vector &processedSas, + SuppAlignmentAnno *consensusSa, + const std::unordered_set &fileIndices) const; + + const ChrSize MAX_DISTANCE; + + const bool MREF_MODE; + }; } // namespace sophia diff --git a/include/GenericChrConverter.h b/include/GenericChrConverter.h new file mode 100644 index 0000000..4a1d2b8 --- /dev/null +++ b/include/GenericChrConverter.h @@ -0,0 +1,196 @@ +/* + * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * LICENSE: GPL + */ + +#ifndef GenericChrConverter_H_ +#define GenericChrConverter_H_ + +#include "ChrConverter.h" +#include "global.h" +#include "ChrCategory.h" +#include "ChrInfo.h" +#include "ChrInfoTable.h" +#include +#include +#include +#include + + +namespace sophia { + + /** This converter provides fast by-index access to the ChrInfoTable. + * + * Manage all and compressed mref chromosomes. + * Allow mapping of all and compressed mref chromosomes. + * Freely mix (order) compressed mref chromosomes into all chromosemes. + * + * This implementation still leaks the compressed mref chromosome detail at a very low + * level into the code. More can probably only be improved, if the infamous parse/business + * logic mash up in the client code is resolved. + **/ + class GenericChrConverter: public ChrConverter { + + public: + + using ChrToIndexMap = + boost::unordered::unordered_map; + using CompressedMrefChrToIndexMap = + boost::unordered::unordered_map; + + protected: + + /** The ChrInfoTable provides some access to chromosomes and chromosome categories and + * guarantees a consistent order of chromosomes. Thus, we use the ChrInfoTable as the + * to access chromosome names and sizes using their global index. */ + const ChrInfoTable chrInfoTable; + + /** ChrInfoTable, is *not* actually for the index-based access. Therefore, as we need + * a ChrName -> ChrIndex mapping, we manage this mapping here. */ + const ChrToIndexMap allChromosomeLookup; + static ChrToIndexMap + buildAllChromosomeLookup(const ChrInfoTable::ChrNames &chr_info); + + /** A mapping table to convert the compressed mref indices into the global index space */ + const std::vector compressedToAllMapping; + static std::vector + buildCompressedMrefToAllMapping(ChrInfoTable chrInfoIn); + + /** A mapping table to convert the global indices into the compressed mref index space. + * This is just a vector indexable by ChrIndex, that contains a non-null optional value + * with the CompressedMrefIndex. */ + const std::vector> allToCompressedMapping; + static std::vector> + buildAllToCompressedMrefMapping(ChrInfoTable chrInfoIn); + + // Helper functions + + // ... for parsing + static + ChrName + parseChrBreakPoint(std::string::const_iterator startIt, + std::string::const_iterator endIt, + char stopChar, + const std::string &stopCharExt); + + static + ChrName + parseChrSimple(std::string::const_iterator startIt, + std::string::const_iterator endIt, + char stopChar); + + public: + + /** Initialize the hg38 chromosome converter with different types of contig/chromosome + * names and the sizes of the corresponding chromosomes. + **/ + GenericChrConverter(std::string assemblyName, + ChrInfoTable chrInfo); + + /** This default constructor only makes sense, as long as the hg38 chromosome names are + hard-coded. */ + GenericChrConverter(); + + /** Number of chromosomes. */ + ChrIndex nChromosomes() const; + + /** Number of compressed mref chromosomes. */ + CompressedMrefIndex nChromosomesCompressedMref() const; + + /** Map an index position to a chromosome name. */ + ChrName indexToChrName(ChrIndex index) const; + + /** Map an index position to a chromosome name for compressed mref files. */ + ChrName compressedMrefIndexToChrName(CompressedMrefIndex index) const; + + // The following methods could also be implemented as isCategory(ChrIndex, ChrCategory), + // but, for performance reason we provide them as separate methods. + + /** chr1-chr22 */ + bool isAutosome(ChrIndex index) const; + + /** chrX */ + bool isX(ChrIndex index) const; + + /** chrY */ + bool isY(ChrIndex index) const; + + /** chrX, chrY */ + bool isGonosome(ChrIndex index) const; + + /** phix index. */ + bool isTechnical(ChrIndex index) const; + + /** NC_007605, EBV. */ + bool isVirus(ChrIndex index) const; + + /** Mitochondrial chromosome index. */ + bool isExtrachromosomal(ChrIndex index) const; + + /** Decoy sequence index. */ + bool isDecoy(ChrIndex index) const; + + /** HLA chromosome index. */ + bool isHLA(ChrIndex index) const; + + /** ALT chromosome index. */ + bool isALT(ChrIndex index) const; + + /** Unplaced chromosome index. */ + bool isUnassigned(ChrIndex index) const; + + /** Whether the chromosome index is that of a compressed mref chromosome. */ + bool isCompressedMref(ChrIndex index) const; + + /** Map the compressed mref index to the uncompressed mref index. */ + ChrIndex compressedMrefIndexToIndex(CompressedMrefIndex index) const; + + /** Map an index from the global index-space to the compressed mref index-space. */ + CompressedMrefIndex indexToCompressedMrefIndex(ChrIndex index) const; + + /** Map compressed mref index to chromosome size. */ + ChrSize chrSizeCompressedMref(CompressedMrefIndex index) const; + + /** Map a chromosome name to an index position. */ + ChrIndex chrNameToIndex(ChrName chrName) const; + + /** Parse chromosome index. It takes a position in a character stream, and translates the + * following character(s) into index positions (using ChrConverter::indexToChrName). + * If the name cannot be parsed, throws a domain_error exception. + * + * This method parses up to the first occurrence of the `stopCharExt`. Then within the + * identified start and end range, parses up to the last occurrence of `stopChar`. This + * allows to parse a chromosome name "HLA-DRB1*13:01:01" from a string + * "HLA-DRB1*13:01:01:2914|(4,0,0?/0)" by first separating out the `|` separator + * (stopCharExt), and then finding the last `:` separator (stopChar). + **/ + static + ChrName parseChr(std::string::const_iterator startIt, + std::string::const_iterator endIt, + char stopChar, + const std::string &stopCharExt = ""); + + // The same as `parseChr`, but returns the index instead of the name. + ChrIndex parseChrAndReturnIndex(std::string::const_iterator startIt, + std::string::const_iterator endIt, + char stopChar, + const std::string &stopCharExt = nullptr) const; + + }; + +} + +#endif /* GenericChrConverter_H_ */ \ No newline at end of file diff --git a/include/GermlineMatch.h b/include/GermlineMatch.h index e4e1008..4119130 100644 --- a/include/GermlineMatch.h +++ b/include/GermlineMatch.h @@ -27,34 +27,31 @@ namespace sophia { - using namespace std; - - -class GermlineMatch { -public: - GermlineMatch(double clonalityIn, double conservativeClonalityIn, const vector>& suppMatchesIn); - const vector& getSuppMatches() const { - return suppMatches; - } - - const vector& getClonalities() const { - return clonalities; - } - - double getClonality() const { - return clonality; - } - - double getConservativeClonality() const { - return conservativeClonality; - } - -private: - double clonality; - double conservativeClonality; - vector suppMatches; - vector clonalities; -}; + class GermlineMatch { + public: + GermlineMatch(double clonalityIn, double conservativeClonalityIn, const std::vector>& suppMatchesIn); + const std::vector& getSuppMatches() const { + return suppMatches; + } + + const std::vector& getClonalities() const { + return clonalities; + } + + double getClonality() const { + return clonality; + } + + double getConservativeClonality() const { + return conservativeClonality; + } + + private: + double clonality; + double conservativeClonality; + std::vector suppMatches; + std::vector clonalities; + }; } /* namespace sophia */ diff --git a/include/GlobalAppConfig.h b/include/GlobalAppConfig.h new file mode 100644 index 0000000..82fbd6e --- /dev/null +++ b/include/GlobalAppConfig.h @@ -0,0 +1,68 @@ +/* + * GlobalAppConfig.h + * + * Author: Philip R. Kensche Copyright (C) 2023 DKFZ Heidelberg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * LICENSE: GPL + */ + +#ifndef GLOBALAPPCONFIG_H +#define GLOBALAPPCONFIG_H + +#include +#include +#include "ChrConverter.h" + + +namespace sophia { + + /** Keep global application config in this singleton. This is mostly to avoid having to hand + around configurations. */ + class GlobalAppConfig { + + private: + static GlobalAppConfig *instance_; + static std::mutex mutex_; + + GlobalAppConfig(std::unique_ptr chrConverter); + + protected: + + /** The chromosome converter. */ + const std::unique_ptr chrConverter; + + public: + + ~GlobalAppConfig(); + + const ChrConverter &getChrConverter() const; + + /** Prevent copying. */ + GlobalAppConfig(GlobalAppConfig &other) = delete; + + /** Prevent assignment. */ + void operator=(const GlobalAppConfig &) = delete; + + /** Factory method. */ + static GlobalAppConfig &init(std::unique_ptr chrConverter); + + /** Getter. */ + static const GlobalAppConfig &getInstance(); + + }; + +} /* namespace sophia */ + +#endif /* GLOBALAPPCONFIG_H */ \ No newline at end of file diff --git a/include/HelperFunctions.h b/include/HelperFunctions.h index d6ba72f..c2e1409 100644 --- a/include/HelperFunctions.h +++ b/include/HelperFunctions.h @@ -26,13 +26,12 @@ #include #include -namespace sophia { -using namespace std; +namespace sophia { -const int EXITCODE_IOERROR = 1; + const int EXITCODE_IOERROR = 1; -istream &error_terminating_getline(istream &is, string &str); + std::istream &error_terminating_getline(std::istream &is, std::string &str); } /* namespace sophia */ diff --git a/include/Hg37ChrConverter.h b/include/Hg37ChrConverter.h new file mode 100644 index 0000000..50c0c0c --- /dev/null +++ b/include/Hg37ChrConverter.h @@ -0,0 +1,211 @@ +/* + * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * LICENSE: GPL + */ + +#ifndef HG37CHRCONVERTER_H_ +#define HG37CHRCONVERTER_H_ + +#include "ChrConverter.h" +#include "global.h" +#include +#include + + +namespace sophia { + + /** Hard-coded chromosome converter for hg37. This tries to encapsulate the implementation + details of the original version. */ + class Hg37ChrConverter: public ChrConverter { + protected: + + /** The constructor does additional checks of the dimensions of the input vectors. */ + Hg37ChrConverter(const std::vector& indexToChrName, + const std::vector& indexToChrCompressedMref, + const std::vector& chrSizesCompressedMref, + const std::vector& indexToCompressedMrefIndex); + + /** Mapping indices to chromosome names. */ + const std::vector _indexToChrName; + + /** Mapping indices to chromosome names for compressed mref indices. */ + const std::vector _compressedMrefIndexToChrName; + + /** Chromosome sizes in base pairs, only for compressed mref chromosomes. */ + const std::vector _chrSizesCompressedMref; + + /** Mapping compressed mref indices names to indices. */ + const std::vector _indexToCompressedMrefIndex; + + /* Mapping of compressed mref indices to indices. */ + const std::vector _compressedMrefIndexToIndex; + + static bool isValid(ChrIndex index); + + static void assertValid(ChrIndex index); + + static bool isValid(CompressedMrefIndex index); + + static void assertValid(CompressedMrefIndex index); + + // The following static methods are used for checks during construction, but also + // to implement the public interface. + + /** chr1-chr22, GL00+ */ + inline static bool _isAutosome(ChrIndex index); + + /** chrX */ + inline static bool _isX(ChrIndex index); + + /** chrY */ + inline static bool _isY(ChrIndex index); + + /** chrX, chrY */ + inline static bool _isGonosome(ChrIndex index); + + /** phix index. */ + inline static bool _isTechnical(ChrIndex index); + + /** NC_007605. */ + inline static bool _isVirus(ChrIndex index); + + /** Mitochondrial chromosome index. */ + inline static bool _isExtrachromosomal(ChrIndex index); + + /** Decoy sequence index. */ + inline static bool _isDecoy(ChrIndex index); + + /** GL00.+ */ + inline static bool _isUnassigned(ChrIndex index); + + /** none */ + inline static bool _isALT(ChrIndex index); + + /** none */ + inline static bool _isHLA(ChrIndex index); + + public: + + static const std::string assemblyName; + + static std::vector _buildCompressedMrefIndexToIndex( + CompressedMrefIndex nChromosomes, + const std::vector &indexToCompressedMrefIndex); + + Hg37ChrConverter(); + + /** Return the number of chromosomes. */ + ChrIndex nChromosomes() const; + + /** Number of compressed mref chromosomes. */ + CompressedMrefIndex nChromosomesCompressedMref() const; + + /** Map an index position to a chromosome name. */ + std::string indexToChrName(ChrIndex index) const; + + /** Map an index position to a chromosome name for compressed mref files. */ + std::string compressedMrefIndexToChrName(CompressedMrefIndex index) const; + + /** chr1-chr22, GL00+ */ + bool isAutosome(ChrIndex index) const; + + /** chrX */ + bool isX(ChrIndex index) const; + + /** chrY */ + bool isY(ChrIndex index) const; + + /** chrX, Y, ... */ + bool isGonosome(ChrIndex index) const; + + /** phix index. */ + bool isTechnical(ChrIndex index) const; + + /** NC_007605. */ + bool isVirus(ChrIndex index) const; + + /** Mitochondrial chromosome index. */ + bool isExtrachromosomal(ChrIndex index) const; + + /** Decoy sequence index. */ + bool isDecoy(ChrIndex index) const; + + /** GL00.+ */ + bool isUnassigned(ChrIndex index) const; + + /** none */ + bool isALT(ChrIndex index) const; + + /** none */ + bool isHLA(ChrIndex index) const; + + /** Whether the chromosome index is that of a compressed mref chromosome. */ + bool isCompressedMref(ChrIndex index) const; + + /** Map the compressed mref index to the uncompressed mref index. */ + ChrIndex compressedMrefIndexToIndex(CompressedMrefIndex index) const; + + /** Map an index from the global index-space to the compressed mref index-space. */ + CompressedMrefIndex indexToCompressedMrefIndex(ChrIndex index) const; + + /** Map compressed mref index to chromosome size. */ + ChrSize chrSizeCompressedMref(CompressedMrefIndex index) const; + + /** Map a chromosome name to an index position for compressed mref files. */ + CompressedMrefIndex chrNameToIndexCompressedMref(std::string chrName) const; + + /** Map a chromosome name to an index position. */ + ChrIndex chrNameToIndex(std::string chrName) const; + + bool isInBlockedRegion(ChrIndex chrIndex, ChrSize position) const; + + /* This is parsing code. It takes a position in a character stream, and translates the + following character(s) into index positions (see ChrConverter::indexToChrName). It is + slightly modified from the original implementation by Umut Toprak. + + If the first position is a digit, read up to the next stopChar. + + * (\d+)$ -> $1 + + If the first position is *not* a digit return indices according to the following rules: + + * h -> 999 + * X -> 40 + * Y -> 41 + * MT -> 1001 + * G?(\d+)\. -> $1 + * N -> 1000 + * p -> 1002 + + NOTE: Most of the matches are eager matches, which means the algorithm does not check for + whether the end iterator or the stopChar is actually reached! The actual stopChar is + not actually checked in these cases. + + All identifiers not matching any of these rules, with throw an exception (domain_error). + + IMPORTANT: The hg37 parser, here, ignores the `stopCharExt`, but instead keeps the + legacy behavior only using the `stopChar` + */ + ChrIndex parseChrAndReturnIndex(std::string::const_iterator startIt, + std::string::const_iterator endIt, + char stopChar, + const std::string &stopCharFirst = "") const; + + }; + +} + +#endif /* HG37CHRCONVERTER_H_ */ \ No newline at end of file diff --git a/include/IndexRange.h b/include/IndexRange.h new file mode 100644 index 0000000..7a8b1c0 --- /dev/null +++ b/include/IndexRange.h @@ -0,0 +1,41 @@ +#ifndef INDEXRANGE_H_ +#define INDEXRANGE_H_ + +#include "global.h" + +namespace sophia { + + /** A simple class to represent a 0-based range of indices. + * Chromosome ranges need to be 0-based and right-exclusive. For instance, in the range + * [0, 10), the first contained index is 0 and the last contained index is 9. + * + * The condition start <= end has to hold. + * + * If you set start == end, then you obtain a range of `width()` == 0. + * you can use this to define a range that does not contain any values, i.e. where + * `contains()` always returns false. + **/ + class IndexRange { + + private: + + ChrIndex start_; + ChrIndex end_; + + public: + + /* Throws invalid_argument exception, if start > end */ + IndexRange(ChrIndex start, ChrIndex end); + ~IndexRange() {} + + ChrIndex start() const; + ChrIndex end() const; + + ChrSize width() const; + + bool contains(const ChrIndex &index) const; + }; + +} + +#endif // INDEXRANGE_H_ \ No newline at end of file diff --git a/include/MasterRefProcessor.h b/include/MasterRefProcessor.h index deb370d..1230ffc 100644 --- a/include/MasterRefProcessor.h +++ b/include/MasterRefProcessor.h @@ -26,6 +26,7 @@ #define MASTERREFPROCESSOR_H_ #include "SuppAlignment.h" +#include "global.h" #include #include #include @@ -40,25 +41,32 @@ namespace sophia { -using namespace std; + class MasterRefProcessor { + public: + MasterRefProcessor(const std::vector &filesIn, + const std::string &outputRootName, + const std::string &version, + const ChrSize defaultReadLengthIn); -class MasterRefProcessor { - public: - MasterRefProcessor(const vector &filesIn, - const string &outputRootName, const string &version, - const int defaultReadLengthIn); - ~MasterRefProcessor() = default; + ~MasterRefProcessor() = default; - private: - unsigned long long processFile(const string &gzPath, short fileIndex); - bool processBp(BreakpointReduced &bp, int chrIndex, short fileIndex); - const int NUMPIDS; - const int DEFAULTREADLENGTH; - unique_ptr mergedBpsOutput; - vector> mrefDb; -}; + private: + // Note that all methods and fields are private. + // The MasterRefProcessor does all the work during construction time. + + unsigned long long processFile(const std::string &gzPath, short fileIndex); + bool processBp(BreakpointReduced &bp, ChrIndex chrIndex, short fileIndex); + + const int NUM_PIDS; + const ChrSize DEFAULT_READ_LENGTH; + std::unique_ptr mergedBpsOutput; + + /** This will be a huge data structure, that contains one MrefEntry per position in the + * master reference chromosomes. + **/ + std::vector> mrefDb; + }; } // namespace sophia -/* namespace sophiaMref */ #endif /* MASTERREFPROCESSOR_H_ */ diff --git a/include/MateInfo.h b/include/MateInfo.h index 35e3657..4755f62 100644 --- a/include/MateInfo.h +++ b/include/MateInfo.h @@ -25,68 +25,50 @@ #ifndef MATEINFO_H_ #define MATEINFO_H_ #include "SuppAlignment.h" +#include "global.h" #include namespace sophia { -struct MateInfo { - int readStartPos; - int readEndPos; - int mateChrIndex; - int mateStartPos; - int mateEndPos; - bool inverted; - int source; - int evidenceLevel; - int matePower; - int inversionSupport; - int straightSupport; - std::vector bpLocs; - bool saSupporter; - bool toRemove; - bool operator<(const MateInfo &rhs) const { - if (mateChrIndex < rhs.mateChrIndex) - return true; - if (mateChrIndex > rhs.mateChrIndex) - return false; - if (mateStartPos < rhs.mateStartPos) - return true; - return false; - } - bool suppAlignmentFuzzyMatch(const SuppAlignment &sa) const { - if (mateChrIndex != sa.getChrIndex()) { - return false; - } else { - if (!sa.isFuzzy()) { - return sa.getPos() >= (mateStartPos - sa.getMatchFuzziness()) && - sa.getPos() <= (mateEndPos + sa.getMatchFuzziness()); - } else { - return (mateStartPos - sa.getMatchFuzziness()) <= - sa.getExtendedPos() && - sa.getPos() <= (mateEndPos + sa.getMatchFuzziness()); - } - } - } - MateInfo(int readStartPosIn, int readEndPosIn, int mateChrIndexIn, - int mateStartPosIn, int sourceType, bool invertedIn) - : readStartPos{readStartPosIn}, readEndPos{readEndPosIn}, - mateChrIndex{mateChrIndexIn}, mateStartPos{mateStartPosIn}, - mateEndPos{mateStartPosIn}, inverted{invertedIn}, source{sourceType}, - evidenceLevel{sourceType == 2 ? 3 : 1}, matePower{1}, - inversionSupport{invertedIn}, straightSupport{!invertedIn}, bpLocs{}, - saSupporter{false}, toRemove{false} {} - MateInfo(int readStartPosIn, int readEndPosIn, int mateChrIndexIn, - int mateStartPosIn, int sourceType, bool invertedIn, - const std::vector &bpLocsIn) - : readStartPos{readStartPosIn}, readEndPos{readEndPosIn}, - mateChrIndex{mateChrIndexIn}, mateStartPos{mateStartPosIn}, - mateEndPos{mateStartPosIn}, inverted{invertedIn}, source{sourceType}, - evidenceLevel{sourceType == 2 ? 3 : 1}, matePower{1}, - inversionSupport{invertedIn}, straightSupport{!invertedIn}, - bpLocs{bpLocsIn}, saSupporter{false}, toRemove{false} {} + struct MateInfo { - bool isToRemove() const { return toRemove; } -}; + ChrSize readStartPos; + ChrSize readEndPos; + ChrIndex mateChrIndex; + ChrSize mateStartPos; + ChrSize mateEndPos; + bool inverted; + int source; + int evidenceLevel; + int matePower; + int inversionSupport; + int straightSupport; + std::vector bpLocs; + bool saSupporter; + bool toRemove; + + bool operator<(const MateInfo &rhs) const; + + bool suppAlignmentFuzzyMatch(const SuppAlignment &sa) const; + + MateInfo(ChrSize readStartPosIn, + ChrSize readEndPosIn, + ChrIndex mateChrIndexIn, + ChrSize mateStartPosIn, + int sourceType, + bool invertedIn); + + MateInfo(ChrSize readStartPosIn, + ChrSize readEndPosIn, + ChrIndex mateChrIndexIn, + ChrSize mateStartPosIn, + int sourceType, + bool invertedIn, + const std::vector &bpLocsIn); + + bool isToRemove() const; + + }; } // namespace sophia #endif /* MATEINFO_H_ */ diff --git a/include/MrefEntry.h b/include/MrefEntry.h index 0d32a2d..bce121b 100644 --- a/include/MrefEntry.h +++ b/include/MrefEntry.h @@ -32,65 +32,102 @@ namespace sophia { -using namespace std; - -class MrefEntry { - public: - static int NUMPIDS; - static int DEFAULTREADLENGTH; - static boost::format doubleFormatter; - MrefEntry(); - void addEntry(Breakpoint &tmpBreakpoint, int fileIndex); - void addEntry(BreakpointReduced &tmpBreakpoint, int fileIndex); - void mergeMrefEntries(MrefEntry &entry2); - - int getPos() const { return pos; } - - const vector &getArtifactRatios() const { return artifactRatios; } - - const vector &getFileIndices() const { return fileIndices; } - - short getValidityScore() const { return validity; } - void removeMarkedFuzzies() { - suppAlignments.erase(remove_if(suppAlignments.begin(), - suppAlignments.end(), - [](const SuppAlignmentAnno &sa) { - return sa.isToRemove(); - }), - suppAlignments.end()); - } - string printBpInfo(const string &chromosome); - string printArtifactRatios(const string &chromosome); - SuppAlignmentAnno *searchFuzzySa(const SuppAlignmentAnno &fuzzySa); - vector getSupplementsPtr() { - vector res{}; - for (auto &sa : suppAlignments) { - res.push_back(&sa); + /** + * @brief MrefEntry class is a container for the mref entries + * It contains the position, the file indices, the artifact ratios and the support alignments. + * This class should be memory optimized, as it will be instantiated in the billions, once + * for every chromosome position by the MasterRefProcessor. + */ + class MrefEntry { + public: + + using ValidityScore = signed char; + + static unsigned int NUM_PIDS; + + static ChrSize DEFAULT_READ_LENGTH; + + static boost::format doubleFormatter; + + MrefEntry(); + + void addEntry(Breakpoint &tmpBreakpoint, int fileIndex); + + void addEntry(BreakpointReduced &tmpBreakpoint, int fileIndex); + + void mergeMrefEntries(MrefEntry &entry2); + + ChrSize getPos() const { + if (!isValid()) { + throw_with_trace(std::logic_error("MrefEntry is invalid")); + } + return pos; + } + + const std::vector &getArtifactRatios() const { return artifactRatios; } + + const std::vector &getFileIndices() const { return fileIndices; } + + ValidityScore getValidityScore() const { return validity; } + + void removeMarkedFuzzies() { + suppAlignments.erase(remove_if(suppAlignments.begin(), + suppAlignments.end(), + [](const SuppAlignmentAnno &sa) { + return sa.isToRemove(); + }), + suppAlignments.end()); + } + + std::string printBpInfo(const std::string &chromosome); + + std::string printArtifactRatios(const std::string &chromosome); + + SuppAlignmentAnno *searchFuzzySa(const SuppAlignmentAnno &fuzzySa); + + std::vector getSupplementsPtr() { + std::vector res{}; + for (auto &sa : suppAlignments) { + res.push_back(&sa); + } + return res; + } + + const std::vector &getFileIndicesWithArtifactRatios() const { + return fileIndicesWithArtifactRatios; + } + + const std::vector &getSuppAlignments() const { + return suppAlignments; + } + + void setAsInvalid() { + pos = std::numeric_limits::max(); + validity = -1; + } + + bool isValid() const { + return pos != std::numeric_limits::max(); } - return res; - } - const vector &getFileIndicesWithArtifactRatios() const { - return fileIndicesWithArtifactRatios; - } - const vector &getSuppAlignments() const { - return suppAlignments; - } - - void setAsInvalid() { - pos = -1; - validity = -1; - } - - private: - bool saMatcher(SuppAlignmentAnno *saPtr); - void finalizeFileIndices(); - short validity; //-1 nothing, 0 only sa, 1 sa and support - int pos; - vector fileIndices; - vector fileIndicesWithArtifactRatios; - vector artifactRatios; - vector suppAlignments; -}; + + private: + + bool saMatcher(SuppAlignmentAnno *saPtr); + + void finalizeFileIndices(); + + ValidityScore validity; // -1 nothing, 0 only sa, 1 sa and support + + ChrSize pos; + + std::vector fileIndices; + + std::vector fileIndicesWithArtifactRatios; + + std::vector artifactRatios; + + std::vector suppAlignments; + }; } /* namespace sophia */ diff --git a/include/MrefEntryAnno.h b/include/MrefEntryAnno.h index c37a1b0..8e60950 100644 --- a/include/MrefEntryAnno.h +++ b/include/MrefEntryAnno.h @@ -32,98 +32,109 @@ namespace sophia { -using namespace std; - -class MrefEntryAnno { - - public: - static int PIDSINMREF; - static int DEFAULTREADLENGTH; - static boost::format doubleFormatter; - MrefEntryAnno(const string &mrefEntryIn); - template bool operator<(const T &rhs) const { - return pos < rhs.getPos(); - } - template int distanceTo(const T &rhs) const { - return abs(pos - rhs.getPos()); - } - template int distanceToBp(const T &compIn) const { - return abs(pos - compIn.getPos()); - } - bool operator==(const MrefEntryAnno &rhs) const { - return pos == rhs.getPos(); - } - - int getPos() const { return pos; } - vector getSuppAlignmentsPtr() { - vector res{}; - for (auto &sa : suppAlignments) { - res.push_back(&sa); + class MrefEntryAnno { + + public: + + static int PIDS_IN_MREF; + + static ChrSize DEFAULT_READ_LENGTH; + + static boost::format doubleFormatter; + + MrefEntryAnno(const std::string &mrefEntryIn); + + template bool operator<(const T &rhs) const { + return static_cast(pos) < static_cast(rhs.getPos()); } - return res; - } - void removeMarkedFuzzies() { - while (!suppAlignments.empty() && suppAlignments.back().isToRemove()) { - suppAlignments.pop_back(); + template int distanceTo(const T &rhs) const { + return abs(static_cast(pos) - static_cast(rhs.getPos())); + } + template int distanceToBp(const T &compIn) const { + return abs(static_cast(pos) - static_cast(compIn.getPos())); } - for (auto saIt = suppAlignments.begin(); saIt != suppAlignments.end(); - ++saIt) { - if (saIt->isToRemove()) { - swap(*saIt, suppAlignments.back()); + + bool operator==(const MrefEntryAnno &rhs) const { + return pos == rhs.getPos(); + } + + ChrSize getPos() const { return pos; } + + std::vector getSuppAlignmentsPtr() { + std::vector res{}; + for (auto &sa : suppAlignments) { + res.push_back(&sa); } - while (!suppAlignments.empty() && - suppAlignments.back().isToRemove()) { + return res; + } + + void removeMarkedFuzzies() { + while (!suppAlignments.empty() && suppAlignments.back().isToRemove()) { suppAlignments.pop_back(); } + for (auto saIt = suppAlignments.begin(); saIt != suppAlignments.end(); + ++saIt) { + if (saIt->isToRemove()) { + std::swap(*saIt, suppAlignments.back()); + } + while (!suppAlignments.empty() && + suppAlignments.back().isToRemove()) { + suppAlignments.pop_back(); + } + } } - } - // SuppAlignmentAnno* searchFuzzySa(const SuppAlignmentAnno& fuzzySa); + // SuppAlignmentAnno* searchFuzzySa(const SuppAlignmentAnno& fuzzySa); - const vector &getSuppAlignments() const { - return suppAlignments; - } - - vector getSupplementsPtr() { - vector res{}; - for (auto &sa : suppAlignments) { - res.push_back(&sa); + const std::vector &getSuppAlignments() const { + return suppAlignments; } - return res; - } - bool closeToSupp(const SuppAlignmentAnno &compIn, int fuzziness) const { - if (compIn.isFuzzy()) { - fuzziness = 2.5 * DEFAULTREADLENGTH; - return (pos - fuzziness) <= (compIn.getExtendedPos() + fuzziness) && - (compIn.getPos() - fuzziness) <= (pos + fuzziness); - } else { - return abs(pos - compIn.getPos()) <= fuzziness; + + std::vector getSupplementsPtr() { + std::vector res{}; + for (auto &sa : suppAlignments) { + res.push_back(&sa); + } + return res; } - } - int distanceToSupp(const SuppAlignmentAnno &compIn) const { - if (compIn.isFuzzy()) { - if (compIn.getPos() <= pos && pos <= compIn.getExtendedPos()) { - return 0; + + bool closeToSupp(const SuppAlignmentAnno &compIn, ChrDistance fuzziness) const { + if (compIn.isFuzzy()) { + fuzziness = int(2.5 * DEFAULT_READ_LENGTH); /* truncate */ + return (static_cast(pos) - fuzziness) <= (static_cast(compIn.getExtendedPos()) + fuzziness) && + (static_cast(compIn.getPos()) - fuzziness) <= (static_cast(pos) + fuzziness); } else { - if (pos < compIn.getPos()) { - return compIn.getPos() - pos; + return ChrDistance(abs(static_cast(pos) - static_cast(compIn.getPos()))) <= fuzziness; + } + } + + ChrDistance distanceToSupp(const SuppAlignmentAnno &compIn) const { + ChrDistance result; + if (compIn.isFuzzy()) { + if (compIn.getPos() <= pos && pos <= compIn.getExtendedPos()) { + result = 0; } else { - return pos - compIn.getExtendedPos(); + if (pos < compIn.getPos()) { + result = ChrDistance(compIn.getPos() - pos); + } else { + result = ChrDistance(pos - compIn.getExtendedPos()); + } } + } else { + result = ChrDistance(abs(static_cast(pos) - static_cast(compIn.getPos()))); } - } else { - return abs(pos - compIn.getPos()); + return result; } - } - short getNumHits() const { return numHits; } - void setNumHits(short numHits) { this->numHits = numHits; } + short getNumHits() const { return numHits; } + + void setNumHits(short numHits) { this->numHits = numHits; } - private: - int pos; - short numHits; - vector suppAlignments; -}; + private: + ChrSize pos; + short numHits; + std::vector suppAlignments; + }; } /* namespace sophia */ diff --git a/include/MrefMatch.h b/include/MrefMatch.h index a798949..6cf8ff7 100644 --- a/include/MrefMatch.h +++ b/include/MrefMatch.h @@ -30,25 +30,29 @@ namespace sophia { -using namespace std; - -class MrefMatch { - public: - MrefMatch(short numHitsIn, short numConsevativeHitsIn, int offsetDistanceIn, - const vector &suppMatchesIn); - short getNumConsevativeHits() const { return numConsevativeHits; } - short getNumHits() const { return numHits; } - int getOffsetDistance() const { return offsetDistance; } - const vector &getSuppMatches() const { - return suppMatches; - } - - private: - short numHits; - short numConsevativeHits; - int offsetDistance; - vector suppMatches; -}; + class MrefMatch { + public: + MrefMatch(short numHitsIn, + short numConsevativeHitsIn, + int offsetDistanceIn, + const std::vector &suppMatchesIn); + + short getNumConsevativeHits() const { return numConsevativeHits; } + + short getNumHits() const { return numHits; } + + int getOffsetDistance() const { return offsetDistance; } + + const std::vector &getSuppMatches() const { + return suppMatches; + } + + private: + short numHits; + short numConsevativeHits; + int offsetDistance; + std::vector suppMatches; + }; } /* namespace sophia */ diff --git a/include/OverhangRange.h b/include/OverhangRange.h index 5971bfe..a1e762f 100644 --- a/include/OverhangRange.h +++ b/include/OverhangRange.h @@ -25,24 +25,35 @@ #ifndef OVERHANGRANGE_H_ #define OVERHANGRANGE_H_ +#include "global.h" + namespace sophia { -class OverhangRange { - friend class Alignment; - - public: - OverhangRange(bool encounteredMIn, int bpPosIn, int startPosOnReadIn, - int lengthIn) - : encounteredM{encounteredMIn}, bpPos{bpPosIn}, - startPosOnRead{startPosOnReadIn}, length{lengthIn} {} - ~OverhangRange() = default; - - private: - bool encounteredM; - int bpPos; - int startPosOnRead; - int length; -}; + class OverhangRange { + friend class Alignment; + + public: + OverhangRange(bool encounteredMIn, + ChrSize bpPosIn, + ChrSize startPosOnReadIn, + ChrSize lengthIn) + : encounteredM{encounteredMIn}, + bpPos{bpPosIn}, + startPosOnRead{startPosOnReadIn}, + length{lengthIn} {} + + ~OverhangRange() = default; + + private: + + bool encounteredM; + + ChrSize bpPos; + + ChrSize startPosOnRead; + + ChrSize length; + }; } // namespace sophia diff --git a/include/SamSegmentMapper.h b/include/SamSegmentMapper.h index 17af0bc..efd03de 100644 --- a/include/SamSegmentMapper.h +++ b/include/SamSegmentMapper.h @@ -24,6 +24,7 @@ #ifndef SAMSEGMENTMAPPER_H_ #define SAMSEGMENTMAPPER_H_ +#include "global.h" #include "Breakpoint.h" #include "CoverageAtBase.h" #include "MateInfo.h" @@ -36,32 +37,52 @@ namespace sophia { -using namespace std; - -class SamSegmentMapper { - public: - SamSegmentMapper(int defaultReadLengthIn); - ~SamSegmentMapper() = default; - void parseSamStream(); - - private: - void printBps(int alignmentStart); - void switchChromosome(const Alignment &alignment); - void incrementCoverages(const Alignment &alignment); - void assignBps(shared_ptr &alignment); - const time_t STARTTIME; - const bool PROPERPARIRCOMPENSATIONMODE; - const int DISCORDANTLEFTRANGE; - const int DISCORDANTRIGHTRANGE; - unsigned int printedBps; - int chrIndexCurrent; - int minPos, maxPos; - map breakpointsCurrent; - deque coverageProfiles; - deque discordantAlignmentsPool; - deque discordantAlignmentCandidatesPool; - deque discordantLowQualAlignmentsPool; -}; + class SamSegmentMapper { + public: + + SamSegmentMapper(ChrSize defaultReadLengthIn); + + ~SamSegmentMapper() = default; + + void parseSamStream(); + + private: + + // Does not print anything by itself, but lets print via another call to + // Breakpoint::finalizeBreakpoint. Then Breakpoint::printBreakpointReport prints + // to stdout. + void printBps(ChrSize alignmentStart); + + void switchChromosome(const Alignment &alignment); + + void incrementCoverages(const Alignment &alignment); + + void assignBps(std::shared_ptr &alignment); + + const time_t STARTTIME; + + const bool PROPER_PAIR_COMPENSATION_MODE; + + const ChrSize DISCORDANT_LEFT_RANGE; + + const ChrSize DISCORDANT_RIGHT_RANGE; + + unsigned int printedBps; + + ChrIndex chrIndexCurrent; + + ChrSize minPos, maxPos; + + std::map breakpointsCurrent; + + std::deque coverageProfiles; + + std::deque discordantAlignmentsPool; + + std::deque discordantAlignmentCandidatesPool; + + std::deque discordantLowQualAlignmentsPool; + }; } /* namespace sophia */ diff --git a/include/Sdust.h b/include/Sdust.h index 2cc1151..ee0ed7a 100644 --- a/include/Sdust.h +++ b/include/Sdust.h @@ -24,55 +24,85 @@ #ifndef SDUST_H_ #define SDUST_H_ +#include "global.h" #include #include #include #include + namespace sophia { -using namespace std; - -struct PerfectInterval; -class Sdust { - public: - Sdust(const vector &overhangIn); - ~Sdust() = default; - const vector &getRes() const { return res; } - - private: - static const int SCORETHRESHOLD = 20; - static const int WINDOWSIZE = 64; - vector res; - set P; - deque w; - int L; - int rW; - int rV; - vector cW; - vector cV; - void saveMaskedRegions(int wStart); - int triplet(const vector &overhangIn, int indexPos); - void shiftWindow(int t); - void addTripletInfo(int &r, vector &c, int t); - void removeTripletInfo(int &r, vector &c, int t); - void findPerfectRegions(int wStart, int r, vector c); -}; -struct PerfectInterval { - int startIndex; - int endIndex; - double score; - bool operator<(const PerfectInterval &rhs) const { - if (startIndex > rhs.startIndex) - return true; - if (startIndex < rhs.startIndex) - return false; - if (endIndex > rhs.endIndex) + struct PerfectInterval; + + class Sdust { + + public: + + Sdust(const std::vector &overhangIn); + + ~Sdust() = default; + + const std::vector &getRes() const { return res; } + + private: + + static const int SCORE_THRESHOLD = 20; + + static const int WINDOW_SIZE = 64; + + std::vector res; + + std::set P; + + std::deque w; + + int L; + + int rW; + + int rV; + + std::vector cW; + + std::vector cV; + + void saveMaskedRegions(int wStart); + + int triplet(const std::vector &overhangIn, int indexPos); + + void shiftWindow(int t); + + void addTripletInfo(int &r, std::vector &c, int t); + + + void removeTripletInfo(int &r, std::vector &c, int t); + + void findPerfectRegions(int wStart, int r, std::vector c); + + }; + + struct PerfectInterval { + + int startIndex; + + int endIndex; + + double score; + + bool operator<(const PerfectInterval &rhs) const { + if (startIndex > rhs.startIndex) + return true; + if (startIndex < rhs.startIndex) + return false; + if (endIndex > rhs.endIndex) + return false; + if (endIndex < rhs.endIndex) + return true; return false; - if (endIndex < rhs.endIndex) - return true; - return false; - } -}; + } + + }; + } /* namespace sophia */ #endif /* SDUST_H_ */ diff --git a/include/SuppAlignment.h b/include/SuppAlignment.h index 6e5f469..697310c 100644 --- a/include/SuppAlignment.h +++ b/include/SuppAlignment.h @@ -25,200 +25,273 @@ #ifndef SUPPALIGNMENT_H_ #define SUPPALIGNMENT_H_ #include "CigarChunk.h" +#include "global.h" +#include "ChrConverter.h" +#include "GlobalAppConfig.h" #include #include #include #include #include + namespace sophia { -using namespace std; - -class SuppAlignment { - public: - SuppAlignment(string::const_iterator saCbegin, - string::const_iterator saCend, bool primaryIn, - bool lowMapqSourceIn, bool nullMapqSourceIn, - bool alignmentOnForwardStrand, bool bpEncounteredM, - int originIndexIn, int bpChrIndex, int bpPos); - SuppAlignment(int chrIndexIn, int posIn, int mateSupportIn, - int expectedDiscordantsIn, bool encounteredMIn, - bool invertedIn, int extendedPosIn, bool primaryIn, - bool lowMapqSourceIn, bool nullMapqSourceIn, - int originIndexIn); - SuppAlignment(const string &saIn); - ~SuppAlignment() = default; - static double ISIZEMAX; - static int DEFAULTREADLENGTH; - string print() const; - void extendSuppAlignment(int minPos, int maxPos) { - pos = min(pos, minPos); - extendedPos = max(extendedPos, maxPos); - } - bool saCloseness(const SuppAlignment &rhs, int fuzziness) const; - bool saDistHomologyRescueCloseness(const SuppAlignment &rhs, - int fuzziness) const; - void padMateSupportHomologyRescue() { expectedDiscordants = mateSupport; } - void removeFuzziness(const SuppAlignment &sa) { - pos = sa.getPos(); - extendedPos = pos; - fuzzy = false; - if (!distant && sa.isDistant()) { - distant = true; + class SuppAlignment { + private: + + SuppAlignment(); + + public: + + /** + * @param originIndexIn Index into an array of supporting alignments + **/ + static SuppAlignment create( + ChrIndex chrIndexIn, + ChrSize posIn, + int mateSupportIn, + int expectedDiscordantsIn, + bool encounteredMIn, + bool invertedIn, + ChrSize extendedPosIn, + bool primaryIn, + bool lowMapqSourceIn, + bool nullMapqSourceIn, + int originIndexIn); + + /** Parse the supplementary alignment information as displayed in columns 6 and 7 of the + * breakpoints BED format and generated by SuppAlignment::print. */ + static SuppAlignment parseSaSupport(const std::string &saIn); + + /** Parse the supplementary alignment information from SAM format SA:Z: tags. + * + * @param originIndexIn Index into an array of supporting alignments + */ + static SuppAlignment parseSamSaTag( + std::string::const_iterator saCbegin, + std::string::const_iterator saCend, + bool primaryIn, + bool lowMapqSourceIn, + bool nullMapqSourceIn, + bool alignmentOnForwardStrand, + bool bpEncounteredM, + int originIndexIn, + ChrIndex bpChrIndex, + int bpPos); + + ~SuppAlignment() = default; + + static double ISIZEMAX; + + static ChrSize DEFAULT_READ_LENGTH; + + /** Print the supplementary alignment information as displayed in columns 6 and 7 of the + * breakpoints BED format. This will be parsed by + * `SuppAlignmentAnno::SuppAlignmentAnno(const std::string&)`. */ + std::string print() const; + + void extendSuppAlignment(ChrSize minPos, ChrSize maxPos) { + pos = std::min(pos, minPos); + extendedPos = std::max(extendedPos, maxPos); } - } - int getChrIndex() const { return chrIndex; } - bool isEncounteredM() const { return encounteredM; } - bool isInverted() const { return inverted; } - int getMateSupport() const { return mateSupport; } - void incrementDistinctReads() { ++distinctReads; } - void incrementMateSupport(int incrementIn) { mateSupport += incrementIn; } - void setMateSupport(int mateSupportIn) { mateSupport = mateSupportIn; } - int getPos() const { return pos; } - bool isPrimary() const { return primary; } - int getSupport() const { return support; } - void addSupportingIndices(const vector &supportingIndicesIn) { - supportingIndices.insert(supportingIndices.end(), - supportingIndicesIn.cbegin(), - supportingIndicesIn.cend()); - } - void addSecondarySupportIndices(int supportingIndicesSecondaryIn) { - supportingIndicesSecondary.push_back(supportingIndicesSecondaryIn); - } - void addSecondarySupportIndices( - const vector &supportingIndicesSecondaryIn) { - supportingIndicesSecondary.insert(supportingIndicesSecondary.end(), - supportingIndicesSecondaryIn.cbegin(), - supportingIndicesSecondaryIn.cend()); - } - void finalizeSupportingIndices(); - int getSecondarySupport() const { return secondarySupport; } - bool isToRemove() const { return toRemove; } - void setToRemove(bool toRemove) { this->toRemove = toRemove; } - int getMapq() const { return mapq; } - void setMapq(int mapq) { this->mapq = mapq; } - bool isSuspicious() const { return suspicious; } - void setSuspicious(bool suspicious) { this->suspicious = suspicious; } - bool isDistant() const { return distant; } - const vector &getSupportingIndices() const { - return supportingIndices; - } - const vector &getSupportingIndicesSecondary() const { - return supportingIndicesSecondary; - } - - void setExpectedDiscordants(int expectedDiscordants) { - this->expectedDiscordants = expectedDiscordants; - } - - int getExpectedDiscordants() const { return expectedDiscordants; } - - int getDistinctReads() const { return distinctReads; } - - int getMatchFuzziness() const { return matchFuzziness; } - - bool isFuzzy() const { return fuzzy; } - bool isStrictFuzzy() const { return strictFuzzy; } - int getExtendedPos() const { return extendedPos; } - - bool isLowMapqSource() const { return lowMapqSource; } - void mrefSaTransform(int fileIndex) { - support = 0; - secondarySupport = 0; - supportingIndices.clear(); - supportingIndices.push_back(fileIndex); - } - void mrefSaConsensus(const unordered_set &fileIndices) { - supportingIndices.clear(); - for (const auto &index : fileIndices) { - supportingIndices.push_back(index); + + bool saCloseness(const SuppAlignment &rhs, int fuzziness) const; + + bool saDistHomologyRescueCloseness(const SuppAlignment &rhs, + int fuzziness) const; + + void padMateSupportHomologyRescue() { expectedDiscordants = mateSupport; } + + void removeFuzziness(const SuppAlignment &sa) { + pos = sa.getPos(); + extendedPos = pos; + fuzzy = false; + if (!distant && sa.isDistant()) { + distant = true; + } } - } - void mergeSa(const SuppAlignment &rhs) { - support = max(support, rhs.getSupport()); - secondarySupport = max(secondarySupport, rhs.getSecondarySupport()); - if (rhs.getExpectedDiscordants() > 0 && expectedDiscordants > 0) { - if ((0.0 + rhs.getMateSupport()) / rhs.getExpectedDiscordants() > - (0.0 + mateSupport) / expectedDiscordants) { + + ChrIndex getChrIndex() const { return chrIndex; } + + bool isEncounteredM() const { return encounteredM; } + + bool isInverted() const { return inverted; } + + int getMateSupport() const { return mateSupport; } + + void incrementDistinctReads() { ++distinctReads; } + + void incrementMateSupport(int incrementIn) { mateSupport += incrementIn; } + + void setMateSupport(int mateSupportIn) { mateSupport = mateSupportIn; } + + ChrPosition getPos() const { return pos; } + + bool isPrimary() const { return primary; } + + int getSupport() const { return support; } + + void addSupportingIndices(const std::vector &supportingIndicesIn) { + supportingIndices.insert(supportingIndices.end(), + supportingIndicesIn.cbegin(), + supportingIndicesIn.cend()); + } + + void addSecondarySupportIndices(int supportingIndicesSecondaryIn) { + supportingIndicesSecondary.push_back(supportingIndicesSecondaryIn); + } + + void addSecondarySupportIndices( + const std::vector &supportingIndicesSecondaryIn) { + supportingIndicesSecondary.insert(supportingIndicesSecondary.end(), + supportingIndicesSecondaryIn.cbegin(), + supportingIndicesSecondaryIn.cend()); + } + + void finalizeSupportingIndices(); + + int getSecondarySupport() const { return secondarySupport; } + + bool isToRemove() const { return toRemove; } + + void setToRemove(bool toRemove) { this->toRemove = toRemove; } + + int getMapq() const { return mapq; } + + void setMapq(int mapq) { this->mapq = mapq; } + + bool isSuspicious() const { return suspicious; } + + void setSuspicious(bool suspicious) { this->suspicious = suspicious; } + + bool isDistant() const { return distant; } + + const std::vector &getSupportingIndices() const { + return supportingIndices; + } + + const std::vector &getSupportingIndicesSecondary() const { + return supportingIndicesSecondary; + } + + void setExpectedDiscordants(int expectedDiscordants) { + this->expectedDiscordants = expectedDiscordants; + } + + int getExpectedDiscordants() const { return expectedDiscordants; } + + int getDistinctReads() const { return distinctReads; } + + ChrPositionDifference getMatchFuzziness() const { return matchFuzziness; } + + bool isFuzzy() const { return fuzzy; } + + bool isStrictFuzzy() const { return strictFuzzy; } + + ChrPosition getExtendedPos() const { return extendedPos; } + + bool isLowMapqSource() const { return lowMapqSource; } + + void mrefSaTransform(int fileIndex) { + support = 0; + secondarySupport = 0; + supportingIndices.clear(); + supportingIndices.push_back(fileIndex); + } + + void mrefSaConsensus(const std::unordered_set &fileIndices) { + supportingIndices.clear(); + for (const auto &index : fileIndices) { + supportingIndices.push_back(index); + } + } + + void mergeSa(const SuppAlignment &rhs) { + support = std::max(support, rhs.getSupport()); + secondarySupport = std::max(secondarySupport, rhs.getSecondarySupport()); + if (rhs.getExpectedDiscordants() > 0 && expectedDiscordants > 0) { + if ((0.0 + rhs.getMateSupport()) / rhs.getExpectedDiscordants() > + (0.0 + mateSupport) / expectedDiscordants) { + mateSupport = rhs.getMateSupport(); + expectedDiscordants = rhs.getExpectedDiscordants(); + } + } else if (rhs.getExpectedDiscordants() > 0) { mateSupport = rhs.getMateSupport(); expectedDiscordants = rhs.getExpectedDiscordants(); } - } else if (rhs.getExpectedDiscordants() > 0) { - mateSupport = rhs.getMateSupport(); - expectedDiscordants = rhs.getExpectedDiscordants(); } - } - void mergeMrefSa(const SuppAlignment &mrefSa) { - for (auto index : mrefSa.getSupportingIndices()) { - supportingIndices.push_back(index); - } - sort(supportingIndices.begin(), supportingIndices.end()); - sort(supportingIndicesSecondary.begin(), - supportingIndicesSecondary.end()); - supportingIndices.erase( - unique(supportingIndices.begin(), supportingIndices.end()), - supportingIndices.end()); - if (mrefSa.getExpectedDiscordants() > 0 && expectedDiscordants > 0) { - if ((0.0 + mrefSa.getMateSupport()) / - mrefSa.getExpectedDiscordants() > - (0.0 + mateSupport) / expectedDiscordants) { + + void mergeMrefSa(const SuppAlignment &mrefSa) { + for (auto index : mrefSa.getSupportingIndices()) { + supportingIndices.push_back(index); + } + sort(supportingIndices.begin(), supportingIndices.end()); + sort(supportingIndicesSecondary.begin(), + supportingIndicesSecondary.end()); + supportingIndices.erase( + unique(supportingIndices.begin(), supportingIndices.end()), + supportingIndices.end()); + if (mrefSa.getExpectedDiscordants() > 0 && expectedDiscordants > 0) { + if ((0.0 + mrefSa.getMateSupport()) / + mrefSa.getExpectedDiscordants() > + (0.0 + mateSupport) / expectedDiscordants) { + mateSupport = mrefSa.getMateSupport(); + expectedDiscordants = mrefSa.getExpectedDiscordants(); + } + } else if (mrefSa.getExpectedDiscordants() > 0) { mateSupport = mrefSa.getMateSupport(); expectedDiscordants = mrefSa.getExpectedDiscordants(); } - } else if (mrefSa.getExpectedDiscordants() > 0) { - mateSupport = mrefSa.getMateSupport(); - expectedDiscordants = mrefSa.getExpectedDiscordants(); + if (!mrefSa.isSemiSuspicious() && semiSuspicious) { + semiSuspicious = false; + } } - if (!mrefSa.isSemiSuspicious() && semiSuspicious) { - semiSuspicious = false; + + bool isSemiSuspicious() const { return semiSuspicious; } + + void setSemiSuspicious(bool semiSuspicious) { + this->semiSuspicious = semiSuspicious; + } + + bool isNullMapqSource() const { return nullMapqSource; } + + void setNullMapqSource(bool nullMapqSource) { + this->nullMapqSource = nullMapqSource; } - } - - bool isSemiSuspicious() const { return semiSuspicious; } - - void setSemiSuspicious(bool semiSuspicious) { - this->semiSuspicious = semiSuspicious; - } - - bool isNullMapqSource() const { return nullMapqSource; } - - void setNullMapqSource(bool nullMapqSource) { - this->nullMapqSource = nullMapqSource; - } - - bool isProperPairErrorProne() const { return properPairErrorProne; } - - void setProperPairErrorProne(bool properPairErrorProne) { - this->properPairErrorProne = properPairErrorProne; - } - - private: - int matchFuzziness; - int chrIndex; - int pos; - int extendedPos; - int mapq; - vector supportingIndices; - vector supportingIndicesSecondary; - int distinctReads; - int support; - int secondarySupport; - int mateSupport; - int expectedDiscordants; - bool encounteredM; - bool toRemove; - bool inverted; - bool fuzzy; - bool strictFuzzy; - bool distant; - bool lowMapqSource; - bool nullMapqSource; - bool suspicious; - bool semiSuspicious; - bool properPairErrorProne; - bool primary; -}; + + bool isProperPairErrorProne() const { return properPairErrorProne; } + + void setProperPairErrorProne(bool properPairErrorProne) { + this->properPairErrorProne = properPairErrorProne; + } + + private: + ChrPositionDifference matchFuzziness; + ChrIndex chrIndex; + ChrPosition pos; + ChrPosition extendedPos; + int mapq; + std::vector supportingIndices; + std::vector supportingIndicesSecondary; + int distinctReads; + int support; + int secondarySupport; + int mateSupport; + int expectedDiscordants; + bool encounteredM; + bool toRemove; + bool inverted; + bool fuzzy; + bool strictFuzzy; + bool distant; + bool lowMapqSource; + bool nullMapqSource; + bool suspicious; + bool semiSuspicious; + bool properPairErrorProne; + bool primary; + }; } /* namespace sophia */ #endif /* SUPPALIGNMENT_H_ */ diff --git a/include/SuppAlignmentAnno.h b/include/SuppAlignmentAnno.h index 4710bdd..841a9e2 100644 --- a/include/SuppAlignmentAnno.h +++ b/include/SuppAlignmentAnno.h @@ -26,6 +26,7 @@ #define SUPPALIGNMENTANNO_H_ #include "CigarChunk.h" #include "SuppAlignment.h" +#include "global.h" #include #include #include @@ -34,112 +35,164 @@ namespace sophia { -using namespace std; - -class SuppAlignmentAnno { - public: - SuppAlignmentAnno(const string &saStrIn); - SuppAlignmentAnno(const SuppAlignment &saIn); - SuppAlignmentAnno(const SuppAlignmentAnno &saAnnoIn); - SuppAlignmentAnno(int emittingBpChrIndex, int emittingBpPos, - const SuppAlignmentAnno &saAnnoIn); - ~SuppAlignmentAnno() = default; - static double ISIZEMAX; - static int DEFAULTREADLENGTH; - string print() const; - void extendSuppAlignment(int minPos, int maxPos) { - pos = min(pos, minPos); - extendedPos = max(extendedPos, maxPos); - } - bool saCloseness(const SuppAlignmentAnno &rhs, int fuzziness) const; - bool saClosenessDirectional(const SuppAlignmentAnno &rhs, - int fuzziness) const; - void removeFuzziness(const SuppAlignmentAnno &sa) { - pos = sa.getPos(); - extendedPos = pos; - fuzzy = false; - if (!distant && sa.isDistant()) { - distant = true; + /** + * @brief The SuppAlignmentAnno class + * This is similar to SuppAlignment. + * + * Note that this class is under size constraints, as it will be instantiated via MrefEntry + * once for each genome position in MasterRefEntry. + **/ + class SuppAlignmentAnno { + + public: + + SuppAlignmentAnno(const std::string &saStrIn); + + SuppAlignmentAnno(const SuppAlignment &saIn); + + SuppAlignmentAnno(const SuppAlignmentAnno &saAnnoIn); + + SuppAlignmentAnno(ChrIndex emittingBpChrIndex, + ChrSize emittingBpPos, + const SuppAlignmentAnno &saAnnoIn); + + ~SuppAlignmentAnno() = default; + + static double ISIZEMAX; + + static ChrSize DEFAULT_READ_LENGTH; + + std::string print() const; + + void extendSuppAlignment(ChrSize minPos, ChrSize maxPos) { + pos = std::min(pos, minPos); + extendedPos = std::max(extendedPos, maxPos); + } + + bool saCloseness(const SuppAlignmentAnno &rhs, int fuzziness) const; + + bool saClosenessDirectional(const SuppAlignmentAnno &rhs, + int fuzziness) const; + + void removeFuzziness(const SuppAlignmentAnno &sa) { + pos = sa.getPos(); + extendedPos = pos; + fuzzy = false; + if (!distant && sa.isDistant()) { + distant = true; + } + } + + ChrIndex getChrIndex() const { return chrIndex; } + + bool isEncounteredM() const { return encounteredM; } + + bool isInverted() const { return inverted; } + + int getMateSupport() const { return mateSupport; } + + void incrementMateSupport() { ++mateSupport; } + + void setMateSupport(int mateSupportIn) { mateSupport = mateSupportIn; } + + ChrSize getPos() const { return pos; } + + int getSupport() const { return support; } + + int getSecondarySupport() const { return secondarySupport; } + + bool isToRemove() const { return toRemove; } + + void setToRemove(bool toRemove) { this->toRemove = toRemove; } + + bool isSuspicious() const { return suspicious; } + + void setSuspicious(bool suspicious) { this->suspicious = suspicious; } + + bool isDistant() const { return distant; } + + void setExpectedDiscordants(int expectedDiscordants) { + this->expectedDiscordants = expectedDiscordants; + } + + int getExpectedDiscordants() const { return expectedDiscordants; } + + bool isFuzzy() const { return fuzzy; } + + bool isStrictFuzzy() const { return strictFuzzy; } + + ChrSize getExtendedPos() const { return extendedPos; } + + bool isSemiSuspicious() const { return semiSuspicious; } + + void setSemiSuspicious(bool semiSuspicious) { + this->semiSuspicious = semiSuspicious; + } + + void setFuzzy(bool fuzzy) { this->fuzzy = fuzzy; } + + bool isProperPairErrorProne() const { return properPairErrorProne; } + + bool isStrictFuzzyCandidate() const { return strictFuzzyCandidate; } + + void addSupportingIndices(const std::vector &supportingIndicesIn) { + supportingIndices.insert(supportingIndices.end(), + supportingIndicesIn.cbegin(), + supportingIndicesIn.cend()); } - } - int getChrIndex() const { return chrIndex; } - bool isEncounteredM() const { return encounteredM; } - bool isInverted() const { return inverted; } - int getMateSupport() const { return mateSupport; } - void incrementMateSupport() { ++mateSupport; } - void setMateSupport(int mateSupportIn) { mateSupport = mateSupportIn; } - int getPos() const { return pos; } - int getSupport() const { return support; } - int getSecondarySupport() const { return secondarySupport; } - bool isToRemove() const { return toRemove; } - void setToRemove(bool toRemove) { this->toRemove = toRemove; } - bool isSuspicious() const { return suspicious; } - void setSuspicious(bool suspicious) { this->suspicious = suspicious; } - bool isDistant() const { return distant; } - void setExpectedDiscordants(int expectedDiscordants) { - this->expectedDiscordants = expectedDiscordants; - } - int getExpectedDiscordants() const { return expectedDiscordants; } - bool isFuzzy() const { return fuzzy; } - bool isStrictFuzzy() const { return strictFuzzy; } - int getExtendedPos() const { return extendedPos; } - bool isSemiSuspicious() const { return semiSuspicious; } - void setSemiSuspicious(bool semiSuspicious) { - this->semiSuspicious = semiSuspicious; - } - void setFuzzy(bool fuzzy) { this->fuzzy = fuzzy; } - - bool isProperPairErrorProne() const { return properPairErrorProne; } - - bool isStrictFuzzyCandidate() const { return strictFuzzyCandidate; } - void addSupportingIndices(const vector &supportingIndicesIn) { - supportingIndices.insert(supportingIndices.end(), - supportingIndicesIn.cbegin(), - supportingIndicesIn.cend()); - } - const vector &getSupportingIndices() const { - return supportingIndices; - } - void mergeMrefSa(const SuppAlignmentAnno &mrefSa); - void finalizeSupportingIndices(); - void mrefSaTransform(int fileIndex) { - supportingIndices.clear(); - supportingIndices.push_back(fileIndex); - } - void mrefSaConsensus(const unordered_set &fileIndices) { - supportingIndices.clear(); - for (const auto &index : fileIndices) { - supportingIndices.push_back(index); + + const std::vector &getSupportingIndices() const { + return supportingIndices; + } + + void mergeMrefSa(const SuppAlignmentAnno &mrefSa); + + void finalizeSupportingIndices(); + + void mrefSaTransform(int fileIndex) { + supportingIndices.clear(); + supportingIndices.push_back(fileIndex); + } + + void mrefSaConsensus(const std::unordered_set &fileIndices) { + supportingIndices.clear(); + for (const auto &index : fileIndices) { + supportingIndices.push_back(index); + } } - } - void addFileIndex(int fileIndex) { supportingIndices.push_back(fileIndex); } - - void setSecondarySupport(int secondarySupport) { - this->secondarySupport = secondarySupport; - } - - void setSupport(int support) { this->support = support; } - - private: - int chrIndex; - int pos; - int extendedPos; - int support; - int secondarySupport; - int mateSupport; - int expectedDiscordants; - bool encounteredM; - bool toRemove; - bool inverted; - bool fuzzy; - bool strictFuzzy; - bool strictFuzzyCandidate; - bool distant; - bool suspicious; - bool semiSuspicious; - bool properPairErrorProne; - vector supportingIndices; -}; + + void addFileIndex(int fileIndex) { supportingIndices.push_back(fileIndex); } + + void setSecondarySupport(int secondarySupport) { + this->secondarySupport = secondarySupport; + } + + void setSupport(int support) { this->support = support; } + + private: + ChrIndex chrIndex; + ChrSize pos; + ChrSize extendedPos; + int support; + int secondarySupport; + int mateSupport; + int expectedDiscordants; + bool encounteredM; + bool toRemove; + bool inverted; + bool fuzzy; + bool strictFuzzy; + bool strictFuzzyCandidate; + bool distant; + bool suspicious; + bool semiSuspicious; + bool properPairErrorProne; + std::vector supportingIndices; + + static const std::string STOP_CHARS; + inline bool isStopChar(char c); + + }; } /* namespace sophia */ #endif /* SUPPALIGNMENTANNO_H_ */ diff --git a/include/SvEvent.h b/include/SvEvent.h index 20f5dda..03d65a4 100644 --- a/include/SvEvent.h +++ b/include/SvEvent.h @@ -25,6 +25,7 @@ #ifndef SVEVENT_H_ #define SVEVENT_H_ +#include "global.h" #include "Breakpoint.h" #include "GermlineMatch.h" #include "MrefMatch.h" @@ -39,193 +40,278 @@ namespace sophia { -using namespace std; + enum ArtifactStatus { ARTIFACT, BORDERLINE, CLEAN, AS_UNKNOWN }; -enum ArtifactStatus { ARTIFACT, BORDERLINE, CLEAN, UNKNOWN_a }; + enum ClonalityStatus { HOMO, HETERO, SUBCLONAL, EXTREME_SUBCLONAL, CS_UNKNOWN }; -enum ClonalityStatus { HOMO, HETERO, SUBCLONAL, EXTREME_SUBCLONAL, UNKNOWN_c }; + class SvEvent { + public: + static boost::format doubleFormatter; -class SvEvent { - public: - static boost::format doubleFormatter; - static int GERMLINEOFFSETTHRESHOLD; - static double RELAXEDBPFREQTHRESHOLD; - static double BPFREQTHRESHOLD; - static double ARTIFACTFREQLOWTHRESHOLD; - static double ARTIFACTFREQHIGHTHRESHOLD; - static double CLONALITYLOWTHRESHOLD; - static double CLONALITYSTRICTLOWTHRESHOLD; - static double CLONALITYHIGHTHRESHOLD; - static string PIDSINMREFSTR; - static int HALFDEFAULTREADLENGTH; - static int GERMLINEDBLIMIT; - static bool ABRIDGEDOUTPUT; - static bool NOCONTROLMODE; - static bool DEBUGMODE; - const static vector EVENTTYPES; - SvEvent(const BreakpointReduced &bp1In, const BreakpointReduced &bp2In, - const SuppAlignmentAnno &sa1In, const SuppAlignmentAnno &sa2In, - const vector> &overhangDb); - SvEvent(const BreakpointReduced &bp1In, const BreakpointReduced &bp2In, - const SuppAlignmentAnno &sa1In, - const vector> &overhangDb, - const SuppAlignmentAnno &dummySaIn); - SvEvent(const BreakpointReduced &bp1In, const SuppAlignmentAnno &sa1In, - GermlineMatch germlineInfo2, MrefMatch hitsInMref2In, - const vector> &overhangDb, - const SuppAlignmentAnno &dummySaIn); + static int GERMLINE_OFFSET_THRESHOLD; - // vector getKey() const; - string getKey() const; + static double RELAXED_BP_FREQ_THRESHOLD; - bool isGermline() const { return germline; } + static double BP_FREQ_THRESHOLD; - int getEventSize() const { return eventSize; } + static double ARTIFACT_FREQ_LOW_THRESHOLD; - bool isInverted() const { return inverted; } + static double ARTIFACT_FREQ_HIGH_THRESHOLD; - int getTotalEvidence1() const { return totalEvidence1; } + static double CLONALITY_LOW_THRESHOLD; - int getTotalEvidence2() const { return totalEvidence2; } + static double CLONALITY_STRICT_LOW_THRESHOLD; - int getEventScore() const { return eventScore; } + static double CLONALITY_HIGH_THRESHOLD; - int getSuspicious() const { return suspicious; } + static std::string PIDS_IN_MREF_STR; - double getMateRatio1() const { return mateRatio1; } + static int HALF_DEFAULT_READ_LENGTH; - double getMateRatio2() const { return mateRatio2; } + static int GERMLINE_DB_LIMIT; - short getEvidenceLevel1() const { return evidenceLevel1; } + static bool ABRIDGED_OUTPUT; - short getEvidenceLevel2() const { return evidenceLevel2; } + static bool NO_CONTROL_MODE; - bool isSemiSuspicious() const { return semiSuspicious; } + static bool DEBUG_MODE; - bool isDistant() const { return distant; } + const static std::vector EVENTTYPES; - const SuppAlignmentAnno &getSelectedSa1() const { return selectedSa1; } + SvEvent(const BreakpointReduced &bp1In, + const BreakpointReduced &bp2In, + const SuppAlignmentAnno &sa1In, + const SuppAlignmentAnno &sa2In, + const std::vector> &overhangDb); - const SuppAlignmentAnno &getSelectedSa2() const { return selectedSa2; } - string printMatch(const vector> &overhangDb) const; + SvEvent(const BreakpointReduced &bp1In, + const BreakpointReduced &bp2In, + const SuppAlignmentAnno &sa1In, + const std::vector> &overhangDb, + const SuppAlignmentAnno &dummySaIn); - bool isToRemove() const { return toRemove; } + SvEvent(const BreakpointReduced &bp1In, + const SuppAlignmentAnno &sa1In, + GermlineMatch germlineInfo2, + MrefMatch hitsInMref2In, + const std::vector> &overhangDb, + const SuppAlignmentAnno &dummySaIn); - void setToRemove(bool toRemove) { this->toRemove = toRemove; } + // vector getKey() const; + std::string getKey() const; - int getContaminationCandidate() const { return contaminationCandidate; } + bool isGermline() const { return germline; } - void setEventScore(int eventScore) { this->eventScore = eventScore; } + int getEventSize() const { return eventSize; } - void setEventType(int eventType) { this->eventType = eventType; } + bool isInverted() const { return inverted; } - bool isOverhang1Compensation() const { return overhang1Compensation; } + int getTotalEvidence1() const { return totalEvidence1; } - double getOverhang1lengthRatio() const { return overhang1lengthRatio; } - double getOverhang2lengthRatio() const { return overhang2lengthRatio; } + int getTotalEvidence2() const { return totalEvidence2; } - private: - pair mateQualityConditions(const SuppAlignmentAnno &sa); - pair assessOverhangQualityCompensation( - int lineIndex, const vector> &overhangDb) const; - pair processMrefHits(const MrefMatch &hitsInMref, - const SuppAlignmentAnno &sa, - int evidenceLevelIn) const; - double determineGermlineClonalityBp(const BreakpointReduced &bp1, - const SuppAlignmentAnno &sa, - double clonalityInit) const; + int getEventScore() const { return eventScore; } - void determineEventTypeAndSize(int posDifferential, bool matchEncounteredM); + int getSuspicious() const { return suspicious; } - int filterMatch(const BreakpointReduced &bp1, const BreakpointReduced &bp2); - int filterMatchSingle(const BreakpointReduced &bp1, - const BreakpointReduced &bp2); - int filterMatchUnknown(const BreakpointReduced &bp1); - - pair assessSvClonality(const BreakpointReduced &bp, - int eventSupportTotal) const; - - ClonalityStatus - assessBreakpointClonalityStatus(double clonalityRatioIn, - const BreakpointReduced &bp1, - const BreakpointReduced &bp2) const; - ClonalityStatus - assessBreakpointClonalityStatusSingle(double clonalityRatioIn, - const BreakpointReduced &bp1, - const BreakpointReduced &bp2) const; - ClonalityStatus - assessBreakpointClonalityStatusUnknown(double clonalityRatioIn, - const BreakpointReduced &bp1) const; + double getMateRatio1() const { return mateRatio1; } - void assessSvArtifactStatus(const BreakpointReduced &bp1, - const BreakpointReduced &bp2); - void assessSvArtifactStatusUnknown(); + double getMateRatio2() const { return mateRatio2; } - int assessEventScore(bool hardClipSuspiciousCall, int inputScoreCategory); - void assessContamination(const vector> &overhangDb); - pair - assessContaminationSingleBp(int overhangIndex, - const vector> &overhangDb, - const SuppAlignmentAnno &selectedSa); - string collapseRange(const vector &vec, - const string &delimiter) const { - if (vec.empty()) { - return "_"; - } else { - return boost::join(vec, delimiter); + short getEvidenceLevel1() const { return evidenceLevel1; } + + short getEvidenceLevel2() const { return evidenceLevel2; } + + bool isSemiSuspicious() const { return semiSuspicious; } + + bool isDistant() const { return distant; } + + const SuppAlignmentAnno &getSelectedSa1() const { return selectedSa1; } + + const SuppAlignmentAnno &getSelectedSa2() const { return selectedSa2; } + + std::string printMatch(const std::vector> &overhangDb) const; + + bool isToRemove() const { return toRemove; } + + void setToRemove(bool toRemove) { this->toRemove = toRemove; } + + int getContaminationCandidate() const { return contaminationCandidate; } + + void setEventScore(int eventScore) { this->eventScore = eventScore; } + + void setEventType(unsigned int eventType) { this->eventType = eventType; } + + bool isOverhang1Compensation() const { return overhang1Compensation; } + + double getOverhang1lengthRatio() const { return overhang1lengthRatio; } + double getOverhang2lengthRatio() const { return overhang2lengthRatio; } + + private: + + std::pair mateQualityConditions(const SuppAlignmentAnno &sa); + + std::pair assessOverhangQualityCompensation( + int lineIndex, + const std::vector> &overhangDb) const; + + std::pair processMrefHits(const MrefMatch &hitsInMref, + const SuppAlignmentAnno &sa, + int evidenceLevelIn) const; + + double determineGermlineClonalityBp(const BreakpointReduced &bp1, + const SuppAlignmentAnno &sa, + double clonalityInit) const; + + void determineEventTypeAndSize(ChrPosition pos1, + ChrPosition pos2, + bool matchEncounteredM); + + int filterMatch(const BreakpointReduced &bp1, + const BreakpointReduced &bp2); + + int filterMatchSingle(const BreakpointReduced &bp1, + const BreakpointReduced &bp2); + + int filterMatchUnknown(const BreakpointReduced &bp1); + + std::pair assessSvClonality(const BreakpointReduced &bp, + int eventSupportTotal) const; + + ClonalityStatus + assessBreakpointClonalityStatus(double clonalityRatioIn, + const BreakpointReduced &bp1, + const BreakpointReduced &bp2) const; + + ClonalityStatus + assessBreakpointClonalityStatusSingle(double clonalityRatioIn, + const BreakpointReduced &bp1, + const BreakpointReduced &bp2) const; + + ClonalityStatus + assessBreakpointClonalityStatusUnknown(double clonalityRatioIn, + const BreakpointReduced &bp1) const; + + void assessSvArtifactStatus(const BreakpointReduced &bp1, + const BreakpointReduced &bp2); + + void assessSvArtifactStatusUnknown(); + + int assessEventScore(bool hardClipSuspiciousCall, int inputScoreCategory); + + void assessContamination(const std::vector> &overhangDb); + + std::pair + assessContaminationSingleBp(int overhangIndex, + const std::vector> &overhangDb, + const SuppAlignmentAnno &selectedSa); + + std::string collapseRange(const std::vector &vec, + const std::string &delimiter) const { + if (vec.empty()) { + return "_"; + } else { + return boost::join(vec, delimiter); + } } - } - - bool toRemove; - int contaminationCandidate; - int chrIndex1; - int pos1; - int chrIndex2; - int pos2; - int lineIndex1; - int lineIndex2; - int eventType; - int eventSize; - bool inverted; - bool doubleSupport; - bool distant; - bool overhang1Compensation; - bool overhang2Compensation; - int overhang1Index; - int overhang2Index; - double overhang1lengthRatio; - double overhang2lengthRatio; - int inputScore; - int eventScore; - int totalEvidence1; - int span1; - int totalEvidence2; - int span2; - short evidenceLevel1; - short evidenceLevel2; - short mrefHits1; - bool mrefHits1Conservative; - short mrefHits2; - bool mrefHits2Conservative; - bool germline; - double germlineClonality1; - bool germlineStatus1; - double germlineClonality2; - bool germlineStatus2; - SuppAlignmentAnno selectedSa1; - SuppAlignmentAnno selectedSa2; - double mateRatio1; - double mateRatio2; - int suspicious; - bool semiSuspicious; - double artifactRatio1; - double clonalityRatio1; - ClonalityStatus clonalityStatus1; - double artifactRatio2; - double clonalityRatio2; - ClonalityStatus clonalityStatus2; - ArtifactStatus artifactStatus; -}; + + bool toRemove; + + + int contaminationCandidate; + + ChrIndex chrIndex1; + + ChrPosition pos1; + + ChrIndex chrIndex2; + + ChrPosition pos2; + + int lineIndex1; + + int lineIndex2; + + // Could probably be an enum. + unsigned int eventType; + + int eventSize; + + bool inverted; + + bool doubleSupport; + + bool distant; + + bool overhang1Compensation; + + bool overhang2Compensation; + + // Can have value -1 + int overhang1Index; + int overhang2Index; + + double overhang1lengthRatio; + + double overhang2lengthRatio; + + int inputScore; + + int eventScore; + + int totalEvidence1; + + int span1; + + int totalEvidence2; + + int span2; + + short evidenceLevel1; + short evidenceLevel2; + + short mrefHits1; + + bool mrefHits1Conservative; + short mrefHits2; + + bool mrefHits2Conservative; + + bool germline; + + double germlineClonality1; + + bool germlineStatus1; + double germlineClonality2; + + bool germlineStatus2; + + SuppAlignmentAnno selectedSa1; + + SuppAlignmentAnno selectedSa2; + + double mateRatio1; + + double mateRatio2; + + int suspicious; + + bool semiSuspicious; + + double artifactRatio1; + + double clonalityRatio1; + + ClonalityStatus clonalityStatus1; + + double artifactRatio2; + + double clonalityRatio2; + + ClonalityStatus clonalityStatus2; + + ArtifactStatus artifactStatus; + }; } /* namespace sophia */ diff --git a/include/cxxopts.hpp b/include/cxxopts.hpp deleted file mode 100644 index 087f04a..0000000 --- a/include/cxxopts.hpp +++ /dev/null @@ -1,1248 +0,0 @@ -/* - - Copyright (c) 2014, 2015, 2016, 2017 Jarryd Beck - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - - */ - -#ifndef CXX_OPTS_HPP -#define CXX_OPTS_HPP - -#if defined(__GNUC__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// when we ask cxxopts to use Unicode, help strings are processed using ICU, -// which results in the correct lengths being computed for strings when they -// are formatted for the help output -// it is necessary to make sure that can be found by the -// compiler, and that icu-uc is linked in to the binary. - -#ifdef CXXOPTS_USE_UNICODE -#include - -namespace cxxopts { - -using namespace std; - -typedef icu::UnicodeString String; - -inline String -toLocalString(string s) { - return icu::UnicodeString::fromUTF8(move(s)); -} - -class UnicodeStringIterator : public iterator { - public: - UnicodeStringIterator(const icu::UnicodeString *string, int32_t pos) - : s(string), i(pos) {} - - value_type operator*() const { return s->char32At(i); } - - bool operator==(const UnicodeStringIterator &rhs) const { - return s == rhs.s && i == rhs.i; - } - - bool operator!=(const UnicodeStringIterator &rhs) const { - return !(*this == rhs); - } - - UnicodeStringIterator &operator++() { - ++i; - return *this; - } - - UnicodeStringIterator operator+(int32_t v) { - return UnicodeStringIterator(s, i + v); - } - - private: - const icu::UnicodeString *s; - int32_t i; -}; - -inline String & -stringAppend(String &s, String a) { - return s.append(move(a)); -} - -inline String & -stringAppend(String &s, int n, UChar32 c) { - for (int i = 0; i != n; ++i) { - s.append(c); - } - - return s; -} - -template -String & -stringAppend(String &s, Iterator begin, Iterator end) { - while (begin != end) { - s.append(*begin); - ++begin; - } - - return s; -} - -inline size_t -stringLength(const String &s) { - return s.length(); -} - -inline string -toUTF8String(const String &s) { - string result; - s.toUTF8String(result); - - return result; -} - -inline bool -empty(const String &s) { - return s.isEmpty(); -} -} // namespace cxxopts - -namespace std { -cxxopts::UnicodeStringIterator -begin(const icu::UnicodeString &s) { - return cxxopts::UnicodeStringIterator(&s, 0); -} - -cxxopts::UnicodeStringIterator -end(const icu::UnicodeString &s) { - return cxxopts::UnicodeStringIterator(&s, s.length()); -} -} // namespace std - -// ifdef CXXOPTS_USE_UNICODE -#else - -namespace cxxopts { - -using namespace std; - -typedef string String; - -template -T -toLocalString(T &&t) { - return t; -} - -inline size_t -stringLength(const String &s) { - return s.length(); -} - -inline String & -stringAppend(String &s, String a) { - return s.append(move(a)); -} - -inline String & -stringAppend(String &s, size_t n, char c) { - return s.append(n, c); -} - -template -String & -stringAppend(String &s, Iterator begin, Iterator end) { - return s.append(begin, end); -} - -template -string -toUTF8String(T &&t) { - return forward(t); -} - -inline bool -empty(const string &s) { - return s.empty(); -} -} // namespace cxxopts - -// ifdef CXXOPTS_USE_UNICODE -#endif - -namespace cxxopts { - -using namespace std; - -namespace { -#ifdef _WIN32 -const string LQUOTE("\'"); -const string RQUOTE("\'"); -#else -const string LQUOTE("‘"); -const string RQUOTE("’"); -#endif -} // namespace - -class Value : public enable_shared_from_this { - public: - virtual void parse(const string &text) const = 0; - - virtual void parse() const = 0; - - virtual bool has_arg() const = 0; - - virtual bool has_default() const = 0; - - virtual bool is_container() const = 0; - - virtual bool has_implicit() const = 0; - - virtual string get_default_value() const = 0; - - virtual string get_implicit_value() const = 0; - - virtual shared_ptr default_value(const string &value) = 0; - - virtual shared_ptr implicit_value(const string &value) = 0; -}; - -class OptionException : public exception { - public: - OptionException(const string &message) : m_message(message) {} - - virtual const char *what() const noexcept { return m_message.c_str(); } - - private: - string m_message; -}; - -class OptionSpecException : public OptionException { - public: - OptionSpecException(const string &message) : OptionException(message) {} -}; - -class OptionParseException : public OptionException { - public: - OptionParseException(const string &message) : OptionException(message) {} -}; - -class option_exists_error : public OptionSpecException { - public: - option_exists_error(const string &option) - : OptionSpecException(u8"Option " + LQUOTE + option + RQUOTE + - u8" already exists") {} -}; - -class invalid_option_format_error : public OptionSpecException { - public: - invalid_option_format_error(const string &format) - : OptionSpecException(u8"Invalid option format " + LQUOTE + format + - RQUOTE) {} -}; - -class option_not_exists_exception : public OptionParseException { - public: - option_not_exists_exception(const string &option) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + - u8" does not exist") {} -}; - -class missing_argument_exception : public OptionParseException { - public: - missing_argument_exception(const string &option) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + - u8" is missing an argument") {} -}; - -class option_requires_argument_exception : public OptionParseException { - public: - option_requires_argument_exception(const string &option) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + - u8" requires an argument") {} -}; - -class option_not_has_argument_exception : public OptionParseException { - public: - option_not_has_argument_exception(const string &option, const string &arg) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + - u8" does not take an argument, but argument" + - LQUOTE + arg + RQUOTE + " given") {} -}; - -class option_not_present_exception : public OptionParseException { - public: - option_not_present_exception(const string &option) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + - u8" not present") {} -}; - -class argument_incorrect_type : public OptionParseException { - public: - argument_incorrect_type(const string &arg) - : OptionParseException(u8"Argument " + LQUOTE + arg + RQUOTE + - u8" failed to parse") {} -}; - -class option_required_exception : public OptionParseException { - public: - option_required_exception(const string &option) - : OptionParseException(u8"Option " + LQUOTE + option + RQUOTE + - u8" is required but not present") {} -}; - -namespace values { -namespace { -basic_regex - integer_pattern("(-)?(0x)?([1-9a-zA-Z][0-9a-zA-Z]*)|((0x)?0)"); -} - -namespace detail { - -template struct SignedCheck; - -template struct SignedCheck { - template - void operator()(bool negative, U u, const string &text) { - if (negative) { - if (u > static_cast(-numeric_limits::min())) { - throw argument_incorrect_type(text); - } - } else { - if (u > static_cast(numeric_limits::max())) { - throw argument_incorrect_type(text); - } - } - } -}; - -template struct SignedCheck { - template void operator()(bool, U, const string &) {} -}; - -template -void -check_signed_range(bool negative, U value, const string &text) { - SignedCheck::is_signed>()(negative, value, text); -} -} // namespace detail - -template -R -checked_negate(T &&t, const string &, true_type) { - // if we got to here, then `t` is a positive number that fits into - // `R`. So to avoid MSVC C4146, we first cast it to `R`. - // See https://github.com/jarro2783/cxxopts/issues/62 for more details. - return -static_cast(t); -} - -template -T -checked_negate(T &&, const string &text, false_type) { - throw argument_incorrect_type(text); -} - -template -void -integer_parser(const string &text, T &value) { - smatch match; - regex_match(text, match, integer_pattern); - - if (match.length() == 0) { - throw argument_incorrect_type(text); - } - - if (match.length(4) > 0) { - value = 0; - return; - } - - using US = typename make_unsigned::type; - - constexpr auto umax = numeric_limits::max(); - constexpr bool is_signed = numeric_limits::is_signed; - const bool negative = match.length(1) > 0; - const auto base = match.length(2) > 0 ? 16 : 10; - - auto value_match = match[3]; - - US result = 0; - - for (auto iter = value_match.first; iter != value_match.second; ++iter) { - int digit = 0; - - if (*iter >= '0' && *iter <= '9') { - digit = *iter - '0'; - } else if (base == 16 && *iter >= 'a' && *iter <= 'f') { - digit = *iter - 'a' + 10; - } else if (base == 16 && *iter >= 'A' && *iter <= 'F') { - digit = *iter - 'A' + 10; - } else { - throw argument_incorrect_type(text); - } - - if (umax - digit < result * base) { - throw argument_incorrect_type(text); - } - - result = result * base + digit; - } - - detail::check_signed_range(negative, result, text); - - if (negative) { - value = checked_negate(result, text, - integral_constant()); - // if (!is_signed) - //{ - // throw argument_incorrect_type(text); - // } - // value = -result; - } else { - value = result; - } -} - -template -void -stringstream_parser(const string &text, T &value) { - stringstream in(text); - in >> value; - if (!in) { - throw argument_incorrect_type(text); - } -} - -inline void -parse_value(const string &text, uint8_t &value) { - integer_parser(text, value); -} - -inline void -parse_value(const string &text, int8_t &value) { - integer_parser(text, value); -} - -inline void -parse_value(const string &text, uint16_t &value) { - integer_parser(text, value); -} - -inline void -parse_value(const string &text, int16_t &value) { - integer_parser(text, value); -} - -inline void -parse_value(const string &text, uint32_t &value) { - integer_parser(text, value); -} - -inline void -parse_value(const string &text, int32_t &value) { - integer_parser(text, value); -} - -inline void -parse_value(const string &text, uint64_t &value) { - integer_parser(text, value); -} - -inline void -parse_value(const string &text, int64_t &value) { - integer_parser(text, value); -} - -inline void -parse_value(const string & /*text*/, bool &value) { - // TODO recognise on, off, yes, no, enable, disable - // so that we can write --long=yes explicitly - value = true; -} - -inline void -parse_value(const string &text, string &value) { - value = text; -} - -// The fallback parser. It uses the stringstream parser to parse all types -// that have not been overloaded explicitly. It has to be placed in the -// source code before all other more specialized templates. -template -void -parse_value(const string &text, T &value) { - stringstream_parser(text, value); -} - -template -void -parse_value(const string &text, vector &value) { - T v; - parse_value(text, v); - value.push_back(v); -} - -template struct value_has_arg { - static constexpr bool value = true; -}; - -template <> struct value_has_arg { - static constexpr bool value = false; -}; - -template struct type_is_container { - static constexpr bool value = false; -}; - -template struct type_is_container> { - static constexpr bool value = true; -}; - -template class standard_value final : public Value { - public: - standard_value() : m_result(make_shared()), m_store(m_result.get()) {} - - standard_value(T *t) : m_store(t) {} - - void parse(const string &text) const { parse_value(text, *m_store); } - - bool is_container() const { return type_is_container::value; } - - void parse() const { parse_value(m_default_value, *m_store); } - - bool has_arg() const { return value_has_arg::value; } - - bool has_default() const { return m_default; } - - bool has_implicit() const { return m_implicit; } - - virtual shared_ptr default_value(const string &value) { - m_default = true; - m_default_value = value; - return shared_from_this(); - } - - virtual shared_ptr implicit_value(const string &value) { - m_implicit = true; - m_implicit_value = value; - return shared_from_this(); - } - - string get_default_value() const { return m_default_value; } - - string get_implicit_value() const { return m_implicit_value; } - - const T &get() const { - if (m_store == nullptr) { - return *m_result; - } else { - return *m_store; - } - } - - protected: - shared_ptr m_result; - T *m_store; - bool m_default = false; - string m_default_value; - bool m_implicit = false; - string m_implicit_value; -}; -} // namespace values - -template -shared_ptr -value() { - return make_shared>(); -} - -template -shared_ptr -value(T &t) { - return make_shared>(&t); -} - -class OptionAdder; - -class OptionDetails { - public: - OptionDetails(const String &desc, shared_ptr val) - : m_desc(desc), m_value(val), m_count(0) {} - - const String &description() const { return m_desc; } - - bool has_arg() const { return m_value->has_arg(); } - - void parse(const string &text) { - m_value->parse(text); - ++m_count; - } - - void parse_default() { m_value->parse(); } - - int count() const { return m_count; } - - const Value &value() const { return *m_value; } - - template const T &as() const { -#ifdef CXXOPTS_NO_RTTI - return static_cast &>(*m_value).get(); -#else - return dynamic_cast &>(*m_value).get(); -#endif - } - - private: - String m_desc; - shared_ptr m_value; - int m_count; -}; - -struct HelpOptionDetails { - string s; - string l; - String desc; - bool has_arg; - bool has_default; - string default_value; - bool has_implicit; - string implicit_value; - string arg_help; - bool is_container; -}; - -struct HelpGroupDetails { - string name; - string description; - vector options; -}; - -class Options { - public: - Options(string program, string help_string = "") - : m_program(move(program)), - m_help_string(toLocalString(move(help_string))), - m_positional_help("positional parameters"), - m_next_positional(m_positional.end()) {} - - inline Options &positional_help(string help_text) { - m_positional_help = move(help_text); - return *this; - } - - inline void parse(int &argc, char **&argv); - - inline OptionAdder add_options(string group = ""); - - inline void add_option(const string &group, const string &s, - const string &l, string desc, - shared_ptr value, string arg_help); - - int count(const string &o) const { - auto iter = m_options.find(o); - if (iter == m_options.end()) { - return 0; - } - - return iter->second->count(); - } - - const OptionDetails &operator[](const string &option) const { - auto iter = m_options.find(option); - - if (iter == m_options.end()) { - throw option_not_present_exception(option); - } - - return *iter->second; - } - - // parse positional arguments into the given option - inline void parse_positional(string option); - - inline void parse_positional(vector options); - - inline string help(const vector &groups = {""}) const; - - inline const vector groups() const; - - inline const HelpGroupDetails &group_help(const string &group) const; - - private: - inline void add_one_option(const string &option, - shared_ptr details); - - inline bool consume_positional(string a); - - inline void add_to_option(const string &option, const string &arg); - - inline void parse_option(shared_ptr value, - const string &name, const string &arg = ""); - - inline void checked_parse_arg(int argc, char *argv[], int ¤t, - shared_ptr value, - const string &name); - - inline String help_one_group(const string &group) const; - - inline void generate_group_help(String &result, - const vector &groups) const; - - inline void generate_all_groups_help(String &result) const; - - string m_program; - String m_help_string; - string m_positional_help; - - map> m_options; - vector m_positional; - vector::iterator m_next_positional; - unordered_set m_positional_set; - - // mapping from groups to help options - map m_help; -}; - -class OptionAdder { - public: - OptionAdder(Options &options, string group) - : m_options(options), m_group(move(group)) {} - - inline OptionAdder & - operator()(const string &opts, const string &desc, - shared_ptr value = ::cxxopts::value(), - string arg_help = ""); - - private: - Options &m_options; - string m_group; -}; - -// A helper function for setting required arguments -inline void -check_required(const Options &options, const vector &required) { - for (auto &r : required) { - if (options.count(r) == 0) { - throw option_required_exception(r); - } - } -} - -namespace { -constexpr int OPTION_LONGEST = 30; -constexpr int OPTION_DESC_GAP = 2; - -basic_regex - option_matcher("--([[:alnum:]][-_[:alnum:]]+)(=(.*))?|-([[:alnum:]]+)"); - -basic_regex - option_specifier("(([[:alnum:]]),)?[ ]*([[:alnum:]][-_[:alnum:]]*)?"); - -String -format_option(const HelpOptionDetails &o) { - auto &s = o.s; - auto &l = o.l; - - String result = " "; - - if (s.size() > 0) { - result += "-" + toLocalString(s) + ","; - } else { - result += " "; - } - - if (l.size() > 0) { - result += " --" + toLocalString(l); - } - - if (o.has_arg) { - auto arg = o.arg_help.size() > 0 ? toLocalString(o.arg_help) : "arg"; - - if (o.has_implicit) { - result += - " [=" + arg + "(=" + toLocalString(o.implicit_value) + ")]"; - } else { - result += " " + arg; - } - } - - return result; -} - -String -format_description(const HelpOptionDetails &o, size_t start, size_t width) { - auto desc = o.desc; - - if (o.has_default) { - desc += toLocalString(" (default: " + o.default_value + ")"); - } - - String result; - - auto current = begin(desc); - auto startLine = current; - auto lastSpace = current; - - auto size = size_t{}; - - while (current != end(desc)) { - if (*current == ' ') { - lastSpace = current; - } - - if (size > width) { - if (lastSpace == startLine) { - stringAppend(result, startLine, current + 1); - stringAppend(result, "\n"); - stringAppend(result, start, ' '); - startLine = current + 1; - lastSpace = startLine; - } else { - stringAppend(result, startLine, lastSpace); - stringAppend(result, "\n"); - stringAppend(result, start, ' '); - startLine = lastSpace + 1; - } - size = 0; - } else { - ++size; - } - - ++current; - } - - // append whatever is left - stringAppend(result, startLine, current); - - return result; -} -} // namespace - -OptionAdder -Options::add_options(string group) { - return OptionAdder(*this, move(group)); -} - -OptionAdder & -OptionAdder::operator()(const string &opts, const string &desc, - shared_ptr value, string arg_help) { - match_results result; - regex_match(opts.c_str(), result, option_specifier); - - if (result.empty()) { - throw invalid_option_format_error(opts); - } - - const auto &short_match = result[2]; - const auto &long_match = result[3]; - - if (!short_match.length() && !long_match.length()) { - throw invalid_option_format_error(opts); - } else if (long_match.length() == 1 && short_match.length()) { - throw invalid_option_format_error(opts); - } - - auto option_names = [](const sub_match &short_, - const sub_match &long_) { - if (long_.length() == 1) { - return make_tuple(long_.str(), short_.str()); - } else { - return make_tuple(short_.str(), long_.str()); - } - }(short_match, long_match); - - m_options.add_option(m_group, get<0>(option_names), get<1>(option_names), - desc, value, move(arg_help)); - - return *this; -} - -void -Options::parse_option(shared_ptr value, const string & /*name*/, - const string &arg) { - value->parse(arg); -} - -void -Options::checked_parse_arg(int argc, char *argv[], int ¤t, - shared_ptr value, - const string &name) { - if (current + 1 >= argc) { - if (value->value().has_implicit()) { - parse_option(value, name, value->value().get_implicit_value()); - } else { - throw missing_argument_exception(name); - } - } else { - if (argv[current + 1][0] == '-' && value->value().has_implicit()) { - parse_option(value, name, value->value().get_implicit_value()); - } else { - parse_option(value, name, argv[current + 1]); - ++current; - } - } -} - -void -Options::add_to_option(const string &option, const string &arg) { - auto iter = m_options.find(option); - - if (iter == m_options.end()) { - throw option_not_exists_exception(option); - } - - parse_option(iter->second, option, arg); -} - -bool -Options::consume_positional(string a) { - while (m_next_positional != m_positional.end()) { - auto iter = m_options.find(*m_next_positional); - if (iter != m_options.end()) { - if (!iter->second->value().is_container()) { - if (iter->second->count() == 0) { - add_to_option(*m_next_positional, a); - ++m_next_positional; - return true; - } else { - ++m_next_positional; - continue; - } - } else { - add_to_option(*m_next_positional, a); - return true; - } - } - ++m_next_positional; - } - - return false; -} - -void -Options::parse_positional(string option) { - parse_positional(vector{option}); -} - -void -Options::parse_positional(vector options) { - m_positional = move(options); - m_next_positional = m_positional.begin(); - - m_positional_set.insert(m_positional.begin(), m_positional.end()); -} - -void -Options::parse(int &argc, char **&argv) { - int current = 1; - int nextKeep = 1; - bool consume_remaining = false; - while (current != argc) { - if (strcmp(argv[current], "--") == 0) { - consume_remaining = true; - ++current; - break; - } - match_results result; - regex_match(argv[current], result, option_matcher); - if (result.empty()) { - // not a flag - // if true is returned here then it was consumed, otherwise it is - // ignored - if (consume_positional(argv[current])) { - } else { - argv[nextKeep] = argv[current]; - ++nextKeep; - } - // if we return from here then it was parsed successfully, so - // continue - } else { - // short or long option? - if (result[4].length() != 0) { - const string &s = result[4]; - - for (size_t i = 0; i != s.size(); ++i) { - string name(1, s[i]); - auto iter = m_options.find(name); - - if (iter == m_options.end()) { - throw option_not_exists_exception(name); - } - - auto value = iter->second; - - // if no argument then just add it - if (!value->has_arg()) { - parse_option(value, name); - } else { - // it must be the last argument - if (i + 1 == s.size()) { - checked_parse_arg(argc, argv, current, value, name); - } else if (value->value().has_implicit()) { - parse_option(value, name, - value->value().get_implicit_value()); - } else { - // error - throw option_requires_argument_exception(name); - } - } - } - } else if (result[1].length() != 0) { - const string &name = result[1]; - - auto iter = m_options.find(name); - - if (iter == m_options.end()) { - throw option_not_exists_exception(name); - } - - auto opt = iter->second; - - // equals provided for long option? - if (result[3].length() != 0) { - // parse the option given - - // but if it doesn't take an argument, this is an error - if (!opt->has_arg()) { - throw option_not_has_argument_exception(name, - result[3]); - } - - parse_option(opt, name, result[3]); - } else { - if (opt->has_arg()) { - // parse the next argument - checked_parse_arg(argc, argv, current, opt, name); - } else { - // parse with empty argument - parse_option(opt, name); - } - } - } - } - - ++current; - } - - for (auto &opt : m_options) { - auto &detail = opt.second; - auto &value = detail->value(); - - if (!detail->count() && value.has_default()) { - detail->parse_default(); - } - } - - if (consume_remaining) { - while (current < argc) { - if (!consume_positional(argv[current])) { - break; - } - ++current; - } - - // adjust argv for any that couldn't be swallowed - while (current != argc) { - argv[nextKeep] = argv[current]; - ++nextKeep; - ++current; - } - } - - argc = nextKeep; -} - -void -Options::add_option(const string &group, const string &s, const string &l, - string desc, shared_ptr value, - string arg_help) { - auto stringDesc = toLocalString(move(desc)); - auto option = make_shared(stringDesc, value); - - if (s.size() > 0) { - add_one_option(s, option); - } - - if (l.size() > 0) { - add_one_option(l, option); - } - - // add the help details - auto &options = m_help[group]; - - options.options.emplace_back(HelpOptionDetails{ - s, l, stringDesc, value->has_arg(), value->has_default(), - value->get_default_value(), value->has_implicit(), - value->get_implicit_value(), move(arg_help), value->is_container()}); -} - -void -Options::add_one_option(const string &option, - shared_ptr details) { - auto in = m_options.emplace(option, details); - - if (!in.second) { - throw option_exists_error(option); - } -} - -String -Options::help_one_group(const string &g) const { - typedef vector> OptionHelp; - - auto group = m_help.find(g); - if (group == m_help.end()) { - return ""; - } - - OptionHelp format; - - size_t longest = 0; - - String result; - - if (!g.empty()) { - result += toLocalString(" " + g + " options:\n"); - } - - for (const auto &o : group->second.options) { - if (o.is_container && - m_positional_set.find(o.l) != m_positional_set.end()) { - continue; - } - - auto s = format_option(o); - longest = max(longest, stringLength(s)); - format.push_back(make_pair(s, String())); - } - - longest = min(longest, static_cast(OPTION_LONGEST)); - - // widest allowed description - auto allowed = size_t{76} - longest - OPTION_DESC_GAP; - - auto fiter = format.begin(); - for (const auto &o : group->second.options) { - if (o.is_container && - m_positional_set.find(o.l) != m_positional_set.end()) { - continue; - } - - auto d = format_description(o, longest + OPTION_DESC_GAP, allowed); - - result += fiter->first; - if (stringLength(fiter->first) > longest) { - result += '\n'; - result += toLocalString(string(longest + OPTION_DESC_GAP, ' ')); - } else { - result += toLocalString(string( - longest + OPTION_DESC_GAP - stringLength(fiter->first), ' ')); - } - result += d; - result += '\n'; - - ++fiter; - } - - return result; -} - -void -Options::generate_group_help(String &result, - const vector &print_groups) const { - for (size_t i = 0; i != print_groups.size(); ++i) { - const String &group_help_text = help_one_group(print_groups[i]); - if (empty(group_help_text)) { - continue; - } - result += group_help_text; - if (i < print_groups.size() - 1) { - result += '\n'; - } - } -} - -void -Options::generate_all_groups_help(String &result) const { - vector all_groups; - all_groups.reserve(m_help.size()); - - for (auto &group : m_help) { - all_groups.push_back(group.first); - } - - generate_group_help(result, all_groups); -} - -string -Options::help(const vector &help_groups) const { - String result = m_help_string + "\nUsage:\n " + toLocalString(m_program) + - " [OPTION...]"; - - if (m_positional.size() > 0) { - result += " " + toLocalString(m_positional_help); - } - - result += "\n\n"; - - if (help_groups.size() == 0) { - generate_all_groups_help(result); - } else { - generate_group_help(result, help_groups); - } - - return toUTF8String(result); -} - -const vector -Options::groups() const { - vector g; - - transform(m_help.begin(), m_help.end(), back_inserter(g), - [](const map::value_type &pair) { - return pair.first; - }); - - return g; -} - -const HelpGroupDetails & -Options::group_help(const string &group) const { - return m_help.at(group); -} - -} // namespace cxxopts - -#if defined(__GNUC__) -#pragma GCC diagnostic pop -#endif - -#endif // CXX_OPTS_HPP diff --git a/include/global.h b/include/global.h new file mode 100644 index 0000000..c15d6ef --- /dev/null +++ b/include/global.h @@ -0,0 +1,83 @@ +/* + * GlobalAppConfig.h + * + * Author: Philip R. Kensche Copyright (C) 2023 DKFZ Heidelberg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * LICENSE: GPL + */ + +#ifndef GLOBALAPPCONFIG_H_ +#define GLOBALAPPCONFIG_H_ + +#include +#include +#include +#include +#include +#include + + +namespace sophia { + + // Enrich boost::exceptions with additional std::string information + using error_info_string = boost::error_info; + + class DomainError : + public virtual boost::exception, + public virtual std::domain_error { + public: + DomainError(const std::string &msg) : std::domain_error(msg) {} + }; + + using ChrName = std::string; + + // IMPORTANT + // + // These two are only to make the code clearer, but are not type checked. There are no opaque + // or strongly type-checked type-"aliases" in C++. An ideal type-safe solution would use + // classes. + // + // By making them signed and unsigned, though, the compiler at least warns about conversions + // between the two, and therefore hints at incorrect conversions. + // + // When developing, you should occasionally switch which is signed or unsigned, to find all + // places, where this matters (e.g. vector indices). Vectors are also usually not specific + // for the global chromosome space or the compressed mref space, which bears the potential + // for bugs. + // + // TODO Make these classes! + using ChrIndex = unsigned int; + using CompressedMrefIndex = signed int; + + using ChrSize = signed int; + using ChrPosition = signed int; + using ChrDistance = signed int; + using ChrPositionDifference = signed int; + + std::string get_trace(const boost::exception &e); + + typedef boost::error_info traced; + + template + void throw_with_trace(const E &e) { + throw boost::enable_error_info(e) << + traced(boost::stacktrace::stacktrace()); + } + + void setApplicationConfig(std::optional assemblyname); +} + + +#endif /* GLOBALAPPCONFIG_H_ */ \ No newline at end of file diff --git a/include/strtk-wrap.h b/include/strtk-wrap.h new file mode 100644 index 0000000..65c6c0d --- /dev/null +++ b/include/strtk-wrap.h @@ -0,0 +1,28 @@ +/* + * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * LICENSE: GPL + */ + + +#ifndef _STRTK_WRAP_H +#define _STRTK_WRAP_H + +// StrTk was written for C++11. To avoid excessive warnings when using C++17, we disable them +// explicitly for strtk.hpp, here. +#pragma GCC system_header +#include "strtk.hpp" + +#endif \ No newline at end of file diff --git a/resources/hg38.tsv b/resources/hg38.tsv new file mode 100644 index 0000000..0ac87a4 --- /dev/null +++ b/resources/hg38.tsv @@ -0,0 +1,3368 @@ +chromosome size compressedMref category +chr1 248956422 TRUE autosome +chr2 242193529 TRUE autosome +chr3 198295559 TRUE autosome +chr4 190214555 TRUE autosome +chr5 181538259 TRUE autosome +chr6 170805979 TRUE autosome +chr7 159345973 TRUE autosome +chr8 145138636 TRUE autosome +chr9 138394717 TRUE autosome +chr10 133797422 TRUE autosome +chr11 135086622 TRUE autosome +chr12 133275309 TRUE autosome +chr13 114364328 TRUE autosome +chr14 107043718 TRUE autosome +chr15 101991189 TRUE autosome +chr16 90338345 TRUE autosome +chr17 83257441 TRUE autosome +chr18 80373285 TRUE autosome +chr19 58617616 TRUE autosome +chr20 64444167 TRUE autosome +chr21 46709983 TRUE autosome +chr22 50818468 TRUE autosome +chrX 156040895 TRUE x +chrY 57227415 TRUE y +chrM 16569 FALSE extrachromosomal +chr1_KI270706v1_random 175055 TRUE unassigned +chr1_KI270707v1_random 32032 TRUE unassigned +chr1_KI270708v1_random 127682 TRUE unassigned +chr1_KI270709v1_random 66860 TRUE unassigned +chr1_KI270710v1_random 40176 TRUE unassigned +chr1_KI270711v1_random 42210 TRUE unassigned +chr1_KI270712v1_random 176043 TRUE unassigned +chr1_KI270713v1_random 40745 TRUE unassigned +chr1_KI270714v1_random 41717 TRUE unassigned +chr2_KI270715v1_random 161471 TRUE unassigned +chr2_KI270716v1_random 153799 TRUE unassigned +chr3_GL000221v1_random 155397 TRUE unassigned +chr4_GL000008v2_random 209709 TRUE unassigned +chr5_GL000208v1_random 92689 TRUE unassigned +chr9_KI270717v1_random 40062 TRUE unassigned +chr9_KI270718v1_random 38054 TRUE unassigned +chr9_KI270719v1_random 176845 TRUE unassigned +chr9_KI270720v1_random 39050 TRUE unassigned +chr11_KI270721v1_random 100316 TRUE unassigned +chr14_GL000009v2_random 201709 TRUE unassigned +chr14_GL000225v1_random 211173 TRUE unassigned +chr14_KI270722v1_random 194050 TRUE unassigned +chr14_GL000194v1_random 191469 TRUE unassigned +chr14_KI270723v1_random 38115 TRUE unassigned +chr14_KI270724v1_random 39555 TRUE unassigned +chr14_KI270725v1_random 172810 TRUE unassigned +chr14_KI270726v1_random 43739 TRUE unassigned +chr15_KI270727v1_random 448248 TRUE unassigned +chr16_KI270728v1_random 1872759 TRUE unassigned +chr17_GL000205v2_random 185591 TRUE unassigned +chr17_KI270729v1_random 280839 TRUE unassigned +chr17_KI270730v1_random 112551 TRUE unassigned +chr22_KI270731v1_random 150754 TRUE unassigned +chr22_KI270732v1_random 41543 TRUE unassigned +chr22_KI270733v1_random 179772 TRUE unassigned +chr22_KI270734v1_random 165050 TRUE unassigned +chr22_KI270735v1_random 42811 TRUE unassigned +chr22_KI270736v1_random 181920 TRUE unassigned +chr22_KI270737v1_random 103838 TRUE unassigned +chr22_KI270738v1_random 99375 TRUE unassigned +chr22_KI270739v1_random 73985 TRUE unassigned +chrY_KI270740v1_random 37240 TRUE unassigned +chrUn_KI270302v1 2274 TRUE unassigned +chrUn_KI270304v1 2165 TRUE unassigned +chrUn_KI270303v1 1942 TRUE unassigned +chrUn_KI270305v1 1472 TRUE unassigned +chrUn_KI270322v1 21476 TRUE unassigned +chrUn_KI270320v1 4416 TRUE unassigned +chrUn_KI270310v1 1201 TRUE unassigned +chrUn_KI270316v1 1444 TRUE unassigned +chrUn_KI270315v1 2276 TRUE unassigned +chrUn_KI270312v1 998 TRUE unassigned +chrUn_KI270311v1 12399 TRUE unassigned +chrUn_KI270317v1 37690 TRUE unassigned +chrUn_KI270412v1 1179 TRUE unassigned +chrUn_KI270411v1 2646 TRUE unassigned +chrUn_KI270414v1 2489 TRUE unassigned +chrUn_KI270419v1 1029 TRUE unassigned +chrUn_KI270418v1 2145 TRUE unassigned +chrUn_KI270420v1 2321 TRUE unassigned +chrUn_KI270424v1 2140 TRUE unassigned +chrUn_KI270417v1 2043 TRUE unassigned +chrUn_KI270422v1 1445 TRUE unassigned +chrUn_KI270423v1 981 TRUE unassigned +chrUn_KI270425v1 1884 TRUE unassigned +chrUn_KI270429v1 1361 TRUE unassigned +chrUn_KI270442v1 392061 TRUE unassigned +chrUn_KI270466v1 1233 TRUE unassigned +chrUn_KI270465v1 1774 TRUE unassigned +chrUn_KI270467v1 3920 TRUE unassigned +chrUn_KI270435v1 92983 TRUE unassigned +chrUn_KI270438v1 112505 TRUE unassigned +chrUn_KI270468v1 4055 TRUE unassigned +chrUn_KI270510v1 2415 TRUE unassigned +chrUn_KI270509v1 2318 TRUE unassigned +chrUn_KI270518v1 2186 TRUE unassigned +chrUn_KI270508v1 1951 TRUE unassigned +chrUn_KI270516v1 1300 TRUE unassigned +chrUn_KI270512v1 22689 TRUE unassigned +chrUn_KI270519v1 138126 TRUE unassigned +chrUn_KI270522v1 5674 TRUE unassigned +chrUn_KI270511v1 8127 TRUE unassigned +chrUn_KI270515v1 6361 TRUE unassigned +chrUn_KI270507v1 5353 TRUE unassigned +chrUn_KI270517v1 3253 TRUE unassigned +chrUn_KI270529v1 1899 TRUE unassigned +chrUn_KI270528v1 2983 TRUE unassigned +chrUn_KI270530v1 2168 TRUE unassigned +chrUn_KI270539v1 993 TRUE unassigned +chrUn_KI270538v1 91309 TRUE unassigned +chrUn_KI270544v1 1202 TRUE unassigned +chrUn_KI270548v1 1599 TRUE unassigned +chrUn_KI270583v1 1400 TRUE unassigned +chrUn_KI270587v1 2969 TRUE unassigned +chrUn_KI270580v1 1553 TRUE unassigned +chrUn_KI270581v1 7046 TRUE unassigned +chrUn_KI270579v1 31033 TRUE unassigned +chrUn_KI270589v1 44474 TRUE unassigned +chrUn_KI270590v1 4685 TRUE unassigned +chrUn_KI270584v1 4513 TRUE unassigned +chrUn_KI270582v1 6504 TRUE unassigned +chrUn_KI270588v1 6158 TRUE unassigned +chrUn_KI270593v1 3041 TRUE unassigned +chrUn_KI270591v1 5796 TRUE unassigned +chrUn_KI270330v1 1652 TRUE unassigned +chrUn_KI270329v1 1040 TRUE unassigned +chrUn_KI270334v1 1368 TRUE unassigned +chrUn_KI270333v1 2699 TRUE unassigned +chrUn_KI270335v1 1048 TRUE unassigned +chrUn_KI270338v1 1428 TRUE unassigned +chrUn_KI270340v1 1428 TRUE unassigned +chrUn_KI270336v1 1026 TRUE unassigned +chrUn_KI270337v1 1121 TRUE unassigned +chrUn_KI270363v1 1803 TRUE unassigned +chrUn_KI270364v1 2855 TRUE unassigned +chrUn_KI270362v1 3530 TRUE unassigned +chrUn_KI270366v1 8320 TRUE unassigned +chrUn_KI270378v1 1048 TRUE unassigned +chrUn_KI270379v1 1045 TRUE unassigned +chrUn_KI270389v1 1298 TRUE unassigned +chrUn_KI270390v1 2387 TRUE unassigned +chrUn_KI270387v1 1537 TRUE unassigned +chrUn_KI270395v1 1143 TRUE unassigned +chrUn_KI270396v1 1880 TRUE unassigned +chrUn_KI270388v1 1216 TRUE unassigned +chrUn_KI270394v1 970 TRUE unassigned +chrUn_KI270386v1 1788 TRUE unassigned +chrUn_KI270391v1 1484 TRUE unassigned +chrUn_KI270383v1 1750 TRUE unassigned +chrUn_KI270393v1 1308 TRUE unassigned +chrUn_KI270384v1 1658 TRUE unassigned +chrUn_KI270392v1 971 TRUE unassigned +chrUn_KI270381v1 1930 TRUE unassigned +chrUn_KI270385v1 990 TRUE unassigned +chrUn_KI270382v1 4215 TRUE unassigned +chrUn_KI270376v1 1136 TRUE unassigned +chrUn_KI270374v1 2656 TRUE unassigned +chrUn_KI270372v1 1650 TRUE unassigned +chrUn_KI270373v1 1451 TRUE unassigned +chrUn_KI270375v1 2378 TRUE unassigned +chrUn_KI270371v1 2805 TRUE unassigned +chrUn_KI270448v1 7992 TRUE unassigned +chrUn_KI270521v1 7642 TRUE unassigned +chrUn_GL000195v1 182896 TRUE unassigned +chrUn_GL000219v1 179198 TRUE unassigned +chrUn_GL000220v1 161802 TRUE unassigned +chrUn_GL000224v1 179693 TRUE unassigned +chrUn_KI270741v1 157432 TRUE unassigned +chrUn_GL000226v1 15008 TRUE unassigned +chrUn_GL000213v1 164239 TRUE unassigned +chrUn_KI270743v1 210658 TRUE unassigned +chrUn_KI270744v1 168472 TRUE unassigned +chrUn_KI270745v1 41891 TRUE unassigned +chrUn_KI270746v1 66486 TRUE unassigned +chrUn_KI270747v1 198735 TRUE unassigned +chrUn_KI270748v1 93321 TRUE unassigned +chrUn_KI270749v1 158759 TRUE unassigned +chrUn_KI270750v1 148850 TRUE unassigned +chrUn_KI270751v1 150742 TRUE unassigned +chrUn_KI270752v1 27745 TRUE unassigned +chrUn_KI270753v1 62944 TRUE unassigned +chrUn_KI270754v1 40191 TRUE unassigned +chrUn_KI270755v1 36723 TRUE unassigned +chrUn_KI270756v1 79590 TRUE unassigned +chrUn_KI270757v1 71251 TRUE unassigned +chrUn_GL000214v1 137718 TRUE unassigned +chrUn_KI270742v1 186739 TRUE unassigned +chrUn_GL000216v2 176608 TRUE unassigned +chrUn_GL000218v1 161147 TRUE unassigned +chr1_KI270762v1_alt 354444 TRUE alt +chr1_KI270766v1_alt 256271 TRUE alt +chr1_KI270760v1_alt 109528 TRUE alt +chr1_KI270765v1_alt 185285 TRUE alt +chr1_GL383518v1_alt 182439 TRUE alt +chr1_GL383519v1_alt 110268 TRUE alt +chr1_GL383520v2_alt 366580 TRUE alt +chr1_KI270764v1_alt 50258 TRUE alt +chr1_KI270763v1_alt 911658 TRUE alt +chr1_KI270759v1_alt 425601 TRUE alt +chr1_KI270761v1_alt 165834 TRUE alt +chr2_KI270770v1_alt 136240 TRUE alt +chr2_KI270773v1_alt 70887 TRUE alt +chr2_KI270774v1_alt 223625 TRUE alt +chr2_KI270769v1_alt 120616 TRUE alt +chr2_GL383521v1_alt 143390 TRUE alt +chr2_KI270772v1_alt 133041 TRUE alt +chr2_KI270775v1_alt 138019 TRUE alt +chr2_KI270771v1_alt 110395 TRUE alt +chr2_KI270768v1_alt 110099 TRUE alt +chr2_GL582966v2_alt 96131 TRUE alt +chr2_GL383522v1_alt 123821 TRUE alt +chr2_KI270776v1_alt 174166 TRUE alt +chr2_KI270767v1_alt 161578 TRUE alt +chr3_JH636055v2_alt 173151 TRUE alt +chr3_KI270783v1_alt 109187 TRUE alt +chr3_KI270780v1_alt 224108 TRUE alt +chr3_GL383526v1_alt 180671 TRUE alt +chr3_KI270777v1_alt 173649 TRUE alt +chr3_KI270778v1_alt 248252 TRUE alt +chr3_KI270781v1_alt 113034 TRUE alt +chr3_KI270779v1_alt 205312 TRUE alt +chr3_KI270782v1_alt 162429 TRUE alt +chr3_KI270784v1_alt 184404 TRUE alt +chr4_KI270790v1_alt 220246 TRUE alt +chr4_GL383528v1_alt 376187 TRUE alt +chr4_KI270787v1_alt 111943 TRUE alt +chr4_GL000257v2_alt 586476 TRUE alt +chr4_KI270788v1_alt 158965 TRUE alt +chr4_GL383527v1_alt 164536 TRUE alt +chr4_KI270785v1_alt 119912 TRUE alt +chr4_KI270789v1_alt 205944 TRUE alt +chr4_KI270786v1_alt 244096 TRUE alt +chr5_KI270793v1_alt 126136 TRUE alt +chr5_KI270792v1_alt 179043 TRUE alt +chr5_KI270791v1_alt 195710 TRUE alt +chr5_GL383532v1_alt 82728 TRUE alt +chr5_GL949742v1_alt 226852 TRUE alt +chr5_KI270794v1_alt 164558 TRUE alt +chr5_GL339449v2_alt 1612928 TRUE alt +chr5_GL383530v1_alt 101241 TRUE alt +chr5_KI270796v1_alt 172708 TRUE alt +chr5_GL383531v1_alt 173459 TRUE alt +chr5_KI270795v1_alt 131892 TRUE alt +chr6_GL000250v2_alt 4672374 TRUE alt +chr6_KI270800v1_alt 175808 TRUE alt +chr6_KI270799v1_alt 152148 TRUE alt +chr6_GL383533v1_alt 124736 TRUE alt +chr6_KI270801v1_alt 870480 TRUE alt +chr6_KI270802v1_alt 75005 TRUE alt +chr6_KB021644v2_alt 185823 TRUE alt +chr6_KI270797v1_alt 197536 TRUE alt +chr6_KI270798v1_alt 271782 TRUE alt +chr7_KI270804v1_alt 157952 TRUE alt +chr7_KI270809v1_alt 209586 TRUE alt +chr7_KI270806v1_alt 158166 TRUE alt +chr7_GL383534v2_alt 119183 TRUE alt +chr7_KI270803v1_alt 1111570 TRUE alt +chr7_KI270808v1_alt 271455 TRUE alt +chr7_KI270807v1_alt 126434 TRUE alt +chr7_KI270805v1_alt 209988 TRUE alt +chr8_KI270818v1_alt 145606 TRUE alt +chr8_KI270812v1_alt 282736 TRUE alt +chr8_KI270811v1_alt 292436 TRUE alt +chr8_KI270821v1_alt 985506 TRUE alt +chr8_KI270813v1_alt 300230 TRUE alt +chr8_KI270822v1_alt 624492 TRUE alt +chr8_KI270814v1_alt 141812 TRUE alt +chr8_KI270810v1_alt 374415 TRUE alt +chr8_KI270819v1_alt 133535 TRUE alt +chr8_KI270820v1_alt 36640 TRUE alt +chr8_KI270817v1_alt 158983 TRUE alt +chr8_KI270816v1_alt 305841 TRUE alt +chr8_KI270815v1_alt 132244 TRUE alt +chr9_GL383539v1_alt 162988 TRUE alt +chr9_GL383540v1_alt 71551 TRUE alt +chr9_GL383541v1_alt 171286 TRUE alt +chr9_GL383542v1_alt 60032 TRUE alt +chr9_KI270823v1_alt 439082 TRUE alt +chr10_GL383545v1_alt 179254 TRUE alt +chr10_KI270824v1_alt 181496 TRUE alt +chr10_GL383546v1_alt 309802 TRUE alt +chr10_KI270825v1_alt 188315 TRUE alt +chr11_KI270832v1_alt 210133 TRUE alt +chr11_KI270830v1_alt 177092 TRUE alt +chr11_KI270831v1_alt 296895 TRUE alt +chr11_KI270829v1_alt 204059 TRUE alt +chr11_GL383547v1_alt 154407 TRUE alt +chr11_JH159136v1_alt 200998 TRUE alt +chr11_JH159137v1_alt 191409 TRUE alt +chr11_KI270827v1_alt 67707 TRUE alt +chr11_KI270826v1_alt 186169 TRUE alt +chr12_GL877875v1_alt 167313 TRUE alt +chr12_GL877876v1_alt 408271 TRUE alt +chr12_KI270837v1_alt 40090 TRUE alt +chr12_GL383549v1_alt 120804 TRUE alt +chr12_KI270835v1_alt 238139 TRUE alt +chr12_GL383550v2_alt 169178 TRUE alt +chr12_GL383552v1_alt 138655 TRUE alt +chr12_GL383553v2_alt 152874 TRUE alt +chr12_KI270834v1_alt 119498 TRUE alt +chr12_GL383551v1_alt 184319 TRUE alt +chr12_KI270833v1_alt 76061 TRUE alt +chr12_KI270836v1_alt 56134 TRUE alt +chr13_KI270840v1_alt 191684 TRUE alt +chr13_KI270839v1_alt 180306 TRUE alt +chr13_KI270843v1_alt 103832 TRUE alt +chr13_KI270841v1_alt 169134 TRUE alt +chr13_KI270838v1_alt 306913 TRUE alt +chr13_KI270842v1_alt 37287 TRUE alt +chr14_KI270844v1_alt 322166 TRUE alt +chr14_KI270847v1_alt 1511111 TRUE alt +chr14_KI270845v1_alt 180703 TRUE alt +chr14_KI270846v1_alt 1351393 TRUE alt +chr15_KI270852v1_alt 478999 TRUE alt +chr15_KI270851v1_alt 263054 TRUE alt +chr15_KI270848v1_alt 327382 TRUE alt +chr15_GL383554v1_alt 296527 TRUE alt +chr15_KI270849v1_alt 244917 TRUE alt +chr15_GL383555v2_alt 388773 TRUE alt +chr15_KI270850v1_alt 430880 TRUE alt +chr16_KI270854v1_alt 134193 TRUE alt +chr16_KI270856v1_alt 63982 TRUE alt +chr16_KI270855v1_alt 232857 TRUE alt +chr16_KI270853v1_alt 2659700 TRUE alt +chr16_GL383556v1_alt 192462 TRUE alt +chr16_GL383557v1_alt 89672 TRUE alt +chr17_GL383563v3_alt 375691 TRUE alt +chr17_KI270862v1_alt 391357 TRUE alt +chr17_KI270861v1_alt 196688 TRUE alt +chr17_KI270857v1_alt 2877074 TRUE alt +chr17_JH159146v1_alt 278131 TRUE alt +chr17_JH159147v1_alt 70345 TRUE alt +chr17_GL383564v2_alt 133151 TRUE alt +chr17_GL000258v2_alt 1821992 TRUE alt +chr17_GL383565v1_alt 223995 TRUE alt +chr17_KI270858v1_alt 235827 TRUE alt +chr17_KI270859v1_alt 108763 TRUE alt +chr17_GL383566v1_alt 90219 TRUE alt +chr17_KI270860v1_alt 178921 TRUE alt +chr18_KI270864v1_alt 111737 TRUE alt +chr18_GL383567v1_alt 289831 TRUE alt +chr18_GL383570v1_alt 164789 TRUE alt +chr18_GL383571v1_alt 198278 TRUE alt +chr18_GL383568v1_alt 104552 TRUE alt +chr18_GL383569v1_alt 167950 TRUE alt +chr18_GL383572v1_alt 159547 TRUE alt +chr18_KI270863v1_alt 167999 TRUE alt +chr19_KI270868v1_alt 61734 TRUE alt +chr19_KI270865v1_alt 52969 TRUE alt +chr19_GL383573v1_alt 385657 TRUE alt +chr19_GL383575v2_alt 170222 TRUE alt +chr19_GL383576v1_alt 188024 TRUE alt +chr19_GL383574v1_alt 155864 TRUE alt +chr19_KI270866v1_alt 43156 TRUE alt +chr19_KI270867v1_alt 233762 TRUE alt +chr19_GL949746v1_alt 987716 TRUE alt +chr20_GL383577v2_alt 128386 TRUE alt +chr20_KI270869v1_alt 118774 TRUE alt +chr20_KI270871v1_alt 58661 TRUE alt +chr20_KI270870v1_alt 183433 TRUE alt +chr21_GL383578v2_alt 63917 TRUE alt +chr21_KI270874v1_alt 166743 TRUE alt +chr21_KI270873v1_alt 143900 TRUE alt +chr21_GL383579v2_alt 201197 TRUE alt +chr21_GL383580v2_alt 74653 TRUE alt +chr21_GL383581v2_alt 116689 TRUE alt +chr21_KI270872v1_alt 82692 TRUE alt +chr22_KI270875v1_alt 259914 TRUE alt +chr22_KI270878v1_alt 186262 TRUE alt +chr22_KI270879v1_alt 304135 TRUE alt +chr22_KI270876v1_alt 263666 TRUE alt +chr22_KI270877v1_alt 101331 TRUE alt +chr22_GL383583v2_alt 96924 TRUE alt +chr22_GL383582v2_alt 162811 TRUE alt +chrX_KI270880v1_alt 284869 TRUE alt +chrX_KI270881v1_alt 144206 TRUE alt +chr19_KI270882v1_alt 248807 TRUE alt +chr19_KI270883v1_alt 170399 TRUE alt +chr19_KI270884v1_alt 157053 TRUE alt +chr19_KI270885v1_alt 171027 TRUE alt +chr19_KI270886v1_alt 204239 TRUE alt +chr19_KI270887v1_alt 209512 TRUE alt +chr19_KI270888v1_alt 155532 TRUE alt +chr19_KI270889v1_alt 170698 TRUE alt +chr19_KI270890v1_alt 184499 TRUE alt +chr19_KI270891v1_alt 170680 TRUE alt +chr1_KI270892v1_alt 162212 TRUE alt +chr2_KI270894v1_alt 214158 TRUE alt +chr2_KI270893v1_alt 161218 TRUE alt +chr3_KI270895v1_alt 162896 TRUE alt +chr4_KI270896v1_alt 378547 TRUE alt +chr5_KI270897v1_alt 1144418 TRUE alt +chr5_KI270898v1_alt 130957 TRUE alt +chr6_GL000251v2_alt 4795265 TRUE alt +chr7_KI270899v1_alt 190869 TRUE alt +chr8_KI270901v1_alt 136959 TRUE alt +chr8_KI270900v1_alt 318687 TRUE alt +chr11_KI270902v1_alt 106711 TRUE alt +chr11_KI270903v1_alt 214625 TRUE alt +chr12_KI270904v1_alt 572349 TRUE alt +chr15_KI270906v1_alt 196384 TRUE alt +chr15_KI270905v1_alt 5161414 TRUE alt +chr17_KI270907v1_alt 137721 TRUE alt +chr17_KI270910v1_alt 157099 TRUE alt +chr17_KI270909v1_alt 325800 TRUE alt +chr17_JH159148v1_alt 88070 TRUE alt +chr17_KI270908v1_alt 1423190 TRUE alt +chr18_KI270912v1_alt 174061 TRUE alt +chr18_KI270911v1_alt 157710 TRUE alt +chr19_GL949747v2_alt 729520 TRUE alt +chr22_KB663609v1_alt 74013 TRUE alt +chrX_KI270913v1_alt 274009 TRUE alt +chr19_KI270914v1_alt 205194 TRUE alt +chr19_KI270915v1_alt 170665 TRUE alt +chr19_KI270916v1_alt 184516 TRUE alt +chr19_KI270917v1_alt 190932 TRUE alt +chr19_KI270918v1_alt 123111 TRUE alt +chr19_KI270919v1_alt 170701 TRUE alt +chr19_KI270920v1_alt 198005 TRUE alt +chr19_KI270921v1_alt 282224 TRUE alt +chr19_KI270922v1_alt 187935 TRUE alt +chr19_KI270923v1_alt 189352 TRUE alt +chr3_KI270924v1_alt 166540 TRUE alt +chr4_KI270925v1_alt 555799 TRUE alt +chr6_GL000252v2_alt 4604811 TRUE alt +chr8_KI270926v1_alt 229282 TRUE alt +chr11_KI270927v1_alt 218612 TRUE alt +chr19_GL949748v2_alt 1064304 TRUE alt +chr22_KI270928v1_alt 176103 TRUE alt +chr19_KI270929v1_alt 186203 TRUE alt +chr19_KI270930v1_alt 200773 TRUE alt +chr19_KI270931v1_alt 170148 TRUE alt +chr19_KI270932v1_alt 215732 TRUE alt +chr19_KI270933v1_alt 170537 TRUE alt +chr19_GL000209v2_alt 177381 TRUE alt +chr3_KI270934v1_alt 163458 TRUE alt +chr6_GL000253v2_alt 4677643 TRUE alt +chr19_GL949749v2_alt 1091841 TRUE alt +chr3_KI270935v1_alt 197351 TRUE alt +chr6_GL000254v2_alt 4827813 TRUE alt +chr19_GL949750v2_alt 1066390 TRUE alt +chr3_KI270936v1_alt 164170 TRUE alt +chr6_GL000255v2_alt 4606388 TRUE alt +chr19_GL949751v2_alt 1002683 TRUE alt +chr3_KI270937v1_alt 165607 TRUE alt +chr6_GL000256v2_alt 4929269 TRUE alt +chr19_GL949752v1_alt 987100 TRUE alt +chr6_KI270758v1_alt 76752 TRUE alt +chr19_GL949753v2_alt 796479 TRUE alt +chr19_KI270938v1_alt 1066800 TRUE alt +chrEBV 171823 TRUE virus +chrUn_KN707606v1_decoy 2200 TRUE decoy +chrUn_KN707607v1_decoy 3033 TRUE decoy +chrUn_KN707608v1_decoy 3112 TRUE decoy +chrUn_KN707609v1_decoy 1642 TRUE decoy +chrUn_KN707610v1_decoy 1393 TRUE decoy +chrUn_KN707611v1_decoy 1103 TRUE decoy +chrUn_KN707612v1_decoy 1039 TRUE decoy +chrUn_KN707613v1_decoy 1619 TRUE decoy +chrUn_KN707614v1_decoy 3122 TRUE decoy +chrUn_KN707615v1_decoy 1934 TRUE decoy +chrUn_KN707616v1_decoy 3111 TRUE decoy +chrUn_KN707617v1_decoy 2545 TRUE decoy +chrUn_KN707618v1_decoy 2295 TRUE decoy +chrUn_KN707619v1_decoy 1551 TRUE decoy +chrUn_KN707620v1_decoy 2046 TRUE decoy +chrUn_KN707621v1_decoy 1222 TRUE decoy +chrUn_KN707622v1_decoy 1535 TRUE decoy +chrUn_KN707623v1_decoy 3784 TRUE decoy +chrUn_KN707624v1_decoy 1329 TRUE decoy +chrUn_KN707625v1_decoy 1238 TRUE decoy +chrUn_KN707626v1_decoy 5623 TRUE decoy +chrUn_KN707627v1_decoy 5821 TRUE decoy +chrUn_KN707628v1_decoy 2960 TRUE decoy +chrUn_KN707629v1_decoy 1848 TRUE decoy +chrUn_KN707630v1_decoy 2315 TRUE decoy +chrUn_KN707631v1_decoy 1945 TRUE decoy +chrUn_KN707632v1_decoy 1424 TRUE decoy +chrUn_KN707633v1_decoy 1274 TRUE decoy +chrUn_KN707634v1_decoy 1007 TRUE decoy +chrUn_KN707635v1_decoy 1414 TRUE decoy +chrUn_KN707636v1_decoy 1725 TRUE decoy +chrUn_KN707637v1_decoy 5354 TRUE decoy +chrUn_KN707638v1_decoy 2189 TRUE decoy +chrUn_KN707639v1_decoy 1294 TRUE decoy +chrUn_KN707640v1_decoy 1831 TRUE decoy +chrUn_KN707641v1_decoy 1647 TRUE decoy +chrUn_KN707642v1_decoy 2943 TRUE decoy +chrUn_KN707643v1_decoy 2857 TRUE decoy +chrUn_KN707644v1_decoy 1030 TRUE decoy +chrUn_KN707645v1_decoy 1070 TRUE decoy +chrUn_KN707646v1_decoy 1735 TRUE decoy +chrUn_KN707647v1_decoy 1982 TRUE decoy +chrUn_KN707648v1_decoy 1564 TRUE decoy +chrUn_KN707649v1_decoy 1775 TRUE decoy +chrUn_KN707650v1_decoy 1540 TRUE decoy +chrUn_KN707651v1_decoy 2013 TRUE decoy +chrUn_KN707652v1_decoy 1176 TRUE decoy +chrUn_KN707653v1_decoy 1890 TRUE decoy +chrUn_KN707654v1_decoy 3644 TRUE decoy +chrUn_KN707655v1_decoy 2785 TRUE decoy +chrUn_KN707656v1_decoy 1017 TRUE decoy +chrUn_KN707657v1_decoy 1068 TRUE decoy +chrUn_KN707658v1_decoy 1007 TRUE decoy +chrUn_KN707659v1_decoy 2605 TRUE decoy +chrUn_KN707660v1_decoy 8410 TRUE decoy +chrUn_KN707661v1_decoy 5534 TRUE decoy +chrUn_KN707662v1_decoy 2173 TRUE decoy +chrUn_KN707663v1_decoy 1065 TRUE decoy +chrUn_KN707664v1_decoy 8683 TRUE decoy +chrUn_KN707665v1_decoy 2670 TRUE decoy +chrUn_KN707666v1_decoy 2420 TRUE decoy +chrUn_KN707667v1_decoy 2189 TRUE decoy +chrUn_KN707668v1_decoy 2093 TRUE decoy +chrUn_KN707669v1_decoy 1184 TRUE decoy +chrUn_KN707670v1_decoy 1205 TRUE decoy +chrUn_KN707671v1_decoy 2786 TRUE decoy +chrUn_KN707672v1_decoy 2794 TRUE decoy +chrUn_KN707673v1_decoy 19544 TRUE decoy +chrUn_KN707674v1_decoy 2848 TRUE decoy +chrUn_KN707675v1_decoy 10556 TRUE decoy +chrUn_KN707676v1_decoy 9066 TRUE decoy +chrUn_KN707677v1_decoy 7267 TRUE decoy +chrUn_KN707678v1_decoy 2462 TRUE decoy +chrUn_KN707679v1_decoy 1774 TRUE decoy +chrUn_KN707680v1_decoy 1297 TRUE decoy +chrUn_KN707681v1_decoy 4379 TRUE decoy +chrUn_KN707682v1_decoy 4208 TRUE decoy +chrUn_KN707683v1_decoy 4068 TRUE decoy +chrUn_KN707684v1_decoy 2940 TRUE decoy +chrUn_KN707685v1_decoy 3938 TRUE decoy +chrUn_KN707686v1_decoy 2072 TRUE decoy +chrUn_KN707687v1_decoy 1136 TRUE decoy +chrUn_KN707688v1_decoy 4248 TRUE decoy +chrUn_KN707689v1_decoy 5823 TRUE decoy +chrUn_KN707690v1_decoy 3715 TRUE decoy +chrUn_KN707691v1_decoy 4885 TRUE decoy +chrUn_KN707692v1_decoy 4813 TRUE decoy +chrUn_KN707693v1_decoy 2899 TRUE decoy +chrUn_KN707694v1_decoy 1228 TRUE decoy +chrUn_KN707695v1_decoy 3119 TRUE decoy +chrUn_KN707696v1_decoy 3828 TRUE decoy +chrUn_KN707697v1_decoy 1186 TRUE decoy +chrUn_KN707698v1_decoy 1908 TRUE decoy +chrUn_KN707699v1_decoy 2795 TRUE decoy +chrUn_KN707700v1_decoy 3703 TRUE decoy +chrUn_KN707701v1_decoy 6722 TRUE decoy +chrUn_KN707702v1_decoy 6466 TRUE decoy +chrUn_KN707703v1_decoy 2235 TRUE decoy +chrUn_KN707704v1_decoy 2871 TRUE decoy +chrUn_KN707705v1_decoy 4632 TRUE decoy +chrUn_KN707706v1_decoy 4225 TRUE decoy +chrUn_KN707707v1_decoy 4339 TRUE decoy +chrUn_KN707708v1_decoy 2305 TRUE decoy +chrUn_KN707709v1_decoy 3273 TRUE decoy +chrUn_KN707710v1_decoy 5701 TRUE decoy +chrUn_KN707711v1_decoy 4154 TRUE decoy +chrUn_KN707712v1_decoy 1243 TRUE decoy +chrUn_KN707713v1_decoy 1308 TRUE decoy +chrUn_KN707714v1_decoy 2922 TRUE decoy +chrUn_KN707715v1_decoy 3044 TRUE decoy +chrUn_KN707716v1_decoy 2888 TRUE decoy +chrUn_KN707717v1_decoy 1742 TRUE decoy +chrUn_KN707718v1_decoy 4969 TRUE decoy +chrUn_KN707719v1_decoy 3270 TRUE decoy +chrUn_KN707720v1_decoy 6028 TRUE decoy +chrUn_KN707721v1_decoy 1105 TRUE decoy +chrUn_KN707722v1_decoy 2884 TRUE decoy +chrUn_KN707723v1_decoy 1124 TRUE decoy +chrUn_KN707724v1_decoy 1454 TRUE decoy +chrUn_KN707725v1_decoy 2565 TRUE decoy +chrUn_KN707726v1_decoy 2149 TRUE decoy +chrUn_KN707727v1_decoy 2630 TRUE decoy +chrUn_KN707728v1_decoy 14625 TRUE decoy +chrUn_KN707729v1_decoy 7431 TRUE decoy +chrUn_KN707730v1_decoy 5776 TRUE decoy +chrUn_KN707731v1_decoy 4820 TRUE decoy +chrUn_KN707732v1_decoy 1227 TRUE decoy +chrUn_KN707733v1_decoy 7503 TRUE decoy +chrUn_KN707734v1_decoy 9652 TRUE decoy +chrUn_KN707735v1_decoy 1091 TRUE decoy +chrUn_KN707736v1_decoy 2467 TRUE decoy +chrUn_KN707737v1_decoy 1270 TRUE decoy +chrUn_KN707738v1_decoy 4365 TRUE decoy +chrUn_KN707739v1_decoy 4284 TRUE decoy +chrUn_KN707740v1_decoy 10282 TRUE decoy +chrUn_KN707741v1_decoy 5601 TRUE decoy +chrUn_KN707742v1_decoy 4758 TRUE decoy +chrUn_KN707743v1_decoy 1624 TRUE decoy +chrUn_KN707744v1_decoy 4024 TRUE decoy +chrUn_KN707745v1_decoy 1276 TRUE decoy +chrUn_KN707746v1_decoy 5083 TRUE decoy +chrUn_KN707747v1_decoy 2075 TRUE decoy +chrUn_KN707748v1_decoy 3553 TRUE decoy +chrUn_KN707749v1_decoy 7010 TRUE decoy +chrUn_KN707750v1_decoy 4718 TRUE decoy +chrUn_KN707751v1_decoy 3546 TRUE decoy +chrUn_KN707752v1_decoy 2873 TRUE decoy +chrUn_KN707753v1_decoy 2144 TRUE decoy +chrUn_KN707754v1_decoy 2243 TRUE decoy +chrUn_KN707755v1_decoy 5343 TRUE decoy +chrUn_KN707756v1_decoy 4877 TRUE decoy +chrUn_KN707757v1_decoy 3034 TRUE decoy +chrUn_KN707758v1_decoy 2826 TRUE decoy +chrUn_KN707759v1_decoy 1221 TRUE decoy +chrUn_KN707760v1_decoy 1169 TRUE decoy +chrUn_KN707761v1_decoy 2319 TRUE decoy +chrUn_KN707762v1_decoy 3450 TRUE decoy +chrUn_KN707763v1_decoy 2674 TRUE decoy +chrUn_KN707764v1_decoy 3912 TRUE decoy +chrUn_KN707765v1_decoy 6020 TRUE decoy +chrUn_KN707766v1_decoy 2303 TRUE decoy +chrUn_KN707767v1_decoy 2552 TRUE decoy +chrUn_KN707768v1_decoy 3656 TRUE decoy +chrUn_KN707769v1_decoy 1591 TRUE decoy +chrUn_KN707770v1_decoy 1209 TRUE decoy +chrUn_KN707771v1_decoy 3176 TRUE decoy +chrUn_KN707772v1_decoy 8915 TRUE decoy +chrUn_KN707773v1_decoy 4902 TRUE decoy +chrUn_KN707774v1_decoy 3324 TRUE decoy +chrUn_KN707775v1_decoy 5997 TRUE decoy +chrUn_KN707776v1_decoy 2618 TRUE decoy +chrUn_KN707777v1_decoy 10311 TRUE decoy +chrUn_KN707778v1_decoy 2440 TRUE decoy +chrUn_KN707779v1_decoy 12444 TRUE decoy +chrUn_KN707780v1_decoy 5691 TRUE decoy +chrUn_KN707781v1_decoy 2717 TRUE decoy +chrUn_KN707782v1_decoy 5277 TRUE decoy +chrUn_KN707783v1_decoy 4373 TRUE decoy +chrUn_KN707784v1_decoy 3224 TRUE decoy +chrUn_KN707785v1_decoy 2631 TRUE decoy +chrUn_KN707786v1_decoy 5385 TRUE decoy +chrUn_KN707787v1_decoy 3678 TRUE decoy +chrUn_KN707788v1_decoy 1412 TRUE decoy +chrUn_KN707789v1_decoy 1443 TRUE decoy +chrUn_KN707790v1_decoy 1098 TRUE decoy +chrUn_KN707791v1_decoy 3240 TRUE decoy +chrUn_KN707792v1_decoy 1915 TRUE decoy +chrUn_KN707793v1_decoy 4667 TRUE decoy +chrUn_KN707794v1_decoy 7219 TRUE decoy +chrUn_KN707795v1_decoy 3277 TRUE decoy +chrUn_KN707796v1_decoy 3473 TRUE decoy +chrUn_KN707797v1_decoy 4243 TRUE decoy +chrUn_KN707798v1_decoy 17599 TRUE decoy +chrUn_KN707799v1_decoy 5095 TRUE decoy +chrUn_KN707800v1_decoy 2237 TRUE decoy +chrUn_KN707801v1_decoy 2901 TRUE decoy +chrUn_KN707802v1_decoy 2666 TRUE decoy +chrUn_KN707803v1_decoy 5336 TRUE decoy +chrUn_KN707804v1_decoy 4383 TRUE decoy +chrUn_KN707805v1_decoy 5446 TRUE decoy +chrUn_KN707806v1_decoy 6252 TRUE decoy +chrUn_KN707807v1_decoy 4616 TRUE decoy +chrUn_KN707808v1_decoy 3021 TRUE decoy +chrUn_KN707809v1_decoy 3667 TRUE decoy +chrUn_KN707810v1_decoy 4563 TRUE decoy +chrUn_KN707811v1_decoy 1120 TRUE decoy +chrUn_KN707812v1_decoy 3845 TRUE decoy +chrUn_KN707813v1_decoy 2272 TRUE decoy +chrUn_KN707814v1_decoy 4764 TRUE decoy +chrUn_KN707815v1_decoy 5410 TRUE decoy +chrUn_KN707816v1_decoy 7150 TRUE decoy +chrUn_KN707817v1_decoy 1762 TRUE decoy +chrUn_KN707818v1_decoy 1207 TRUE decoy +chrUn_KN707819v1_decoy 1331 TRUE decoy +chrUn_KN707820v1_decoy 8307 TRUE decoy +chrUn_KN707821v1_decoy 2276 TRUE decoy +chrUn_KN707822v1_decoy 2575 TRUE decoy +chrUn_KN707823v1_decoy 3970 TRUE decoy +chrUn_KN707824v1_decoy 1352 TRUE decoy +chrUn_KN707825v1_decoy 3040 TRUE decoy +chrUn_KN707826v1_decoy 2070 TRUE decoy +chrUn_KN707827v1_decoy 2913 TRUE decoy +chrUn_KN707828v1_decoy 2389 TRUE decoy +chrUn_KN707829v1_decoy 1835 TRUE decoy +chrUn_KN707830v1_decoy 4807 TRUE decoy +chrUn_KN707831v1_decoy 2201 TRUE decoy +chrUn_KN707832v1_decoy 1265 TRUE decoy +chrUn_KN707833v1_decoy 1961 TRUE decoy +chrUn_KN707834v1_decoy 1064 TRUE decoy +chrUn_KN707835v1_decoy 1932 TRUE decoy +chrUn_KN707836v1_decoy 3213 TRUE decoy +chrUn_KN707837v1_decoy 1178 TRUE decoy +chrUn_KN707838v1_decoy 2926 TRUE decoy +chrUn_KN707839v1_decoy 1038 TRUE decoy +chrUn_KN707840v1_decoy 3298 TRUE decoy +chrUn_KN707841v1_decoy 8992 TRUE decoy +chrUn_KN707842v1_decoy 6698 TRUE decoy +chrUn_KN707843v1_decoy 4880 TRUE decoy +chrUn_KN707844v1_decoy 1766 TRUE decoy +chrUn_KN707845v1_decoy 3532 TRUE decoy +chrUn_KN707846v1_decoy 2297 TRUE decoy +chrUn_KN707847v1_decoy 1234 TRUE decoy +chrUn_KN707848v1_decoy 1205 TRUE decoy +chrUn_KN707849v1_decoy 2790 TRUE decoy +chrUn_KN707850v1_decoy 2006 TRUE decoy +chrUn_KN707851v1_decoy 4593 TRUE decoy +chrUn_KN707852v1_decoy 1579 TRUE decoy +chrUn_KN707853v1_decoy 9597 TRUE decoy +chrUn_KN707854v1_decoy 10451 TRUE decoy +chrUn_KN707855v1_decoy 3219 TRUE decoy +chrUn_KN707856v1_decoy 2300 TRUE decoy +chrUn_KN707857v1_decoy 5985 TRUE decoy +chrUn_KN707858v1_decoy 2959 TRUE decoy +chrUn_KN707859v1_decoy 1340 TRUE decoy +chrUn_KN707860v1_decoy 3148 TRUE decoy +chrUn_KN707861v1_decoy 2242 TRUE decoy +chrUn_KN707862v1_decoy 16513 TRUE decoy +chrUn_KN707863v1_decoy 7821 TRUE decoy +chrUn_KN707864v1_decoy 2159 TRUE decoy +chrUn_KN707865v1_decoy 2114 TRUE decoy +chrUn_KN707866v1_decoy 4109 TRUE decoy +chrUn_KN707867v1_decoy 1544 TRUE decoy +chrUn_KN707868v1_decoy 1005 TRUE decoy +chrUn_KN707869v1_decoy 8632 TRUE decoy +chrUn_KN707870v1_decoy 1012 TRUE decoy +chrUn_KN707871v1_decoy 4728 TRUE decoy +chrUn_KN707872v1_decoy 2165 TRUE decoy +chrUn_KN707873v1_decoy 7591 TRUE decoy +chrUn_KN707874v1_decoy 5202 TRUE decoy +chrUn_KN707875v1_decoy 4241 TRUE decoy +chrUn_KN707876v1_decoy 4131 TRUE decoy +chrUn_KN707877v1_decoy 2272 TRUE decoy +chrUn_KN707878v1_decoy 2085 TRUE decoy +chrUn_KN707879v1_decoy 4346 TRUE decoy +chrUn_KN707880v1_decoy 1208 TRUE decoy +chrUn_KN707881v1_decoy 4543 TRUE decoy +chrUn_KN707882v1_decoy 2772 TRUE decoy +chrUn_KN707883v1_decoy 2490 TRUE decoy +chrUn_KN707884v1_decoy 4568 TRUE decoy +chrUn_KN707885v1_decoy 1776 TRUE decoy +chrUn_KN707886v1_decoy 2699 TRUE decoy +chrUn_KN707887v1_decoy 3534 TRUE decoy +chrUn_KN707888v1_decoy 2424 TRUE decoy +chrUn_KN707889v1_decoy 1747 TRUE decoy +chrUn_KN707890v1_decoy 1088 TRUE decoy +chrUn_KN707891v1_decoy 1143 TRUE decoy +chrUn_KN707892v1_decoy 2530 TRUE decoy +chrUn_KN707893v1_decoy 8049 TRUE decoy +chrUn_KN707894v1_decoy 1366 TRUE decoy +chrUn_KN707895v1_decoy 4284 TRUE decoy +chrUn_KN707896v1_decoy 33125 TRUE decoy +chrUn_KN707897v1_decoy 2137 TRUE decoy +chrUn_KN707898v1_decoy 3840 TRUE decoy +chrUn_KN707899v1_decoy 3087 TRUE decoy +chrUn_KN707900v1_decoy 2041 TRUE decoy +chrUn_KN707901v1_decoy 3344 TRUE decoy +chrUn_KN707902v1_decoy 2921 TRUE decoy +chrUn_KN707903v1_decoy 6581 TRUE decoy +chrUn_KN707904v1_decoy 3968 TRUE decoy +chrUn_KN707905v1_decoy 2339 TRUE decoy +chrUn_KN707906v1_decoy 1243 TRUE decoy +chrUn_KN707907v1_decoy 7776 TRUE decoy +chrUn_KN707908v1_decoy 19837 TRUE decoy +chrUn_KN707909v1_decoy 1737 TRUE decoy +chrUn_KN707910v1_decoy 1098 TRUE decoy +chrUn_KN707911v1_decoy 1893 TRUE decoy +chrUn_KN707912v1_decoy 1281 TRUE decoy +chrUn_KN707913v1_decoy 1527 TRUE decoy +chrUn_KN707914v1_decoy 2055 TRUE decoy +chrUn_KN707915v1_decoy 2527 TRUE decoy +chrUn_KN707916v1_decoy 3275 TRUE decoy +chrUn_KN707917v1_decoy 1265 TRUE decoy +chrUn_KN707918v1_decoy 2623 TRUE decoy +chrUn_KN707919v1_decoy 4850 TRUE decoy +chrUn_KN707920v1_decoy 3584 TRUE decoy +chrUn_KN707921v1_decoy 2561 TRUE decoy +chrUn_KN707922v1_decoy 3041 TRUE decoy +chrUn_KN707923v1_decoy 1409 TRUE decoy +chrUn_KN707924v1_decoy 4596 TRUE decoy +chrUn_KN707925v1_decoy 11555 TRUE decoy +chrUn_KN707926v1_decoy 1266 TRUE decoy +chrUn_KN707927v1_decoy 1079 TRUE decoy +chrUn_KN707928v1_decoy 1087 TRUE decoy +chrUn_KN707929v1_decoy 1226 TRUE decoy +chrUn_KN707930v1_decoy 1131 TRUE decoy +chrUn_KN707931v1_decoy 1199 TRUE decoy +chrUn_KN707932v1_decoy 1084 TRUE decoy +chrUn_KN707933v1_decoy 2038 TRUE decoy +chrUn_KN707934v1_decoy 1070 TRUE decoy +chrUn_KN707935v1_decoy 1312 TRUE decoy +chrUn_KN707936v1_decoy 4031 TRUE decoy +chrUn_KN707937v1_decoy 7445 TRUE decoy +chrUn_KN707938v1_decoy 1770 TRUE decoy +chrUn_KN707939v1_decoy 5600 TRUE decoy +chrUn_KN707940v1_decoy 1882 TRUE decoy +chrUn_KN707941v1_decoy 1170 TRUE decoy +chrUn_KN707942v1_decoy 1300 TRUE decoy +chrUn_KN707943v1_decoy 5325 TRUE decoy +chrUn_KN707944v1_decoy 2043 TRUE decoy +chrUn_KN707945v1_decoy 1072 TRUE decoy +chrUn_KN707946v1_decoy 2463 TRUE decoy +chrUn_KN707947v1_decoy 1010 TRUE decoy +chrUn_KN707948v1_decoy 1432 TRUE decoy +chrUn_KN707949v1_decoy 1162 TRUE decoy +chrUn_KN707950v1_decoy 1095 TRUE decoy +chrUn_KN707951v1_decoy 1118 TRUE decoy +chrUn_KN707952v1_decoy 1383 TRUE decoy +chrUn_KN707953v1_decoy 2289 TRUE decoy +chrUn_KN707954v1_decoy 1648 TRUE decoy +chrUn_KN707955v1_decoy 2203 TRUE decoy +chrUn_KN707956v1_decoy 3270 TRUE decoy +chrUn_KN707957v1_decoy 11499 TRUE decoy +chrUn_KN707958v1_decoy 2474 TRUE decoy +chrUn_KN707959v1_decoy 2294 TRUE decoy +chrUn_KN707960v1_decoy 1238 TRUE decoy +chrUn_KN707961v1_decoy 3410 TRUE decoy +chrUn_KN707962v1_decoy 1523 TRUE decoy +chrUn_KN707963v1_decoy 62955 TRUE decoy +chrUn_KN707964v1_decoy 6282 TRUE decoy +chrUn_KN707965v1_decoy 3836 TRUE decoy +chrUn_KN707966v1_decoy 6486 TRUE decoy +chrUn_KN707967v1_decoy 15368 TRUE decoy +chrUn_KN707968v1_decoy 9572 TRUE decoy +chrUn_KN707969v1_decoy 6413 TRUE decoy +chrUn_KN707970v1_decoy 4104 TRUE decoy +chrUn_KN707971v1_decoy 12943 TRUE decoy +chrUn_KN707972v1_decoy 4650 TRUE decoy +chrUn_KN707973v1_decoy 3080 TRUE decoy +chrUn_KN707974v1_decoy 3134 TRUE decoy +chrUn_KN707975v1_decoy 6211 TRUE decoy +chrUn_KN707976v1_decoy 1126 TRUE decoy +chrUn_KN707977v1_decoy 1101 TRUE decoy +chrUn_KN707978v1_decoy 1101 TRUE decoy +chrUn_KN707979v1_decoy 2648 TRUE decoy +chrUn_KN707980v1_decoy 2973 TRUE decoy +chrUn_KN707981v1_decoy 2520 TRUE decoy +chrUn_KN707982v1_decoy 2318 TRUE decoy +chrUn_KN707983v1_decoy 2606 TRUE decoy +chrUn_KN707984v1_decoy 2205 TRUE decoy +chrUn_KN707985v1_decoy 2929 TRUE decoy +chrUn_KN707986v1_decoy 3869 TRUE decoy +chrUn_KN707987v1_decoy 1117 TRUE decoy +chrUn_KN707988v1_decoy 2960 TRUE decoy +chrUn_KN707989v1_decoy 1009 TRUE decoy +chrUn_KN707990v1_decoy 4048 TRUE decoy +chrUn_KN707991v1_decoy 2193 TRUE decoy +chrUn_KN707992v1_decoy 1830 TRUE decoy +chrUn_JTFH01000001v1_decoy 25139 TRUE decoy +chrUn_JTFH01000002v1_decoy 18532 TRUE decoy +chrUn_JTFH01000003v1_decoy 15240 TRUE decoy +chrUn_JTFH01000004v1_decoy 13739 TRUE decoy +chrUn_JTFH01000005v1_decoy 11297 TRUE decoy +chrUn_JTFH01000006v1_decoy 10074 TRUE decoy +chrUn_JTFH01000007v1_decoy 9891 TRUE decoy +chrUn_JTFH01000008v1_decoy 9774 TRUE decoy +chrUn_JTFH01000009v1_decoy 9727 TRUE decoy +chrUn_JTFH01000010v1_decoy 9358 TRUE decoy +chrUn_JTFH01000011v1_decoy 8920 TRUE decoy +chrUn_JTFH01000012v1_decoy 8479 TRUE decoy +chrUn_JTFH01000013v1_decoy 8312 TRUE decoy +chrUn_JTFH01000014v1_decoy 8261 TRUE decoy +chrUn_JTFH01000015v1_decoy 8131 TRUE decoy +chrUn_JTFH01000016v1_decoy 8051 TRUE decoy +chrUn_JTFH01000017v1_decoy 7832 TRUE decoy +chrUn_JTFH01000018v1_decoy 7710 TRUE decoy +chrUn_JTFH01000019v1_decoy 7702 TRUE decoy +chrUn_JTFH01000020v1_decoy 7479 TRUE decoy +chrUn_JTFH01000021v1_decoy 7368 TRUE decoy +chrUn_JTFH01000022v1_decoy 7162 TRUE decoy +chrUn_JTFH01000023v1_decoy 7065 TRUE decoy +chrUn_JTFH01000024v1_decoy 7019 TRUE decoy +chrUn_JTFH01000025v1_decoy 6997 TRUE decoy +chrUn_JTFH01000026v1_decoy 6994 TRUE decoy +chrUn_JTFH01000027v1_decoy 6979 TRUE decoy +chrUn_JTFH01000028v1_decoy 6797 TRUE decoy +chrUn_JTFH01000029v1_decoy 6525 TRUE decoy +chrUn_JTFH01000030v1_decoy 6246 TRUE decoy +chrUn_JTFH01000031v1_decoy 5926 TRUE decoy +chrUn_JTFH01000032v1_decoy 5914 TRUE decoy +chrUn_JTFH01000033v1_decoy 5898 TRUE decoy +chrUn_JTFH01000034v1_decoy 5879 TRUE decoy +chrUn_JTFH01000035v1_decoy 5834 TRUE decoy +chrUn_JTFH01000036v1_decoy 5743 TRUE decoy +chrUn_JTFH01000037v1_decoy 5577 TRUE decoy +chrUn_JTFH01000038v1_decoy 5413 TRUE decoy +chrUn_JTFH01000039v1_decoy 5250 TRUE decoy +chrUn_JTFH01000040v1_decoy 5246 TRUE decoy +chrUn_JTFH01000041v1_decoy 5118 TRUE decoy +chrUn_JTFH01000042v1_decoy 5058 TRUE decoy +chrUn_JTFH01000043v1_decoy 4959 TRUE decoy +chrUn_JTFH01000044v1_decoy 4853 TRUE decoy +chrUn_JTFH01000045v1_decoy 4828 TRUE decoy +chrUn_JTFH01000046v1_decoy 4819 TRUE decoy +chrUn_JTFH01000047v1_decoy 4809 TRUE decoy +chrUn_JTFH01000048v1_decoy 4710 TRUE decoy +chrUn_JTFH01000049v1_decoy 4680 TRUE decoy +chrUn_JTFH01000050v1_decoy 4645 TRUE decoy +chrUn_JTFH01000051v1_decoy 4514 TRUE decoy +chrUn_JTFH01000052v1_decoy 4439 TRUE decoy +chrUn_JTFH01000053v1_decoy 4416 TRUE decoy +chrUn_JTFH01000054v1_decoy 4409 TRUE decoy +chrUn_JTFH01000055v1_decoy 4392 TRUE decoy +chrUn_JTFH01000056v1_decoy 4359 TRUE decoy +chrUn_JTFH01000057v1_decoy 4319 TRUE decoy +chrUn_JTFH01000058v1_decoy 4290 TRUE decoy +chrUn_JTFH01000059v1_decoy 4242 TRUE decoy +chrUn_JTFH01000060v1_decoy 4228 TRUE decoy +chrUn_JTFH01000061v1_decoy 4222 TRUE decoy +chrUn_JTFH01000062v1_decoy 4216 TRUE decoy +chrUn_JTFH01000063v1_decoy 4210 TRUE decoy +chrUn_JTFH01000064v1_decoy 4206 TRUE decoy +chrUn_JTFH01000065v1_decoy 4102 TRUE decoy +chrUn_JTFH01000066v1_decoy 4101 TRUE decoy +chrUn_JTFH01000067v1_decoy 4083 TRUE decoy +chrUn_JTFH01000068v1_decoy 3967 TRUE decoy +chrUn_JTFH01000069v1_decoy 3955 TRUE decoy +chrUn_JTFH01000070v1_decoy 3945 TRUE decoy +chrUn_JTFH01000071v1_decoy 3930 TRUE decoy +chrUn_JTFH01000072v1_decoy 3929 TRUE decoy +chrUn_JTFH01000073v1_decoy 3924 TRUE decoy +chrUn_JTFH01000074v1_decoy 3919 TRUE decoy +chrUn_JTFH01000075v1_decoy 3908 TRUE decoy +chrUn_JTFH01000076v1_decoy 3892 TRUE decoy +chrUn_JTFH01000077v1_decoy 3890 TRUE decoy +chrUn_JTFH01000078v1_decoy 3859 TRUE decoy +chrUn_JTFH01000079v1_decoy 3846 TRUE decoy +chrUn_JTFH01000080v1_decoy 3835 TRUE decoy +chrUn_JTFH01000081v1_decoy 3830 TRUE decoy +chrUn_JTFH01000082v1_decoy 3828 TRUE decoy +chrUn_JTFH01000083v1_decoy 3825 TRUE decoy +chrUn_JTFH01000084v1_decoy 3821 TRUE decoy +chrUn_JTFH01000085v1_decoy 3809 TRUE decoy +chrUn_JTFH01000086v1_decoy 3801 TRUE decoy +chrUn_JTFH01000087v1_decoy 3799 TRUE decoy +chrUn_JTFH01000088v1_decoy 3737 TRUE decoy +chrUn_JTFH01000089v1_decoy 3701 TRUE decoy +chrUn_JTFH01000090v1_decoy 3698 TRUE decoy +chrUn_JTFH01000091v1_decoy 3692 TRUE decoy +chrUn_JTFH01000092v1_decoy 3686 TRUE decoy +chrUn_JTFH01000093v1_decoy 3677 TRUE decoy +chrUn_JTFH01000094v1_decoy 3664 TRUE decoy +chrUn_JTFH01000095v1_decoy 3613 TRUE decoy +chrUn_JTFH01000096v1_decoy 3611 TRUE decoy +chrUn_JTFH01000097v1_decoy 3606 TRUE decoy +chrUn_JTFH01000098v1_decoy 3584 TRUE decoy +chrUn_JTFH01000099v1_decoy 3581 TRUE decoy +chrUn_JTFH01000100v1_decoy 3543 TRUE decoy +chrUn_JTFH01000101v1_decoy 3528 TRUE decoy +chrUn_JTFH01000102v1_decoy 3527 TRUE decoy +chrUn_JTFH01000103v1_decoy 3496 TRUE decoy +chrUn_JTFH01000104v1_decoy 3493 TRUE decoy +chrUn_JTFH01000105v1_decoy 3484 TRUE decoy +chrUn_JTFH01000106v1_decoy 3435 TRUE decoy +chrUn_JTFH01000107v1_decoy 3391 TRUE decoy +chrUn_JTFH01000108v1_decoy 3374 TRUE decoy +chrUn_JTFH01000109v1_decoy 3371 TRUE decoy +chrUn_JTFH01000110v1_decoy 3361 TRUE decoy +chrUn_JTFH01000111v1_decoy 3351 TRUE decoy +chrUn_JTFH01000112v1_decoy 3340 TRUE decoy +chrUn_JTFH01000113v1_decoy 3320 TRUE decoy +chrUn_JTFH01000114v1_decoy 3282 TRUE decoy +chrUn_JTFH01000115v1_decoy 3278 TRUE decoy +chrUn_JTFH01000116v1_decoy 3260 TRUE decoy +chrUn_JTFH01000117v1_decoy 3258 TRUE decoy +chrUn_JTFH01000118v1_decoy 3253 TRUE decoy +chrUn_JTFH01000119v1_decoy 3247 TRUE decoy +chrUn_JTFH01000120v1_decoy 3230 TRUE decoy +chrUn_JTFH01000121v1_decoy 3224 TRUE decoy +chrUn_JTFH01000122v1_decoy 3216 TRUE decoy +chrUn_JTFH01000123v1_decoy 3212 TRUE decoy +chrUn_JTFH01000124v1_decoy 3194 TRUE decoy +chrUn_JTFH01000125v1_decoy 3189 TRUE decoy +chrUn_JTFH01000126v1_decoy 3177 TRUE decoy +chrUn_JTFH01000127v1_decoy 3176 TRUE decoy +chrUn_JTFH01000128v1_decoy 3173 TRUE decoy +chrUn_JTFH01000129v1_decoy 3170 TRUE decoy +chrUn_JTFH01000130v1_decoy 3166 TRUE decoy +chrUn_JTFH01000131v1_decoy 3163 TRUE decoy +chrUn_JTFH01000132v1_decoy 3143 TRUE decoy +chrUn_JTFH01000133v1_decoy 3137 TRUE decoy +chrUn_JTFH01000134v1_decoy 3116 TRUE decoy +chrUn_JTFH01000135v1_decoy 3106 TRUE decoy +chrUn_JTFH01000136v1_decoy 3093 TRUE decoy +chrUn_JTFH01000137v1_decoy 3079 TRUE decoy +chrUn_JTFH01000138v1_decoy 3053 TRUE decoy +chrUn_JTFH01000139v1_decoy 3051 TRUE decoy +chrUn_JTFH01000140v1_decoy 3015 TRUE decoy +chrUn_JTFH01000141v1_decoy 3012 TRUE decoy +chrUn_JTFH01000142v1_decoy 3009 TRUE decoy +chrUn_JTFH01000143v1_decoy 2997 TRUE decoy +chrUn_JTFH01000144v1_decoy 2997 TRUE decoy +chrUn_JTFH01000145v1_decoy 2983 TRUE decoy +chrUn_JTFH01000146v1_decoy 2979 TRUE decoy +chrUn_JTFH01000147v1_decoy 2967 TRUE decoy +chrUn_JTFH01000148v1_decoy 2967 TRUE decoy +chrUn_JTFH01000149v1_decoy 2966 TRUE decoy +chrUn_JTFH01000150v1_decoy 2954 TRUE decoy +chrUn_JTFH01000151v1_decoy 2952 TRUE decoy +chrUn_JTFH01000152v1_decoy 2934 TRUE decoy +chrUn_JTFH01000153v1_decoy 2918 TRUE decoy +chrUn_JTFH01000154v1_decoy 2895 TRUE decoy +chrUn_JTFH01000155v1_decoy 2887 TRUE decoy +chrUn_JTFH01000156v1_decoy 2879 TRUE decoy +chrUn_JTFH01000157v1_decoy 2878 TRUE decoy +chrUn_JTFH01000158v1_decoy 2872 TRUE decoy +chrUn_JTFH01000159v1_decoy 2868 TRUE decoy +chrUn_JTFH01000160v1_decoy 2866 TRUE decoy +chrUn_JTFH01000161v1_decoy 2865 TRUE decoy +chrUn_JTFH01000162v1_decoy 2864 TRUE decoy +chrUn_JTFH01000163v1_decoy 2859 TRUE decoy +chrUn_JTFH01000164v1_decoy 2854 TRUE decoy +chrUn_JTFH01000165v1_decoy 2830 TRUE decoy +chrUn_JTFH01000166v1_decoy 2828 TRUE decoy +chrUn_JTFH01000167v1_decoy 2824 TRUE decoy +chrUn_JTFH01000168v1_decoy 2819 TRUE decoy +chrUn_JTFH01000169v1_decoy 2813 TRUE decoy +chrUn_JTFH01000170v1_decoy 2809 TRUE decoy +chrUn_JTFH01000171v1_decoy 2802 TRUE decoy +chrUn_JTFH01000172v1_decoy 2791 TRUE decoy +chrUn_JTFH01000173v1_decoy 2783 TRUE decoy +chrUn_JTFH01000174v1_decoy 2778 TRUE decoy +chrUn_JTFH01000175v1_decoy 2777 TRUE decoy +chrUn_JTFH01000176v1_decoy 2770 TRUE decoy +chrUn_JTFH01000177v1_decoy 2769 TRUE decoy +chrUn_JTFH01000178v1_decoy 2767 TRUE decoy +chrUn_JTFH01000179v1_decoy 2763 TRUE decoy +chrUn_JTFH01000180v1_decoy 2745 TRUE decoy +chrUn_JTFH01000181v1_decoy 2742 TRUE decoy +chrUn_JTFH01000182v1_decoy 2736 TRUE decoy +chrUn_JTFH01000183v1_decoy 2729 TRUE decoy +chrUn_JTFH01000184v1_decoy 2726 TRUE decoy +chrUn_JTFH01000185v1_decoy 2719 TRUE decoy +chrUn_JTFH01000186v1_decoy 2715 TRUE decoy +chrUn_JTFH01000187v1_decoy 2708 TRUE decoy +chrUn_JTFH01000188v1_decoy 2704 TRUE decoy +chrUn_JTFH01000189v1_decoy 2692 TRUE decoy +chrUn_JTFH01000190v1_decoy 2691 TRUE decoy +chrUn_JTFH01000191v1_decoy 2690 TRUE decoy +chrUn_JTFH01000192v1_decoy 2687 TRUE decoy +chrUn_JTFH01000193v1_decoy 2677 TRUE decoy +chrUn_JTFH01000194v1_decoy 2668 TRUE decoy +chrUn_JTFH01000195v1_decoy 2668 TRUE decoy +chrUn_JTFH01000196v1_decoy 2663 TRUE decoy +chrUn_JTFH01000197v1_decoy 2655 TRUE decoy +chrUn_JTFH01000198v1_decoy 2644 TRUE decoy +chrUn_JTFH01000199v1_decoy 2642 TRUE decoy +chrUn_JTFH01000200v1_decoy 2632 TRUE decoy +chrUn_JTFH01000201v1_decoy 2632 TRUE decoy +chrUn_JTFH01000202v1_decoy 2628 TRUE decoy +chrUn_JTFH01000203v1_decoy 2623 TRUE decoy +chrUn_JTFH01000204v1_decoy 2622 TRUE decoy +chrUn_JTFH01000205v1_decoy 2619 TRUE decoy +chrUn_JTFH01000206v1_decoy 2605 TRUE decoy +chrUn_JTFH01000207v1_decoy 2603 TRUE decoy +chrUn_JTFH01000208v1_decoy 2601 TRUE decoy +chrUn_JTFH01000209v1_decoy 2598 TRUE decoy +chrUn_JTFH01000210v1_decoy 2597 TRUE decoy +chrUn_JTFH01000211v1_decoy 2596 TRUE decoy +chrUn_JTFH01000212v1_decoy 2594 TRUE decoy +chrUn_JTFH01000213v1_decoy 2586 TRUE decoy +chrUn_JTFH01000214v1_decoy 2585 TRUE decoy +chrUn_JTFH01000215v1_decoy 2583 TRUE decoy +chrUn_JTFH01000216v1_decoy 2578 TRUE decoy +chrUn_JTFH01000217v1_decoy 2569 TRUE decoy +chrUn_JTFH01000218v1_decoy 2569 TRUE decoy +chrUn_JTFH01000219v1_decoy 2551 TRUE decoy +chrUn_JTFH01000220v1_decoy 2548 TRUE decoy +chrUn_JTFH01000221v1_decoy 2548 TRUE decoy +chrUn_JTFH01000222v1_decoy 2546 TRUE decoy +chrUn_JTFH01000223v1_decoy 2545 TRUE decoy +chrUn_JTFH01000224v1_decoy 2534 TRUE decoy +chrUn_JTFH01000225v1_decoy 2533 TRUE decoy +chrUn_JTFH01000226v1_decoy 2522 TRUE decoy +chrUn_JTFH01000227v1_decoy 2522 TRUE decoy +chrUn_JTFH01000228v1_decoy 2515 TRUE decoy +chrUn_JTFH01000229v1_decoy 2513 TRUE decoy +chrUn_JTFH01000230v1_decoy 2507 TRUE decoy +chrUn_JTFH01000231v1_decoy 2504 TRUE decoy +chrUn_JTFH01000232v1_decoy 2497 TRUE decoy +chrUn_JTFH01000233v1_decoy 2471 TRUE decoy +chrUn_JTFH01000234v1_decoy 2465 TRUE decoy +chrUn_JTFH01000235v1_decoy 2464 TRUE decoy +chrUn_JTFH01000236v1_decoy 2459 TRUE decoy +chrUn_JTFH01000237v1_decoy 2457 TRUE decoy +chrUn_JTFH01000238v1_decoy 2450 TRUE decoy +chrUn_JTFH01000239v1_decoy 2435 TRUE decoy +chrUn_JTFH01000240v1_decoy 2434 TRUE decoy +chrUn_JTFH01000241v1_decoy 2432 TRUE decoy +chrUn_JTFH01000242v1_decoy 2427 TRUE decoy +chrUn_JTFH01000243v1_decoy 2421 TRUE decoy +chrUn_JTFH01000244v1_decoy 2420 TRUE decoy +chrUn_JTFH01000245v1_decoy 2414 TRUE decoy +chrUn_JTFH01000246v1_decoy 2404 TRUE decoy +chrUn_JTFH01000247v1_decoy 2403 TRUE decoy +chrUn_JTFH01000248v1_decoy 2402 TRUE decoy +chrUn_JTFH01000249v1_decoy 2397 TRUE decoy +chrUn_JTFH01000250v1_decoy 2395 TRUE decoy +chrUn_JTFH01000251v1_decoy 2394 TRUE decoy +chrUn_JTFH01000252v1_decoy 2388 TRUE decoy +chrUn_JTFH01000253v1_decoy 2382 TRUE decoy +chrUn_JTFH01000254v1_decoy 2381 TRUE decoy +chrUn_JTFH01000255v1_decoy 2380 TRUE decoy +chrUn_JTFH01000256v1_decoy 2368 TRUE decoy +chrUn_JTFH01000257v1_decoy 2364 TRUE decoy +chrUn_JTFH01000258v1_decoy 2363 TRUE decoy +chrUn_JTFH01000259v1_decoy 2348 TRUE decoy +chrUn_JTFH01000260v1_decoy 2339 TRUE decoy +chrUn_JTFH01000261v1_decoy 2335 TRUE decoy +chrUn_JTFH01000262v1_decoy 2332 TRUE decoy +chrUn_JTFH01000263v1_decoy 2331 TRUE decoy +chrUn_JTFH01000264v1_decoy 2330 TRUE decoy +chrUn_JTFH01000265v1_decoy 2323 TRUE decoy +chrUn_JTFH01000266v1_decoy 2319 TRUE decoy +chrUn_JTFH01000267v1_decoy 2314 TRUE decoy +chrUn_JTFH01000268v1_decoy 2308 TRUE decoy +chrUn_JTFH01000269v1_decoy 2306 TRUE decoy +chrUn_JTFH01000270v1_decoy 2296 TRUE decoy +chrUn_JTFH01000271v1_decoy 2287 TRUE decoy +chrUn_JTFH01000272v1_decoy 2279 TRUE decoy +chrUn_JTFH01000273v1_decoy 2276 TRUE decoy +chrUn_JTFH01000274v1_decoy 2273 TRUE decoy +chrUn_JTFH01000275v1_decoy 2262 TRUE decoy +chrUn_JTFH01000276v1_decoy 2254 TRUE decoy +chrUn_JTFH01000277v1_decoy 2252 TRUE decoy +chrUn_JTFH01000278v1_decoy 2245 TRUE decoy +chrUn_JTFH01000279v1_decoy 2239 TRUE decoy +chrUn_JTFH01000280v1_decoy 2223 TRUE decoy +chrUn_JTFH01000281v1_decoy 2220 TRUE decoy +chrUn_JTFH01000282v1_decoy 2218 TRUE decoy +chrUn_JTFH01000283v1_decoy 2215 TRUE decoy +chrUn_JTFH01000284v1_decoy 2213 TRUE decoy +chrUn_JTFH01000285v1_decoy 2203 TRUE decoy +chrUn_JTFH01000286v1_decoy 2200 TRUE decoy +chrUn_JTFH01000287v1_decoy 2197 TRUE decoy +chrUn_JTFH01000288v1_decoy 2194 TRUE decoy +chrUn_JTFH01000289v1_decoy 2183 TRUE decoy +chrUn_JTFH01000290v1_decoy 2179 TRUE decoy +chrUn_JTFH01000291v1_decoy 2177 TRUE decoy +chrUn_JTFH01000292v1_decoy 2177 TRUE decoy +chrUn_JTFH01000293v1_decoy 2177 TRUE decoy +chrUn_JTFH01000294v1_decoy 2168 TRUE decoy +chrUn_JTFH01000295v1_decoy 2160 TRUE decoy +chrUn_JTFH01000296v1_decoy 2155 TRUE decoy +chrUn_JTFH01000297v1_decoy 2144 TRUE decoy +chrUn_JTFH01000298v1_decoy 2143 TRUE decoy +chrUn_JTFH01000299v1_decoy 2136 TRUE decoy +chrUn_JTFH01000300v1_decoy 2134 TRUE decoy +chrUn_JTFH01000301v1_decoy 2129 TRUE decoy +chrUn_JTFH01000302v1_decoy 2128 TRUE decoy +chrUn_JTFH01000303v1_decoy 2125 TRUE decoy +chrUn_JTFH01000304v1_decoy 2125 TRUE decoy +chrUn_JTFH01000305v1_decoy 2122 TRUE decoy +chrUn_JTFH01000306v1_decoy 2111 TRUE decoy +chrUn_JTFH01000307v1_decoy 2106 TRUE decoy +chrUn_JTFH01000308v1_decoy 2094 TRUE decoy +chrUn_JTFH01000309v1_decoy 2093 TRUE decoy +chrUn_JTFH01000310v1_decoy 2088 TRUE decoy +chrUn_JTFH01000311v1_decoy 2086 TRUE decoy +chrUn_JTFH01000312v1_decoy 2086 TRUE decoy +chrUn_JTFH01000313v1_decoy 2084 TRUE decoy +chrUn_JTFH01000314v1_decoy 2080 TRUE decoy +chrUn_JTFH01000315v1_decoy 2079 TRUE decoy +chrUn_JTFH01000316v1_decoy 2076 TRUE decoy +chrUn_JTFH01000317v1_decoy 2071 TRUE decoy +chrUn_JTFH01000318v1_decoy 2066 TRUE decoy +chrUn_JTFH01000319v1_decoy 2061 TRUE decoy +chrUn_JTFH01000320v1_decoy 2055 TRUE decoy +chrUn_JTFH01000321v1_decoy 2053 TRUE decoy +chrUn_JTFH01000322v1_decoy 2040 TRUE decoy +chrUn_JTFH01000323v1_decoy 2036 TRUE decoy +chrUn_JTFH01000324v1_decoy 2035 TRUE decoy +chrUn_JTFH01000325v1_decoy 2034 TRUE decoy +chrUn_JTFH01000326v1_decoy 2032 TRUE decoy +chrUn_JTFH01000327v1_decoy 2029 TRUE decoy +chrUn_JTFH01000328v1_decoy 2025 TRUE decoy +chrUn_JTFH01000329v1_decoy 2021 TRUE decoy +chrUn_JTFH01000330v1_decoy 2018 TRUE decoy +chrUn_JTFH01000331v1_decoy 2015 TRUE decoy +chrUn_JTFH01000332v1_decoy 2009 TRUE decoy +chrUn_JTFH01000333v1_decoy 2007 TRUE decoy +chrUn_JTFH01000334v1_decoy 2005 TRUE decoy +chrUn_JTFH01000335v1_decoy 2003 TRUE decoy +chrUn_JTFH01000336v1_decoy 2001 TRUE decoy +chrUn_JTFH01000337v1_decoy 2001 TRUE decoy +chrUn_JTFH01000338v1_decoy 2000 TRUE decoy +chrUn_JTFH01000339v1_decoy 1996 TRUE decoy +chrUn_JTFH01000340v1_decoy 1992 TRUE decoy +chrUn_JTFH01000341v1_decoy 1985 TRUE decoy +chrUn_JTFH01000342v1_decoy 1981 TRUE decoy +chrUn_JTFH01000343v1_decoy 1977 TRUE decoy +chrUn_JTFH01000344v1_decoy 1971 TRUE decoy +chrUn_JTFH01000345v1_decoy 1968 TRUE decoy +chrUn_JTFH01000346v1_decoy 1962 TRUE decoy +chrUn_JTFH01000347v1_decoy 1961 TRUE decoy +chrUn_JTFH01000348v1_decoy 1960 TRUE decoy +chrUn_JTFH01000349v1_decoy 1960 TRUE decoy +chrUn_JTFH01000350v1_decoy 1954 TRUE decoy +chrUn_JTFH01000351v1_decoy 1952 TRUE decoy +chrUn_JTFH01000352v1_decoy 1947 TRUE decoy +chrUn_JTFH01000353v1_decoy 1944 TRUE decoy +chrUn_JTFH01000354v1_decoy 1943 TRUE decoy +chrUn_JTFH01000355v1_decoy 1941 TRUE decoy +chrUn_JTFH01000356v1_decoy 1937 TRUE decoy +chrUn_JTFH01000357v1_decoy 1934 TRUE decoy +chrUn_JTFH01000358v1_decoy 1929 TRUE decoy +chrUn_JTFH01000359v1_decoy 1924 TRUE decoy +chrUn_JTFH01000360v1_decoy 1924 TRUE decoy +chrUn_JTFH01000361v1_decoy 1923 TRUE decoy +chrUn_JTFH01000362v1_decoy 1921 TRUE decoy +chrUn_JTFH01000363v1_decoy 1918 TRUE decoy +chrUn_JTFH01000364v1_decoy 1915 TRUE decoy +chrUn_JTFH01000365v1_decoy 1915 TRUE decoy +chrUn_JTFH01000366v1_decoy 1914 TRUE decoy +chrUn_JTFH01000367v1_decoy 1912 TRUE decoy +chrUn_JTFH01000368v1_decoy 1910 TRUE decoy +chrUn_JTFH01000369v1_decoy 1907 TRUE decoy +chrUn_JTFH01000370v1_decoy 1904 TRUE decoy +chrUn_JTFH01000371v1_decoy 1897 TRUE decoy +chrUn_JTFH01000372v1_decoy 1891 TRUE decoy +chrUn_JTFH01000373v1_decoy 1890 TRUE decoy +chrUn_JTFH01000374v1_decoy 1888 TRUE decoy +chrUn_JTFH01000375v1_decoy 1888 TRUE decoy +chrUn_JTFH01000376v1_decoy 1885 TRUE decoy +chrUn_JTFH01000377v1_decoy 1881 TRUE decoy +chrUn_JTFH01000378v1_decoy 1879 TRUE decoy +chrUn_JTFH01000379v1_decoy 1877 TRUE decoy +chrUn_JTFH01000380v1_decoy 1876 TRUE decoy +chrUn_JTFH01000381v1_decoy 1876 TRUE decoy +chrUn_JTFH01000382v1_decoy 1874 TRUE decoy +chrUn_JTFH01000383v1_decoy 1872 TRUE decoy +chrUn_JTFH01000384v1_decoy 1869 TRUE decoy +chrUn_JTFH01000385v1_decoy 1866 TRUE decoy +chrUn_JTFH01000386v1_decoy 1865 TRUE decoy +chrUn_JTFH01000387v1_decoy 1865 TRUE decoy +chrUn_JTFH01000388v1_decoy 1865 TRUE decoy +chrUn_JTFH01000389v1_decoy 1862 TRUE decoy +chrUn_JTFH01000390v1_decoy 1862 TRUE decoy +chrUn_JTFH01000391v1_decoy 1859 TRUE decoy +chrUn_JTFH01000392v1_decoy 1856 TRUE decoy +chrUn_JTFH01000393v1_decoy 1856 TRUE decoy +chrUn_JTFH01000394v1_decoy 1854 TRUE decoy +chrUn_JTFH01000395v1_decoy 1850 TRUE decoy +chrUn_JTFH01000396v1_decoy 1849 TRUE decoy +chrUn_JTFH01000397v1_decoy 1849 TRUE decoy +chrUn_JTFH01000398v1_decoy 1847 TRUE decoy +chrUn_JTFH01000399v1_decoy 1839 TRUE decoy +chrUn_JTFH01000400v1_decoy 1834 TRUE decoy +chrUn_JTFH01000401v1_decoy 1821 TRUE decoy +chrUn_JTFH01000402v1_decoy 1815 TRUE decoy +chrUn_JTFH01000403v1_decoy 1811 TRUE decoy +chrUn_JTFH01000404v1_decoy 1808 TRUE decoy +chrUn_JTFH01000405v1_decoy 1808 TRUE decoy +chrUn_JTFH01000406v1_decoy 1807 TRUE decoy +chrUn_JTFH01000407v1_decoy 1807 TRUE decoy +chrUn_JTFH01000408v1_decoy 1802 TRUE decoy +chrUn_JTFH01000409v1_decoy 1801 TRUE decoy +chrUn_JTFH01000410v1_decoy 1800 TRUE decoy +chrUn_JTFH01000411v1_decoy 1795 TRUE decoy +chrUn_JTFH01000412v1_decoy 1794 TRUE decoy +chrUn_JTFH01000413v1_decoy 1792 TRUE decoy +chrUn_JTFH01000414v1_decoy 1788 TRUE decoy +chrUn_JTFH01000415v1_decoy 1786 TRUE decoy +chrUn_JTFH01000416v1_decoy 1782 TRUE decoy +chrUn_JTFH01000417v1_decoy 1782 TRUE decoy +chrUn_JTFH01000418v1_decoy 1781 TRUE decoy +chrUn_JTFH01000419v1_decoy 1781 TRUE decoy +chrUn_JTFH01000420v1_decoy 1779 TRUE decoy +chrUn_JTFH01000421v1_decoy 1777 TRUE decoy +chrUn_JTFH01000422v1_decoy 1764 TRUE decoy +chrUn_JTFH01000423v1_decoy 1762 TRUE decoy +chrUn_JTFH01000424v1_decoy 1755 TRUE decoy +chrUn_JTFH01000425v1_decoy 1749 TRUE decoy +chrUn_JTFH01000426v1_decoy 1747 TRUE decoy +chrUn_JTFH01000427v1_decoy 1746 TRUE decoy +chrUn_JTFH01000428v1_decoy 1745 TRUE decoy +chrUn_JTFH01000429v1_decoy 1744 TRUE decoy +chrUn_JTFH01000430v1_decoy 1742 TRUE decoy +chrUn_JTFH01000431v1_decoy 1740 TRUE decoy +chrUn_JTFH01000432v1_decoy 1740 TRUE decoy +chrUn_JTFH01000433v1_decoy 1736 TRUE decoy +chrUn_JTFH01000434v1_decoy 1735 TRUE decoy +chrUn_JTFH01000435v1_decoy 1732 TRUE decoy +chrUn_JTFH01000436v1_decoy 1732 TRUE decoy +chrUn_JTFH01000437v1_decoy 1730 TRUE decoy +chrUn_JTFH01000438v1_decoy 1727 TRUE decoy +chrUn_JTFH01000439v1_decoy 1722 TRUE decoy +chrUn_JTFH01000440v1_decoy 1718 TRUE decoy +chrUn_JTFH01000441v1_decoy 1716 TRUE decoy +chrUn_JTFH01000442v1_decoy 1710 TRUE decoy +chrUn_JTFH01000443v1_decoy 1708 TRUE decoy +chrUn_JTFH01000444v1_decoy 1707 TRUE decoy +chrUn_JTFH01000445v1_decoy 1706 TRUE decoy +chrUn_JTFH01000446v1_decoy 1705 TRUE decoy +chrUn_JTFH01000447v1_decoy 1704 TRUE decoy +chrUn_JTFH01000448v1_decoy 1699 TRUE decoy +chrUn_JTFH01000449v1_decoy 1698 TRUE decoy +chrUn_JTFH01000450v1_decoy 1697 TRUE decoy +chrUn_JTFH01000451v1_decoy 1697 TRUE decoy +chrUn_JTFH01000452v1_decoy 1695 TRUE decoy +chrUn_JTFH01000453v1_decoy 1695 TRUE decoy +chrUn_JTFH01000454v1_decoy 1693 TRUE decoy +chrUn_JTFH01000455v1_decoy 1687 TRUE decoy +chrUn_JTFH01000456v1_decoy 1686 TRUE decoy +chrUn_JTFH01000457v1_decoy 1680 TRUE decoy +chrUn_JTFH01000458v1_decoy 1679 TRUE decoy +chrUn_JTFH01000459v1_decoy 1679 TRUE decoy +chrUn_JTFH01000460v1_decoy 1678 TRUE decoy +chrUn_JTFH01000461v1_decoy 1674 TRUE decoy +chrUn_JTFH01000462v1_decoy 1674 TRUE decoy +chrUn_JTFH01000463v1_decoy 1671 TRUE decoy +chrUn_JTFH01000464v1_decoy 1669 TRUE decoy +chrUn_JTFH01000465v1_decoy 1665 TRUE decoy +chrUn_JTFH01000466v1_decoy 1663 TRUE decoy +chrUn_JTFH01000467v1_decoy 1657 TRUE decoy +chrUn_JTFH01000468v1_decoy 1653 TRUE decoy +chrUn_JTFH01000469v1_decoy 1652 TRUE decoy +chrUn_JTFH01000470v1_decoy 1650 TRUE decoy +chrUn_JTFH01000471v1_decoy 1649 TRUE decoy +chrUn_JTFH01000472v1_decoy 1649 TRUE decoy +chrUn_JTFH01000473v1_decoy 1640 TRUE decoy +chrUn_JTFH01000474v1_decoy 1638 TRUE decoy +chrUn_JTFH01000475v1_decoy 1636 TRUE decoy +chrUn_JTFH01000476v1_decoy 1632 TRUE decoy +chrUn_JTFH01000477v1_decoy 1631 TRUE decoy +chrUn_JTFH01000478v1_decoy 1630 TRUE decoy +chrUn_JTFH01000479v1_decoy 1627 TRUE decoy +chrUn_JTFH01000480v1_decoy 1624 TRUE decoy +chrUn_JTFH01000481v1_decoy 1617 TRUE decoy +chrUn_JTFH01000482v1_decoy 1616 TRUE decoy +chrUn_JTFH01000483v1_decoy 1615 TRUE decoy +chrUn_JTFH01000484v1_decoy 1611 TRUE decoy +chrUn_JTFH01000485v1_decoy 1611 TRUE decoy +chrUn_JTFH01000486v1_decoy 1606 TRUE decoy +chrUn_JTFH01000487v1_decoy 1605 TRUE decoy +chrUn_JTFH01000488v1_decoy 1605 TRUE decoy +chrUn_JTFH01000489v1_decoy 1600 TRUE decoy +chrUn_JTFH01000490v1_decoy 1598 TRUE decoy +chrUn_JTFH01000491v1_decoy 1598 TRUE decoy +chrUn_JTFH01000492v1_decoy 1597 TRUE decoy +chrUn_JTFH01000493v1_decoy 1596 TRUE decoy +chrUn_JTFH01000494v1_decoy 1595 TRUE decoy +chrUn_JTFH01000495v1_decoy 1592 TRUE decoy +chrUn_JTFH01000496v1_decoy 1589 TRUE decoy +chrUn_JTFH01000497v1_decoy 1585 TRUE decoy +chrUn_JTFH01000498v1_decoy 1579 TRUE decoy +chrUn_JTFH01000499v1_decoy 1578 TRUE decoy +chrUn_JTFH01000500v1_decoy 1577 TRUE decoy +chrUn_JTFH01000501v1_decoy 1577 TRUE decoy +chrUn_JTFH01000502v1_decoy 1577 TRUE decoy +chrUn_JTFH01000503v1_decoy 1576 TRUE decoy +chrUn_JTFH01000504v1_decoy 1575 TRUE decoy +chrUn_JTFH01000505v1_decoy 1574 TRUE decoy +chrUn_JTFH01000506v1_decoy 1572 TRUE decoy +chrUn_JTFH01000507v1_decoy 1571 TRUE decoy +chrUn_JTFH01000508v1_decoy 1563 TRUE decoy +chrUn_JTFH01000509v1_decoy 1561 TRUE decoy +chrUn_JTFH01000510v1_decoy 1561 TRUE decoy +chrUn_JTFH01000511v1_decoy 1560 TRUE decoy +chrUn_JTFH01000512v1_decoy 1560 TRUE decoy +chrUn_JTFH01000513v1_decoy 1554 TRUE decoy +chrUn_JTFH01000514v1_decoy 1552 TRUE decoy +chrUn_JTFH01000515v1_decoy 1548 TRUE decoy +chrUn_JTFH01000516v1_decoy 1546 TRUE decoy +chrUn_JTFH01000517v1_decoy 1541 TRUE decoy +chrUn_JTFH01000518v1_decoy 1536 TRUE decoy +chrUn_JTFH01000519v1_decoy 1533 TRUE decoy +chrUn_JTFH01000520v1_decoy 1532 TRUE decoy +chrUn_JTFH01000521v1_decoy 1532 TRUE decoy +chrUn_JTFH01000522v1_decoy 1530 TRUE decoy +chrUn_JTFH01000523v1_decoy 1527 TRUE decoy +chrUn_JTFH01000524v1_decoy 1526 TRUE decoy +chrUn_JTFH01000525v1_decoy 1524 TRUE decoy +chrUn_JTFH01000526v1_decoy 1523 TRUE decoy +chrUn_JTFH01000527v1_decoy 1523 TRUE decoy +chrUn_JTFH01000528v1_decoy 1522 TRUE decoy +chrUn_JTFH01000529v1_decoy 1522 TRUE decoy +chrUn_JTFH01000530v1_decoy 1519 TRUE decoy +chrUn_JTFH01000531v1_decoy 1513 TRUE decoy +chrUn_JTFH01000532v1_decoy 1508 TRUE decoy +chrUn_JTFH01000533v1_decoy 1508 TRUE decoy +chrUn_JTFH01000534v1_decoy 1505 TRUE decoy +chrUn_JTFH01000535v1_decoy 1503 TRUE decoy +chrUn_JTFH01000536v1_decoy 1496 TRUE decoy +chrUn_JTFH01000537v1_decoy 1491 TRUE decoy +chrUn_JTFH01000538v1_decoy 1490 TRUE decoy +chrUn_JTFH01000539v1_decoy 1490 TRUE decoy +chrUn_JTFH01000540v1_decoy 1487 TRUE decoy +chrUn_JTFH01000541v1_decoy 1486 TRUE decoy +chrUn_JTFH01000542v1_decoy 1485 TRUE decoy +chrUn_JTFH01000543v1_decoy 1484 TRUE decoy +chrUn_JTFH01000544v1_decoy 1483 TRUE decoy +chrUn_JTFH01000545v1_decoy 1479 TRUE decoy +chrUn_JTFH01000546v1_decoy 1479 TRUE decoy +chrUn_JTFH01000547v1_decoy 1476 TRUE decoy +chrUn_JTFH01000548v1_decoy 1475 TRUE decoy +chrUn_JTFH01000549v1_decoy 1472 TRUE decoy +chrUn_JTFH01000550v1_decoy 1469 TRUE decoy +chrUn_JTFH01000551v1_decoy 1468 TRUE decoy +chrUn_JTFH01000552v1_decoy 1467 TRUE decoy +chrUn_JTFH01000553v1_decoy 1465 TRUE decoy +chrUn_JTFH01000554v1_decoy 1464 TRUE decoy +chrUn_JTFH01000555v1_decoy 1463 TRUE decoy +chrUn_JTFH01000556v1_decoy 1463 TRUE decoy +chrUn_JTFH01000557v1_decoy 1459 TRUE decoy +chrUn_JTFH01000558v1_decoy 1459 TRUE decoy +chrUn_JTFH01000559v1_decoy 1458 TRUE decoy +chrUn_JTFH01000560v1_decoy 1458 TRUE decoy +chrUn_JTFH01000561v1_decoy 1454 TRUE decoy +chrUn_JTFH01000562v1_decoy 1449 TRUE decoy +chrUn_JTFH01000563v1_decoy 1449 TRUE decoy +chrUn_JTFH01000564v1_decoy 1448 TRUE decoy +chrUn_JTFH01000565v1_decoy 1446 TRUE decoy +chrUn_JTFH01000566v1_decoy 1442 TRUE decoy +chrUn_JTFH01000567v1_decoy 1441 TRUE decoy +chrUn_JTFH01000568v1_decoy 1440 TRUE decoy +chrUn_JTFH01000569v1_decoy 1439 TRUE decoy +chrUn_JTFH01000570v1_decoy 1437 TRUE decoy +chrUn_JTFH01000571v1_decoy 1436 TRUE decoy +chrUn_JTFH01000572v1_decoy 1429 TRUE decoy +chrUn_JTFH01000573v1_decoy 1429 TRUE decoy +chrUn_JTFH01000574v1_decoy 1427 TRUE decoy +chrUn_JTFH01000575v1_decoy 1426 TRUE decoy +chrUn_JTFH01000576v1_decoy 1425 TRUE decoy +chrUn_JTFH01000577v1_decoy 1424 TRUE decoy +chrUn_JTFH01000578v1_decoy 1424 TRUE decoy +chrUn_JTFH01000579v1_decoy 1423 TRUE decoy +chrUn_JTFH01000580v1_decoy 1423 TRUE decoy +chrUn_JTFH01000581v1_decoy 1423 TRUE decoy +chrUn_JTFH01000582v1_decoy 1414 TRUE decoy +chrUn_JTFH01000583v1_decoy 1414 TRUE decoy +chrUn_JTFH01000584v1_decoy 1413 TRUE decoy +chrUn_JTFH01000585v1_decoy 1413 TRUE decoy +chrUn_JTFH01000586v1_decoy 1410 TRUE decoy +chrUn_JTFH01000587v1_decoy 1409 TRUE decoy +chrUn_JTFH01000588v1_decoy 1409 TRUE decoy +chrUn_JTFH01000589v1_decoy 1406 TRUE decoy +chrUn_JTFH01000590v1_decoy 1405 TRUE decoy +chrUn_JTFH01000591v1_decoy 1405 TRUE decoy +chrUn_JTFH01000592v1_decoy 1404 TRUE decoy +chrUn_JTFH01000593v1_decoy 1404 TRUE decoy +chrUn_JTFH01000594v1_decoy 1402 TRUE decoy +chrUn_JTFH01000595v1_decoy 1402 TRUE decoy +chrUn_JTFH01000596v1_decoy 1402 TRUE decoy +chrUn_JTFH01000597v1_decoy 1402 TRUE decoy +chrUn_JTFH01000598v1_decoy 1400 TRUE decoy +chrUn_JTFH01000599v1_decoy 1398 TRUE decoy +chrUn_JTFH01000600v1_decoy 1396 TRUE decoy +chrUn_JTFH01000601v1_decoy 1395 TRUE decoy +chrUn_JTFH01000602v1_decoy 1394 TRUE decoy +chrUn_JTFH01000603v1_decoy 1393 TRUE decoy +chrUn_JTFH01000604v1_decoy 1391 TRUE decoy +chrUn_JTFH01000605v1_decoy 1389 TRUE decoy +chrUn_JTFH01000606v1_decoy 1389 TRUE decoy +chrUn_JTFH01000607v1_decoy 1388 TRUE decoy +chrUn_JTFH01000608v1_decoy 1387 TRUE decoy +chrUn_JTFH01000609v1_decoy 1384 TRUE decoy +chrUn_JTFH01000610v1_decoy 1381 TRUE decoy +chrUn_JTFH01000611v1_decoy 1381 TRUE decoy +chrUn_JTFH01000612v1_decoy 1379 TRUE decoy +chrUn_JTFH01000613v1_decoy 1377 TRUE decoy +chrUn_JTFH01000614v1_decoy 1376 TRUE decoy +chrUn_JTFH01000615v1_decoy 1376 TRUE decoy +chrUn_JTFH01000616v1_decoy 1375 TRUE decoy +chrUn_JTFH01000617v1_decoy 1374 TRUE decoy +chrUn_JTFH01000618v1_decoy 1372 TRUE decoy +chrUn_JTFH01000619v1_decoy 1371 TRUE decoy +chrUn_JTFH01000620v1_decoy 1370 TRUE decoy +chrUn_JTFH01000621v1_decoy 1370 TRUE decoy +chrUn_JTFH01000622v1_decoy 1366 TRUE decoy +chrUn_JTFH01000623v1_decoy 1363 TRUE decoy +chrUn_JTFH01000624v1_decoy 1360 TRUE decoy +chrUn_JTFH01000625v1_decoy 1356 TRUE decoy +chrUn_JTFH01000626v1_decoy 1355 TRUE decoy +chrUn_JTFH01000627v1_decoy 1355 TRUE decoy +chrUn_JTFH01000628v1_decoy 1352 TRUE decoy +chrUn_JTFH01000629v1_decoy 1345 TRUE decoy +chrUn_JTFH01000630v1_decoy 1344 TRUE decoy +chrUn_JTFH01000631v1_decoy 1344 TRUE decoy +chrUn_JTFH01000632v1_decoy 1342 TRUE decoy +chrUn_JTFH01000633v1_decoy 1342 TRUE decoy +chrUn_JTFH01000634v1_decoy 1336 TRUE decoy +chrUn_JTFH01000635v1_decoy 1334 TRUE decoy +chrUn_JTFH01000636v1_decoy 1334 TRUE decoy +chrUn_JTFH01000637v1_decoy 1333 TRUE decoy +chrUn_JTFH01000638v1_decoy 1332 TRUE decoy +chrUn_JTFH01000639v1_decoy 1328 TRUE decoy +chrUn_JTFH01000640v1_decoy 1328 TRUE decoy +chrUn_JTFH01000641v1_decoy 1328 TRUE decoy +chrUn_JTFH01000642v1_decoy 1327 TRUE decoy +chrUn_JTFH01000643v1_decoy 1325 TRUE decoy +chrUn_JTFH01000644v1_decoy 1322 TRUE decoy +chrUn_JTFH01000645v1_decoy 1320 TRUE decoy +chrUn_JTFH01000646v1_decoy 1319 TRUE decoy +chrUn_JTFH01000647v1_decoy 1318 TRUE decoy +chrUn_JTFH01000648v1_decoy 1315 TRUE decoy +chrUn_JTFH01000649v1_decoy 1314 TRUE decoy +chrUn_JTFH01000650v1_decoy 1313 TRUE decoy +chrUn_JTFH01000651v1_decoy 1313 TRUE decoy +chrUn_JTFH01000652v1_decoy 1312 TRUE decoy +chrUn_JTFH01000653v1_decoy 1310 TRUE decoy +chrUn_JTFH01000654v1_decoy 1309 TRUE decoy +chrUn_JTFH01000655v1_decoy 1309 TRUE decoy +chrUn_JTFH01000656v1_decoy 1307 TRUE decoy +chrUn_JTFH01000657v1_decoy 1307 TRUE decoy +chrUn_JTFH01000658v1_decoy 1305 TRUE decoy +chrUn_JTFH01000659v1_decoy 1304 TRUE decoy +chrUn_JTFH01000660v1_decoy 1303 TRUE decoy +chrUn_JTFH01000661v1_decoy 1302 TRUE decoy +chrUn_JTFH01000662v1_decoy 1302 TRUE decoy +chrUn_JTFH01000663v1_decoy 1301 TRUE decoy +chrUn_JTFH01000664v1_decoy 1301 TRUE decoy +chrUn_JTFH01000665v1_decoy 1300 TRUE decoy +chrUn_JTFH01000666v1_decoy 1299 TRUE decoy +chrUn_JTFH01000667v1_decoy 1297 TRUE decoy +chrUn_JTFH01000668v1_decoy 1295 TRUE decoy +chrUn_JTFH01000669v1_decoy 1294 TRUE decoy +chrUn_JTFH01000670v1_decoy 1293 TRUE decoy +chrUn_JTFH01000671v1_decoy 1291 TRUE decoy +chrUn_JTFH01000672v1_decoy 1291 TRUE decoy +chrUn_JTFH01000673v1_decoy 1289 TRUE decoy +chrUn_JTFH01000674v1_decoy 1288 TRUE decoy +chrUn_JTFH01000675v1_decoy 1288 TRUE decoy +chrUn_JTFH01000676v1_decoy 1287 TRUE decoy +chrUn_JTFH01000677v1_decoy 1287 TRUE decoy +chrUn_JTFH01000678v1_decoy 1287 TRUE decoy +chrUn_JTFH01000679v1_decoy 1286 TRUE decoy +chrUn_JTFH01000680v1_decoy 1283 TRUE decoy +chrUn_JTFH01000681v1_decoy 1281 TRUE decoy +chrUn_JTFH01000682v1_decoy 1277 TRUE decoy +chrUn_JTFH01000683v1_decoy 1274 TRUE decoy +chrUn_JTFH01000684v1_decoy 1270 TRUE decoy +chrUn_JTFH01000685v1_decoy 1267 TRUE decoy +chrUn_JTFH01000686v1_decoy 1266 TRUE decoy +chrUn_JTFH01000687v1_decoy 1260 TRUE decoy +chrUn_JTFH01000688v1_decoy 1259 TRUE decoy +chrUn_JTFH01000689v1_decoy 1258 TRUE decoy +chrUn_JTFH01000690v1_decoy 1258 TRUE decoy +chrUn_JTFH01000691v1_decoy 1258 TRUE decoy +chrUn_JTFH01000692v1_decoy 1256 TRUE decoy +chrUn_JTFH01000693v1_decoy 1255 TRUE decoy +chrUn_JTFH01000694v1_decoy 1254 TRUE decoy +chrUn_JTFH01000695v1_decoy 1254 TRUE decoy +chrUn_JTFH01000696v1_decoy 1253 TRUE decoy +chrUn_JTFH01000697v1_decoy 1250 TRUE decoy +chrUn_JTFH01000698v1_decoy 1249 TRUE decoy +chrUn_JTFH01000699v1_decoy 1248 TRUE decoy +chrUn_JTFH01000700v1_decoy 1248 TRUE decoy +chrUn_JTFH01000701v1_decoy 1247 TRUE decoy +chrUn_JTFH01000702v1_decoy 1242 TRUE decoy +chrUn_JTFH01000703v1_decoy 1242 TRUE decoy +chrUn_JTFH01000704v1_decoy 1241 TRUE decoy +chrUn_JTFH01000705v1_decoy 1241 TRUE decoy +chrUn_JTFH01000706v1_decoy 1241 TRUE decoy +chrUn_JTFH01000707v1_decoy 1239 TRUE decoy +chrUn_JTFH01000708v1_decoy 1238 TRUE decoy +chrUn_JTFH01000709v1_decoy 1237 TRUE decoy +chrUn_JTFH01000710v1_decoy 1236 TRUE decoy +chrUn_JTFH01000711v1_decoy 1235 TRUE decoy +chrUn_JTFH01000712v1_decoy 1234 TRUE decoy +chrUn_JTFH01000713v1_decoy 1234 TRUE decoy +chrUn_JTFH01000714v1_decoy 1234 TRUE decoy +chrUn_JTFH01000715v1_decoy 1233 TRUE decoy +chrUn_JTFH01000716v1_decoy 1232 TRUE decoy +chrUn_JTFH01000717v1_decoy 1232 TRUE decoy +chrUn_JTFH01000718v1_decoy 1231 TRUE decoy +chrUn_JTFH01000719v1_decoy 1230 TRUE decoy +chrUn_JTFH01000720v1_decoy 1228 TRUE decoy +chrUn_JTFH01000721v1_decoy 1227 TRUE decoy +chrUn_JTFH01000722v1_decoy 1227 TRUE decoy +chrUn_JTFH01000723v1_decoy 1226 TRUE decoy +chrUn_JTFH01000724v1_decoy 1224 TRUE decoy +chrUn_JTFH01000725v1_decoy 1224 TRUE decoy +chrUn_JTFH01000726v1_decoy 1220 TRUE decoy +chrUn_JTFH01000727v1_decoy 1220 TRUE decoy +chrUn_JTFH01000728v1_decoy 1219 TRUE decoy +chrUn_JTFH01000729v1_decoy 1217 TRUE decoy +chrUn_JTFH01000730v1_decoy 1216 TRUE decoy +chrUn_JTFH01000731v1_decoy 1215 TRUE decoy +chrUn_JTFH01000732v1_decoy 1214 TRUE decoy +chrUn_JTFH01000733v1_decoy 1214 TRUE decoy +chrUn_JTFH01000734v1_decoy 1214 TRUE decoy +chrUn_JTFH01000735v1_decoy 1213 TRUE decoy +chrUn_JTFH01000736v1_decoy 1212 TRUE decoy +chrUn_JTFH01000737v1_decoy 1209 TRUE decoy +chrUn_JTFH01000738v1_decoy 1208 TRUE decoy +chrUn_JTFH01000739v1_decoy 1207 TRUE decoy +chrUn_JTFH01000740v1_decoy 1207 TRUE decoy +chrUn_JTFH01000741v1_decoy 1207 TRUE decoy +chrUn_JTFH01000742v1_decoy 1206 TRUE decoy +chrUn_JTFH01000743v1_decoy 1206 TRUE decoy +chrUn_JTFH01000744v1_decoy 1205 TRUE decoy +chrUn_JTFH01000745v1_decoy 1205 TRUE decoy +chrUn_JTFH01000746v1_decoy 1204 TRUE decoy +chrUn_JTFH01000747v1_decoy 1204 TRUE decoy +chrUn_JTFH01000748v1_decoy 1204 TRUE decoy +chrUn_JTFH01000749v1_decoy 1203 TRUE decoy +chrUn_JTFH01000750v1_decoy 1201 TRUE decoy +chrUn_JTFH01000751v1_decoy 1201 TRUE decoy +chrUn_JTFH01000752v1_decoy 1200 TRUE decoy +chrUn_JTFH01000753v1_decoy 1200 TRUE decoy +chrUn_JTFH01000754v1_decoy 1199 TRUE decoy +chrUn_JTFH01000755v1_decoy 1198 TRUE decoy +chrUn_JTFH01000756v1_decoy 1197 TRUE decoy +chrUn_JTFH01000757v1_decoy 1196 TRUE decoy +chrUn_JTFH01000758v1_decoy 1195 TRUE decoy +chrUn_JTFH01000759v1_decoy 1194 TRUE decoy +chrUn_JTFH01000760v1_decoy 1194 TRUE decoy +chrUn_JTFH01000761v1_decoy 1191 TRUE decoy +chrUn_JTFH01000762v1_decoy 1189 TRUE decoy +chrUn_JTFH01000763v1_decoy 1186 TRUE decoy +chrUn_JTFH01000764v1_decoy 1186 TRUE decoy +chrUn_JTFH01000765v1_decoy 1184 TRUE decoy +chrUn_JTFH01000766v1_decoy 1183 TRUE decoy +chrUn_JTFH01000767v1_decoy 1183 TRUE decoy +chrUn_JTFH01000768v1_decoy 1182 TRUE decoy +chrUn_JTFH01000769v1_decoy 1181 TRUE decoy +chrUn_JTFH01000770v1_decoy 1181 TRUE decoy +chrUn_JTFH01000771v1_decoy 1181 TRUE decoy +chrUn_JTFH01000772v1_decoy 1181 TRUE decoy +chrUn_JTFH01000773v1_decoy 1179 TRUE decoy +chrUn_JTFH01000774v1_decoy 1178 TRUE decoy +chrUn_JTFH01000775v1_decoy 1178 TRUE decoy +chrUn_JTFH01000776v1_decoy 1177 TRUE decoy +chrUn_JTFH01000777v1_decoy 1177 TRUE decoy +chrUn_JTFH01000778v1_decoy 1171 TRUE decoy +chrUn_JTFH01000779v1_decoy 1171 TRUE decoy +chrUn_JTFH01000780v1_decoy 1171 TRUE decoy +chrUn_JTFH01000781v1_decoy 1170 TRUE decoy +chrUn_JTFH01000782v1_decoy 1170 TRUE decoy +chrUn_JTFH01000783v1_decoy 1167 TRUE decoy +chrUn_JTFH01000784v1_decoy 1167 TRUE decoy +chrUn_JTFH01000785v1_decoy 1167 TRUE decoy +chrUn_JTFH01000786v1_decoy 1165 TRUE decoy +chrUn_JTFH01000787v1_decoy 1165 TRUE decoy +chrUn_JTFH01000788v1_decoy 1162 TRUE decoy +chrUn_JTFH01000789v1_decoy 1157 TRUE decoy +chrUn_JTFH01000790v1_decoy 1156 TRUE decoy +chrUn_JTFH01000791v1_decoy 1156 TRUE decoy +chrUn_JTFH01000792v1_decoy 1154 TRUE decoy +chrUn_JTFH01000793v1_decoy 1154 TRUE decoy +chrUn_JTFH01000794v1_decoy 1151 TRUE decoy +chrUn_JTFH01000795v1_decoy 1151 TRUE decoy +chrUn_JTFH01000796v1_decoy 1150 TRUE decoy +chrUn_JTFH01000797v1_decoy 1150 TRUE decoy +chrUn_JTFH01000798v1_decoy 1147 TRUE decoy +chrUn_JTFH01000799v1_decoy 1147 TRUE decoy +chrUn_JTFH01000800v1_decoy 1146 TRUE decoy +chrUn_JTFH01000801v1_decoy 1144 TRUE decoy +chrUn_JTFH01000802v1_decoy 1144 TRUE decoy +chrUn_JTFH01000803v1_decoy 1143 TRUE decoy +chrUn_JTFH01000804v1_decoy 1142 TRUE decoy +chrUn_JTFH01000805v1_decoy 1141 TRUE decoy +chrUn_JTFH01000806v1_decoy 1141 TRUE decoy +chrUn_JTFH01000807v1_decoy 1140 TRUE decoy +chrUn_JTFH01000808v1_decoy 1138 TRUE decoy +chrUn_JTFH01000809v1_decoy 1134 TRUE decoy +chrUn_JTFH01000810v1_decoy 1134 TRUE decoy +chrUn_JTFH01000811v1_decoy 1132 TRUE decoy +chrUn_JTFH01000812v1_decoy 1131 TRUE decoy +chrUn_JTFH01000813v1_decoy 1131 TRUE decoy +chrUn_JTFH01000814v1_decoy 1130 TRUE decoy +chrUn_JTFH01000815v1_decoy 1127 TRUE decoy +chrUn_JTFH01000816v1_decoy 1126 TRUE decoy +chrUn_JTFH01000817v1_decoy 1124 TRUE decoy +chrUn_JTFH01000818v1_decoy 1122 TRUE decoy +chrUn_JTFH01000819v1_decoy 1122 TRUE decoy +chrUn_JTFH01000820v1_decoy 1121 TRUE decoy +chrUn_JTFH01000821v1_decoy 1119 TRUE decoy +chrUn_JTFH01000822v1_decoy 1119 TRUE decoy +chrUn_JTFH01000823v1_decoy 1119 TRUE decoy +chrUn_JTFH01000824v1_decoy 1119 TRUE decoy +chrUn_JTFH01000825v1_decoy 1118 TRUE decoy +chrUn_JTFH01000826v1_decoy 1116 TRUE decoy +chrUn_JTFH01000827v1_decoy 1116 TRUE decoy +chrUn_JTFH01000828v1_decoy 1115 TRUE decoy +chrUn_JTFH01000829v1_decoy 1115 TRUE decoy +chrUn_JTFH01000830v1_decoy 1115 TRUE decoy +chrUn_JTFH01000831v1_decoy 1114 TRUE decoy +chrUn_JTFH01000832v1_decoy 1113 TRUE decoy +chrUn_JTFH01000833v1_decoy 1113 TRUE decoy +chrUn_JTFH01000834v1_decoy 1110 TRUE decoy +chrUn_JTFH01000835v1_decoy 1110 TRUE decoy +chrUn_JTFH01000836v1_decoy 1109 TRUE decoy +chrUn_JTFH01000837v1_decoy 1108 TRUE decoy +chrUn_JTFH01000838v1_decoy 1107 TRUE decoy +chrUn_JTFH01000839v1_decoy 1107 TRUE decoy +chrUn_JTFH01000840v1_decoy 1107 TRUE decoy +chrUn_JTFH01000841v1_decoy 1107 TRUE decoy +chrUn_JTFH01000842v1_decoy 1106 TRUE decoy +chrUn_JTFH01000843v1_decoy 1103 TRUE decoy +chrUn_JTFH01000844v1_decoy 1103 TRUE decoy +chrUn_JTFH01000845v1_decoy 1103 TRUE decoy +chrUn_JTFH01000846v1_decoy 1100 TRUE decoy +chrUn_JTFH01000847v1_decoy 1099 TRUE decoy +chrUn_JTFH01000848v1_decoy 1098 TRUE decoy +chrUn_JTFH01000849v1_decoy 1097 TRUE decoy +chrUn_JTFH01000850v1_decoy 1096 TRUE decoy +chrUn_JTFH01000851v1_decoy 1096 TRUE decoy +chrUn_JTFH01000852v1_decoy 1094 TRUE decoy +chrUn_JTFH01000853v1_decoy 1093 TRUE decoy +chrUn_JTFH01000854v1_decoy 1090 TRUE decoy +chrUn_JTFH01000855v1_decoy 1088 TRUE decoy +chrUn_JTFH01000856v1_decoy 1087 TRUE decoy +chrUn_JTFH01000857v1_decoy 1086 TRUE decoy +chrUn_JTFH01000858v1_decoy 1085 TRUE decoy +chrUn_JTFH01000859v1_decoy 1084 TRUE decoy +chrUn_JTFH01000860v1_decoy 1084 TRUE decoy +chrUn_JTFH01000861v1_decoy 1084 TRUE decoy +chrUn_JTFH01000862v1_decoy 1084 TRUE decoy +chrUn_JTFH01000863v1_decoy 1083 TRUE decoy +chrUn_JTFH01000864v1_decoy 1083 TRUE decoy +chrUn_JTFH01000865v1_decoy 1082 TRUE decoy +chrUn_JTFH01000866v1_decoy 1082 TRUE decoy +chrUn_JTFH01000867v1_decoy 1081 TRUE decoy +chrUn_JTFH01000868v1_decoy 1081 TRUE decoy +chrUn_JTFH01000869v1_decoy 1079 TRUE decoy +chrUn_JTFH01000870v1_decoy 1076 TRUE decoy +chrUn_JTFH01000871v1_decoy 1074 TRUE decoy +chrUn_JTFH01000872v1_decoy 1073 TRUE decoy +chrUn_JTFH01000873v1_decoy 1073 TRUE decoy +chrUn_JTFH01000874v1_decoy 1071 TRUE decoy +chrUn_JTFH01000875v1_decoy 1069 TRUE decoy +chrUn_JTFH01000876v1_decoy 1067 TRUE decoy +chrUn_JTFH01000877v1_decoy 1067 TRUE decoy +chrUn_JTFH01000878v1_decoy 1067 TRUE decoy +chrUn_JTFH01000879v1_decoy 1066 TRUE decoy +chrUn_JTFH01000880v1_decoy 1065 TRUE decoy +chrUn_JTFH01000881v1_decoy 1065 TRUE decoy +chrUn_JTFH01000882v1_decoy 1065 TRUE decoy +chrUn_JTFH01000883v1_decoy 1065 TRUE decoy +chrUn_JTFH01000884v1_decoy 1065 TRUE decoy +chrUn_JTFH01000885v1_decoy 1064 TRUE decoy +chrUn_JTFH01000886v1_decoy 1064 TRUE decoy +chrUn_JTFH01000887v1_decoy 1064 TRUE decoy +chrUn_JTFH01000888v1_decoy 1063 TRUE decoy +chrUn_JTFH01000889v1_decoy 1062 TRUE decoy +chrUn_JTFH01000890v1_decoy 1062 TRUE decoy +chrUn_JTFH01000891v1_decoy 1062 TRUE decoy +chrUn_JTFH01000892v1_decoy 1061 TRUE decoy +chrUn_JTFH01000893v1_decoy 1060 TRUE decoy +chrUn_JTFH01000894v1_decoy 1057 TRUE decoy +chrUn_JTFH01000895v1_decoy 1057 TRUE decoy +chrUn_JTFH01000896v1_decoy 1056 TRUE decoy +chrUn_JTFH01000897v1_decoy 1055 TRUE decoy +chrUn_JTFH01000898v1_decoy 1055 TRUE decoy +chrUn_JTFH01000899v1_decoy 1055 TRUE decoy +chrUn_JTFH01000900v1_decoy 1055 TRUE decoy +chrUn_JTFH01000901v1_decoy 1054 TRUE decoy +chrUn_JTFH01000902v1_decoy 1051 TRUE decoy +chrUn_JTFH01000903v1_decoy 1050 TRUE decoy +chrUn_JTFH01000904v1_decoy 1050 TRUE decoy +chrUn_JTFH01000905v1_decoy 1049 TRUE decoy +chrUn_JTFH01000906v1_decoy 1048 TRUE decoy +chrUn_JTFH01000907v1_decoy 1047 TRUE decoy +chrUn_JTFH01000908v1_decoy 1046 TRUE decoy +chrUn_JTFH01000909v1_decoy 1046 TRUE decoy +chrUn_JTFH01000910v1_decoy 1046 TRUE decoy +chrUn_JTFH01000911v1_decoy 1045 TRUE decoy +chrUn_JTFH01000912v1_decoy 1045 TRUE decoy +chrUn_JTFH01000913v1_decoy 1045 TRUE decoy +chrUn_JTFH01000914v1_decoy 1044 TRUE decoy +chrUn_JTFH01000915v1_decoy 1042 TRUE decoy +chrUn_JTFH01000916v1_decoy 1041 TRUE decoy +chrUn_JTFH01000917v1_decoy 1039 TRUE decoy +chrUn_JTFH01000918v1_decoy 1039 TRUE decoy +chrUn_JTFH01000919v1_decoy 1038 TRUE decoy +chrUn_JTFH01000920v1_decoy 1036 TRUE decoy +chrUn_JTFH01000921v1_decoy 1036 TRUE decoy +chrUn_JTFH01000922v1_decoy 1035 TRUE decoy +chrUn_JTFH01000923v1_decoy 1035 TRUE decoy +chrUn_JTFH01000924v1_decoy 1033 TRUE decoy +chrUn_JTFH01000925v1_decoy 1032 TRUE decoy +chrUn_JTFH01000926v1_decoy 1031 TRUE decoy +chrUn_JTFH01000927v1_decoy 1031 TRUE decoy +chrUn_JTFH01000928v1_decoy 1031 TRUE decoy +chrUn_JTFH01000929v1_decoy 1027 TRUE decoy +chrUn_JTFH01000930v1_decoy 1027 TRUE decoy +chrUn_JTFH01000931v1_decoy 1026 TRUE decoy +chrUn_JTFH01000932v1_decoy 1026 TRUE decoy +chrUn_JTFH01000933v1_decoy 1024 TRUE decoy +chrUn_JTFH01000934v1_decoy 1024 TRUE decoy +chrUn_JTFH01000935v1_decoy 1022 TRUE decoy +chrUn_JTFH01000936v1_decoy 1022 TRUE decoy +chrUn_JTFH01000937v1_decoy 1021 TRUE decoy +chrUn_JTFH01000938v1_decoy 1020 TRUE decoy +chrUn_JTFH01000939v1_decoy 1019 TRUE decoy +chrUn_JTFH01000940v1_decoy 1018 TRUE decoy +chrUn_JTFH01000941v1_decoy 1018 TRUE decoy +chrUn_JTFH01000942v1_decoy 1018 TRUE decoy +chrUn_JTFH01000943v1_decoy 1016 TRUE decoy +chrUn_JTFH01000944v1_decoy 1010 TRUE decoy +chrUn_JTFH01000945v1_decoy 1010 TRUE decoy +chrUn_JTFH01000946v1_decoy 1009 TRUE decoy +chrUn_JTFH01000947v1_decoy 1008 TRUE decoy +chrUn_JTFH01000948v1_decoy 1007 TRUE decoy +chrUn_JTFH01000949v1_decoy 1006 TRUE decoy +chrUn_JTFH01000950v1_decoy 1005 TRUE decoy +chrUn_JTFH01000951v1_decoy 1005 TRUE decoy +chrUn_JTFH01000952v1_decoy 1004 TRUE decoy +chrUn_JTFH01000953v1_decoy 1004 TRUE decoy +chrUn_JTFH01000954v1_decoy 1003 TRUE decoy +chrUn_JTFH01000955v1_decoy 1003 TRUE decoy +chrUn_JTFH01000956v1_decoy 1003 TRUE decoy +chrUn_JTFH01000957v1_decoy 1003 TRUE decoy +chrUn_JTFH01000958v1_decoy 1002 TRUE decoy +chrUn_JTFH01000959v1_decoy 1002 TRUE decoy +chrUn_JTFH01000960v1_decoy 1000 TRUE decoy +chrUn_JTFH01000961v1_decoy 1000 TRUE decoy +chrUn_JTFH01000962v1_decoy 8358 TRUE decoy +chrUn_JTFH01000963v1_decoy 7932 TRUE decoy +chrUn_JTFH01000964v1_decoy 6846 TRUE decoy +chrUn_JTFH01000965v1_decoy 4591 TRUE decoy +chrUn_JTFH01000966v1_decoy 4041 TRUE decoy +chrUn_JTFH01000967v1_decoy 3841 TRUE decoy +chrUn_JTFH01000968v1_decoy 3754 TRUE decoy +chrUn_JTFH01000969v1_decoy 3743 TRUE decoy +chrUn_JTFH01000970v1_decoy 3702 TRUE decoy +chrUn_JTFH01000971v1_decoy 3625 TRUE decoy +chrUn_JTFH01000972v1_decoy 3529 TRUE decoy +chrUn_JTFH01000973v1_decoy 3508 TRUE decoy +chrUn_JTFH01000974v1_decoy 3359 TRUE decoy +chrUn_JTFH01000975v1_decoy 3320 TRUE decoy +chrUn_JTFH01000976v1_decoy 3231 TRUE decoy +chrUn_JTFH01000977v1_decoy 3220 TRUE decoy +chrUn_JTFH01000978v1_decoy 3212 TRUE decoy +chrUn_JTFH01000979v1_decoy 3192 TRUE decoy +chrUn_JTFH01000980v1_decoy 3092 TRUE decoy +chrUn_JTFH01000981v1_decoy 3087 TRUE decoy +chrUn_JTFH01000982v1_decoy 3048 TRUE decoy +chrUn_JTFH01000983v1_decoy 3005 TRUE decoy +chrUn_JTFH01000984v1_decoy 3004 TRUE decoy +chrUn_JTFH01000985v1_decoy 2959 TRUE decoy +chrUn_JTFH01000986v1_decoy 2934 TRUE decoy +chrUn_JTFH01000987v1_decoy 2933 TRUE decoy +chrUn_JTFH01000988v1_decoy 2827 TRUE decoy +chrUn_JTFH01000989v1_decoy 2794 TRUE decoy +chrUn_JTFH01000990v1_decoy 2749 TRUE decoy +chrUn_JTFH01000991v1_decoy 2745 TRUE decoy +chrUn_JTFH01000992v1_decoy 2733 TRUE decoy +chrUn_JTFH01000993v1_decoy 2698 TRUE decoy +chrUn_JTFH01000994v1_decoy 2665 TRUE decoy +chrUn_JTFH01000995v1_decoy 2634 TRUE decoy +chrUn_JTFH01000996v1_decoy 2492 TRUE decoy +chrUn_JTFH01000997v1_decoy 2489 TRUE decoy +chrUn_JTFH01000998v1_decoy 2468 TRUE decoy +chrUn_JTFH01000999v1_decoy 2414 TRUE decoy +chrUn_JTFH01001000v1_decoy 2395 TRUE decoy +chrUn_JTFH01001001v1_decoy 2356 TRUE decoy +chrUn_JTFH01001002v1_decoy 2339 TRUE decoy +chrUn_JTFH01001003v1_decoy 2310 TRUE decoy +chrUn_JTFH01001004v1_decoy 2288 TRUE decoy +chrUn_JTFH01001005v1_decoy 2285 TRUE decoy +chrUn_JTFH01001006v1_decoy 2269 TRUE decoy +chrUn_JTFH01001007v1_decoy 2253 TRUE decoy +chrUn_JTFH01001008v1_decoy 2203 TRUE decoy +chrUn_JTFH01001009v1_decoy 2176 TRUE decoy +chrUn_JTFH01001010v1_decoy 2159 TRUE decoy +chrUn_JTFH01001011v1_decoy 2155 TRUE decoy +chrUn_JTFH01001012v1_decoy 2149 TRUE decoy +chrUn_JTFH01001013v1_decoy 2129 TRUE decoy +chrUn_JTFH01001014v1_decoy 2116 TRUE decoy +chrUn_JTFH01001015v1_decoy 2113 TRUE decoy +chrUn_JTFH01001016v1_decoy 2098 TRUE decoy +chrUn_JTFH01001017v1_decoy 2066 TRUE decoy +chrUn_JTFH01001018v1_decoy 2066 TRUE decoy +chrUn_JTFH01001019v1_decoy 2059 TRUE decoy +chrUn_JTFH01001020v1_decoy 2047 TRUE decoy +chrUn_JTFH01001021v1_decoy 2040 TRUE decoy +chrUn_JTFH01001022v1_decoy 2030 TRUE decoy +chrUn_JTFH01001023v1_decoy 2024 TRUE decoy +chrUn_JTFH01001024v1_decoy 2001 TRUE decoy +chrUn_JTFH01001025v1_decoy 1992 TRUE decoy +chrUn_JTFH01001026v1_decoy 1981 TRUE decoy +chrUn_JTFH01001027v1_decoy 1979 TRUE decoy +chrUn_JTFH01001028v1_decoy 1957 TRUE decoy +chrUn_JTFH01001029v1_decoy 1953 TRUE decoy +chrUn_JTFH01001030v1_decoy 1944 TRUE decoy +chrUn_JTFH01001031v1_decoy 1936 TRUE decoy +chrUn_JTFH01001032v1_decoy 1932 TRUE decoy +chrUn_JTFH01001033v1_decoy 1882 TRUE decoy +chrUn_JTFH01001034v1_decoy 1878 TRUE decoy +chrUn_JTFH01001035v1_decoy 1870 TRUE decoy +chrUn_JTFH01001036v1_decoy 1821 TRUE decoy +chrUn_JTFH01001037v1_decoy 1813 TRUE decoy +chrUn_JTFH01001038v1_decoy 1809 TRUE decoy +chrUn_JTFH01001039v1_decoy 1804 TRUE decoy +chrUn_JTFH01001040v1_decoy 1797 TRUE decoy +chrUn_JTFH01001041v1_decoy 1791 TRUE decoy +chrUn_JTFH01001042v1_decoy 1781 TRUE decoy +chrUn_JTFH01001043v1_decoy 1766 TRUE decoy +chrUn_JTFH01001044v1_decoy 1764 TRUE decoy +chrUn_JTFH01001045v1_decoy 1743 TRUE decoy +chrUn_JTFH01001046v1_decoy 1741 TRUE decoy +chrUn_JTFH01001047v1_decoy 1709 TRUE decoy +chrUn_JTFH01001048v1_decoy 1706 TRUE decoy +chrUn_JTFH01001049v1_decoy 1701 TRUE decoy +chrUn_JTFH01001050v1_decoy 1689 TRUE decoy +chrUn_JTFH01001051v1_decoy 1646 TRUE decoy +chrUn_JTFH01001052v1_decoy 1641 TRUE decoy +chrUn_JTFH01001053v1_decoy 1639 TRUE decoy +chrUn_JTFH01001054v1_decoy 1636 TRUE decoy +chrUn_JTFH01001055v1_decoy 1632 TRUE decoy +chrUn_JTFH01001056v1_decoy 1629 TRUE decoy +chrUn_JTFH01001057v1_decoy 1623 TRUE decoy +chrUn_JTFH01001058v1_decoy 1622 TRUE decoy +chrUn_JTFH01001059v1_decoy 1622 TRUE decoy +chrUn_JTFH01001060v1_decoy 1619 TRUE decoy +chrUn_JTFH01001061v1_decoy 1606 TRUE decoy +chrUn_JTFH01001062v1_decoy 1593 TRUE decoy +chrUn_JTFH01001063v1_decoy 1592 TRUE decoy +chrUn_JTFH01001064v1_decoy 1558 TRUE decoy +chrUn_JTFH01001065v1_decoy 1545 TRUE decoy +chrUn_JTFH01001066v1_decoy 1542 TRUE decoy +chrUn_JTFH01001067v1_decoy 1540 TRUE decoy +chrUn_JTFH01001068v1_decoy 1529 TRUE decoy +chrUn_JTFH01001069v1_decoy 1518 TRUE decoy +chrUn_JTFH01001070v1_decoy 1515 TRUE decoy +chrUn_JTFH01001071v1_decoy 1513 TRUE decoy +chrUn_JTFH01001072v1_decoy 1507 TRUE decoy +chrUn_JTFH01001073v1_decoy 1504 TRUE decoy +chrUn_JTFH01001074v1_decoy 1499 TRUE decoy +chrUn_JTFH01001075v1_decoy 1495 TRUE decoy +chrUn_JTFH01001076v1_decoy 1495 TRUE decoy +chrUn_JTFH01001077v1_decoy 1492 TRUE decoy +chrUn_JTFH01001078v1_decoy 1492 TRUE decoy +chrUn_JTFH01001079v1_decoy 1489 TRUE decoy +chrUn_JTFH01001080v1_decoy 1485 TRUE decoy +chrUn_JTFH01001081v1_decoy 1483 TRUE decoy +chrUn_JTFH01001082v1_decoy 1473 TRUE decoy +chrUn_JTFH01001083v1_decoy 1470 TRUE decoy +chrUn_JTFH01001084v1_decoy 1463 TRUE decoy +chrUn_JTFH01001085v1_decoy 1460 TRUE decoy +chrUn_JTFH01001086v1_decoy 1458 TRUE decoy +chrUn_JTFH01001087v1_decoy 1456 TRUE decoy +chrUn_JTFH01001088v1_decoy 1453 TRUE decoy +chrUn_JTFH01001089v1_decoy 1443 TRUE decoy +chrUn_JTFH01001090v1_decoy 1441 TRUE decoy +chrUn_JTFH01001091v1_decoy 1426 TRUE decoy +chrUn_JTFH01001092v1_decoy 1425 TRUE decoy +chrUn_JTFH01001093v1_decoy 1418 TRUE decoy +chrUn_JTFH01001094v1_decoy 1413 TRUE decoy +chrUn_JTFH01001095v1_decoy 1413 TRUE decoy +chrUn_JTFH01001096v1_decoy 1412 TRUE decoy +chrUn_JTFH01001097v1_decoy 1407 TRUE decoy +chrUn_JTFH01001098v1_decoy 1406 TRUE decoy +chrUn_JTFH01001099v1_decoy 1396 TRUE decoy +chrUn_JTFH01001100v1_decoy 1390 TRUE decoy +chrUn_JTFH01001101v1_decoy 1382 TRUE decoy +chrUn_JTFH01001102v1_decoy 1376 TRUE decoy +chrUn_JTFH01001103v1_decoy 1375 TRUE decoy +chrUn_JTFH01001104v1_decoy 1371 TRUE decoy +chrUn_JTFH01001105v1_decoy 1367 TRUE decoy +chrUn_JTFH01001106v1_decoy 1364 TRUE decoy +chrUn_JTFH01001107v1_decoy 1356 TRUE decoy +chrUn_JTFH01001108v1_decoy 1355 TRUE decoy +chrUn_JTFH01001109v1_decoy 1352 TRUE decoy +chrUn_JTFH01001110v1_decoy 1350 TRUE decoy +chrUn_JTFH01001111v1_decoy 1346 TRUE decoy +chrUn_JTFH01001112v1_decoy 1345 TRUE decoy +chrUn_JTFH01001113v1_decoy 1340 TRUE decoy +chrUn_JTFH01001114v1_decoy 1330 TRUE decoy +chrUn_JTFH01001115v1_decoy 1329 TRUE decoy +chrUn_JTFH01001116v1_decoy 1324 TRUE decoy +chrUn_JTFH01001117v1_decoy 1316 TRUE decoy +chrUn_JTFH01001118v1_decoy 1307 TRUE decoy +chrUn_JTFH01001119v1_decoy 1304 TRUE decoy +chrUn_JTFH01001120v1_decoy 1304 TRUE decoy +chrUn_JTFH01001121v1_decoy 1303 TRUE decoy +chrUn_JTFH01001122v1_decoy 1301 TRUE decoy +chrUn_JTFH01001123v1_decoy 1300 TRUE decoy +chrUn_JTFH01001124v1_decoy 1297 TRUE decoy +chrUn_JTFH01001125v1_decoy 1296 TRUE decoy +chrUn_JTFH01001126v1_decoy 1290 TRUE decoy +chrUn_JTFH01001127v1_decoy 1284 TRUE decoy +chrUn_JTFH01001128v1_decoy 1282 TRUE decoy +chrUn_JTFH01001129v1_decoy 1281 TRUE decoy +chrUn_JTFH01001130v1_decoy 1280 TRUE decoy +chrUn_JTFH01001131v1_decoy 1279 TRUE decoy +chrUn_JTFH01001132v1_decoy 1272 TRUE decoy +chrUn_JTFH01001133v1_decoy 1267 TRUE decoy +chrUn_JTFH01001134v1_decoy 1267 TRUE decoy +chrUn_JTFH01001135v1_decoy 1266 TRUE decoy +chrUn_JTFH01001136v1_decoy 1264 TRUE decoy +chrUn_JTFH01001137v1_decoy 1264 TRUE decoy +chrUn_JTFH01001138v1_decoy 1264 TRUE decoy +chrUn_JTFH01001139v1_decoy 1263 TRUE decoy +chrUn_JTFH01001140v1_decoy 1249 TRUE decoy +chrUn_JTFH01001141v1_decoy 1240 TRUE decoy +chrUn_JTFH01001142v1_decoy 1239 TRUE decoy +chrUn_JTFH01001143v1_decoy 1235 TRUE decoy +chrUn_JTFH01001144v1_decoy 1235 TRUE decoy +chrUn_JTFH01001145v1_decoy 1233 TRUE decoy +chrUn_JTFH01001146v1_decoy 1232 TRUE decoy +chrUn_JTFH01001147v1_decoy 1230 TRUE decoy +chrUn_JTFH01001148v1_decoy 1226 TRUE decoy +chrUn_JTFH01001149v1_decoy 1223 TRUE decoy +chrUn_JTFH01001150v1_decoy 1214 TRUE decoy +chrUn_JTFH01001151v1_decoy 1213 TRUE decoy +chrUn_JTFH01001152v1_decoy 1211 TRUE decoy +chrUn_JTFH01001153v1_decoy 1209 TRUE decoy +chrUn_JTFH01001154v1_decoy 1202 TRUE decoy +chrUn_JTFH01001155v1_decoy 1199 TRUE decoy +chrUn_JTFH01001156v1_decoy 1197 TRUE decoy +chrUn_JTFH01001157v1_decoy 1193 TRUE decoy +chrUn_JTFH01001158v1_decoy 1191 TRUE decoy +chrUn_JTFH01001159v1_decoy 1187 TRUE decoy +chrUn_JTFH01001160v1_decoy 1186 TRUE decoy +chrUn_JTFH01001161v1_decoy 1184 TRUE decoy +chrUn_JTFH01001162v1_decoy 1184 TRUE decoy +chrUn_JTFH01001163v1_decoy 1182 TRUE decoy +chrUn_JTFH01001164v1_decoy 1179 TRUE decoy +chrUn_JTFH01001165v1_decoy 1173 TRUE decoy +chrUn_JTFH01001166v1_decoy 1169 TRUE decoy +chrUn_JTFH01001167v1_decoy 1167 TRUE decoy +chrUn_JTFH01001168v1_decoy 1166 TRUE decoy +chrUn_JTFH01001169v1_decoy 1165 TRUE decoy +chrUn_JTFH01001170v1_decoy 1164 TRUE decoy +chrUn_JTFH01001171v1_decoy 1163 TRUE decoy +chrUn_JTFH01001172v1_decoy 1158 TRUE decoy +chrUn_JTFH01001173v1_decoy 1158 TRUE decoy +chrUn_JTFH01001174v1_decoy 1157 TRUE decoy +chrUn_JTFH01001175v1_decoy 1157 TRUE decoy +chrUn_JTFH01001176v1_decoy 1157 TRUE decoy +chrUn_JTFH01001177v1_decoy 1155 TRUE decoy +chrUn_JTFH01001178v1_decoy 1154 TRUE decoy +chrUn_JTFH01001179v1_decoy 1149 TRUE decoy +chrUn_JTFH01001180v1_decoy 1148 TRUE decoy +chrUn_JTFH01001181v1_decoy 1148 TRUE decoy +chrUn_JTFH01001182v1_decoy 1146 TRUE decoy +chrUn_JTFH01001183v1_decoy 1144 TRUE decoy +chrUn_JTFH01001184v1_decoy 1140 TRUE decoy +chrUn_JTFH01001185v1_decoy 1136 TRUE decoy +chrUn_JTFH01001186v1_decoy 1134 TRUE decoy +chrUn_JTFH01001187v1_decoy 1133 TRUE decoy +chrUn_JTFH01001188v1_decoy 1129 TRUE decoy +chrUn_JTFH01001189v1_decoy 1127 TRUE decoy +chrUn_JTFH01001190v1_decoy 1127 TRUE decoy +chrUn_JTFH01001191v1_decoy 1118 TRUE decoy +chrUn_JTFH01001192v1_decoy 1110 TRUE decoy +chrUn_JTFH01001193v1_decoy 1104 TRUE decoy +chrUn_JTFH01001194v1_decoy 1104 TRUE decoy +chrUn_JTFH01001195v1_decoy 1101 TRUE decoy +chrUn_JTFH01001196v1_decoy 1098 TRUE decoy +chrUn_JTFH01001197v1_decoy 1096 TRUE decoy +chrUn_JTFH01001198v1_decoy 1094 TRUE decoy +chrUn_JTFH01001199v1_decoy 1091 TRUE decoy +chrUn_JTFH01001200v1_decoy 1089 TRUE decoy +chrUn_JTFH01001201v1_decoy 1086 TRUE decoy +chrUn_JTFH01001202v1_decoy 1085 TRUE decoy +chrUn_JTFH01001203v1_decoy 1084 TRUE decoy +chrUn_JTFH01001204v1_decoy 1083 TRUE decoy +chrUn_JTFH01001205v1_decoy 1083 TRUE decoy +chrUn_JTFH01001206v1_decoy 1079 TRUE decoy +chrUn_JTFH01001207v1_decoy 1076 TRUE decoy +chrUn_JTFH01001208v1_decoy 1069 TRUE decoy +chrUn_JTFH01001209v1_decoy 1068 TRUE decoy +chrUn_JTFH01001210v1_decoy 1067 TRUE decoy +chrUn_JTFH01001211v1_decoy 1067 TRUE decoy +chrUn_JTFH01001212v1_decoy 1067 TRUE decoy +chrUn_JTFH01001213v1_decoy 1063 TRUE decoy +chrUn_JTFH01001214v1_decoy 1062 TRUE decoy +chrUn_JTFH01001215v1_decoy 1059 TRUE decoy +chrUn_JTFH01001216v1_decoy 1058 TRUE decoy +chrUn_JTFH01001217v1_decoy 1058 TRUE decoy +chrUn_JTFH01001218v1_decoy 1055 TRUE decoy +chrUn_JTFH01001219v1_decoy 1054 TRUE decoy +chrUn_JTFH01001220v1_decoy 1054 TRUE decoy +chrUn_JTFH01001221v1_decoy 1053 TRUE decoy +chrUn_JTFH01001222v1_decoy 1053 TRUE decoy +chrUn_JTFH01001223v1_decoy 1052 TRUE decoy +chrUn_JTFH01001224v1_decoy 1051 TRUE decoy +chrUn_JTFH01001225v1_decoy 1049 TRUE decoy +chrUn_JTFH01001226v1_decoy 1047 TRUE decoy +chrUn_JTFH01001227v1_decoy 1044 TRUE decoy +chrUn_JTFH01001228v1_decoy 1043 TRUE decoy +chrUn_JTFH01001229v1_decoy 1043 TRUE decoy +chrUn_JTFH01001230v1_decoy 1042 TRUE decoy +chrUn_JTFH01001231v1_decoy 1042 TRUE decoy +chrUn_JTFH01001232v1_decoy 1041 TRUE decoy +chrUn_JTFH01001233v1_decoy 1040 TRUE decoy +chrUn_JTFH01001234v1_decoy 1039 TRUE decoy +chrUn_JTFH01001235v1_decoy 1038 TRUE decoy +chrUn_JTFH01001236v1_decoy 1037 TRUE decoy +chrUn_JTFH01001237v1_decoy 1037 TRUE decoy +chrUn_JTFH01001238v1_decoy 1035 TRUE decoy +chrUn_JTFH01001239v1_decoy 1027 TRUE decoy +chrUn_JTFH01001240v1_decoy 1021 TRUE decoy +chrUn_JTFH01001241v1_decoy 1021 TRUE decoy +chrUn_JTFH01001242v1_decoy 1019 TRUE decoy +chrUn_JTFH01001243v1_decoy 1019 TRUE decoy +chrUn_JTFH01001244v1_decoy 1016 TRUE decoy +chrUn_JTFH01001245v1_decoy 1014 TRUE decoy +chrUn_JTFH01001246v1_decoy 1013 TRUE decoy +chrUn_JTFH01001247v1_decoy 1009 TRUE decoy +chrUn_JTFH01001248v1_decoy 1008 TRUE decoy +chrUn_JTFH01001249v1_decoy 1007 TRUE decoy +chrUn_JTFH01001250v1_decoy 1004 TRUE decoy +chrUn_JTFH01001251v1_decoy 1004 TRUE decoy +chrUn_JTFH01001252v1_decoy 1003 TRUE decoy +chrUn_JTFH01001253v1_decoy 1001 TRUE decoy +chrUn_JTFH01001254v1_decoy 1000 TRUE decoy +chrUn_JTFH01001255v1_decoy 1000 TRUE decoy +chrUn_JTFH01001256v1_decoy 1000 TRUE decoy +chrUn_JTFH01001257v1_decoy 17929 TRUE decoy +chrUn_JTFH01001258v1_decoy 9749 TRUE decoy +chrUn_JTFH01001259v1_decoy 8053 TRUE decoy +chrUn_JTFH01001260v1_decoy 7826 TRUE decoy +chrUn_JTFH01001261v1_decoy 7768 TRUE decoy +chrUn_JTFH01001262v1_decoy 5691 TRUE decoy +chrUn_JTFH01001263v1_decoy 5444 TRUE decoy +chrUn_JTFH01001264v1_decoy 5077 TRUE decoy +chrUn_JTFH01001265v1_decoy 4990 TRUE decoy +chrUn_JTFH01001266v1_decoy 4545 TRUE decoy +chrUn_JTFH01001267v1_decoy 4544 TRUE decoy +chrUn_JTFH01001268v1_decoy 4202 TRUE decoy +chrUn_JTFH01001269v1_decoy 4195 TRUE decoy +chrUn_JTFH01001270v1_decoy 3807 TRUE decoy +chrUn_JTFH01001271v1_decoy 3741 TRUE decoy +chrUn_JTFH01001272v1_decoy 3699 TRUE decoy +chrUn_JTFH01001273v1_decoy 3640 TRUE decoy +chrUn_JTFH01001274v1_decoy 3531 TRUE decoy +chrUn_JTFH01001275v1_decoy 3455 TRUE decoy +chrUn_JTFH01001276v1_decoy 3411 TRUE decoy +chrUn_JTFH01001277v1_decoy 3387 TRUE decoy +chrUn_JTFH01001278v1_decoy 3358 TRUE decoy +chrUn_JTFH01001279v1_decoy 3285 TRUE decoy +chrUn_JTFH01001280v1_decoy 3273 TRUE decoy +chrUn_JTFH01001281v1_decoy 3262 TRUE decoy +chrUn_JTFH01001282v1_decoy 3259 TRUE decoy +chrUn_JTFH01001283v1_decoy 3222 TRUE decoy +chrUn_JTFH01001284v1_decoy 3127 TRUE decoy +chrUn_JTFH01001285v1_decoy 3110 TRUE decoy +chrUn_JTFH01001286v1_decoy 3104 TRUE decoy +chrUn_JTFH01001287v1_decoy 3071 TRUE decoy +chrUn_JTFH01001288v1_decoy 3063 TRUE decoy +chrUn_JTFH01001289v1_decoy 3059 TRUE decoy +chrUn_JTFH01001290v1_decoy 2990 TRUE decoy +chrUn_JTFH01001291v1_decoy 2986 TRUE decoy +chrUn_JTFH01001292v1_decoy 2928 TRUE decoy +chrUn_JTFH01001293v1_decoy 2922 TRUE decoy +chrUn_JTFH01001294v1_decoy 2875 TRUE decoy +chrUn_JTFH01001295v1_decoy 2859 TRUE decoy +chrUn_JTFH01001296v1_decoy 2850 TRUE decoy +chrUn_JTFH01001297v1_decoy 2813 TRUE decoy +chrUn_JTFH01001298v1_decoy 2785 TRUE decoy +chrUn_JTFH01001299v1_decoy 2736 TRUE decoy +chrUn_JTFH01001300v1_decoy 2688 TRUE decoy +chrUn_JTFH01001301v1_decoy 2658 TRUE decoy +chrUn_JTFH01001302v1_decoy 2643 TRUE decoy +chrUn_JTFH01001303v1_decoy 2618 TRUE decoy +chrUn_JTFH01001304v1_decoy 2605 TRUE decoy +chrUn_JTFH01001305v1_decoy 2583 TRUE decoy +chrUn_JTFH01001306v1_decoy 2534 TRUE decoy +chrUn_JTFH01001307v1_decoy 2512 TRUE decoy +chrUn_JTFH01001308v1_decoy 2500 TRUE decoy +chrUn_JTFH01001309v1_decoy 2481 TRUE decoy +chrUn_JTFH01001310v1_decoy 2478 TRUE decoy +chrUn_JTFH01001311v1_decoy 2473 TRUE decoy +chrUn_JTFH01001312v1_decoy 2467 TRUE decoy +chrUn_JTFH01001313v1_decoy 2442 TRUE decoy +chrUn_JTFH01001314v1_decoy 2430 TRUE decoy +chrUn_JTFH01001315v1_decoy 2417 TRUE decoy +chrUn_JTFH01001316v1_decoy 2408 TRUE decoy +chrUn_JTFH01001317v1_decoy 2395 TRUE decoy +chrUn_JTFH01001318v1_decoy 2352 TRUE decoy +chrUn_JTFH01001319v1_decoy 2337 TRUE decoy +chrUn_JTFH01001320v1_decoy 2322 TRUE decoy +chrUn_JTFH01001321v1_decoy 2307 TRUE decoy +chrUn_JTFH01001322v1_decoy 2306 TRUE decoy +chrUn_JTFH01001323v1_decoy 2292 TRUE decoy +chrUn_JTFH01001324v1_decoy 2271 TRUE decoy +chrUn_JTFH01001325v1_decoy 2265 TRUE decoy +chrUn_JTFH01001326v1_decoy 2260 TRUE decoy +chrUn_JTFH01001327v1_decoy 2240 TRUE decoy +chrUn_JTFH01001328v1_decoy 2238 TRUE decoy +chrUn_JTFH01001329v1_decoy 2228 TRUE decoy +chrUn_JTFH01001330v1_decoy 2215 TRUE decoy +chrUn_JTFH01001331v1_decoy 2205 TRUE decoy +chrUn_JTFH01001332v1_decoy 2191 TRUE decoy +chrUn_JTFH01001333v1_decoy 2191 TRUE decoy +chrUn_JTFH01001334v1_decoy 2190 TRUE decoy +chrUn_JTFH01001335v1_decoy 2184 TRUE decoy +chrUn_JTFH01001336v1_decoy 2166 TRUE decoy +chrUn_JTFH01001337v1_decoy 2165 TRUE decoy +chrUn_JTFH01001338v1_decoy 2162 TRUE decoy +chrUn_JTFH01001339v1_decoy 2146 TRUE decoy +chrUn_JTFH01001340v1_decoy 2116 TRUE decoy +chrUn_JTFH01001341v1_decoy 2112 TRUE decoy +chrUn_JTFH01001342v1_decoy 2108 TRUE decoy +chrUn_JTFH01001343v1_decoy 2106 TRUE decoy +chrUn_JTFH01001344v1_decoy 2106 TRUE decoy +chrUn_JTFH01001345v1_decoy 2106 TRUE decoy +chrUn_JTFH01001346v1_decoy 2097 TRUE decoy +chrUn_JTFH01001347v1_decoy 2081 TRUE decoy +chrUn_JTFH01001348v1_decoy 2058 TRUE decoy +chrUn_JTFH01001349v1_decoy 2055 TRUE decoy +chrUn_JTFH01001350v1_decoy 2054 TRUE decoy +chrUn_JTFH01001351v1_decoy 2037 TRUE decoy +chrUn_JTFH01001352v1_decoy 2032 TRUE decoy +chrUn_JTFH01001353v1_decoy 2032 TRUE decoy +chrUn_JTFH01001354v1_decoy 2020 TRUE decoy +chrUn_JTFH01001355v1_decoy 2018 TRUE decoy +chrUn_JTFH01001356v1_decoy 2014 TRUE decoy +chrUn_JTFH01001357v1_decoy 2001 TRUE decoy +chrUn_JTFH01001358v1_decoy 2001 TRUE decoy +chrUn_JTFH01001359v1_decoy 1991 TRUE decoy +chrUn_JTFH01001360v1_decoy 1990 TRUE decoy +chrUn_JTFH01001361v1_decoy 1983 TRUE decoy +chrUn_JTFH01001362v1_decoy 1981 TRUE decoy +chrUn_JTFH01001363v1_decoy 1981 TRUE decoy +chrUn_JTFH01001364v1_decoy 1979 TRUE decoy +chrUn_JTFH01001365v1_decoy 1963 TRUE decoy +chrUn_JTFH01001366v1_decoy 1932 TRUE decoy +chrUn_JTFH01001367v1_decoy 1929 TRUE decoy +chrUn_JTFH01001368v1_decoy 1881 TRUE decoy +chrUn_JTFH01001369v1_decoy 1874 TRUE decoy +chrUn_JTFH01001370v1_decoy 1849 TRUE decoy +chrUn_JTFH01001371v1_decoy 1849 TRUE decoy +chrUn_JTFH01001372v1_decoy 1833 TRUE decoy +chrUn_JTFH01001373v1_decoy 1832 TRUE decoy +chrUn_JTFH01001374v1_decoy 1826 TRUE decoy +chrUn_JTFH01001375v1_decoy 1814 TRUE decoy +chrUn_JTFH01001376v1_decoy 1814 TRUE decoy +chrUn_JTFH01001377v1_decoy 1791 TRUE decoy +chrUn_JTFH01001378v1_decoy 1789 TRUE decoy +chrUn_JTFH01001379v1_decoy 1786 TRUE decoy +chrUn_JTFH01001380v1_decoy 1778 TRUE decoy +chrUn_JTFH01001381v1_decoy 1776 TRUE decoy +chrUn_JTFH01001382v1_decoy 1762 TRUE decoy +chrUn_JTFH01001383v1_decoy 1758 TRUE decoy +chrUn_JTFH01001384v1_decoy 1757 TRUE decoy +chrUn_JTFH01001385v1_decoy 1754 TRUE decoy +chrUn_JTFH01001386v1_decoy 1752 TRUE decoy +chrUn_JTFH01001387v1_decoy 1751 TRUE decoy +chrUn_JTFH01001388v1_decoy 1749 TRUE decoy +chrUn_JTFH01001389v1_decoy 1738 TRUE decoy +chrUn_JTFH01001390v1_decoy 1729 TRUE decoy +chrUn_JTFH01001391v1_decoy 1726 TRUE decoy +chrUn_JTFH01001392v1_decoy 1716 TRUE decoy +chrUn_JTFH01001393v1_decoy 1712 TRUE decoy +chrUn_JTFH01001394v1_decoy 1711 TRUE decoy +chrUn_JTFH01001395v1_decoy 1703 TRUE decoy +chrUn_JTFH01001396v1_decoy 1702 TRUE decoy +chrUn_JTFH01001397v1_decoy 1699 TRUE decoy +chrUn_JTFH01001398v1_decoy 1686 TRUE decoy +chrUn_JTFH01001399v1_decoy 1684 TRUE decoy +chrUn_JTFH01001400v1_decoy 1680 TRUE decoy +chrUn_JTFH01001401v1_decoy 1678 TRUE decoy +chrUn_JTFH01001402v1_decoy 1678 TRUE decoy +chrUn_JTFH01001403v1_decoy 1677 TRUE decoy +chrUn_JTFH01001404v1_decoy 1676 TRUE decoy +chrUn_JTFH01001405v1_decoy 1672 TRUE decoy +chrUn_JTFH01001406v1_decoy 1669 TRUE decoy +chrUn_JTFH01001407v1_decoy 1668 TRUE decoy +chrUn_JTFH01001408v1_decoy 1663 TRUE decoy +chrUn_JTFH01001409v1_decoy 1660 TRUE decoy +chrUn_JTFH01001410v1_decoy 1660 TRUE decoy +chrUn_JTFH01001411v1_decoy 1658 TRUE decoy +chrUn_JTFH01001412v1_decoy 1656 TRUE decoy +chrUn_JTFH01001413v1_decoy 1656 TRUE decoy +chrUn_JTFH01001414v1_decoy 1652 TRUE decoy +chrUn_JTFH01001415v1_decoy 1647 TRUE decoy +chrUn_JTFH01001416v1_decoy 1645 TRUE decoy +chrUn_JTFH01001417v1_decoy 1641 TRUE decoy +chrUn_JTFH01001418v1_decoy 1638 TRUE decoy +chrUn_JTFH01001419v1_decoy 1633 TRUE decoy +chrUn_JTFH01001420v1_decoy 1626 TRUE decoy +chrUn_JTFH01001421v1_decoy 1614 TRUE decoy +chrUn_JTFH01001422v1_decoy 1612 TRUE decoy +chrUn_JTFH01001423v1_decoy 1605 TRUE decoy +chrUn_JTFH01001424v1_decoy 1603 TRUE decoy +chrUn_JTFH01001425v1_decoy 1599 TRUE decoy +chrUn_JTFH01001426v1_decoy 1589 TRUE decoy +chrUn_JTFH01001427v1_decoy 1588 TRUE decoy +chrUn_JTFH01001428v1_decoy 1585 TRUE decoy +chrUn_JTFH01001429v1_decoy 1584 TRUE decoy +chrUn_JTFH01001430v1_decoy 1584 TRUE decoy +chrUn_JTFH01001431v1_decoy 1580 TRUE decoy +chrUn_JTFH01001432v1_decoy 1572 TRUE decoy +chrUn_JTFH01001433v1_decoy 1570 TRUE decoy +chrUn_JTFH01001434v1_decoy 1569 TRUE decoy +chrUn_JTFH01001435v1_decoy 1568 TRUE decoy +chrUn_JTFH01001436v1_decoy 1567 TRUE decoy +chrUn_JTFH01001437v1_decoy 1565 TRUE decoy +chrUn_JTFH01001438v1_decoy 1559 TRUE decoy +chrUn_JTFH01001439v1_decoy 1559 TRUE decoy +chrUn_JTFH01001440v1_decoy 1556 TRUE decoy +chrUn_JTFH01001441v1_decoy 1554 TRUE decoy +chrUn_JTFH01001442v1_decoy 1549 TRUE decoy +chrUn_JTFH01001443v1_decoy 1542 TRUE decoy +chrUn_JTFH01001444v1_decoy 1541 TRUE decoy +chrUn_JTFH01001445v1_decoy 1538 TRUE decoy +chrUn_JTFH01001446v1_decoy 1537 TRUE decoy +chrUn_JTFH01001447v1_decoy 1535 TRUE decoy +chrUn_JTFH01001448v1_decoy 1530 TRUE decoy +chrUn_JTFH01001449v1_decoy 1528 TRUE decoy +chrUn_JTFH01001450v1_decoy 1522 TRUE decoy +chrUn_JTFH01001451v1_decoy 1514 TRUE decoy +chrUn_JTFH01001452v1_decoy 1509 TRUE decoy +chrUn_JTFH01001453v1_decoy 1507 TRUE decoy +chrUn_JTFH01001454v1_decoy 1500 TRUE decoy +chrUn_JTFH01001455v1_decoy 1499 TRUE decoy +chrUn_JTFH01001456v1_decoy 1499 TRUE decoy +chrUn_JTFH01001457v1_decoy 1497 TRUE decoy +chrUn_JTFH01001458v1_decoy 1496 TRUE decoy +chrUn_JTFH01001459v1_decoy 1488 TRUE decoy +chrUn_JTFH01001460v1_decoy 1486 TRUE decoy +chrUn_JTFH01001461v1_decoy 1485 TRUE decoy +chrUn_JTFH01001462v1_decoy 1481 TRUE decoy +chrUn_JTFH01001463v1_decoy 1479 TRUE decoy +chrUn_JTFH01001464v1_decoy 1472 TRUE decoy +chrUn_JTFH01001465v1_decoy 1472 TRUE decoy +chrUn_JTFH01001466v1_decoy 1470 TRUE decoy +chrUn_JTFH01001467v1_decoy 1466 TRUE decoy +chrUn_JTFH01001468v1_decoy 1465 TRUE decoy +chrUn_JTFH01001469v1_decoy 1461 TRUE decoy +chrUn_JTFH01001470v1_decoy 1458 TRUE decoy +chrUn_JTFH01001471v1_decoy 1457 TRUE decoy +chrUn_JTFH01001472v1_decoy 1448 TRUE decoy +chrUn_JTFH01001473v1_decoy 1447 TRUE decoy +chrUn_JTFH01001474v1_decoy 1444 TRUE decoy +chrUn_JTFH01001475v1_decoy 1443 TRUE decoy +chrUn_JTFH01001476v1_decoy 1443 TRUE decoy +chrUn_JTFH01001477v1_decoy 1438 TRUE decoy +chrUn_JTFH01001478v1_decoy 1432 TRUE decoy +chrUn_JTFH01001479v1_decoy 1430 TRUE decoy +chrUn_JTFH01001480v1_decoy 1430 TRUE decoy +chrUn_JTFH01001481v1_decoy 1429 TRUE decoy +chrUn_JTFH01001482v1_decoy 1429 TRUE decoy +chrUn_JTFH01001483v1_decoy 1429 TRUE decoy +chrUn_JTFH01001484v1_decoy 1426 TRUE decoy +chrUn_JTFH01001485v1_decoy 1426 TRUE decoy +chrUn_JTFH01001486v1_decoy 1420 TRUE decoy +chrUn_JTFH01001487v1_decoy 1416 TRUE decoy +chrUn_JTFH01001488v1_decoy 1416 TRUE decoy +chrUn_JTFH01001489v1_decoy 1415 TRUE decoy +chrUn_JTFH01001490v1_decoy 1415 TRUE decoy +chrUn_JTFH01001491v1_decoy 1414 TRUE decoy +chrUn_JTFH01001492v1_decoy 1413 TRUE decoy +chrUn_JTFH01001493v1_decoy 1410 TRUE decoy +chrUn_JTFH01001494v1_decoy 1405 TRUE decoy +chrUn_JTFH01001495v1_decoy 1402 TRUE decoy +chrUn_JTFH01001496v1_decoy 1398 TRUE decoy +chrUn_JTFH01001497v1_decoy 1397 TRUE decoy +chrUn_JTFH01001498v1_decoy 1395 TRUE decoy +chrUn_JTFH01001499v1_decoy 1392 TRUE decoy +chrUn_JTFH01001500v1_decoy 1388 TRUE decoy +chrUn_JTFH01001501v1_decoy 1386 TRUE decoy +chrUn_JTFH01001502v1_decoy 1382 TRUE decoy +chrUn_JTFH01001503v1_decoy 1381 TRUE decoy +chrUn_JTFH01001504v1_decoy 1379 TRUE decoy +chrUn_JTFH01001505v1_decoy 1376 TRUE decoy +chrUn_JTFH01001506v1_decoy 1374 TRUE decoy +chrUn_JTFH01001507v1_decoy 1374 TRUE decoy +chrUn_JTFH01001508v1_decoy 1373 TRUE decoy +chrUn_JTFH01001509v1_decoy 1373 TRUE decoy +chrUn_JTFH01001510v1_decoy 1372 TRUE decoy +chrUn_JTFH01001511v1_decoy 1370 TRUE decoy +chrUn_JTFH01001512v1_decoy 1367 TRUE decoy +chrUn_JTFH01001513v1_decoy 1365 TRUE decoy +chrUn_JTFH01001514v1_decoy 1364 TRUE decoy +chrUn_JTFH01001515v1_decoy 1361 TRUE decoy +chrUn_JTFH01001516v1_decoy 1361 TRUE decoy +chrUn_JTFH01001517v1_decoy 1355 TRUE decoy +chrUn_JTFH01001518v1_decoy 1355 TRUE decoy +chrUn_JTFH01001519v1_decoy 1354 TRUE decoy +chrUn_JTFH01001520v1_decoy 1353 TRUE decoy +chrUn_JTFH01001521v1_decoy 1349 TRUE decoy +chrUn_JTFH01001522v1_decoy 1345 TRUE decoy +chrUn_JTFH01001523v1_decoy 1344 TRUE decoy +chrUn_JTFH01001524v1_decoy 1343 TRUE decoy +chrUn_JTFH01001525v1_decoy 1338 TRUE decoy +chrUn_JTFH01001526v1_decoy 1338 TRUE decoy +chrUn_JTFH01001527v1_decoy 1338 TRUE decoy +chrUn_JTFH01001528v1_decoy 1336 TRUE decoy +chrUn_JTFH01001529v1_decoy 1333 TRUE decoy +chrUn_JTFH01001530v1_decoy 1333 TRUE decoy +chrUn_JTFH01001531v1_decoy 1332 TRUE decoy +chrUn_JTFH01001532v1_decoy 1324 TRUE decoy +chrUn_JTFH01001533v1_decoy 1323 TRUE decoy +chrUn_JTFH01001534v1_decoy 1323 TRUE decoy +chrUn_JTFH01001535v1_decoy 1320 TRUE decoy +chrUn_JTFH01001536v1_decoy 1320 TRUE decoy +chrUn_JTFH01001537v1_decoy 1317 TRUE decoy +chrUn_JTFH01001538v1_decoy 1316 TRUE decoy +chrUn_JTFH01001539v1_decoy 1304 TRUE decoy +chrUn_JTFH01001540v1_decoy 1304 TRUE decoy +chrUn_JTFH01001541v1_decoy 1303 TRUE decoy +chrUn_JTFH01001542v1_decoy 1302 TRUE decoy +chrUn_JTFH01001543v1_decoy 1301 TRUE decoy +chrUn_JTFH01001544v1_decoy 1300 TRUE decoy +chrUn_JTFH01001545v1_decoy 1298 TRUE decoy +chrUn_JTFH01001546v1_decoy 1297 TRUE decoy +chrUn_JTFH01001547v1_decoy 1295 TRUE decoy +chrUn_JTFH01001548v1_decoy 1284 TRUE decoy +chrUn_JTFH01001549v1_decoy 1283 TRUE decoy +chrUn_JTFH01001550v1_decoy 1283 TRUE decoy +chrUn_JTFH01001551v1_decoy 1279 TRUE decoy +chrUn_JTFH01001552v1_decoy 1278 TRUE decoy +chrUn_JTFH01001553v1_decoy 1271 TRUE decoy +chrUn_JTFH01001554v1_decoy 1271 TRUE decoy +chrUn_JTFH01001555v1_decoy 1268 TRUE decoy +chrUn_JTFH01001556v1_decoy 1264 TRUE decoy +chrUn_JTFH01001557v1_decoy 1263 TRUE decoy +chrUn_JTFH01001558v1_decoy 1262 TRUE decoy +chrUn_JTFH01001559v1_decoy 1261 TRUE decoy +chrUn_JTFH01001560v1_decoy 1260 TRUE decoy +chrUn_JTFH01001561v1_decoy 1259 TRUE decoy +chrUn_JTFH01001562v1_decoy 1259 TRUE decoy +chrUn_JTFH01001563v1_decoy 1258 TRUE decoy +chrUn_JTFH01001564v1_decoy 1256 TRUE decoy +chrUn_JTFH01001565v1_decoy 1253 TRUE decoy +chrUn_JTFH01001566v1_decoy 1248 TRUE decoy +chrUn_JTFH01001567v1_decoy 1248 TRUE decoy +chrUn_JTFH01001568v1_decoy 1246 TRUE decoy +chrUn_JTFH01001569v1_decoy 1246 TRUE decoy +chrUn_JTFH01001570v1_decoy 1244 TRUE decoy +chrUn_JTFH01001571v1_decoy 1238 TRUE decoy +chrUn_JTFH01001572v1_decoy 1238 TRUE decoy +chrUn_JTFH01001573v1_decoy 1236 TRUE decoy +chrUn_JTFH01001574v1_decoy 1234 TRUE decoy +chrUn_JTFH01001575v1_decoy 1234 TRUE decoy +chrUn_JTFH01001576v1_decoy 1231 TRUE decoy +chrUn_JTFH01001577v1_decoy 1231 TRUE decoy +chrUn_JTFH01001578v1_decoy 1230 TRUE decoy +chrUn_JTFH01001579v1_decoy 1230 TRUE decoy +chrUn_JTFH01001580v1_decoy 1228 TRUE decoy +chrUn_JTFH01001581v1_decoy 1227 TRUE decoy +chrUn_JTFH01001582v1_decoy 1222 TRUE decoy +chrUn_JTFH01001583v1_decoy 1222 TRUE decoy +chrUn_JTFH01001584v1_decoy 1221 TRUE decoy +chrUn_JTFH01001585v1_decoy 1221 TRUE decoy +chrUn_JTFH01001586v1_decoy 1220 TRUE decoy +chrUn_JTFH01001587v1_decoy 1218 TRUE decoy +chrUn_JTFH01001588v1_decoy 1218 TRUE decoy +chrUn_JTFH01001589v1_decoy 1216 TRUE decoy +chrUn_JTFH01001590v1_decoy 1216 TRUE decoy +chrUn_JTFH01001591v1_decoy 1212 TRUE decoy +chrUn_JTFH01001592v1_decoy 1210 TRUE decoy +chrUn_JTFH01001593v1_decoy 1209 TRUE decoy +chrUn_JTFH01001594v1_decoy 1208 TRUE decoy +chrUn_JTFH01001595v1_decoy 1208 TRUE decoy +chrUn_JTFH01001596v1_decoy 1206 TRUE decoy +chrUn_JTFH01001597v1_decoy 1205 TRUE decoy +chrUn_JTFH01001598v1_decoy 1205 TRUE decoy +chrUn_JTFH01001599v1_decoy 1202 TRUE decoy +chrUn_JTFH01001600v1_decoy 1200 TRUE decoy +chrUn_JTFH01001601v1_decoy 1199 TRUE decoy +chrUn_JTFH01001602v1_decoy 1198 TRUE decoy +chrUn_JTFH01001603v1_decoy 1198 TRUE decoy +chrUn_JTFH01001604v1_decoy 1198 TRUE decoy +chrUn_JTFH01001605v1_decoy 1195 TRUE decoy +chrUn_JTFH01001606v1_decoy 1194 TRUE decoy +chrUn_JTFH01001607v1_decoy 1191 TRUE decoy +chrUn_JTFH01001608v1_decoy 1189 TRUE decoy +chrUn_JTFH01001609v1_decoy 1188 TRUE decoy +chrUn_JTFH01001610v1_decoy 1180 TRUE decoy +chrUn_JTFH01001611v1_decoy 1180 TRUE decoy +chrUn_JTFH01001612v1_decoy 1179 TRUE decoy +chrUn_JTFH01001613v1_decoy 1172 TRUE decoy +chrUn_JTFH01001614v1_decoy 1168 TRUE decoy +chrUn_JTFH01001615v1_decoy 1166 TRUE decoy +chrUn_JTFH01001616v1_decoy 1157 TRUE decoy +chrUn_JTFH01001617v1_decoy 1156 TRUE decoy +chrUn_JTFH01001618v1_decoy 1156 TRUE decoy +chrUn_JTFH01001619v1_decoy 1155 TRUE decoy +chrUn_JTFH01001620v1_decoy 1154 TRUE decoy +chrUn_JTFH01001621v1_decoy 1154 TRUE decoy +chrUn_JTFH01001622v1_decoy 1149 TRUE decoy +chrUn_JTFH01001623v1_decoy 1143 TRUE decoy +chrUn_JTFH01001624v1_decoy 1143 TRUE decoy +chrUn_JTFH01001625v1_decoy 1140 TRUE decoy +chrUn_JTFH01001626v1_decoy 1137 TRUE decoy +chrUn_JTFH01001627v1_decoy 1135 TRUE decoy +chrUn_JTFH01001628v1_decoy 1135 TRUE decoy +chrUn_JTFH01001629v1_decoy 1135 TRUE decoy +chrUn_JTFH01001630v1_decoy 1127 TRUE decoy +chrUn_JTFH01001631v1_decoy 1127 TRUE decoy +chrUn_JTFH01001632v1_decoy 1126 TRUE decoy +chrUn_JTFH01001633v1_decoy 1123 TRUE decoy +chrUn_JTFH01001634v1_decoy 1123 TRUE decoy +chrUn_JTFH01001635v1_decoy 1123 TRUE decoy +chrUn_JTFH01001636v1_decoy 1122 TRUE decoy +chrUn_JTFH01001637v1_decoy 1122 TRUE decoy +chrUn_JTFH01001638v1_decoy 1121 TRUE decoy +chrUn_JTFH01001639v1_decoy 1121 TRUE decoy +chrUn_JTFH01001640v1_decoy 1119 TRUE decoy +chrUn_JTFH01001641v1_decoy 1119 TRUE decoy +chrUn_JTFH01001642v1_decoy 1119 TRUE decoy +chrUn_JTFH01001643v1_decoy 1118 TRUE decoy +chrUn_JTFH01001644v1_decoy 1115 TRUE decoy +chrUn_JTFH01001645v1_decoy 1106 TRUE decoy +chrUn_JTFH01001646v1_decoy 1106 TRUE decoy +chrUn_JTFH01001647v1_decoy 1104 TRUE decoy +chrUn_JTFH01001648v1_decoy 1102 TRUE decoy +chrUn_JTFH01001649v1_decoy 1101 TRUE decoy +chrUn_JTFH01001650v1_decoy 1098 TRUE decoy +chrUn_JTFH01001651v1_decoy 1098 TRUE decoy +chrUn_JTFH01001652v1_decoy 1096 TRUE decoy +chrUn_JTFH01001653v1_decoy 1096 TRUE decoy +chrUn_JTFH01001654v1_decoy 1095 TRUE decoy +chrUn_JTFH01001655v1_decoy 1093 TRUE decoy +chrUn_JTFH01001656v1_decoy 1090 TRUE decoy +chrUn_JTFH01001657v1_decoy 1089 TRUE decoy +chrUn_JTFH01001658v1_decoy 1087 TRUE decoy +chrUn_JTFH01001659v1_decoy 1087 TRUE decoy +chrUn_JTFH01001660v1_decoy 1085 TRUE decoy +chrUn_JTFH01001661v1_decoy 1085 TRUE decoy +chrUn_JTFH01001662v1_decoy 1085 TRUE decoy +chrUn_JTFH01001663v1_decoy 1083 TRUE decoy +chrUn_JTFH01001664v1_decoy 1080 TRUE decoy +chrUn_JTFH01001665v1_decoy 1080 TRUE decoy +chrUn_JTFH01001666v1_decoy 1079 TRUE decoy +chrUn_JTFH01001667v1_decoy 1079 TRUE decoy +chrUn_JTFH01001668v1_decoy 1079 TRUE decoy +chrUn_JTFH01001669v1_decoy 1075 TRUE decoy +chrUn_JTFH01001670v1_decoy 1074 TRUE decoy +chrUn_JTFH01001671v1_decoy 1073 TRUE decoy +chrUn_JTFH01001672v1_decoy 1070 TRUE decoy +chrUn_JTFH01001673v1_decoy 1068 TRUE decoy +chrUn_JTFH01001674v1_decoy 1067 TRUE decoy +chrUn_JTFH01001675v1_decoy 1066 TRUE decoy +chrUn_JTFH01001676v1_decoy 1066 TRUE decoy +chrUn_JTFH01001677v1_decoy 1066 TRUE decoy +chrUn_JTFH01001678v1_decoy 1063 TRUE decoy +chrUn_JTFH01001679v1_decoy 1063 TRUE decoy +chrUn_JTFH01001680v1_decoy 1063 TRUE decoy +chrUn_JTFH01001681v1_decoy 1062 TRUE decoy +chrUn_JTFH01001682v1_decoy 1058 TRUE decoy +chrUn_JTFH01001683v1_decoy 1056 TRUE decoy +chrUn_JTFH01001684v1_decoy 1052 TRUE decoy +chrUn_JTFH01001685v1_decoy 1051 TRUE decoy +chrUn_JTFH01001686v1_decoy 1051 TRUE decoy +chrUn_JTFH01001687v1_decoy 1050 TRUE decoy +chrUn_JTFH01001688v1_decoy 1048 TRUE decoy +chrUn_JTFH01001689v1_decoy 1046 TRUE decoy +chrUn_JTFH01001690v1_decoy 1046 TRUE decoy +chrUn_JTFH01001691v1_decoy 1045 TRUE decoy +chrUn_JTFH01001692v1_decoy 1043 TRUE decoy +chrUn_JTFH01001693v1_decoy 1038 TRUE decoy +chrUn_JTFH01001694v1_decoy 1036 TRUE decoy +chrUn_JTFH01001695v1_decoy 1035 TRUE decoy +chrUn_JTFH01001696v1_decoy 1035 TRUE decoy +chrUn_JTFH01001697v1_decoy 1035 TRUE decoy +chrUn_JTFH01001698v1_decoy 1033 TRUE decoy +chrUn_JTFH01001699v1_decoy 1032 TRUE decoy +chrUn_JTFH01001700v1_decoy 1031 TRUE decoy +chrUn_JTFH01001701v1_decoy 1026 TRUE decoy +chrUn_JTFH01001702v1_decoy 1026 TRUE decoy +chrUn_JTFH01001703v1_decoy 1026 TRUE decoy +chrUn_JTFH01001704v1_decoy 1023 TRUE decoy +chrUn_JTFH01001705v1_decoy 1022 TRUE decoy +chrUn_JTFH01001706v1_decoy 1020 TRUE decoy +chrUn_JTFH01001707v1_decoy 1020 TRUE decoy +chrUn_JTFH01001708v1_decoy 1020 TRUE decoy +chrUn_JTFH01001709v1_decoy 1019 TRUE decoy +chrUn_JTFH01001710v1_decoy 1018 TRUE decoy +chrUn_JTFH01001711v1_decoy 1018 TRUE decoy +chrUn_JTFH01001712v1_decoy 1017 TRUE decoy +chrUn_JTFH01001713v1_decoy 1015 TRUE decoy +chrUn_JTFH01001714v1_decoy 1015 TRUE decoy +chrUn_JTFH01001715v1_decoy 1015 TRUE decoy +chrUn_JTFH01001716v1_decoy 1014 TRUE decoy +chrUn_JTFH01001717v1_decoy 1014 TRUE decoy +chrUn_JTFH01001718v1_decoy 1013 TRUE decoy +chrUn_JTFH01001719v1_decoy 1013 TRUE decoy +chrUn_JTFH01001720v1_decoy 1013 TRUE decoy +chrUn_JTFH01001721v1_decoy 1012 TRUE decoy +chrUn_JTFH01001722v1_decoy 1011 TRUE decoy +chrUn_JTFH01001723v1_decoy 1011 TRUE decoy +chrUn_JTFH01001724v1_decoy 1009 TRUE decoy +chrUn_JTFH01001725v1_decoy 1008 TRUE decoy +chrUn_JTFH01001726v1_decoy 1008 TRUE decoy +chrUn_JTFH01001727v1_decoy 1007 TRUE decoy +chrUn_JTFH01001728v1_decoy 1007 TRUE decoy +chrUn_JTFH01001729v1_decoy 1007 TRUE decoy +chrUn_JTFH01001730v1_decoy 1006 TRUE decoy +chrUn_JTFH01001731v1_decoy 1005 TRUE decoy +chrUn_JTFH01001732v1_decoy 1003 TRUE decoy +chrUn_JTFH01001733v1_decoy 1001 TRUE decoy +chrUn_JTFH01001734v1_decoy 1000 TRUE decoy +chrUn_JTFH01001735v1_decoy 19311 TRUE decoy +chrUn_JTFH01001736v1_decoy 11713 TRUE decoy +chrUn_JTFH01001737v1_decoy 11263 TRUE decoy +chrUn_JTFH01001738v1_decoy 9779 TRUE decoy +chrUn_JTFH01001739v1_decoy 9568 TRUE decoy +chrUn_JTFH01001740v1_decoy 9344 TRUE decoy +chrUn_JTFH01001741v1_decoy 9188 TRUE decoy +chrUn_JTFH01001742v1_decoy 9100 TRUE decoy +chrUn_JTFH01001743v1_decoy 8771 TRUE decoy +chrUn_JTFH01001744v1_decoy 8690 TRUE decoy +chrUn_JTFH01001745v1_decoy 8566 TRUE decoy +chrUn_JTFH01001746v1_decoy 8058 TRUE decoy +chrUn_JTFH01001747v1_decoy 7759 TRUE decoy +chrUn_JTFH01001748v1_decoy 7585 TRUE decoy +chrUn_JTFH01001749v1_decoy 7471 TRUE decoy +chrUn_JTFH01001750v1_decoy 7461 TRUE decoy +chrUn_JTFH01001751v1_decoy 7342 TRUE decoy +chrUn_JTFH01001752v1_decoy 7223 TRUE decoy +chrUn_JTFH01001753v1_decoy 7064 TRUE decoy +chrUn_JTFH01001754v1_decoy 6916 TRUE decoy +chrUn_JTFH01001755v1_decoy 6897 TRUE decoy +chrUn_JTFH01001756v1_decoy 6880 TRUE decoy +chrUn_JTFH01001757v1_decoy 6857 TRUE decoy +chrUn_JTFH01001758v1_decoy 6840 TRUE decoy +chrUn_JTFH01001759v1_decoy 6728 TRUE decoy +chrUn_JTFH01001760v1_decoy 6688 TRUE decoy +chrUn_JTFH01001761v1_decoy 6553 TRUE decoy +chrUn_JTFH01001762v1_decoy 6396 TRUE decoy +chrUn_JTFH01001763v1_decoy 6345 TRUE decoy +chrUn_JTFH01001764v1_decoy 6295 TRUE decoy +chrUn_JTFH01001765v1_decoy 6266 TRUE decoy +chrUn_JTFH01001766v1_decoy 6173 TRUE decoy +chrUn_JTFH01001767v1_decoy 6171 TRUE decoy +chrUn_JTFH01001768v1_decoy 6120 TRUE decoy +chrUn_JTFH01001769v1_decoy 6105 TRUE decoy +chrUn_JTFH01001770v1_decoy 6099 TRUE decoy +chrUn_JTFH01001771v1_decoy 5893 TRUE decoy +chrUn_JTFH01001772v1_decoy 5829 TRUE decoy +chrUn_JTFH01001773v1_decoy 5793 TRUE decoy +chrUn_JTFH01001774v1_decoy 5776 TRUE decoy +chrUn_JTFH01001775v1_decoy 5759 TRUE decoy +chrUn_JTFH01001776v1_decoy 5716 TRUE decoy +chrUn_JTFH01001777v1_decoy 5708 TRUE decoy +chrUn_JTFH01001778v1_decoy 5590 TRUE decoy +chrUn_JTFH01001779v1_decoy 5566 TRUE decoy +chrUn_JTFH01001780v1_decoy 5558 TRUE decoy +chrUn_JTFH01001781v1_decoy 5418 TRUE decoy +chrUn_JTFH01001782v1_decoy 5375 TRUE decoy +chrUn_JTFH01001783v1_decoy 5300 TRUE decoy +chrUn_JTFH01001784v1_decoy 5255 TRUE decoy +chrUn_JTFH01001785v1_decoy 5157 TRUE decoy +chrUn_JTFH01001786v1_decoy 5130 TRUE decoy +chrUn_JTFH01001787v1_decoy 4978 TRUE decoy +chrUn_JTFH01001788v1_decoy 4957 TRUE decoy +chrUn_JTFH01001789v1_decoy 4947 TRUE decoy +chrUn_JTFH01001790v1_decoy 4897 TRUE decoy +chrUn_JTFH01001791v1_decoy 4867 TRUE decoy +chrUn_JTFH01001792v1_decoy 4845 TRUE decoy +chrUn_JTFH01001793v1_decoy 4678 TRUE decoy +chrUn_JTFH01001794v1_decoy 4641 TRUE decoy +chrUn_JTFH01001795v1_decoy 4592 TRUE decoy +chrUn_JTFH01001796v1_decoy 4543 TRUE decoy +chrUn_JTFH01001797v1_decoy 4532 TRUE decoy +chrUn_JTFH01001798v1_decoy 4503 TRUE decoy +chrUn_JTFH01001799v1_decoy 4495 TRUE decoy +chrUn_JTFH01001800v1_decoy 4444 TRUE decoy +chrUn_JTFH01001801v1_decoy 4414 TRUE decoy +chrUn_JTFH01001802v1_decoy 4409 TRUE decoy +chrUn_JTFH01001803v1_decoy 4302 TRUE decoy +chrUn_JTFH01001804v1_decoy 4300 TRUE decoy +chrUn_JTFH01001805v1_decoy 4277 TRUE decoy +chrUn_JTFH01001806v1_decoy 4173 TRUE decoy +chrUn_JTFH01001807v1_decoy 4169 TRUE decoy +chrUn_JTFH01001808v1_decoy 4136 TRUE decoy +chrUn_JTFH01001809v1_decoy 4101 TRUE decoy +chrUn_JTFH01001810v1_decoy 4089 TRUE decoy +chrUn_JTFH01001811v1_decoy 4015 TRUE decoy +chrUn_JTFH01001812v1_decoy 4000 TRUE decoy +chrUn_JTFH01001813v1_decoy 3973 TRUE decoy +chrUn_JTFH01001814v1_decoy 3732 TRUE decoy +chrUn_JTFH01001815v1_decoy 3709 TRUE decoy +chrUn_JTFH01001816v1_decoy 3686 TRUE decoy +chrUn_JTFH01001817v1_decoy 3676 TRUE decoy +chrUn_JTFH01001818v1_decoy 3673 TRUE decoy +chrUn_JTFH01001819v1_decoy 3672 TRUE decoy +chrUn_JTFH01001820v1_decoy 3633 TRUE decoy +chrUn_JTFH01001821v1_decoy 3633 TRUE decoy +chrUn_JTFH01001822v1_decoy 3613 TRUE decoy +chrUn_JTFH01001823v1_decoy 3605 TRUE decoy +chrUn_JTFH01001824v1_decoy 3592 TRUE decoy +chrUn_JTFH01001825v1_decoy 3586 TRUE decoy +chrUn_JTFH01001826v1_decoy 3584 TRUE decoy +chrUn_JTFH01001827v1_decoy 3577 TRUE decoy +chrUn_JTFH01001828v1_decoy 3537 TRUE decoy +chrUn_JTFH01001829v1_decoy 3510 TRUE decoy +chrUn_JTFH01001830v1_decoy 3509 TRUE decoy +chrUn_JTFH01001831v1_decoy 3488 TRUE decoy +chrUn_JTFH01001832v1_decoy 3473 TRUE decoy +chrUn_JTFH01001833v1_decoy 3445 TRUE decoy +chrUn_JTFH01001834v1_decoy 3427 TRUE decoy +chrUn_JTFH01001835v1_decoy 3395 TRUE decoy +chrUn_JTFH01001836v1_decoy 3367 TRUE decoy +chrUn_JTFH01001837v1_decoy 3337 TRUE decoy +chrUn_JTFH01001838v1_decoy 3324 TRUE decoy +chrUn_JTFH01001839v1_decoy 3315 TRUE decoy +chrUn_JTFH01001840v1_decoy 3313 TRUE decoy +chrUn_JTFH01001841v1_decoy 3283 TRUE decoy +chrUn_JTFH01001842v1_decoy 3250 TRUE decoy +chrUn_JTFH01001843v1_decoy 3247 TRUE decoy +chrUn_JTFH01001844v1_decoy 3237 TRUE decoy +chrUn_JTFH01001845v1_decoy 3235 TRUE decoy +chrUn_JTFH01001846v1_decoy 3200 TRUE decoy +chrUn_JTFH01001847v1_decoy 3195 TRUE decoy +chrUn_JTFH01001848v1_decoy 3175 TRUE decoy +chrUn_JTFH01001849v1_decoy 3158 TRUE decoy +chrUn_JTFH01001850v1_decoy 3143 TRUE decoy +chrUn_JTFH01001851v1_decoy 3139 TRUE decoy +chrUn_JTFH01001852v1_decoy 3138 TRUE decoy +chrUn_JTFH01001853v1_decoy 3136 TRUE decoy +chrUn_JTFH01001854v1_decoy 3132 TRUE decoy +chrUn_JTFH01001855v1_decoy 3132 TRUE decoy +chrUn_JTFH01001856v1_decoy 3095 TRUE decoy +chrUn_JTFH01001857v1_decoy 3094 TRUE decoy +chrUn_JTFH01001858v1_decoy 3093 TRUE decoy +chrUn_JTFH01001859v1_decoy 3059 TRUE decoy +chrUn_JTFH01001860v1_decoy 2985 TRUE decoy +chrUn_JTFH01001861v1_decoy 2975 TRUE decoy +chrUn_JTFH01001862v1_decoy 2967 TRUE decoy +chrUn_JTFH01001863v1_decoy 2961 TRUE decoy +chrUn_JTFH01001864v1_decoy 2955 TRUE decoy +chrUn_JTFH01001865v1_decoy 2935 TRUE decoy +chrUn_JTFH01001866v1_decoy 2933 TRUE decoy +chrUn_JTFH01001867v1_decoy 2909 TRUE decoy +chrUn_JTFH01001868v1_decoy 2904 TRUE decoy +chrUn_JTFH01001869v1_decoy 2892 TRUE decoy +chrUn_JTFH01001870v1_decoy 2886 TRUE decoy +chrUn_JTFH01001871v1_decoy 2885 TRUE decoy +chrUn_JTFH01001872v1_decoy 2878 TRUE decoy +chrUn_JTFH01001873v1_decoy 2875 TRUE decoy +chrUn_JTFH01001874v1_decoy 2861 TRUE decoy +chrUn_JTFH01001875v1_decoy 2856 TRUE decoy +chrUn_JTFH01001876v1_decoy 2838 TRUE decoy +chrUn_JTFH01001877v1_decoy 2801 TRUE decoy +chrUn_JTFH01001878v1_decoy 2797 TRUE decoy +chrUn_JTFH01001879v1_decoy 2788 TRUE decoy +chrUn_JTFH01001880v1_decoy 2773 TRUE decoy +chrUn_JTFH01001881v1_decoy 2755 TRUE decoy +chrUn_JTFH01001882v1_decoy 2754 TRUE decoy +chrUn_JTFH01001883v1_decoy 2743 TRUE decoy +chrUn_JTFH01001884v1_decoy 2725 TRUE decoy +chrUn_JTFH01001885v1_decoy 2722 TRUE decoy +chrUn_JTFH01001886v1_decoy 2682 TRUE decoy +chrUn_JTFH01001887v1_decoy 2669 TRUE decoy +chrUn_JTFH01001888v1_decoy 2663 TRUE decoy +chrUn_JTFH01001889v1_decoy 2652 TRUE decoy +chrUn_JTFH01001890v1_decoy 2647 TRUE decoy +chrUn_JTFH01001891v1_decoy 2635 TRUE decoy +chrUn_JTFH01001892v1_decoy 2633 TRUE decoy +chrUn_JTFH01001893v1_decoy 2629 TRUE decoy +chrUn_JTFH01001894v1_decoy 2612 TRUE decoy +chrUn_JTFH01001895v1_decoy 2599 TRUE decoy +chrUn_JTFH01001896v1_decoy 2566 TRUE decoy +chrUn_JTFH01001897v1_decoy 2556 TRUE decoy +chrUn_JTFH01001898v1_decoy 2551 TRUE decoy +chrUn_JTFH01001899v1_decoy 2551 TRUE decoy +chrUn_JTFH01001900v1_decoy 2538 TRUE decoy +chrUn_JTFH01001901v1_decoy 2538 TRUE decoy +chrUn_JTFH01001902v1_decoy 2525 TRUE decoy +chrUn_JTFH01001903v1_decoy 2498 TRUE decoy +chrUn_JTFH01001904v1_decoy 2496 TRUE decoy +chrUn_JTFH01001905v1_decoy 2483 TRUE decoy +chrUn_JTFH01001906v1_decoy 2475 TRUE decoy +chrUn_JTFH01001907v1_decoy 2469 TRUE decoy +chrUn_JTFH01001908v1_decoy 2455 TRUE decoy +chrUn_JTFH01001909v1_decoy 2444 TRUE decoy +chrUn_JTFH01001910v1_decoy 2437 TRUE decoy +chrUn_JTFH01001911v1_decoy 2435 TRUE decoy +chrUn_JTFH01001912v1_decoy 2427 TRUE decoy +chrUn_JTFH01001913v1_decoy 2419 TRUE decoy +chrUn_JTFH01001914v1_decoy 2413 TRUE decoy +chrUn_JTFH01001915v1_decoy 2412 TRUE decoy +chrUn_JTFH01001916v1_decoy 2400 TRUE decoy +chrUn_JTFH01001917v1_decoy 2399 TRUE decoy +chrUn_JTFH01001918v1_decoy 2396 TRUE decoy +chrUn_JTFH01001919v1_decoy 2393 TRUE decoy +chrUn_JTFH01001920v1_decoy 2386 TRUE decoy +chrUn_JTFH01001921v1_decoy 2384 TRUE decoy +chrUn_JTFH01001922v1_decoy 2382 TRUE decoy +chrUn_JTFH01001923v1_decoy 2382 TRUE decoy +chrUn_JTFH01001924v1_decoy 2367 TRUE decoy +chrUn_JTFH01001925v1_decoy 2366 TRUE decoy +chrUn_JTFH01001926v1_decoy 2362 TRUE decoy +chrUn_JTFH01001927v1_decoy 2361 TRUE decoy +chrUn_JTFH01001928v1_decoy 2353 TRUE decoy +chrUn_JTFH01001929v1_decoy 2349 TRUE decoy +chrUn_JTFH01001930v1_decoy 2348 TRUE decoy +chrUn_JTFH01001931v1_decoy 2340 TRUE decoy +chrUn_JTFH01001932v1_decoy 2339 TRUE decoy +chrUn_JTFH01001933v1_decoy 2336 TRUE decoy +chrUn_JTFH01001934v1_decoy 2333 TRUE decoy +chrUn_JTFH01001935v1_decoy 2330 TRUE decoy +chrUn_JTFH01001936v1_decoy 2327 TRUE decoy +chrUn_JTFH01001937v1_decoy 2318 TRUE decoy +chrUn_JTFH01001938v1_decoy 2293 TRUE decoy +chrUn_JTFH01001939v1_decoy 2292 TRUE decoy +chrUn_JTFH01001940v1_decoy 2287 TRUE decoy +chrUn_JTFH01001941v1_decoy 2274 TRUE decoy +chrUn_JTFH01001942v1_decoy 2274 TRUE decoy +chrUn_JTFH01001943v1_decoy 2267 TRUE decoy +chrUn_JTFH01001944v1_decoy 2260 TRUE decoy +chrUn_JTFH01001945v1_decoy 2257 TRUE decoy +chrUn_JTFH01001946v1_decoy 2240 TRUE decoy +chrUn_JTFH01001947v1_decoy 2239 TRUE decoy +chrUn_JTFH01001948v1_decoy 2232 TRUE decoy +chrUn_JTFH01001949v1_decoy 2230 TRUE decoy +chrUn_JTFH01001950v1_decoy 2230 TRUE decoy +chrUn_JTFH01001951v1_decoy 2222 TRUE decoy +chrUn_JTFH01001952v1_decoy 2216 TRUE decoy +chrUn_JTFH01001953v1_decoy 2214 TRUE decoy +chrUn_JTFH01001954v1_decoy 2210 TRUE decoy +chrUn_JTFH01001955v1_decoy 2203 TRUE decoy +chrUn_JTFH01001956v1_decoy 2197 TRUE decoy +chrUn_JTFH01001957v1_decoy 2196 TRUE decoy +chrUn_JTFH01001958v1_decoy 2196 TRUE decoy +chrUn_JTFH01001959v1_decoy 2179 TRUE decoy +chrUn_JTFH01001960v1_decoy 2178 TRUE decoy +chrUn_JTFH01001961v1_decoy 2178 TRUE decoy +chrUn_JTFH01001962v1_decoy 2172 TRUE decoy +chrUn_JTFH01001963v1_decoy 2170 TRUE decoy +chrUn_JTFH01001964v1_decoy 2167 TRUE decoy +chrUn_JTFH01001965v1_decoy 2167 TRUE decoy +chrUn_JTFH01001966v1_decoy 2157 TRUE decoy +chrUn_JTFH01001967v1_decoy 2153 TRUE decoy +chrUn_JTFH01001968v1_decoy 2151 TRUE decoy +chrUn_JTFH01001969v1_decoy 2147 TRUE decoy +chrUn_JTFH01001970v1_decoy 2145 TRUE decoy +chrUn_JTFH01001971v1_decoy 2142 TRUE decoy +chrUn_JTFH01001972v1_decoy 2142 TRUE decoy +chrUn_JTFH01001973v1_decoy 2136 TRUE decoy +chrUn_JTFH01001974v1_decoy 2130 TRUE decoy +chrUn_JTFH01001975v1_decoy 2128 TRUE decoy +chrUn_JTFH01001976v1_decoy 2126 TRUE decoy +chrUn_JTFH01001977v1_decoy 2126 TRUE decoy +chrUn_JTFH01001978v1_decoy 2119 TRUE decoy +chrUn_JTFH01001979v1_decoy 2107 TRUE decoy +chrUn_JTFH01001980v1_decoy 2091 TRUE decoy +chrUn_JTFH01001981v1_decoy 2087 TRUE decoy +chrUn_JTFH01001982v1_decoy 2086 TRUE decoy +chrUn_JTFH01001983v1_decoy 2083 TRUE decoy +chrUn_JTFH01001984v1_decoy 2075 TRUE decoy +chrUn_JTFH01001985v1_decoy 2075 TRUE decoy +chrUn_JTFH01001986v1_decoy 2072 TRUE decoy +chrUn_JTFH01001987v1_decoy 2068 TRUE decoy +chrUn_JTFH01001988v1_decoy 2067 TRUE decoy +chrUn_JTFH01001989v1_decoy 2055 TRUE decoy +chrUn_JTFH01001990v1_decoy 2051 TRUE decoy +chrUn_JTFH01001991v1_decoy 2050 TRUE decoy +chrUn_JTFH01001992v1_decoy 2033 TRUE decoy +chrUn_JTFH01001993v1_decoy 2024 TRUE decoy +chrUn_JTFH01001994v1_decoy 2016 TRUE decoy +chrUn_JTFH01001995v1_decoy 2011 TRUE decoy +chrUn_JTFH01001996v1_decoy 2009 TRUE decoy +chrUn_JTFH01001997v1_decoy 2003 TRUE decoy +chrUn_JTFH01001998v1_decoy 2001 TRUE decoy +HLA-A*01:01:01:01 3503 TRUE hla +HLA-A*01:01:01:02N 3291 TRUE hla +HLA-A*01:01:38L 3374 TRUE hla +HLA-A*01:02 3374 TRUE hla +HLA-A*01:03 3503 TRUE hla +HLA-A*01:04N 3136 TRUE hla +HLA-A*01:09 3105 TRUE hla +HLA-A*01:11N 3374 TRUE hla +HLA-A*01:14 3095 TRUE hla +HLA-A*01:16N 2985 TRUE hla +HLA-A*01:20 3105 TRUE hla +HLA-A*02:01:01:01 3517 TRUE hla +HLA-A*02:01:01:02L 3287 TRUE hla +HLA-A*02:01:01:03 3023 TRUE hla +HLA-A*02:01:01:04 3516 TRUE hla +HLA-A*02:02:01 2917 TRUE hla +HLA-A*02:03:01 3517 TRUE hla +HLA-A*02:03:03 3148 TRUE hla +HLA-A*02:05:01 3517 TRUE hla +HLA-A*02:06:01 3517 TRUE hla +HLA-A*02:07:01 3517 TRUE hla +HLA-A*02:10 3517 TRUE hla +HLA-A*02:251 3517 TRUE hla +HLA-A*02:259 2978 TRUE hla +HLA-A*02:264 3002 TRUE hla +HLA-A*02:265 3148 TRUE hla +HLA-A*02:266 3084 TRUE hla +HLA-A*02:269 3101 TRUE hla +HLA-A*02:279 3103 TRUE hla +HLA-A*02:32N 3517 TRUE hla +HLA-A*02:376 3104 TRUE hla +HLA-A*02:43N 3218 TRUE hla +HLA-A*02:455 3118 TRUE hla +HLA-A*02:48 3517 TRUE hla +HLA-A*02:51 3109 TRUE hla +HLA-A*02:533 3217 TRUE hla +HLA-A*02:53N 3305 TRUE hla +HLA-A*02:57 3054 TRUE hla +HLA-A*02:60:01 3112 TRUE hla +HLA-A*02:65 3387 TRUE hla +HLA-A*02:68 3109 TRUE hla +HLA-A*02:77 3371 TRUE hla +HLA-A*02:81 3309 TRUE hla +HLA-A*02:89 3371 TRUE hla +HLA-A*02:95 3388 TRUE hla +HLA-A*03:01:01:01 3502 TRUE hla +HLA-A*03:01:01:02N 3373 TRUE hla +HLA-A*03:01:01:03 3094 TRUE hla +HLA-A*03:02:01 3502 TRUE hla +HLA-A*03:11N 3404 TRUE hla +HLA-A*03:21N 3095 TRUE hla +HLA-A*03:36N 3142 TRUE hla +HLA-A*11:01:01 3503 TRUE hla +HLA-A*11:01:18 3503 TRUE hla +HLA-A*11:02:01 3503 TRUE hla +HLA-A*11:05 3373 TRUE hla +HLA-A*11:110 2903 TRUE hla +HLA-A*11:25 3073 TRUE hla +HLA-A*11:50Q 3362 TRUE hla +HLA-A*11:60 3241 TRUE hla +HLA-A*11:69N 3500 TRUE hla +HLA-A*11:74 3227 TRUE hla +HLA-A*11:75 3184 TRUE hla +HLA-A*11:77 3233 TRUE hla +HLA-A*23:01:01 3502 TRUE hla +HLA-A*23:09 3104 TRUE hla +HLA-A*23:38N 3020 TRUE hla +HLA-A*24:02:01:01 3502 TRUE hla +HLA-A*24:02:01:02L 3502 TRUE hla +HLA-A*24:02:01:03 3075 TRUE hla +HLA-A*24:02:03Q 3247 TRUE hla +HLA-A*24:02:10 3356 TRUE hla +HLA-A*24:03:01 3502 TRUE hla +HLA-A*24:07:01 3502 TRUE hla +HLA-A*24:08 3502 TRUE hla +HLA-A*24:09N 3502 TRUE hla +HLA-A*24:10:01 3502 TRUE hla +HLA-A*24:11N 3503 TRUE hla +HLA-A*24:152 3176 TRUE hla +HLA-A*24:20 3502 TRUE hla +HLA-A*24:215 3116 TRUE hla +HLA-A*24:61 3043 TRUE hla +HLA-A*24:86N 3415 TRUE hla +HLA-A*25:01:01 2917 TRUE hla +HLA-A*26:01:01 3517 TRUE hla +HLA-A*26:11N 3091 TRUE hla +HLA-A*26:15 3217 TRUE hla +HLA-A*26:50 3141 TRUE hla +HLA-A*29:01:01:01 3518 TRUE hla +HLA-A*29:01:01:02N 3303 TRUE hla +HLA-A*29:02:01:01 3518 TRUE hla +HLA-A*29:02:01:02 3518 TRUE hla +HLA-A*29:46 3310 TRUE hla +HLA-A*30:01:01 3503 TRUE hla +HLA-A*30:02:01:01 2903 TRUE hla +HLA-A*30:02:01:02 3374 TRUE hla +HLA-A*30:04:01 3503 TRUE hla +HLA-A*30:89 2903 TRUE hla +HLA-A*31:01:02 3518 TRUE hla +HLA-A*31:01:23 2918 TRUE hla +HLA-A*31:04 2918 TRUE hla +HLA-A*31:14N 3090 TRUE hla +HLA-A*31:46 3075 TRUE hla +HLA-A*32:01:01 3518 TRUE hla +HLA-A*32:06 3389 TRUE hla +HLA-A*33:01:01 3518 TRUE hla +HLA-A*33:03:01 3518 TRUE hla +HLA-A*33:07 3389 TRUE hla +HLA-A*34:01:01 3517 TRUE hla +HLA-A*34:02:01 3096 TRUE hla +HLA-A*36:01 2903 TRUE hla +HLA-A*43:01 3388 TRUE hla +HLA-A*66:01:01 3517 TRUE hla +HLA-A*66:17 3075 TRUE hla +HLA-A*68:01:01:01 2930 TRUE hla +HLA-A*68:01:01:02 3517 TRUE hla +HLA-A*68:01:02:01 3517 TRUE hla +HLA-A*68:01:02:02 3388 TRUE hla +HLA-A*68:02:01:01 3517 TRUE hla +HLA-A*68:02:01:02 3506 TRUE hla +HLA-A*68:02:01:03 2909 TRUE hla +HLA-A*68:02:02 2916 TRUE hla +HLA-A*68:03:01 2917 TRUE hla +HLA-A*68:08:01 3120 TRUE hla +HLA-A*68:113 3070 TRUE hla +HLA-A*68:17 3134 TRUE hla +HLA-A*68:18N 3237 TRUE hla +HLA-A*68:22 3119 TRUE hla +HLA-A*68:71 3198 TRUE hla +HLA-A*69:01 2917 TRUE hla +HLA-A*74:01 2918 TRUE hla +HLA-A*74:02:01:01 2918 TRUE hla +HLA-A*74:02:01:02 3518 TRUE hla +HLA-A*80:01:01:01 3263 TRUE hla +HLA-A*80:01:01:02 3055 TRUE hla +HLA-B*07:02:01 3323 TRUE hla +HLA-B*07:05:01 2676 TRUE hla +HLA-B*07:06 2676 TRUE hla +HLA-B*07:156 2967 TRUE hla +HLA-B*07:33:01 3239 TRUE hla +HLA-B*07:41 3266 TRUE hla +HLA-B*07:44 3270 TRUE hla +HLA-B*07:50 3323 TRUE hla +HLA-B*08:01:01 3322 TRUE hla +HLA-B*08:08N 3035 TRUE hla +HLA-B*08:132 2675 TRUE hla +HLA-B*08:134 2959 TRUE hla +HLA-B*08:19N 3322 TRUE hla +HLA-B*08:20 3322 TRUE hla +HLA-B*08:33 3322 TRUE hla +HLA-B*08:79 2676 TRUE hla +HLA-B*13:01:01 3324 TRUE hla +HLA-B*13:02:01 3324 TRUE hla +HLA-B*13:02:03 3323 TRUE hla +HLA-B*13:02:09 2919 TRUE hla +HLA-B*13:08 3324 TRUE hla +HLA-B*13:15 3323 TRUE hla +HLA-B*13:25 2689 TRUE hla +HLA-B*14:01:01 3312 TRUE hla +HLA-B*14:02:01 3312 TRUE hla +HLA-B*14:07N 3255 TRUE hla +HLA-B*15:01:01:01 3336 TRUE hla +HLA-B*15:01:01:02N 1208 TRUE hla +HLA-B*15:01:01:03 3026 TRUE hla +HLA-B*15:02:01 3335 TRUE hla +HLA-B*15:03:01 2689 TRUE hla +HLA-B*15:04:01 3052 TRUE hla +HLA-B*15:07:01 3336 TRUE hla +HLA-B*15:108 3283 TRUE hla +HLA-B*15:10:01 2689 TRUE hla +HLA-B*15:11:01 3336 TRUE hla +HLA-B*15:13:01 2688 TRUE hla +HLA-B*15:16:01 2688 TRUE hla +HLA-B*15:17:01:01 3051 TRUE hla +HLA-B*15:17:01:02 3051 TRUE hla +HLA-B*15:18:01 3336 TRUE hla +HLA-B*15:220 2878 TRUE hla +HLA-B*15:25:01 3335 TRUE hla +HLA-B*15:27:01 2689 TRUE hla +HLA-B*15:32:01 3336 TRUE hla +HLA-B*15:42 3333 TRUE hla +HLA-B*15:58 3336 TRUE hla +HLA-B*15:66 2902 TRUE hla +HLA-B*15:77 3336 TRUE hla +HLA-B*15:83 3337 TRUE hla +HLA-B*18:01:01:01 3323 TRUE hla +HLA-B*18:01:01:02 3323 TRUE hla +HLA-B*18:02 2686 TRUE hla +HLA-B*18:03 3323 TRUE hla +HLA-B*18:17N 2979 TRUE hla +HLA-B*18:26 3323 TRUE hla +HLA-B*18:94N 2970 TRUE hla +HLA-B*27:04:01 3325 TRUE hla +HLA-B*27:05:02 3325 TRUE hla +HLA-B*27:05:18 3321 TRUE hla +HLA-B*27:06 3325 TRUE hla +HLA-B*27:07:01 2677 TRUE hla +HLA-B*27:131 3325 TRUE hla +HLA-B*27:24 2677 TRUE hla +HLA-B*27:25 2677 TRUE hla +HLA-B*27:32 3325 TRUE hla +HLA-B*35:01:01:01 3327 TRUE hla +HLA-B*35:01:01:02 3327 TRUE hla +HLA-B*35:01:22 2806 TRUE hla +HLA-B*35:02:01 3327 TRUE hla +HLA-B*35:03:01 2689 TRUE hla +HLA-B*35:05:01 2690 TRUE hla +HLA-B*35:08:01 2689 TRUE hla +HLA-B*35:14:02 3327 TRUE hla +HLA-B*35:241 3042 TRUE hla +HLA-B*35:41 3327 TRUE hla +HLA-B*37:01:01 3324 TRUE hla +HLA-B*37:01:05 2687 TRUE hla +HLA-B*38:01:01 3312 TRUE hla +HLA-B*38:02:01 3312 TRUE hla +HLA-B*38:14 2738 TRUE hla +HLA-B*39:01:01:01 3155 TRUE hla +HLA-B*39:01:01:02L 3153 TRUE hla +HLA-B*39:01:01:03 3312 TRUE hla +HLA-B*39:01:03 3155 TRUE hla +HLA-B*39:01:16 3155 TRUE hla +HLA-B*39:01:21 3312 TRUE hla +HLA-B*39:05:01 2675 TRUE hla +HLA-B*39:06:02 2674 TRUE hla +HLA-B*39:10:01 3027 TRUE hla +HLA-B*39:13:02 3255 TRUE hla +HLA-B*39:14 2765 TRUE hla +HLA-B*39:34 3254 TRUE hla +HLA-B*39:38Q 2675 TRUE hla +HLA-B*40:01:01 2676 TRUE hla +HLA-B*40:01:02 3323 TRUE hla +HLA-B*40:02:01 3258 TRUE hla +HLA-B*40:03 2677 TRUE hla +HLA-B*40:06:01:01 3325 TRUE hla +HLA-B*40:06:01:02 3299 TRUE hla +HLA-B*40:10:01 3304 TRUE hla +HLA-B*40:150 2800 TRUE hla +HLA-B*40:40 2677 TRUE hla +HLA-B*40:72:01 3283 TRUE hla +HLA-B*40:79 3257 TRUE hla +HLA-B*41:01:01 3322 TRUE hla +HLA-B*41:02:01 3322 TRUE hla +HLA-B*42:01:01 3322 TRUE hla +HLA-B*42:02 2675 TRUE hla +HLA-B*42:08 3165 TRUE hla +HLA-B*44:02:01:01 3323 TRUE hla +HLA-B*44:02:01:02S 3152 TRUE hla +HLA-B*44:02:01:03 3152 TRUE hla +HLA-B*44:02:17 3323 TRUE hla +HLA-B*44:02:27 2872 TRUE hla +HLA-B*44:03:01 3323 TRUE hla +HLA-B*44:03:02 2676 TRUE hla +HLA-B*44:04 3239 TRUE hla +HLA-B*44:09 3317 TRUE hla +HLA-B*44:138Q 3043 TRUE hla +HLA-B*44:150 2676 TRUE hla +HLA-B*44:23N 3323 TRUE hla +HLA-B*44:26 2804 TRUE hla +HLA-B*44:46 3323 TRUE hla +HLA-B*44:49 3039 TRUE hla +HLA-B*44:56N 2676 TRUE hla +HLA-B*45:01:01 3338 TRUE hla +HLA-B*45:04 3339 TRUE hla +HLA-B*46:01:01 3336 TRUE hla +HLA-B*46:01:05 2891 TRUE hla +HLA-B*47:01:01:01 3041 TRUE hla +HLA-B*47:01:01:02 3041 TRUE hla +HLA-B*48:01:01 3323 TRUE hla +HLA-B*48:03:01 2676 TRUE hla +HLA-B*48:04 2676 TRUE hla +HLA-B*48:08 3323 TRUE hla +HLA-B*49:01:01 3340 TRUE hla +HLA-B*49:32 3340 TRUE hla +HLA-B*50:01:01 3340 TRUE hla +HLA-B*51:01:01 3327 TRUE hla +HLA-B*51:01:02 3043 TRUE hla +HLA-B*51:02:01 3327 TRUE hla +HLA-B*51:07:01 3327 TRUE hla +HLA-B*51:42 2962 TRUE hla +HLA-B*52:01:01:01 3327 TRUE hla +HLA-B*52:01:01:02 3327 TRUE hla +HLA-B*52:01:01:03 3327 TRUE hla +HLA-B*52:01:02 3327 TRUE hla +HLA-B*53:01:01 3327 TRUE hla +HLA-B*53:11 3274 TRUE hla +HLA-B*54:01:01 3332 TRUE hla +HLA-B*54:18 2813 TRUE hla +HLA-B*55:01:01 3332 TRUE hla +HLA-B*55:01:03 3332 TRUE hla +HLA-B*55:02:01 3333 TRUE hla +HLA-B*55:12 3332 TRUE hla +HLA-B*55:24 3332 TRUE hla +HLA-B*55:48 2980 TRUE hla +HLA-B*56:01:01 2688 TRUE hla +HLA-B*56:03 2688 TRUE hla +HLA-B*56:04 2688 TRUE hla +HLA-B*57:01:01 3337 TRUE hla +HLA-B*57:03:01 2689 TRUE hla +HLA-B*57:06 3284 TRUE hla +HLA-B*57:11 3306 TRUE hla +HLA-B*57:29 3337 TRUE hla +HLA-B*58:01:01 3336 TRUE hla +HLA-B*58:31N 3004 TRUE hla +HLA-B*59:01:01:01 3333 TRUE hla +HLA-B*59:01:01:02 3332 TRUE hla +HLA-B*67:01:01 3312 TRUE hla +HLA-B*67:01:02 2675 TRUE hla +HLA-B*67:02 3307 TRUE hla +HLA-B*73:01 3323 TRUE hla +HLA-B*78:01:01 3327 TRUE hla +HLA-B*81:01 2676 TRUE hla +HLA-B*82:02:01 3050 TRUE hla +HLA-C*01:02:01 3349 TRUE hla +HLA-C*01:02:11 3057 TRUE hla +HLA-C*01:02:29 3349 TRUE hla +HLA-C*01:02:30 3333 TRUE hla +HLA-C*01:03 3349 TRUE hla +HLA-C*01:06 2895 TRUE hla +HLA-C*01:08 3349 TRUE hla +HLA-C*01:14 2895 TRUE hla +HLA-C*01:21 2895 TRUE hla +HLA-C*01:30 3349 TRUE hla +HLA-C*01:40 2968 TRUE hla +HLA-C*02:02:02:01 3347 TRUE hla +HLA-C*02:02:02:02 3347 TRUE hla +HLA-C*02:10 2893 TRUE hla +HLA-C*02:11 3320 TRUE hla +HLA-C*02:16:02 3029 TRUE hla +HLA-C*02:69 2933 TRUE hla +HLA-C*02:85 3347 TRUE hla +HLA-C*02:86 3347 TRUE hla +HLA-C*02:87 3064 TRUE hla +HLA-C*03:02:01 2894 TRUE hla +HLA-C*03:02:02:01 3348 TRUE hla +HLA-C*03:02:02:02 2896 TRUE hla +HLA-C*03:02:02:03 3348 TRUE hla +HLA-C*03:03:01 3348 TRUE hla +HLA-C*03:04:01:01 3348 TRUE hla +HLA-C*03:04:01:02 3348 TRUE hla +HLA-C*03:04:02 2877 TRUE hla +HLA-C*03:04:04 2966 TRUE hla +HLA-C*03:05 2894 TRUE hla +HLA-C*03:06 2894 TRUE hla +HLA-C*03:100 3034 TRUE hla +HLA-C*03:13:01 3065 TRUE hla +HLA-C*03:20N 3321 TRUE hla +HLA-C*03:219 3070 TRUE hla +HLA-C*03:261 3348 TRUE hla +HLA-C*03:40:01 2894 TRUE hla +HLA-C*03:41:02 3328 TRUE hla +HLA-C*03:46 2997 TRUE hla +HLA-C*03:61 2894 TRUE hla +HLA-C*04:01:01:01 3349 TRUE hla +HLA-C*04:01:01:02 3349 TRUE hla +HLA-C*04:01:01:03 3349 TRUE hla +HLA-C*04:01:01:04 3012 TRUE hla +HLA-C*04:01:01:05 2931 TRUE hla +HLA-C*04:01:62 3329 TRUE hla +HLA-C*04:03:01 3349 TRUE hla +HLA-C*04:06 3349 TRUE hla +HLA-C*04:09N 2991 TRUE hla +HLA-C*04:128 3086 TRUE hla +HLA-C*04:161 3237 TRUE hla +HLA-C*04:177 3349 TRUE hla +HLA-C*04:70 3058 TRUE hla +HLA-C*04:71 3086 TRUE hla +HLA-C*05:01:01:01 3349 TRUE hla +HLA-C*05:01:01:02 3349 TRUE hla +HLA-C*05:08 3059 TRUE hla +HLA-C*05:09:01 3322 TRUE hla +HLA-C*05:93 2946 TRUE hla +HLA-C*06:02:01:01 3349 TRUE hla +HLA-C*06:02:01:02 3349 TRUE hla +HLA-C*06:02:01:03 3349 TRUE hla +HLA-C*06:23 3349 TRUE hla +HLA-C*06:24 3349 TRUE hla +HLA-C*06:46N 2987 TRUE hla +HLA-C*07:01:01:01 3354 TRUE hla +HLA-C*07:01:01:02 3093 TRUE hla +HLA-C*07:01:02 3352 TRUE hla +HLA-C*07:01:19 3354 TRUE hla +HLA-C*07:01:27 3195 TRUE hla +HLA-C*07:01:45 3354 TRUE hla +HLA-C*07:02:01:01 3354 TRUE hla +HLA-C*07:02:01:02 3074 TRUE hla +HLA-C*07:02:01:03 3354 TRUE hla +HLA-C*07:02:01:04 3353 TRUE hla +HLA-C*07:02:01:05 3354 TRUE hla +HLA-C*07:02:05 2903 TRUE hla +HLA-C*07:02:06 3354 TRUE hla +HLA-C*07:02:64 3354 TRUE hla +HLA-C*07:04:01 3354 TRUE hla +HLA-C*07:04:02 3343 TRUE hla +HLA-C*07:06 3354 TRUE hla +HLA-C*07:149 3098 TRUE hla +HLA-C*07:18 3353 TRUE hla +HLA-C*07:19 3222 TRUE hla +HLA-C*07:26 3069 TRUE hla +HLA-C*07:30 2903 TRUE hla +HLA-C*07:32N 3334 TRUE hla +HLA-C*07:384 3349 TRUE hla +HLA-C*07:385 3354 TRUE hla +HLA-C*07:386 3183 TRUE hla +HLA-C*07:391 3354 TRUE hla +HLA-C*07:392 3354 TRUE hla +HLA-C*07:49 2935 TRUE hla +HLA-C*07:56:02 3354 TRUE hla +HLA-C*07:66 3354 TRUE hla +HLA-C*07:67 3354 TRUE hla +HLA-C*08:01:01 3349 TRUE hla +HLA-C*08:01:03 2998 TRUE hla +HLA-C*08:02:01:01 3349 TRUE hla +HLA-C*08:02:01:02 3349 TRUE hla +HLA-C*08:03:01 3349 TRUE hla +HLA-C*08:04:01 2895 TRUE hla +HLA-C*08:112 3178 TRUE hla +HLA-C*08:20 3349 TRUE hla +HLA-C*08:21 3349 TRUE hla +HLA-C*08:22 3349 TRUE hla +HLA-C*08:24 2895 TRUE hla +HLA-C*08:27 3349 TRUE hla +HLA-C*08:36N 3097 TRUE hla +HLA-C*08:40 2978 TRUE hla +HLA-C*08:41 3019 TRUE hla +HLA-C*08:62 3086 TRUE hla +HLA-C*12:02:02 3349 TRUE hla +HLA-C*12:03:01:01 3349 TRUE hla +HLA-C*12:03:01:02 3348 TRUE hla +HLA-C*12:08 3066 TRUE hla +HLA-C*12:13 3058 TRUE hla +HLA-C*12:19 3349 TRUE hla +HLA-C*12:22 2895 TRUE hla +HLA-C*12:99 3349 TRUE hla +HLA-C*14:02:01 3349 TRUE hla +HLA-C*14:03 3349 TRUE hla +HLA-C*14:21N 3099 TRUE hla +HLA-C*14:23 2976 TRUE hla +HLA-C*15:02:01 3349 TRUE hla +HLA-C*15:05:01 3349 TRUE hla +HLA-C*15:05:02 3349 TRUE hla +HLA-C*15:13 2895 TRUE hla +HLA-C*15:16 3066 TRUE hla +HLA-C*15:17 3349 TRUE hla +HLA-C*15:96Q 3349 TRUE hla +HLA-C*16:01:01 3349 TRUE hla +HLA-C*16:02:01 2895 TRUE hla +HLA-C*16:04:01 3349 TRUE hla +HLA-C*17:01:01:01 3368 TRUE hla +HLA-C*17:01:01:02 3368 TRUE hla +HLA-C*17:01:01:03 3368 TRUE hla +HLA-C*17:03 3197 TRUE hla +HLA-C*18:01 3346 TRUE hla +HLA-DQA1*01:01:02 6489 TRUE hla +HLA-DQA1*01:02:01:01 6484 TRUE hla +HLA-DQA1*01:02:01:02 6485 TRUE hla +HLA-DQA1*01:02:01:03 6485 TRUE hla +HLA-DQA1*01:02:01:04 6492 TRUE hla +HLA-DQA1*01:03:01:01 6485 TRUE hla +HLA-DQA1*01:03:01:02 6492 TRUE hla +HLA-DQA1*01:04:01:01 6484 TRUE hla +HLA-DQA1*01:04:01:02 6485 TRUE hla +HLA-DQA1*01:05:01 6485 TRUE hla +HLA-DQA1*01:07 5959 TRUE hla +HLA-DQA1*01:10 5790 TRUE hla +HLA-DQA1*01:11 5926 TRUE hla +HLA-DQA1*02:01 6403 TRUE hla +HLA-DQA1*03:01:01 6437 TRUE hla +HLA-DQA1*03:02 6437 TRUE hla +HLA-DQA1*03:03:01 6437 TRUE hla +HLA-DQA1*04:01:02:01 5853 TRUE hla +HLA-DQA1*04:01:02:02 5666 TRUE hla +HLA-DQA1*04:02 6210 TRUE hla +HLA-DQA1*05:01:01:01 5806 TRUE hla +HLA-DQA1*05:01:01:02 6529 TRUE hla +HLA-DQA1*05:03 6121 TRUE hla +HLA-DQA1*05:05:01:01 6593 TRUE hla +HLA-DQA1*05:05:01:02 6597 TRUE hla +HLA-DQA1*05:05:01:03 6393 TRUE hla +HLA-DQA1*05:11 6589 TRUE hla +HLA-DQA1*06:01:01 5878 TRUE hla +HLA-DQB1*02:01:01 7480 TRUE hla +HLA-DQB1*02:02:01 7471 TRUE hla +HLA-DQB1*03:01:01:01 7231 TRUE hla +HLA-DQB1*03:01:01:02 7230 TRUE hla +HLA-DQB1*03:01:01:03 7231 TRUE hla +HLA-DQB1*03:02:01 7126 TRUE hla +HLA-DQB1*03:03:02:01 7126 TRUE hla +HLA-DQB1*03:03:02:02 7126 TRUE hla +HLA-DQB1*03:03:02:03 6800 TRUE hla +HLA-DQB1*03:05:01 6934 TRUE hla +HLA-DQB1*05:01:01:01 7090 TRUE hla +HLA-DQB1*05:01:01:02 7090 TRUE hla +HLA-DQB1*05:03:01:01 7089 TRUE hla +HLA-DQB1*05:03:01:02 7089 TRUE hla +HLA-DQB1*06:01:01 7111 TRUE hla +HLA-DQB1*06:02:01 7102 TRUE hla +HLA-DQB1*06:03:01 7103 TRUE hla +HLA-DQB1*06:09:01 7102 TRUE hla +HLA-DRB1*01:01:01 10741 TRUE hla +HLA-DRB1*01:02:01 11229 TRUE hla +HLA-DRB1*03:01:01:01 13908 TRUE hla +HLA-DRB1*03:01:01:02 13426 TRUE hla +HLA-DRB1*04:03:01 15246 TRUE hla +HLA-DRB1*07:01:01:01 16110 TRUE hla +HLA-DRB1*07:01:01:02 16120 TRUE hla +HLA-DRB1*08:03:02 13562 TRUE hla +HLA-DRB1*09:21 16039 TRUE hla +HLA-DRB1*10:01:01 13501 TRUE hla +HLA-DRB1*11:01:01 13921 TRUE hla +HLA-DRB1*11:01:02 13931 TRUE hla +HLA-DRB1*11:04:01 13919 TRUE hla +HLA-DRB1*12:01:01 13404 TRUE hla +HLA-DRB1*12:17 11260 TRUE hla +HLA-DRB1*13:01:01 13935 TRUE hla +HLA-DRB1*13:02:01 13941 TRUE hla +HLA-DRB1*14:05:01 13933 TRUE hla +HLA-DRB1*14:54:01 13936 TRUE hla +HLA-DRB1*15:01:01:01 11080 TRUE hla +HLA-DRB1*15:01:01:02 11571 TRUE hla +HLA-DRB1*15:01:01:03 11056 TRUE hla +HLA-DRB1*15:01:01:04 11056 TRUE hla +HLA-DRB1*15:02:01 10313 TRUE hla +HLA-DRB1*15:03:01:01 11567 TRUE hla +HLA-DRB1*15:03:01:02 11569 TRUE hla +HLA-DRB1*16:02:01 11005 TRUE hla +phix 5386 FALSE technical diff --git a/resources/hg38_test.tsv b/resources/hg38_test.tsv new file mode 100644 index 0000000..0ac87a4 --- /dev/null +++ b/resources/hg38_test.tsv @@ -0,0 +1,3368 @@ +chromosome size compressedMref category +chr1 248956422 TRUE autosome +chr2 242193529 TRUE autosome +chr3 198295559 TRUE autosome +chr4 190214555 TRUE autosome +chr5 181538259 TRUE autosome +chr6 170805979 TRUE autosome +chr7 159345973 TRUE autosome +chr8 145138636 TRUE autosome +chr9 138394717 TRUE autosome +chr10 133797422 TRUE autosome +chr11 135086622 TRUE autosome +chr12 133275309 TRUE autosome +chr13 114364328 TRUE autosome +chr14 107043718 TRUE autosome +chr15 101991189 TRUE autosome +chr16 90338345 TRUE autosome +chr17 83257441 TRUE autosome +chr18 80373285 TRUE autosome +chr19 58617616 TRUE autosome +chr20 64444167 TRUE autosome +chr21 46709983 TRUE autosome +chr22 50818468 TRUE autosome +chrX 156040895 TRUE x +chrY 57227415 TRUE y +chrM 16569 FALSE extrachromosomal +chr1_KI270706v1_random 175055 TRUE unassigned +chr1_KI270707v1_random 32032 TRUE unassigned +chr1_KI270708v1_random 127682 TRUE unassigned +chr1_KI270709v1_random 66860 TRUE unassigned +chr1_KI270710v1_random 40176 TRUE unassigned +chr1_KI270711v1_random 42210 TRUE unassigned +chr1_KI270712v1_random 176043 TRUE unassigned +chr1_KI270713v1_random 40745 TRUE unassigned +chr1_KI270714v1_random 41717 TRUE unassigned +chr2_KI270715v1_random 161471 TRUE unassigned +chr2_KI270716v1_random 153799 TRUE unassigned +chr3_GL000221v1_random 155397 TRUE unassigned +chr4_GL000008v2_random 209709 TRUE unassigned +chr5_GL000208v1_random 92689 TRUE unassigned +chr9_KI270717v1_random 40062 TRUE unassigned +chr9_KI270718v1_random 38054 TRUE unassigned +chr9_KI270719v1_random 176845 TRUE unassigned +chr9_KI270720v1_random 39050 TRUE unassigned +chr11_KI270721v1_random 100316 TRUE unassigned +chr14_GL000009v2_random 201709 TRUE unassigned +chr14_GL000225v1_random 211173 TRUE unassigned +chr14_KI270722v1_random 194050 TRUE unassigned +chr14_GL000194v1_random 191469 TRUE unassigned +chr14_KI270723v1_random 38115 TRUE unassigned +chr14_KI270724v1_random 39555 TRUE unassigned +chr14_KI270725v1_random 172810 TRUE unassigned +chr14_KI270726v1_random 43739 TRUE unassigned +chr15_KI270727v1_random 448248 TRUE unassigned +chr16_KI270728v1_random 1872759 TRUE unassigned +chr17_GL000205v2_random 185591 TRUE unassigned +chr17_KI270729v1_random 280839 TRUE unassigned +chr17_KI270730v1_random 112551 TRUE unassigned +chr22_KI270731v1_random 150754 TRUE unassigned +chr22_KI270732v1_random 41543 TRUE unassigned +chr22_KI270733v1_random 179772 TRUE unassigned +chr22_KI270734v1_random 165050 TRUE unassigned +chr22_KI270735v1_random 42811 TRUE unassigned +chr22_KI270736v1_random 181920 TRUE unassigned +chr22_KI270737v1_random 103838 TRUE unassigned +chr22_KI270738v1_random 99375 TRUE unassigned +chr22_KI270739v1_random 73985 TRUE unassigned +chrY_KI270740v1_random 37240 TRUE unassigned +chrUn_KI270302v1 2274 TRUE unassigned +chrUn_KI270304v1 2165 TRUE unassigned +chrUn_KI270303v1 1942 TRUE unassigned +chrUn_KI270305v1 1472 TRUE unassigned +chrUn_KI270322v1 21476 TRUE unassigned +chrUn_KI270320v1 4416 TRUE unassigned +chrUn_KI270310v1 1201 TRUE unassigned +chrUn_KI270316v1 1444 TRUE unassigned +chrUn_KI270315v1 2276 TRUE unassigned +chrUn_KI270312v1 998 TRUE unassigned +chrUn_KI270311v1 12399 TRUE unassigned +chrUn_KI270317v1 37690 TRUE unassigned +chrUn_KI270412v1 1179 TRUE unassigned +chrUn_KI270411v1 2646 TRUE unassigned +chrUn_KI270414v1 2489 TRUE unassigned +chrUn_KI270419v1 1029 TRUE unassigned +chrUn_KI270418v1 2145 TRUE unassigned +chrUn_KI270420v1 2321 TRUE unassigned +chrUn_KI270424v1 2140 TRUE unassigned +chrUn_KI270417v1 2043 TRUE unassigned +chrUn_KI270422v1 1445 TRUE unassigned +chrUn_KI270423v1 981 TRUE unassigned +chrUn_KI270425v1 1884 TRUE unassigned +chrUn_KI270429v1 1361 TRUE unassigned +chrUn_KI270442v1 392061 TRUE unassigned +chrUn_KI270466v1 1233 TRUE unassigned +chrUn_KI270465v1 1774 TRUE unassigned +chrUn_KI270467v1 3920 TRUE unassigned +chrUn_KI270435v1 92983 TRUE unassigned +chrUn_KI270438v1 112505 TRUE unassigned +chrUn_KI270468v1 4055 TRUE unassigned +chrUn_KI270510v1 2415 TRUE unassigned +chrUn_KI270509v1 2318 TRUE unassigned +chrUn_KI270518v1 2186 TRUE unassigned +chrUn_KI270508v1 1951 TRUE unassigned +chrUn_KI270516v1 1300 TRUE unassigned +chrUn_KI270512v1 22689 TRUE unassigned +chrUn_KI270519v1 138126 TRUE unassigned +chrUn_KI270522v1 5674 TRUE unassigned +chrUn_KI270511v1 8127 TRUE unassigned +chrUn_KI270515v1 6361 TRUE unassigned +chrUn_KI270507v1 5353 TRUE unassigned +chrUn_KI270517v1 3253 TRUE unassigned +chrUn_KI270529v1 1899 TRUE unassigned +chrUn_KI270528v1 2983 TRUE unassigned +chrUn_KI270530v1 2168 TRUE unassigned +chrUn_KI270539v1 993 TRUE unassigned +chrUn_KI270538v1 91309 TRUE unassigned +chrUn_KI270544v1 1202 TRUE unassigned +chrUn_KI270548v1 1599 TRUE unassigned +chrUn_KI270583v1 1400 TRUE unassigned +chrUn_KI270587v1 2969 TRUE unassigned +chrUn_KI270580v1 1553 TRUE unassigned +chrUn_KI270581v1 7046 TRUE unassigned +chrUn_KI270579v1 31033 TRUE unassigned +chrUn_KI270589v1 44474 TRUE unassigned +chrUn_KI270590v1 4685 TRUE unassigned +chrUn_KI270584v1 4513 TRUE unassigned +chrUn_KI270582v1 6504 TRUE unassigned +chrUn_KI270588v1 6158 TRUE unassigned +chrUn_KI270593v1 3041 TRUE unassigned +chrUn_KI270591v1 5796 TRUE unassigned +chrUn_KI270330v1 1652 TRUE unassigned +chrUn_KI270329v1 1040 TRUE unassigned +chrUn_KI270334v1 1368 TRUE unassigned +chrUn_KI270333v1 2699 TRUE unassigned +chrUn_KI270335v1 1048 TRUE unassigned +chrUn_KI270338v1 1428 TRUE unassigned +chrUn_KI270340v1 1428 TRUE unassigned +chrUn_KI270336v1 1026 TRUE unassigned +chrUn_KI270337v1 1121 TRUE unassigned +chrUn_KI270363v1 1803 TRUE unassigned +chrUn_KI270364v1 2855 TRUE unassigned +chrUn_KI270362v1 3530 TRUE unassigned +chrUn_KI270366v1 8320 TRUE unassigned +chrUn_KI270378v1 1048 TRUE unassigned +chrUn_KI270379v1 1045 TRUE unassigned +chrUn_KI270389v1 1298 TRUE unassigned +chrUn_KI270390v1 2387 TRUE unassigned +chrUn_KI270387v1 1537 TRUE unassigned +chrUn_KI270395v1 1143 TRUE unassigned +chrUn_KI270396v1 1880 TRUE unassigned +chrUn_KI270388v1 1216 TRUE unassigned +chrUn_KI270394v1 970 TRUE unassigned +chrUn_KI270386v1 1788 TRUE unassigned +chrUn_KI270391v1 1484 TRUE unassigned +chrUn_KI270383v1 1750 TRUE unassigned +chrUn_KI270393v1 1308 TRUE unassigned +chrUn_KI270384v1 1658 TRUE unassigned +chrUn_KI270392v1 971 TRUE unassigned +chrUn_KI270381v1 1930 TRUE unassigned +chrUn_KI270385v1 990 TRUE unassigned +chrUn_KI270382v1 4215 TRUE unassigned +chrUn_KI270376v1 1136 TRUE unassigned +chrUn_KI270374v1 2656 TRUE unassigned +chrUn_KI270372v1 1650 TRUE unassigned +chrUn_KI270373v1 1451 TRUE unassigned +chrUn_KI270375v1 2378 TRUE unassigned +chrUn_KI270371v1 2805 TRUE unassigned +chrUn_KI270448v1 7992 TRUE unassigned +chrUn_KI270521v1 7642 TRUE unassigned +chrUn_GL000195v1 182896 TRUE unassigned +chrUn_GL000219v1 179198 TRUE unassigned +chrUn_GL000220v1 161802 TRUE unassigned +chrUn_GL000224v1 179693 TRUE unassigned +chrUn_KI270741v1 157432 TRUE unassigned +chrUn_GL000226v1 15008 TRUE unassigned +chrUn_GL000213v1 164239 TRUE unassigned +chrUn_KI270743v1 210658 TRUE unassigned +chrUn_KI270744v1 168472 TRUE unassigned +chrUn_KI270745v1 41891 TRUE unassigned +chrUn_KI270746v1 66486 TRUE unassigned +chrUn_KI270747v1 198735 TRUE unassigned +chrUn_KI270748v1 93321 TRUE unassigned +chrUn_KI270749v1 158759 TRUE unassigned +chrUn_KI270750v1 148850 TRUE unassigned +chrUn_KI270751v1 150742 TRUE unassigned +chrUn_KI270752v1 27745 TRUE unassigned +chrUn_KI270753v1 62944 TRUE unassigned +chrUn_KI270754v1 40191 TRUE unassigned +chrUn_KI270755v1 36723 TRUE unassigned +chrUn_KI270756v1 79590 TRUE unassigned +chrUn_KI270757v1 71251 TRUE unassigned +chrUn_GL000214v1 137718 TRUE unassigned +chrUn_KI270742v1 186739 TRUE unassigned +chrUn_GL000216v2 176608 TRUE unassigned +chrUn_GL000218v1 161147 TRUE unassigned +chr1_KI270762v1_alt 354444 TRUE alt +chr1_KI270766v1_alt 256271 TRUE alt +chr1_KI270760v1_alt 109528 TRUE alt +chr1_KI270765v1_alt 185285 TRUE alt +chr1_GL383518v1_alt 182439 TRUE alt +chr1_GL383519v1_alt 110268 TRUE alt +chr1_GL383520v2_alt 366580 TRUE alt +chr1_KI270764v1_alt 50258 TRUE alt +chr1_KI270763v1_alt 911658 TRUE alt +chr1_KI270759v1_alt 425601 TRUE alt +chr1_KI270761v1_alt 165834 TRUE alt +chr2_KI270770v1_alt 136240 TRUE alt +chr2_KI270773v1_alt 70887 TRUE alt +chr2_KI270774v1_alt 223625 TRUE alt +chr2_KI270769v1_alt 120616 TRUE alt +chr2_GL383521v1_alt 143390 TRUE alt +chr2_KI270772v1_alt 133041 TRUE alt +chr2_KI270775v1_alt 138019 TRUE alt +chr2_KI270771v1_alt 110395 TRUE alt +chr2_KI270768v1_alt 110099 TRUE alt +chr2_GL582966v2_alt 96131 TRUE alt +chr2_GL383522v1_alt 123821 TRUE alt +chr2_KI270776v1_alt 174166 TRUE alt +chr2_KI270767v1_alt 161578 TRUE alt +chr3_JH636055v2_alt 173151 TRUE alt +chr3_KI270783v1_alt 109187 TRUE alt +chr3_KI270780v1_alt 224108 TRUE alt +chr3_GL383526v1_alt 180671 TRUE alt +chr3_KI270777v1_alt 173649 TRUE alt +chr3_KI270778v1_alt 248252 TRUE alt +chr3_KI270781v1_alt 113034 TRUE alt +chr3_KI270779v1_alt 205312 TRUE alt +chr3_KI270782v1_alt 162429 TRUE alt +chr3_KI270784v1_alt 184404 TRUE alt +chr4_KI270790v1_alt 220246 TRUE alt +chr4_GL383528v1_alt 376187 TRUE alt +chr4_KI270787v1_alt 111943 TRUE alt +chr4_GL000257v2_alt 586476 TRUE alt +chr4_KI270788v1_alt 158965 TRUE alt +chr4_GL383527v1_alt 164536 TRUE alt +chr4_KI270785v1_alt 119912 TRUE alt +chr4_KI270789v1_alt 205944 TRUE alt +chr4_KI270786v1_alt 244096 TRUE alt +chr5_KI270793v1_alt 126136 TRUE alt +chr5_KI270792v1_alt 179043 TRUE alt +chr5_KI270791v1_alt 195710 TRUE alt +chr5_GL383532v1_alt 82728 TRUE alt +chr5_GL949742v1_alt 226852 TRUE alt +chr5_KI270794v1_alt 164558 TRUE alt +chr5_GL339449v2_alt 1612928 TRUE alt +chr5_GL383530v1_alt 101241 TRUE alt +chr5_KI270796v1_alt 172708 TRUE alt +chr5_GL383531v1_alt 173459 TRUE alt +chr5_KI270795v1_alt 131892 TRUE alt +chr6_GL000250v2_alt 4672374 TRUE alt +chr6_KI270800v1_alt 175808 TRUE alt +chr6_KI270799v1_alt 152148 TRUE alt +chr6_GL383533v1_alt 124736 TRUE alt +chr6_KI270801v1_alt 870480 TRUE alt +chr6_KI270802v1_alt 75005 TRUE alt +chr6_KB021644v2_alt 185823 TRUE alt +chr6_KI270797v1_alt 197536 TRUE alt +chr6_KI270798v1_alt 271782 TRUE alt +chr7_KI270804v1_alt 157952 TRUE alt +chr7_KI270809v1_alt 209586 TRUE alt +chr7_KI270806v1_alt 158166 TRUE alt +chr7_GL383534v2_alt 119183 TRUE alt +chr7_KI270803v1_alt 1111570 TRUE alt +chr7_KI270808v1_alt 271455 TRUE alt +chr7_KI270807v1_alt 126434 TRUE alt +chr7_KI270805v1_alt 209988 TRUE alt +chr8_KI270818v1_alt 145606 TRUE alt +chr8_KI270812v1_alt 282736 TRUE alt +chr8_KI270811v1_alt 292436 TRUE alt +chr8_KI270821v1_alt 985506 TRUE alt +chr8_KI270813v1_alt 300230 TRUE alt +chr8_KI270822v1_alt 624492 TRUE alt +chr8_KI270814v1_alt 141812 TRUE alt +chr8_KI270810v1_alt 374415 TRUE alt +chr8_KI270819v1_alt 133535 TRUE alt +chr8_KI270820v1_alt 36640 TRUE alt +chr8_KI270817v1_alt 158983 TRUE alt +chr8_KI270816v1_alt 305841 TRUE alt +chr8_KI270815v1_alt 132244 TRUE alt +chr9_GL383539v1_alt 162988 TRUE alt +chr9_GL383540v1_alt 71551 TRUE alt +chr9_GL383541v1_alt 171286 TRUE alt +chr9_GL383542v1_alt 60032 TRUE alt +chr9_KI270823v1_alt 439082 TRUE alt +chr10_GL383545v1_alt 179254 TRUE alt +chr10_KI270824v1_alt 181496 TRUE alt +chr10_GL383546v1_alt 309802 TRUE alt +chr10_KI270825v1_alt 188315 TRUE alt +chr11_KI270832v1_alt 210133 TRUE alt +chr11_KI270830v1_alt 177092 TRUE alt +chr11_KI270831v1_alt 296895 TRUE alt +chr11_KI270829v1_alt 204059 TRUE alt +chr11_GL383547v1_alt 154407 TRUE alt +chr11_JH159136v1_alt 200998 TRUE alt +chr11_JH159137v1_alt 191409 TRUE alt +chr11_KI270827v1_alt 67707 TRUE alt +chr11_KI270826v1_alt 186169 TRUE alt +chr12_GL877875v1_alt 167313 TRUE alt +chr12_GL877876v1_alt 408271 TRUE alt +chr12_KI270837v1_alt 40090 TRUE alt +chr12_GL383549v1_alt 120804 TRUE alt +chr12_KI270835v1_alt 238139 TRUE alt +chr12_GL383550v2_alt 169178 TRUE alt +chr12_GL383552v1_alt 138655 TRUE alt +chr12_GL383553v2_alt 152874 TRUE alt +chr12_KI270834v1_alt 119498 TRUE alt +chr12_GL383551v1_alt 184319 TRUE alt +chr12_KI270833v1_alt 76061 TRUE alt +chr12_KI270836v1_alt 56134 TRUE alt +chr13_KI270840v1_alt 191684 TRUE alt +chr13_KI270839v1_alt 180306 TRUE alt +chr13_KI270843v1_alt 103832 TRUE alt +chr13_KI270841v1_alt 169134 TRUE alt +chr13_KI270838v1_alt 306913 TRUE alt +chr13_KI270842v1_alt 37287 TRUE alt +chr14_KI270844v1_alt 322166 TRUE alt +chr14_KI270847v1_alt 1511111 TRUE alt +chr14_KI270845v1_alt 180703 TRUE alt +chr14_KI270846v1_alt 1351393 TRUE alt +chr15_KI270852v1_alt 478999 TRUE alt +chr15_KI270851v1_alt 263054 TRUE alt +chr15_KI270848v1_alt 327382 TRUE alt +chr15_GL383554v1_alt 296527 TRUE alt +chr15_KI270849v1_alt 244917 TRUE alt +chr15_GL383555v2_alt 388773 TRUE alt +chr15_KI270850v1_alt 430880 TRUE alt +chr16_KI270854v1_alt 134193 TRUE alt +chr16_KI270856v1_alt 63982 TRUE alt +chr16_KI270855v1_alt 232857 TRUE alt +chr16_KI270853v1_alt 2659700 TRUE alt +chr16_GL383556v1_alt 192462 TRUE alt +chr16_GL383557v1_alt 89672 TRUE alt +chr17_GL383563v3_alt 375691 TRUE alt +chr17_KI270862v1_alt 391357 TRUE alt +chr17_KI270861v1_alt 196688 TRUE alt +chr17_KI270857v1_alt 2877074 TRUE alt +chr17_JH159146v1_alt 278131 TRUE alt +chr17_JH159147v1_alt 70345 TRUE alt +chr17_GL383564v2_alt 133151 TRUE alt +chr17_GL000258v2_alt 1821992 TRUE alt +chr17_GL383565v1_alt 223995 TRUE alt +chr17_KI270858v1_alt 235827 TRUE alt +chr17_KI270859v1_alt 108763 TRUE alt +chr17_GL383566v1_alt 90219 TRUE alt +chr17_KI270860v1_alt 178921 TRUE alt +chr18_KI270864v1_alt 111737 TRUE alt +chr18_GL383567v1_alt 289831 TRUE alt +chr18_GL383570v1_alt 164789 TRUE alt +chr18_GL383571v1_alt 198278 TRUE alt +chr18_GL383568v1_alt 104552 TRUE alt +chr18_GL383569v1_alt 167950 TRUE alt +chr18_GL383572v1_alt 159547 TRUE alt +chr18_KI270863v1_alt 167999 TRUE alt +chr19_KI270868v1_alt 61734 TRUE alt +chr19_KI270865v1_alt 52969 TRUE alt +chr19_GL383573v1_alt 385657 TRUE alt +chr19_GL383575v2_alt 170222 TRUE alt +chr19_GL383576v1_alt 188024 TRUE alt +chr19_GL383574v1_alt 155864 TRUE alt +chr19_KI270866v1_alt 43156 TRUE alt +chr19_KI270867v1_alt 233762 TRUE alt +chr19_GL949746v1_alt 987716 TRUE alt +chr20_GL383577v2_alt 128386 TRUE alt +chr20_KI270869v1_alt 118774 TRUE alt +chr20_KI270871v1_alt 58661 TRUE alt +chr20_KI270870v1_alt 183433 TRUE alt +chr21_GL383578v2_alt 63917 TRUE alt +chr21_KI270874v1_alt 166743 TRUE alt +chr21_KI270873v1_alt 143900 TRUE alt +chr21_GL383579v2_alt 201197 TRUE alt +chr21_GL383580v2_alt 74653 TRUE alt +chr21_GL383581v2_alt 116689 TRUE alt +chr21_KI270872v1_alt 82692 TRUE alt +chr22_KI270875v1_alt 259914 TRUE alt +chr22_KI270878v1_alt 186262 TRUE alt +chr22_KI270879v1_alt 304135 TRUE alt +chr22_KI270876v1_alt 263666 TRUE alt +chr22_KI270877v1_alt 101331 TRUE alt +chr22_GL383583v2_alt 96924 TRUE alt +chr22_GL383582v2_alt 162811 TRUE alt +chrX_KI270880v1_alt 284869 TRUE alt +chrX_KI270881v1_alt 144206 TRUE alt +chr19_KI270882v1_alt 248807 TRUE alt +chr19_KI270883v1_alt 170399 TRUE alt +chr19_KI270884v1_alt 157053 TRUE alt +chr19_KI270885v1_alt 171027 TRUE alt +chr19_KI270886v1_alt 204239 TRUE alt +chr19_KI270887v1_alt 209512 TRUE alt +chr19_KI270888v1_alt 155532 TRUE alt +chr19_KI270889v1_alt 170698 TRUE alt +chr19_KI270890v1_alt 184499 TRUE alt +chr19_KI270891v1_alt 170680 TRUE alt +chr1_KI270892v1_alt 162212 TRUE alt +chr2_KI270894v1_alt 214158 TRUE alt +chr2_KI270893v1_alt 161218 TRUE alt +chr3_KI270895v1_alt 162896 TRUE alt +chr4_KI270896v1_alt 378547 TRUE alt +chr5_KI270897v1_alt 1144418 TRUE alt +chr5_KI270898v1_alt 130957 TRUE alt +chr6_GL000251v2_alt 4795265 TRUE alt +chr7_KI270899v1_alt 190869 TRUE alt +chr8_KI270901v1_alt 136959 TRUE alt +chr8_KI270900v1_alt 318687 TRUE alt +chr11_KI270902v1_alt 106711 TRUE alt +chr11_KI270903v1_alt 214625 TRUE alt +chr12_KI270904v1_alt 572349 TRUE alt +chr15_KI270906v1_alt 196384 TRUE alt +chr15_KI270905v1_alt 5161414 TRUE alt +chr17_KI270907v1_alt 137721 TRUE alt +chr17_KI270910v1_alt 157099 TRUE alt +chr17_KI270909v1_alt 325800 TRUE alt +chr17_JH159148v1_alt 88070 TRUE alt +chr17_KI270908v1_alt 1423190 TRUE alt +chr18_KI270912v1_alt 174061 TRUE alt +chr18_KI270911v1_alt 157710 TRUE alt +chr19_GL949747v2_alt 729520 TRUE alt +chr22_KB663609v1_alt 74013 TRUE alt +chrX_KI270913v1_alt 274009 TRUE alt +chr19_KI270914v1_alt 205194 TRUE alt +chr19_KI270915v1_alt 170665 TRUE alt +chr19_KI270916v1_alt 184516 TRUE alt +chr19_KI270917v1_alt 190932 TRUE alt +chr19_KI270918v1_alt 123111 TRUE alt +chr19_KI270919v1_alt 170701 TRUE alt +chr19_KI270920v1_alt 198005 TRUE alt +chr19_KI270921v1_alt 282224 TRUE alt +chr19_KI270922v1_alt 187935 TRUE alt +chr19_KI270923v1_alt 189352 TRUE alt +chr3_KI270924v1_alt 166540 TRUE alt +chr4_KI270925v1_alt 555799 TRUE alt +chr6_GL000252v2_alt 4604811 TRUE alt +chr8_KI270926v1_alt 229282 TRUE alt +chr11_KI270927v1_alt 218612 TRUE alt +chr19_GL949748v2_alt 1064304 TRUE alt +chr22_KI270928v1_alt 176103 TRUE alt +chr19_KI270929v1_alt 186203 TRUE alt +chr19_KI270930v1_alt 200773 TRUE alt +chr19_KI270931v1_alt 170148 TRUE alt +chr19_KI270932v1_alt 215732 TRUE alt +chr19_KI270933v1_alt 170537 TRUE alt +chr19_GL000209v2_alt 177381 TRUE alt +chr3_KI270934v1_alt 163458 TRUE alt +chr6_GL000253v2_alt 4677643 TRUE alt +chr19_GL949749v2_alt 1091841 TRUE alt +chr3_KI270935v1_alt 197351 TRUE alt +chr6_GL000254v2_alt 4827813 TRUE alt +chr19_GL949750v2_alt 1066390 TRUE alt +chr3_KI270936v1_alt 164170 TRUE alt +chr6_GL000255v2_alt 4606388 TRUE alt +chr19_GL949751v2_alt 1002683 TRUE alt +chr3_KI270937v1_alt 165607 TRUE alt +chr6_GL000256v2_alt 4929269 TRUE alt +chr19_GL949752v1_alt 987100 TRUE alt +chr6_KI270758v1_alt 76752 TRUE alt +chr19_GL949753v2_alt 796479 TRUE alt +chr19_KI270938v1_alt 1066800 TRUE alt +chrEBV 171823 TRUE virus +chrUn_KN707606v1_decoy 2200 TRUE decoy +chrUn_KN707607v1_decoy 3033 TRUE decoy +chrUn_KN707608v1_decoy 3112 TRUE decoy +chrUn_KN707609v1_decoy 1642 TRUE decoy +chrUn_KN707610v1_decoy 1393 TRUE decoy +chrUn_KN707611v1_decoy 1103 TRUE decoy +chrUn_KN707612v1_decoy 1039 TRUE decoy +chrUn_KN707613v1_decoy 1619 TRUE decoy +chrUn_KN707614v1_decoy 3122 TRUE decoy +chrUn_KN707615v1_decoy 1934 TRUE decoy +chrUn_KN707616v1_decoy 3111 TRUE decoy +chrUn_KN707617v1_decoy 2545 TRUE decoy +chrUn_KN707618v1_decoy 2295 TRUE decoy +chrUn_KN707619v1_decoy 1551 TRUE decoy +chrUn_KN707620v1_decoy 2046 TRUE decoy +chrUn_KN707621v1_decoy 1222 TRUE decoy +chrUn_KN707622v1_decoy 1535 TRUE decoy +chrUn_KN707623v1_decoy 3784 TRUE decoy +chrUn_KN707624v1_decoy 1329 TRUE decoy +chrUn_KN707625v1_decoy 1238 TRUE decoy +chrUn_KN707626v1_decoy 5623 TRUE decoy +chrUn_KN707627v1_decoy 5821 TRUE decoy +chrUn_KN707628v1_decoy 2960 TRUE decoy +chrUn_KN707629v1_decoy 1848 TRUE decoy +chrUn_KN707630v1_decoy 2315 TRUE decoy +chrUn_KN707631v1_decoy 1945 TRUE decoy +chrUn_KN707632v1_decoy 1424 TRUE decoy +chrUn_KN707633v1_decoy 1274 TRUE decoy +chrUn_KN707634v1_decoy 1007 TRUE decoy +chrUn_KN707635v1_decoy 1414 TRUE decoy +chrUn_KN707636v1_decoy 1725 TRUE decoy +chrUn_KN707637v1_decoy 5354 TRUE decoy +chrUn_KN707638v1_decoy 2189 TRUE decoy +chrUn_KN707639v1_decoy 1294 TRUE decoy +chrUn_KN707640v1_decoy 1831 TRUE decoy +chrUn_KN707641v1_decoy 1647 TRUE decoy +chrUn_KN707642v1_decoy 2943 TRUE decoy +chrUn_KN707643v1_decoy 2857 TRUE decoy +chrUn_KN707644v1_decoy 1030 TRUE decoy +chrUn_KN707645v1_decoy 1070 TRUE decoy +chrUn_KN707646v1_decoy 1735 TRUE decoy +chrUn_KN707647v1_decoy 1982 TRUE decoy +chrUn_KN707648v1_decoy 1564 TRUE decoy +chrUn_KN707649v1_decoy 1775 TRUE decoy +chrUn_KN707650v1_decoy 1540 TRUE decoy +chrUn_KN707651v1_decoy 2013 TRUE decoy +chrUn_KN707652v1_decoy 1176 TRUE decoy +chrUn_KN707653v1_decoy 1890 TRUE decoy +chrUn_KN707654v1_decoy 3644 TRUE decoy +chrUn_KN707655v1_decoy 2785 TRUE decoy +chrUn_KN707656v1_decoy 1017 TRUE decoy +chrUn_KN707657v1_decoy 1068 TRUE decoy +chrUn_KN707658v1_decoy 1007 TRUE decoy +chrUn_KN707659v1_decoy 2605 TRUE decoy +chrUn_KN707660v1_decoy 8410 TRUE decoy +chrUn_KN707661v1_decoy 5534 TRUE decoy +chrUn_KN707662v1_decoy 2173 TRUE decoy +chrUn_KN707663v1_decoy 1065 TRUE decoy +chrUn_KN707664v1_decoy 8683 TRUE decoy +chrUn_KN707665v1_decoy 2670 TRUE decoy +chrUn_KN707666v1_decoy 2420 TRUE decoy +chrUn_KN707667v1_decoy 2189 TRUE decoy +chrUn_KN707668v1_decoy 2093 TRUE decoy +chrUn_KN707669v1_decoy 1184 TRUE decoy +chrUn_KN707670v1_decoy 1205 TRUE decoy +chrUn_KN707671v1_decoy 2786 TRUE decoy +chrUn_KN707672v1_decoy 2794 TRUE decoy +chrUn_KN707673v1_decoy 19544 TRUE decoy +chrUn_KN707674v1_decoy 2848 TRUE decoy +chrUn_KN707675v1_decoy 10556 TRUE decoy +chrUn_KN707676v1_decoy 9066 TRUE decoy +chrUn_KN707677v1_decoy 7267 TRUE decoy +chrUn_KN707678v1_decoy 2462 TRUE decoy +chrUn_KN707679v1_decoy 1774 TRUE decoy +chrUn_KN707680v1_decoy 1297 TRUE decoy +chrUn_KN707681v1_decoy 4379 TRUE decoy +chrUn_KN707682v1_decoy 4208 TRUE decoy +chrUn_KN707683v1_decoy 4068 TRUE decoy +chrUn_KN707684v1_decoy 2940 TRUE decoy +chrUn_KN707685v1_decoy 3938 TRUE decoy +chrUn_KN707686v1_decoy 2072 TRUE decoy +chrUn_KN707687v1_decoy 1136 TRUE decoy +chrUn_KN707688v1_decoy 4248 TRUE decoy +chrUn_KN707689v1_decoy 5823 TRUE decoy +chrUn_KN707690v1_decoy 3715 TRUE decoy +chrUn_KN707691v1_decoy 4885 TRUE decoy +chrUn_KN707692v1_decoy 4813 TRUE decoy +chrUn_KN707693v1_decoy 2899 TRUE decoy +chrUn_KN707694v1_decoy 1228 TRUE decoy +chrUn_KN707695v1_decoy 3119 TRUE decoy +chrUn_KN707696v1_decoy 3828 TRUE decoy +chrUn_KN707697v1_decoy 1186 TRUE decoy +chrUn_KN707698v1_decoy 1908 TRUE decoy +chrUn_KN707699v1_decoy 2795 TRUE decoy +chrUn_KN707700v1_decoy 3703 TRUE decoy +chrUn_KN707701v1_decoy 6722 TRUE decoy +chrUn_KN707702v1_decoy 6466 TRUE decoy +chrUn_KN707703v1_decoy 2235 TRUE decoy +chrUn_KN707704v1_decoy 2871 TRUE decoy +chrUn_KN707705v1_decoy 4632 TRUE decoy +chrUn_KN707706v1_decoy 4225 TRUE decoy +chrUn_KN707707v1_decoy 4339 TRUE decoy +chrUn_KN707708v1_decoy 2305 TRUE decoy +chrUn_KN707709v1_decoy 3273 TRUE decoy +chrUn_KN707710v1_decoy 5701 TRUE decoy +chrUn_KN707711v1_decoy 4154 TRUE decoy +chrUn_KN707712v1_decoy 1243 TRUE decoy +chrUn_KN707713v1_decoy 1308 TRUE decoy +chrUn_KN707714v1_decoy 2922 TRUE decoy +chrUn_KN707715v1_decoy 3044 TRUE decoy +chrUn_KN707716v1_decoy 2888 TRUE decoy +chrUn_KN707717v1_decoy 1742 TRUE decoy +chrUn_KN707718v1_decoy 4969 TRUE decoy +chrUn_KN707719v1_decoy 3270 TRUE decoy +chrUn_KN707720v1_decoy 6028 TRUE decoy +chrUn_KN707721v1_decoy 1105 TRUE decoy +chrUn_KN707722v1_decoy 2884 TRUE decoy +chrUn_KN707723v1_decoy 1124 TRUE decoy +chrUn_KN707724v1_decoy 1454 TRUE decoy +chrUn_KN707725v1_decoy 2565 TRUE decoy +chrUn_KN707726v1_decoy 2149 TRUE decoy +chrUn_KN707727v1_decoy 2630 TRUE decoy +chrUn_KN707728v1_decoy 14625 TRUE decoy +chrUn_KN707729v1_decoy 7431 TRUE decoy +chrUn_KN707730v1_decoy 5776 TRUE decoy +chrUn_KN707731v1_decoy 4820 TRUE decoy +chrUn_KN707732v1_decoy 1227 TRUE decoy +chrUn_KN707733v1_decoy 7503 TRUE decoy +chrUn_KN707734v1_decoy 9652 TRUE decoy +chrUn_KN707735v1_decoy 1091 TRUE decoy +chrUn_KN707736v1_decoy 2467 TRUE decoy +chrUn_KN707737v1_decoy 1270 TRUE decoy +chrUn_KN707738v1_decoy 4365 TRUE decoy +chrUn_KN707739v1_decoy 4284 TRUE decoy +chrUn_KN707740v1_decoy 10282 TRUE decoy +chrUn_KN707741v1_decoy 5601 TRUE decoy +chrUn_KN707742v1_decoy 4758 TRUE decoy +chrUn_KN707743v1_decoy 1624 TRUE decoy +chrUn_KN707744v1_decoy 4024 TRUE decoy +chrUn_KN707745v1_decoy 1276 TRUE decoy +chrUn_KN707746v1_decoy 5083 TRUE decoy +chrUn_KN707747v1_decoy 2075 TRUE decoy +chrUn_KN707748v1_decoy 3553 TRUE decoy +chrUn_KN707749v1_decoy 7010 TRUE decoy +chrUn_KN707750v1_decoy 4718 TRUE decoy +chrUn_KN707751v1_decoy 3546 TRUE decoy +chrUn_KN707752v1_decoy 2873 TRUE decoy +chrUn_KN707753v1_decoy 2144 TRUE decoy +chrUn_KN707754v1_decoy 2243 TRUE decoy +chrUn_KN707755v1_decoy 5343 TRUE decoy +chrUn_KN707756v1_decoy 4877 TRUE decoy +chrUn_KN707757v1_decoy 3034 TRUE decoy +chrUn_KN707758v1_decoy 2826 TRUE decoy +chrUn_KN707759v1_decoy 1221 TRUE decoy +chrUn_KN707760v1_decoy 1169 TRUE decoy +chrUn_KN707761v1_decoy 2319 TRUE decoy +chrUn_KN707762v1_decoy 3450 TRUE decoy +chrUn_KN707763v1_decoy 2674 TRUE decoy +chrUn_KN707764v1_decoy 3912 TRUE decoy +chrUn_KN707765v1_decoy 6020 TRUE decoy +chrUn_KN707766v1_decoy 2303 TRUE decoy +chrUn_KN707767v1_decoy 2552 TRUE decoy +chrUn_KN707768v1_decoy 3656 TRUE decoy +chrUn_KN707769v1_decoy 1591 TRUE decoy +chrUn_KN707770v1_decoy 1209 TRUE decoy +chrUn_KN707771v1_decoy 3176 TRUE decoy +chrUn_KN707772v1_decoy 8915 TRUE decoy +chrUn_KN707773v1_decoy 4902 TRUE decoy +chrUn_KN707774v1_decoy 3324 TRUE decoy +chrUn_KN707775v1_decoy 5997 TRUE decoy +chrUn_KN707776v1_decoy 2618 TRUE decoy +chrUn_KN707777v1_decoy 10311 TRUE decoy +chrUn_KN707778v1_decoy 2440 TRUE decoy +chrUn_KN707779v1_decoy 12444 TRUE decoy +chrUn_KN707780v1_decoy 5691 TRUE decoy +chrUn_KN707781v1_decoy 2717 TRUE decoy +chrUn_KN707782v1_decoy 5277 TRUE decoy +chrUn_KN707783v1_decoy 4373 TRUE decoy +chrUn_KN707784v1_decoy 3224 TRUE decoy +chrUn_KN707785v1_decoy 2631 TRUE decoy +chrUn_KN707786v1_decoy 5385 TRUE decoy +chrUn_KN707787v1_decoy 3678 TRUE decoy +chrUn_KN707788v1_decoy 1412 TRUE decoy +chrUn_KN707789v1_decoy 1443 TRUE decoy +chrUn_KN707790v1_decoy 1098 TRUE decoy +chrUn_KN707791v1_decoy 3240 TRUE decoy +chrUn_KN707792v1_decoy 1915 TRUE decoy +chrUn_KN707793v1_decoy 4667 TRUE decoy +chrUn_KN707794v1_decoy 7219 TRUE decoy +chrUn_KN707795v1_decoy 3277 TRUE decoy +chrUn_KN707796v1_decoy 3473 TRUE decoy +chrUn_KN707797v1_decoy 4243 TRUE decoy +chrUn_KN707798v1_decoy 17599 TRUE decoy +chrUn_KN707799v1_decoy 5095 TRUE decoy +chrUn_KN707800v1_decoy 2237 TRUE decoy +chrUn_KN707801v1_decoy 2901 TRUE decoy +chrUn_KN707802v1_decoy 2666 TRUE decoy +chrUn_KN707803v1_decoy 5336 TRUE decoy +chrUn_KN707804v1_decoy 4383 TRUE decoy +chrUn_KN707805v1_decoy 5446 TRUE decoy +chrUn_KN707806v1_decoy 6252 TRUE decoy +chrUn_KN707807v1_decoy 4616 TRUE decoy +chrUn_KN707808v1_decoy 3021 TRUE decoy +chrUn_KN707809v1_decoy 3667 TRUE decoy +chrUn_KN707810v1_decoy 4563 TRUE decoy +chrUn_KN707811v1_decoy 1120 TRUE decoy +chrUn_KN707812v1_decoy 3845 TRUE decoy +chrUn_KN707813v1_decoy 2272 TRUE decoy +chrUn_KN707814v1_decoy 4764 TRUE decoy +chrUn_KN707815v1_decoy 5410 TRUE decoy +chrUn_KN707816v1_decoy 7150 TRUE decoy +chrUn_KN707817v1_decoy 1762 TRUE decoy +chrUn_KN707818v1_decoy 1207 TRUE decoy +chrUn_KN707819v1_decoy 1331 TRUE decoy +chrUn_KN707820v1_decoy 8307 TRUE decoy +chrUn_KN707821v1_decoy 2276 TRUE decoy +chrUn_KN707822v1_decoy 2575 TRUE decoy +chrUn_KN707823v1_decoy 3970 TRUE decoy +chrUn_KN707824v1_decoy 1352 TRUE decoy +chrUn_KN707825v1_decoy 3040 TRUE decoy +chrUn_KN707826v1_decoy 2070 TRUE decoy +chrUn_KN707827v1_decoy 2913 TRUE decoy +chrUn_KN707828v1_decoy 2389 TRUE decoy +chrUn_KN707829v1_decoy 1835 TRUE decoy +chrUn_KN707830v1_decoy 4807 TRUE decoy +chrUn_KN707831v1_decoy 2201 TRUE decoy +chrUn_KN707832v1_decoy 1265 TRUE decoy +chrUn_KN707833v1_decoy 1961 TRUE decoy +chrUn_KN707834v1_decoy 1064 TRUE decoy +chrUn_KN707835v1_decoy 1932 TRUE decoy +chrUn_KN707836v1_decoy 3213 TRUE decoy +chrUn_KN707837v1_decoy 1178 TRUE decoy +chrUn_KN707838v1_decoy 2926 TRUE decoy +chrUn_KN707839v1_decoy 1038 TRUE decoy +chrUn_KN707840v1_decoy 3298 TRUE decoy +chrUn_KN707841v1_decoy 8992 TRUE decoy +chrUn_KN707842v1_decoy 6698 TRUE decoy +chrUn_KN707843v1_decoy 4880 TRUE decoy +chrUn_KN707844v1_decoy 1766 TRUE decoy +chrUn_KN707845v1_decoy 3532 TRUE decoy +chrUn_KN707846v1_decoy 2297 TRUE decoy +chrUn_KN707847v1_decoy 1234 TRUE decoy +chrUn_KN707848v1_decoy 1205 TRUE decoy +chrUn_KN707849v1_decoy 2790 TRUE decoy +chrUn_KN707850v1_decoy 2006 TRUE decoy +chrUn_KN707851v1_decoy 4593 TRUE decoy +chrUn_KN707852v1_decoy 1579 TRUE decoy +chrUn_KN707853v1_decoy 9597 TRUE decoy +chrUn_KN707854v1_decoy 10451 TRUE decoy +chrUn_KN707855v1_decoy 3219 TRUE decoy +chrUn_KN707856v1_decoy 2300 TRUE decoy +chrUn_KN707857v1_decoy 5985 TRUE decoy +chrUn_KN707858v1_decoy 2959 TRUE decoy +chrUn_KN707859v1_decoy 1340 TRUE decoy +chrUn_KN707860v1_decoy 3148 TRUE decoy +chrUn_KN707861v1_decoy 2242 TRUE decoy +chrUn_KN707862v1_decoy 16513 TRUE decoy +chrUn_KN707863v1_decoy 7821 TRUE decoy +chrUn_KN707864v1_decoy 2159 TRUE decoy +chrUn_KN707865v1_decoy 2114 TRUE decoy +chrUn_KN707866v1_decoy 4109 TRUE decoy +chrUn_KN707867v1_decoy 1544 TRUE decoy +chrUn_KN707868v1_decoy 1005 TRUE decoy +chrUn_KN707869v1_decoy 8632 TRUE decoy +chrUn_KN707870v1_decoy 1012 TRUE decoy +chrUn_KN707871v1_decoy 4728 TRUE decoy +chrUn_KN707872v1_decoy 2165 TRUE decoy +chrUn_KN707873v1_decoy 7591 TRUE decoy +chrUn_KN707874v1_decoy 5202 TRUE decoy +chrUn_KN707875v1_decoy 4241 TRUE decoy +chrUn_KN707876v1_decoy 4131 TRUE decoy +chrUn_KN707877v1_decoy 2272 TRUE decoy +chrUn_KN707878v1_decoy 2085 TRUE decoy +chrUn_KN707879v1_decoy 4346 TRUE decoy +chrUn_KN707880v1_decoy 1208 TRUE decoy +chrUn_KN707881v1_decoy 4543 TRUE decoy +chrUn_KN707882v1_decoy 2772 TRUE decoy +chrUn_KN707883v1_decoy 2490 TRUE decoy +chrUn_KN707884v1_decoy 4568 TRUE decoy +chrUn_KN707885v1_decoy 1776 TRUE decoy +chrUn_KN707886v1_decoy 2699 TRUE decoy +chrUn_KN707887v1_decoy 3534 TRUE decoy +chrUn_KN707888v1_decoy 2424 TRUE decoy +chrUn_KN707889v1_decoy 1747 TRUE decoy +chrUn_KN707890v1_decoy 1088 TRUE decoy +chrUn_KN707891v1_decoy 1143 TRUE decoy +chrUn_KN707892v1_decoy 2530 TRUE decoy +chrUn_KN707893v1_decoy 8049 TRUE decoy +chrUn_KN707894v1_decoy 1366 TRUE decoy +chrUn_KN707895v1_decoy 4284 TRUE decoy +chrUn_KN707896v1_decoy 33125 TRUE decoy +chrUn_KN707897v1_decoy 2137 TRUE decoy +chrUn_KN707898v1_decoy 3840 TRUE decoy +chrUn_KN707899v1_decoy 3087 TRUE decoy +chrUn_KN707900v1_decoy 2041 TRUE decoy +chrUn_KN707901v1_decoy 3344 TRUE decoy +chrUn_KN707902v1_decoy 2921 TRUE decoy +chrUn_KN707903v1_decoy 6581 TRUE decoy +chrUn_KN707904v1_decoy 3968 TRUE decoy +chrUn_KN707905v1_decoy 2339 TRUE decoy +chrUn_KN707906v1_decoy 1243 TRUE decoy +chrUn_KN707907v1_decoy 7776 TRUE decoy +chrUn_KN707908v1_decoy 19837 TRUE decoy +chrUn_KN707909v1_decoy 1737 TRUE decoy +chrUn_KN707910v1_decoy 1098 TRUE decoy +chrUn_KN707911v1_decoy 1893 TRUE decoy +chrUn_KN707912v1_decoy 1281 TRUE decoy +chrUn_KN707913v1_decoy 1527 TRUE decoy +chrUn_KN707914v1_decoy 2055 TRUE decoy +chrUn_KN707915v1_decoy 2527 TRUE decoy +chrUn_KN707916v1_decoy 3275 TRUE decoy +chrUn_KN707917v1_decoy 1265 TRUE decoy +chrUn_KN707918v1_decoy 2623 TRUE decoy +chrUn_KN707919v1_decoy 4850 TRUE decoy +chrUn_KN707920v1_decoy 3584 TRUE decoy +chrUn_KN707921v1_decoy 2561 TRUE decoy +chrUn_KN707922v1_decoy 3041 TRUE decoy +chrUn_KN707923v1_decoy 1409 TRUE decoy +chrUn_KN707924v1_decoy 4596 TRUE decoy +chrUn_KN707925v1_decoy 11555 TRUE decoy +chrUn_KN707926v1_decoy 1266 TRUE decoy +chrUn_KN707927v1_decoy 1079 TRUE decoy +chrUn_KN707928v1_decoy 1087 TRUE decoy +chrUn_KN707929v1_decoy 1226 TRUE decoy +chrUn_KN707930v1_decoy 1131 TRUE decoy +chrUn_KN707931v1_decoy 1199 TRUE decoy +chrUn_KN707932v1_decoy 1084 TRUE decoy +chrUn_KN707933v1_decoy 2038 TRUE decoy +chrUn_KN707934v1_decoy 1070 TRUE decoy +chrUn_KN707935v1_decoy 1312 TRUE decoy +chrUn_KN707936v1_decoy 4031 TRUE decoy +chrUn_KN707937v1_decoy 7445 TRUE decoy +chrUn_KN707938v1_decoy 1770 TRUE decoy +chrUn_KN707939v1_decoy 5600 TRUE decoy +chrUn_KN707940v1_decoy 1882 TRUE decoy +chrUn_KN707941v1_decoy 1170 TRUE decoy +chrUn_KN707942v1_decoy 1300 TRUE decoy +chrUn_KN707943v1_decoy 5325 TRUE decoy +chrUn_KN707944v1_decoy 2043 TRUE decoy +chrUn_KN707945v1_decoy 1072 TRUE decoy +chrUn_KN707946v1_decoy 2463 TRUE decoy +chrUn_KN707947v1_decoy 1010 TRUE decoy +chrUn_KN707948v1_decoy 1432 TRUE decoy +chrUn_KN707949v1_decoy 1162 TRUE decoy +chrUn_KN707950v1_decoy 1095 TRUE decoy +chrUn_KN707951v1_decoy 1118 TRUE decoy +chrUn_KN707952v1_decoy 1383 TRUE decoy +chrUn_KN707953v1_decoy 2289 TRUE decoy +chrUn_KN707954v1_decoy 1648 TRUE decoy +chrUn_KN707955v1_decoy 2203 TRUE decoy +chrUn_KN707956v1_decoy 3270 TRUE decoy +chrUn_KN707957v1_decoy 11499 TRUE decoy +chrUn_KN707958v1_decoy 2474 TRUE decoy +chrUn_KN707959v1_decoy 2294 TRUE decoy +chrUn_KN707960v1_decoy 1238 TRUE decoy +chrUn_KN707961v1_decoy 3410 TRUE decoy +chrUn_KN707962v1_decoy 1523 TRUE decoy +chrUn_KN707963v1_decoy 62955 TRUE decoy +chrUn_KN707964v1_decoy 6282 TRUE decoy +chrUn_KN707965v1_decoy 3836 TRUE decoy +chrUn_KN707966v1_decoy 6486 TRUE decoy +chrUn_KN707967v1_decoy 15368 TRUE decoy +chrUn_KN707968v1_decoy 9572 TRUE decoy +chrUn_KN707969v1_decoy 6413 TRUE decoy +chrUn_KN707970v1_decoy 4104 TRUE decoy +chrUn_KN707971v1_decoy 12943 TRUE decoy +chrUn_KN707972v1_decoy 4650 TRUE decoy +chrUn_KN707973v1_decoy 3080 TRUE decoy +chrUn_KN707974v1_decoy 3134 TRUE decoy +chrUn_KN707975v1_decoy 6211 TRUE decoy +chrUn_KN707976v1_decoy 1126 TRUE decoy +chrUn_KN707977v1_decoy 1101 TRUE decoy +chrUn_KN707978v1_decoy 1101 TRUE decoy +chrUn_KN707979v1_decoy 2648 TRUE decoy +chrUn_KN707980v1_decoy 2973 TRUE decoy +chrUn_KN707981v1_decoy 2520 TRUE decoy +chrUn_KN707982v1_decoy 2318 TRUE decoy +chrUn_KN707983v1_decoy 2606 TRUE decoy +chrUn_KN707984v1_decoy 2205 TRUE decoy +chrUn_KN707985v1_decoy 2929 TRUE decoy +chrUn_KN707986v1_decoy 3869 TRUE decoy +chrUn_KN707987v1_decoy 1117 TRUE decoy +chrUn_KN707988v1_decoy 2960 TRUE decoy +chrUn_KN707989v1_decoy 1009 TRUE decoy +chrUn_KN707990v1_decoy 4048 TRUE decoy +chrUn_KN707991v1_decoy 2193 TRUE decoy +chrUn_KN707992v1_decoy 1830 TRUE decoy +chrUn_JTFH01000001v1_decoy 25139 TRUE decoy +chrUn_JTFH01000002v1_decoy 18532 TRUE decoy +chrUn_JTFH01000003v1_decoy 15240 TRUE decoy +chrUn_JTFH01000004v1_decoy 13739 TRUE decoy +chrUn_JTFH01000005v1_decoy 11297 TRUE decoy +chrUn_JTFH01000006v1_decoy 10074 TRUE decoy +chrUn_JTFH01000007v1_decoy 9891 TRUE decoy +chrUn_JTFH01000008v1_decoy 9774 TRUE decoy +chrUn_JTFH01000009v1_decoy 9727 TRUE decoy +chrUn_JTFH01000010v1_decoy 9358 TRUE decoy +chrUn_JTFH01000011v1_decoy 8920 TRUE decoy +chrUn_JTFH01000012v1_decoy 8479 TRUE decoy +chrUn_JTFH01000013v1_decoy 8312 TRUE decoy +chrUn_JTFH01000014v1_decoy 8261 TRUE decoy +chrUn_JTFH01000015v1_decoy 8131 TRUE decoy +chrUn_JTFH01000016v1_decoy 8051 TRUE decoy +chrUn_JTFH01000017v1_decoy 7832 TRUE decoy +chrUn_JTFH01000018v1_decoy 7710 TRUE decoy +chrUn_JTFH01000019v1_decoy 7702 TRUE decoy +chrUn_JTFH01000020v1_decoy 7479 TRUE decoy +chrUn_JTFH01000021v1_decoy 7368 TRUE decoy +chrUn_JTFH01000022v1_decoy 7162 TRUE decoy +chrUn_JTFH01000023v1_decoy 7065 TRUE decoy +chrUn_JTFH01000024v1_decoy 7019 TRUE decoy +chrUn_JTFH01000025v1_decoy 6997 TRUE decoy +chrUn_JTFH01000026v1_decoy 6994 TRUE decoy +chrUn_JTFH01000027v1_decoy 6979 TRUE decoy +chrUn_JTFH01000028v1_decoy 6797 TRUE decoy +chrUn_JTFH01000029v1_decoy 6525 TRUE decoy +chrUn_JTFH01000030v1_decoy 6246 TRUE decoy +chrUn_JTFH01000031v1_decoy 5926 TRUE decoy +chrUn_JTFH01000032v1_decoy 5914 TRUE decoy +chrUn_JTFH01000033v1_decoy 5898 TRUE decoy +chrUn_JTFH01000034v1_decoy 5879 TRUE decoy +chrUn_JTFH01000035v1_decoy 5834 TRUE decoy +chrUn_JTFH01000036v1_decoy 5743 TRUE decoy +chrUn_JTFH01000037v1_decoy 5577 TRUE decoy +chrUn_JTFH01000038v1_decoy 5413 TRUE decoy +chrUn_JTFH01000039v1_decoy 5250 TRUE decoy +chrUn_JTFH01000040v1_decoy 5246 TRUE decoy +chrUn_JTFH01000041v1_decoy 5118 TRUE decoy +chrUn_JTFH01000042v1_decoy 5058 TRUE decoy +chrUn_JTFH01000043v1_decoy 4959 TRUE decoy +chrUn_JTFH01000044v1_decoy 4853 TRUE decoy +chrUn_JTFH01000045v1_decoy 4828 TRUE decoy +chrUn_JTFH01000046v1_decoy 4819 TRUE decoy +chrUn_JTFH01000047v1_decoy 4809 TRUE decoy +chrUn_JTFH01000048v1_decoy 4710 TRUE decoy +chrUn_JTFH01000049v1_decoy 4680 TRUE decoy +chrUn_JTFH01000050v1_decoy 4645 TRUE decoy +chrUn_JTFH01000051v1_decoy 4514 TRUE decoy +chrUn_JTFH01000052v1_decoy 4439 TRUE decoy +chrUn_JTFH01000053v1_decoy 4416 TRUE decoy +chrUn_JTFH01000054v1_decoy 4409 TRUE decoy +chrUn_JTFH01000055v1_decoy 4392 TRUE decoy +chrUn_JTFH01000056v1_decoy 4359 TRUE decoy +chrUn_JTFH01000057v1_decoy 4319 TRUE decoy +chrUn_JTFH01000058v1_decoy 4290 TRUE decoy +chrUn_JTFH01000059v1_decoy 4242 TRUE decoy +chrUn_JTFH01000060v1_decoy 4228 TRUE decoy +chrUn_JTFH01000061v1_decoy 4222 TRUE decoy +chrUn_JTFH01000062v1_decoy 4216 TRUE decoy +chrUn_JTFH01000063v1_decoy 4210 TRUE decoy +chrUn_JTFH01000064v1_decoy 4206 TRUE decoy +chrUn_JTFH01000065v1_decoy 4102 TRUE decoy +chrUn_JTFH01000066v1_decoy 4101 TRUE decoy +chrUn_JTFH01000067v1_decoy 4083 TRUE decoy +chrUn_JTFH01000068v1_decoy 3967 TRUE decoy +chrUn_JTFH01000069v1_decoy 3955 TRUE decoy +chrUn_JTFH01000070v1_decoy 3945 TRUE decoy +chrUn_JTFH01000071v1_decoy 3930 TRUE decoy +chrUn_JTFH01000072v1_decoy 3929 TRUE decoy +chrUn_JTFH01000073v1_decoy 3924 TRUE decoy +chrUn_JTFH01000074v1_decoy 3919 TRUE decoy +chrUn_JTFH01000075v1_decoy 3908 TRUE decoy +chrUn_JTFH01000076v1_decoy 3892 TRUE decoy +chrUn_JTFH01000077v1_decoy 3890 TRUE decoy +chrUn_JTFH01000078v1_decoy 3859 TRUE decoy +chrUn_JTFH01000079v1_decoy 3846 TRUE decoy +chrUn_JTFH01000080v1_decoy 3835 TRUE decoy +chrUn_JTFH01000081v1_decoy 3830 TRUE decoy +chrUn_JTFH01000082v1_decoy 3828 TRUE decoy +chrUn_JTFH01000083v1_decoy 3825 TRUE decoy +chrUn_JTFH01000084v1_decoy 3821 TRUE decoy +chrUn_JTFH01000085v1_decoy 3809 TRUE decoy +chrUn_JTFH01000086v1_decoy 3801 TRUE decoy +chrUn_JTFH01000087v1_decoy 3799 TRUE decoy +chrUn_JTFH01000088v1_decoy 3737 TRUE decoy +chrUn_JTFH01000089v1_decoy 3701 TRUE decoy +chrUn_JTFH01000090v1_decoy 3698 TRUE decoy +chrUn_JTFH01000091v1_decoy 3692 TRUE decoy +chrUn_JTFH01000092v1_decoy 3686 TRUE decoy +chrUn_JTFH01000093v1_decoy 3677 TRUE decoy +chrUn_JTFH01000094v1_decoy 3664 TRUE decoy +chrUn_JTFH01000095v1_decoy 3613 TRUE decoy +chrUn_JTFH01000096v1_decoy 3611 TRUE decoy +chrUn_JTFH01000097v1_decoy 3606 TRUE decoy +chrUn_JTFH01000098v1_decoy 3584 TRUE decoy +chrUn_JTFH01000099v1_decoy 3581 TRUE decoy +chrUn_JTFH01000100v1_decoy 3543 TRUE decoy +chrUn_JTFH01000101v1_decoy 3528 TRUE decoy +chrUn_JTFH01000102v1_decoy 3527 TRUE decoy +chrUn_JTFH01000103v1_decoy 3496 TRUE decoy +chrUn_JTFH01000104v1_decoy 3493 TRUE decoy +chrUn_JTFH01000105v1_decoy 3484 TRUE decoy +chrUn_JTFH01000106v1_decoy 3435 TRUE decoy +chrUn_JTFH01000107v1_decoy 3391 TRUE decoy +chrUn_JTFH01000108v1_decoy 3374 TRUE decoy +chrUn_JTFH01000109v1_decoy 3371 TRUE decoy +chrUn_JTFH01000110v1_decoy 3361 TRUE decoy +chrUn_JTFH01000111v1_decoy 3351 TRUE decoy +chrUn_JTFH01000112v1_decoy 3340 TRUE decoy +chrUn_JTFH01000113v1_decoy 3320 TRUE decoy +chrUn_JTFH01000114v1_decoy 3282 TRUE decoy +chrUn_JTFH01000115v1_decoy 3278 TRUE decoy +chrUn_JTFH01000116v1_decoy 3260 TRUE decoy +chrUn_JTFH01000117v1_decoy 3258 TRUE decoy +chrUn_JTFH01000118v1_decoy 3253 TRUE decoy +chrUn_JTFH01000119v1_decoy 3247 TRUE decoy +chrUn_JTFH01000120v1_decoy 3230 TRUE decoy +chrUn_JTFH01000121v1_decoy 3224 TRUE decoy +chrUn_JTFH01000122v1_decoy 3216 TRUE decoy +chrUn_JTFH01000123v1_decoy 3212 TRUE decoy +chrUn_JTFH01000124v1_decoy 3194 TRUE decoy +chrUn_JTFH01000125v1_decoy 3189 TRUE decoy +chrUn_JTFH01000126v1_decoy 3177 TRUE decoy +chrUn_JTFH01000127v1_decoy 3176 TRUE decoy +chrUn_JTFH01000128v1_decoy 3173 TRUE decoy +chrUn_JTFH01000129v1_decoy 3170 TRUE decoy +chrUn_JTFH01000130v1_decoy 3166 TRUE decoy +chrUn_JTFH01000131v1_decoy 3163 TRUE decoy +chrUn_JTFH01000132v1_decoy 3143 TRUE decoy +chrUn_JTFH01000133v1_decoy 3137 TRUE decoy +chrUn_JTFH01000134v1_decoy 3116 TRUE decoy +chrUn_JTFH01000135v1_decoy 3106 TRUE decoy +chrUn_JTFH01000136v1_decoy 3093 TRUE decoy +chrUn_JTFH01000137v1_decoy 3079 TRUE decoy +chrUn_JTFH01000138v1_decoy 3053 TRUE decoy +chrUn_JTFH01000139v1_decoy 3051 TRUE decoy +chrUn_JTFH01000140v1_decoy 3015 TRUE decoy +chrUn_JTFH01000141v1_decoy 3012 TRUE decoy +chrUn_JTFH01000142v1_decoy 3009 TRUE decoy +chrUn_JTFH01000143v1_decoy 2997 TRUE decoy +chrUn_JTFH01000144v1_decoy 2997 TRUE decoy +chrUn_JTFH01000145v1_decoy 2983 TRUE decoy +chrUn_JTFH01000146v1_decoy 2979 TRUE decoy +chrUn_JTFH01000147v1_decoy 2967 TRUE decoy +chrUn_JTFH01000148v1_decoy 2967 TRUE decoy +chrUn_JTFH01000149v1_decoy 2966 TRUE decoy +chrUn_JTFH01000150v1_decoy 2954 TRUE decoy +chrUn_JTFH01000151v1_decoy 2952 TRUE decoy +chrUn_JTFH01000152v1_decoy 2934 TRUE decoy +chrUn_JTFH01000153v1_decoy 2918 TRUE decoy +chrUn_JTFH01000154v1_decoy 2895 TRUE decoy +chrUn_JTFH01000155v1_decoy 2887 TRUE decoy +chrUn_JTFH01000156v1_decoy 2879 TRUE decoy +chrUn_JTFH01000157v1_decoy 2878 TRUE decoy +chrUn_JTFH01000158v1_decoy 2872 TRUE decoy +chrUn_JTFH01000159v1_decoy 2868 TRUE decoy +chrUn_JTFH01000160v1_decoy 2866 TRUE decoy +chrUn_JTFH01000161v1_decoy 2865 TRUE decoy +chrUn_JTFH01000162v1_decoy 2864 TRUE decoy +chrUn_JTFH01000163v1_decoy 2859 TRUE decoy +chrUn_JTFH01000164v1_decoy 2854 TRUE decoy +chrUn_JTFH01000165v1_decoy 2830 TRUE decoy +chrUn_JTFH01000166v1_decoy 2828 TRUE decoy +chrUn_JTFH01000167v1_decoy 2824 TRUE decoy +chrUn_JTFH01000168v1_decoy 2819 TRUE decoy +chrUn_JTFH01000169v1_decoy 2813 TRUE decoy +chrUn_JTFH01000170v1_decoy 2809 TRUE decoy +chrUn_JTFH01000171v1_decoy 2802 TRUE decoy +chrUn_JTFH01000172v1_decoy 2791 TRUE decoy +chrUn_JTFH01000173v1_decoy 2783 TRUE decoy +chrUn_JTFH01000174v1_decoy 2778 TRUE decoy +chrUn_JTFH01000175v1_decoy 2777 TRUE decoy +chrUn_JTFH01000176v1_decoy 2770 TRUE decoy +chrUn_JTFH01000177v1_decoy 2769 TRUE decoy +chrUn_JTFH01000178v1_decoy 2767 TRUE decoy +chrUn_JTFH01000179v1_decoy 2763 TRUE decoy +chrUn_JTFH01000180v1_decoy 2745 TRUE decoy +chrUn_JTFH01000181v1_decoy 2742 TRUE decoy +chrUn_JTFH01000182v1_decoy 2736 TRUE decoy +chrUn_JTFH01000183v1_decoy 2729 TRUE decoy +chrUn_JTFH01000184v1_decoy 2726 TRUE decoy +chrUn_JTFH01000185v1_decoy 2719 TRUE decoy +chrUn_JTFH01000186v1_decoy 2715 TRUE decoy +chrUn_JTFH01000187v1_decoy 2708 TRUE decoy +chrUn_JTFH01000188v1_decoy 2704 TRUE decoy +chrUn_JTFH01000189v1_decoy 2692 TRUE decoy +chrUn_JTFH01000190v1_decoy 2691 TRUE decoy +chrUn_JTFH01000191v1_decoy 2690 TRUE decoy +chrUn_JTFH01000192v1_decoy 2687 TRUE decoy +chrUn_JTFH01000193v1_decoy 2677 TRUE decoy +chrUn_JTFH01000194v1_decoy 2668 TRUE decoy +chrUn_JTFH01000195v1_decoy 2668 TRUE decoy +chrUn_JTFH01000196v1_decoy 2663 TRUE decoy +chrUn_JTFH01000197v1_decoy 2655 TRUE decoy +chrUn_JTFH01000198v1_decoy 2644 TRUE decoy +chrUn_JTFH01000199v1_decoy 2642 TRUE decoy +chrUn_JTFH01000200v1_decoy 2632 TRUE decoy +chrUn_JTFH01000201v1_decoy 2632 TRUE decoy +chrUn_JTFH01000202v1_decoy 2628 TRUE decoy +chrUn_JTFH01000203v1_decoy 2623 TRUE decoy +chrUn_JTFH01000204v1_decoy 2622 TRUE decoy +chrUn_JTFH01000205v1_decoy 2619 TRUE decoy +chrUn_JTFH01000206v1_decoy 2605 TRUE decoy +chrUn_JTFH01000207v1_decoy 2603 TRUE decoy +chrUn_JTFH01000208v1_decoy 2601 TRUE decoy +chrUn_JTFH01000209v1_decoy 2598 TRUE decoy +chrUn_JTFH01000210v1_decoy 2597 TRUE decoy +chrUn_JTFH01000211v1_decoy 2596 TRUE decoy +chrUn_JTFH01000212v1_decoy 2594 TRUE decoy +chrUn_JTFH01000213v1_decoy 2586 TRUE decoy +chrUn_JTFH01000214v1_decoy 2585 TRUE decoy +chrUn_JTFH01000215v1_decoy 2583 TRUE decoy +chrUn_JTFH01000216v1_decoy 2578 TRUE decoy +chrUn_JTFH01000217v1_decoy 2569 TRUE decoy +chrUn_JTFH01000218v1_decoy 2569 TRUE decoy +chrUn_JTFH01000219v1_decoy 2551 TRUE decoy +chrUn_JTFH01000220v1_decoy 2548 TRUE decoy +chrUn_JTFH01000221v1_decoy 2548 TRUE decoy +chrUn_JTFH01000222v1_decoy 2546 TRUE decoy +chrUn_JTFH01000223v1_decoy 2545 TRUE decoy +chrUn_JTFH01000224v1_decoy 2534 TRUE decoy +chrUn_JTFH01000225v1_decoy 2533 TRUE decoy +chrUn_JTFH01000226v1_decoy 2522 TRUE decoy +chrUn_JTFH01000227v1_decoy 2522 TRUE decoy +chrUn_JTFH01000228v1_decoy 2515 TRUE decoy +chrUn_JTFH01000229v1_decoy 2513 TRUE decoy +chrUn_JTFH01000230v1_decoy 2507 TRUE decoy +chrUn_JTFH01000231v1_decoy 2504 TRUE decoy +chrUn_JTFH01000232v1_decoy 2497 TRUE decoy +chrUn_JTFH01000233v1_decoy 2471 TRUE decoy +chrUn_JTFH01000234v1_decoy 2465 TRUE decoy +chrUn_JTFH01000235v1_decoy 2464 TRUE decoy +chrUn_JTFH01000236v1_decoy 2459 TRUE decoy +chrUn_JTFH01000237v1_decoy 2457 TRUE decoy +chrUn_JTFH01000238v1_decoy 2450 TRUE decoy +chrUn_JTFH01000239v1_decoy 2435 TRUE decoy +chrUn_JTFH01000240v1_decoy 2434 TRUE decoy +chrUn_JTFH01000241v1_decoy 2432 TRUE decoy +chrUn_JTFH01000242v1_decoy 2427 TRUE decoy +chrUn_JTFH01000243v1_decoy 2421 TRUE decoy +chrUn_JTFH01000244v1_decoy 2420 TRUE decoy +chrUn_JTFH01000245v1_decoy 2414 TRUE decoy +chrUn_JTFH01000246v1_decoy 2404 TRUE decoy +chrUn_JTFH01000247v1_decoy 2403 TRUE decoy +chrUn_JTFH01000248v1_decoy 2402 TRUE decoy +chrUn_JTFH01000249v1_decoy 2397 TRUE decoy +chrUn_JTFH01000250v1_decoy 2395 TRUE decoy +chrUn_JTFH01000251v1_decoy 2394 TRUE decoy +chrUn_JTFH01000252v1_decoy 2388 TRUE decoy +chrUn_JTFH01000253v1_decoy 2382 TRUE decoy +chrUn_JTFH01000254v1_decoy 2381 TRUE decoy +chrUn_JTFH01000255v1_decoy 2380 TRUE decoy +chrUn_JTFH01000256v1_decoy 2368 TRUE decoy +chrUn_JTFH01000257v1_decoy 2364 TRUE decoy +chrUn_JTFH01000258v1_decoy 2363 TRUE decoy +chrUn_JTFH01000259v1_decoy 2348 TRUE decoy +chrUn_JTFH01000260v1_decoy 2339 TRUE decoy +chrUn_JTFH01000261v1_decoy 2335 TRUE decoy +chrUn_JTFH01000262v1_decoy 2332 TRUE decoy +chrUn_JTFH01000263v1_decoy 2331 TRUE decoy +chrUn_JTFH01000264v1_decoy 2330 TRUE decoy +chrUn_JTFH01000265v1_decoy 2323 TRUE decoy +chrUn_JTFH01000266v1_decoy 2319 TRUE decoy +chrUn_JTFH01000267v1_decoy 2314 TRUE decoy +chrUn_JTFH01000268v1_decoy 2308 TRUE decoy +chrUn_JTFH01000269v1_decoy 2306 TRUE decoy +chrUn_JTFH01000270v1_decoy 2296 TRUE decoy +chrUn_JTFH01000271v1_decoy 2287 TRUE decoy +chrUn_JTFH01000272v1_decoy 2279 TRUE decoy +chrUn_JTFH01000273v1_decoy 2276 TRUE decoy +chrUn_JTFH01000274v1_decoy 2273 TRUE decoy +chrUn_JTFH01000275v1_decoy 2262 TRUE decoy +chrUn_JTFH01000276v1_decoy 2254 TRUE decoy +chrUn_JTFH01000277v1_decoy 2252 TRUE decoy +chrUn_JTFH01000278v1_decoy 2245 TRUE decoy +chrUn_JTFH01000279v1_decoy 2239 TRUE decoy +chrUn_JTFH01000280v1_decoy 2223 TRUE decoy +chrUn_JTFH01000281v1_decoy 2220 TRUE decoy +chrUn_JTFH01000282v1_decoy 2218 TRUE decoy +chrUn_JTFH01000283v1_decoy 2215 TRUE decoy +chrUn_JTFH01000284v1_decoy 2213 TRUE decoy +chrUn_JTFH01000285v1_decoy 2203 TRUE decoy +chrUn_JTFH01000286v1_decoy 2200 TRUE decoy +chrUn_JTFH01000287v1_decoy 2197 TRUE decoy +chrUn_JTFH01000288v1_decoy 2194 TRUE decoy +chrUn_JTFH01000289v1_decoy 2183 TRUE decoy +chrUn_JTFH01000290v1_decoy 2179 TRUE decoy +chrUn_JTFH01000291v1_decoy 2177 TRUE decoy +chrUn_JTFH01000292v1_decoy 2177 TRUE decoy +chrUn_JTFH01000293v1_decoy 2177 TRUE decoy +chrUn_JTFH01000294v1_decoy 2168 TRUE decoy +chrUn_JTFH01000295v1_decoy 2160 TRUE decoy +chrUn_JTFH01000296v1_decoy 2155 TRUE decoy +chrUn_JTFH01000297v1_decoy 2144 TRUE decoy +chrUn_JTFH01000298v1_decoy 2143 TRUE decoy +chrUn_JTFH01000299v1_decoy 2136 TRUE decoy +chrUn_JTFH01000300v1_decoy 2134 TRUE decoy +chrUn_JTFH01000301v1_decoy 2129 TRUE decoy +chrUn_JTFH01000302v1_decoy 2128 TRUE decoy +chrUn_JTFH01000303v1_decoy 2125 TRUE decoy +chrUn_JTFH01000304v1_decoy 2125 TRUE decoy +chrUn_JTFH01000305v1_decoy 2122 TRUE decoy +chrUn_JTFH01000306v1_decoy 2111 TRUE decoy +chrUn_JTFH01000307v1_decoy 2106 TRUE decoy +chrUn_JTFH01000308v1_decoy 2094 TRUE decoy +chrUn_JTFH01000309v1_decoy 2093 TRUE decoy +chrUn_JTFH01000310v1_decoy 2088 TRUE decoy +chrUn_JTFH01000311v1_decoy 2086 TRUE decoy +chrUn_JTFH01000312v1_decoy 2086 TRUE decoy +chrUn_JTFH01000313v1_decoy 2084 TRUE decoy +chrUn_JTFH01000314v1_decoy 2080 TRUE decoy +chrUn_JTFH01000315v1_decoy 2079 TRUE decoy +chrUn_JTFH01000316v1_decoy 2076 TRUE decoy +chrUn_JTFH01000317v1_decoy 2071 TRUE decoy +chrUn_JTFH01000318v1_decoy 2066 TRUE decoy +chrUn_JTFH01000319v1_decoy 2061 TRUE decoy +chrUn_JTFH01000320v1_decoy 2055 TRUE decoy +chrUn_JTFH01000321v1_decoy 2053 TRUE decoy +chrUn_JTFH01000322v1_decoy 2040 TRUE decoy +chrUn_JTFH01000323v1_decoy 2036 TRUE decoy +chrUn_JTFH01000324v1_decoy 2035 TRUE decoy +chrUn_JTFH01000325v1_decoy 2034 TRUE decoy +chrUn_JTFH01000326v1_decoy 2032 TRUE decoy +chrUn_JTFH01000327v1_decoy 2029 TRUE decoy +chrUn_JTFH01000328v1_decoy 2025 TRUE decoy +chrUn_JTFH01000329v1_decoy 2021 TRUE decoy +chrUn_JTFH01000330v1_decoy 2018 TRUE decoy +chrUn_JTFH01000331v1_decoy 2015 TRUE decoy +chrUn_JTFH01000332v1_decoy 2009 TRUE decoy +chrUn_JTFH01000333v1_decoy 2007 TRUE decoy +chrUn_JTFH01000334v1_decoy 2005 TRUE decoy +chrUn_JTFH01000335v1_decoy 2003 TRUE decoy +chrUn_JTFH01000336v1_decoy 2001 TRUE decoy +chrUn_JTFH01000337v1_decoy 2001 TRUE decoy +chrUn_JTFH01000338v1_decoy 2000 TRUE decoy +chrUn_JTFH01000339v1_decoy 1996 TRUE decoy +chrUn_JTFH01000340v1_decoy 1992 TRUE decoy +chrUn_JTFH01000341v1_decoy 1985 TRUE decoy +chrUn_JTFH01000342v1_decoy 1981 TRUE decoy +chrUn_JTFH01000343v1_decoy 1977 TRUE decoy +chrUn_JTFH01000344v1_decoy 1971 TRUE decoy +chrUn_JTFH01000345v1_decoy 1968 TRUE decoy +chrUn_JTFH01000346v1_decoy 1962 TRUE decoy +chrUn_JTFH01000347v1_decoy 1961 TRUE decoy +chrUn_JTFH01000348v1_decoy 1960 TRUE decoy +chrUn_JTFH01000349v1_decoy 1960 TRUE decoy +chrUn_JTFH01000350v1_decoy 1954 TRUE decoy +chrUn_JTFH01000351v1_decoy 1952 TRUE decoy +chrUn_JTFH01000352v1_decoy 1947 TRUE decoy +chrUn_JTFH01000353v1_decoy 1944 TRUE decoy +chrUn_JTFH01000354v1_decoy 1943 TRUE decoy +chrUn_JTFH01000355v1_decoy 1941 TRUE decoy +chrUn_JTFH01000356v1_decoy 1937 TRUE decoy +chrUn_JTFH01000357v1_decoy 1934 TRUE decoy +chrUn_JTFH01000358v1_decoy 1929 TRUE decoy +chrUn_JTFH01000359v1_decoy 1924 TRUE decoy +chrUn_JTFH01000360v1_decoy 1924 TRUE decoy +chrUn_JTFH01000361v1_decoy 1923 TRUE decoy +chrUn_JTFH01000362v1_decoy 1921 TRUE decoy +chrUn_JTFH01000363v1_decoy 1918 TRUE decoy +chrUn_JTFH01000364v1_decoy 1915 TRUE decoy +chrUn_JTFH01000365v1_decoy 1915 TRUE decoy +chrUn_JTFH01000366v1_decoy 1914 TRUE decoy +chrUn_JTFH01000367v1_decoy 1912 TRUE decoy +chrUn_JTFH01000368v1_decoy 1910 TRUE decoy +chrUn_JTFH01000369v1_decoy 1907 TRUE decoy +chrUn_JTFH01000370v1_decoy 1904 TRUE decoy +chrUn_JTFH01000371v1_decoy 1897 TRUE decoy +chrUn_JTFH01000372v1_decoy 1891 TRUE decoy +chrUn_JTFH01000373v1_decoy 1890 TRUE decoy +chrUn_JTFH01000374v1_decoy 1888 TRUE decoy +chrUn_JTFH01000375v1_decoy 1888 TRUE decoy +chrUn_JTFH01000376v1_decoy 1885 TRUE decoy +chrUn_JTFH01000377v1_decoy 1881 TRUE decoy +chrUn_JTFH01000378v1_decoy 1879 TRUE decoy +chrUn_JTFH01000379v1_decoy 1877 TRUE decoy +chrUn_JTFH01000380v1_decoy 1876 TRUE decoy +chrUn_JTFH01000381v1_decoy 1876 TRUE decoy +chrUn_JTFH01000382v1_decoy 1874 TRUE decoy +chrUn_JTFH01000383v1_decoy 1872 TRUE decoy +chrUn_JTFH01000384v1_decoy 1869 TRUE decoy +chrUn_JTFH01000385v1_decoy 1866 TRUE decoy +chrUn_JTFH01000386v1_decoy 1865 TRUE decoy +chrUn_JTFH01000387v1_decoy 1865 TRUE decoy +chrUn_JTFH01000388v1_decoy 1865 TRUE decoy +chrUn_JTFH01000389v1_decoy 1862 TRUE decoy +chrUn_JTFH01000390v1_decoy 1862 TRUE decoy +chrUn_JTFH01000391v1_decoy 1859 TRUE decoy +chrUn_JTFH01000392v1_decoy 1856 TRUE decoy +chrUn_JTFH01000393v1_decoy 1856 TRUE decoy +chrUn_JTFH01000394v1_decoy 1854 TRUE decoy +chrUn_JTFH01000395v1_decoy 1850 TRUE decoy +chrUn_JTFH01000396v1_decoy 1849 TRUE decoy +chrUn_JTFH01000397v1_decoy 1849 TRUE decoy +chrUn_JTFH01000398v1_decoy 1847 TRUE decoy +chrUn_JTFH01000399v1_decoy 1839 TRUE decoy +chrUn_JTFH01000400v1_decoy 1834 TRUE decoy +chrUn_JTFH01000401v1_decoy 1821 TRUE decoy +chrUn_JTFH01000402v1_decoy 1815 TRUE decoy +chrUn_JTFH01000403v1_decoy 1811 TRUE decoy +chrUn_JTFH01000404v1_decoy 1808 TRUE decoy +chrUn_JTFH01000405v1_decoy 1808 TRUE decoy +chrUn_JTFH01000406v1_decoy 1807 TRUE decoy +chrUn_JTFH01000407v1_decoy 1807 TRUE decoy +chrUn_JTFH01000408v1_decoy 1802 TRUE decoy +chrUn_JTFH01000409v1_decoy 1801 TRUE decoy +chrUn_JTFH01000410v1_decoy 1800 TRUE decoy +chrUn_JTFH01000411v1_decoy 1795 TRUE decoy +chrUn_JTFH01000412v1_decoy 1794 TRUE decoy +chrUn_JTFH01000413v1_decoy 1792 TRUE decoy +chrUn_JTFH01000414v1_decoy 1788 TRUE decoy +chrUn_JTFH01000415v1_decoy 1786 TRUE decoy +chrUn_JTFH01000416v1_decoy 1782 TRUE decoy +chrUn_JTFH01000417v1_decoy 1782 TRUE decoy +chrUn_JTFH01000418v1_decoy 1781 TRUE decoy +chrUn_JTFH01000419v1_decoy 1781 TRUE decoy +chrUn_JTFH01000420v1_decoy 1779 TRUE decoy +chrUn_JTFH01000421v1_decoy 1777 TRUE decoy +chrUn_JTFH01000422v1_decoy 1764 TRUE decoy +chrUn_JTFH01000423v1_decoy 1762 TRUE decoy +chrUn_JTFH01000424v1_decoy 1755 TRUE decoy +chrUn_JTFH01000425v1_decoy 1749 TRUE decoy +chrUn_JTFH01000426v1_decoy 1747 TRUE decoy +chrUn_JTFH01000427v1_decoy 1746 TRUE decoy +chrUn_JTFH01000428v1_decoy 1745 TRUE decoy +chrUn_JTFH01000429v1_decoy 1744 TRUE decoy +chrUn_JTFH01000430v1_decoy 1742 TRUE decoy +chrUn_JTFH01000431v1_decoy 1740 TRUE decoy +chrUn_JTFH01000432v1_decoy 1740 TRUE decoy +chrUn_JTFH01000433v1_decoy 1736 TRUE decoy +chrUn_JTFH01000434v1_decoy 1735 TRUE decoy +chrUn_JTFH01000435v1_decoy 1732 TRUE decoy +chrUn_JTFH01000436v1_decoy 1732 TRUE decoy +chrUn_JTFH01000437v1_decoy 1730 TRUE decoy +chrUn_JTFH01000438v1_decoy 1727 TRUE decoy +chrUn_JTFH01000439v1_decoy 1722 TRUE decoy +chrUn_JTFH01000440v1_decoy 1718 TRUE decoy +chrUn_JTFH01000441v1_decoy 1716 TRUE decoy +chrUn_JTFH01000442v1_decoy 1710 TRUE decoy +chrUn_JTFH01000443v1_decoy 1708 TRUE decoy +chrUn_JTFH01000444v1_decoy 1707 TRUE decoy +chrUn_JTFH01000445v1_decoy 1706 TRUE decoy +chrUn_JTFH01000446v1_decoy 1705 TRUE decoy +chrUn_JTFH01000447v1_decoy 1704 TRUE decoy +chrUn_JTFH01000448v1_decoy 1699 TRUE decoy +chrUn_JTFH01000449v1_decoy 1698 TRUE decoy +chrUn_JTFH01000450v1_decoy 1697 TRUE decoy +chrUn_JTFH01000451v1_decoy 1697 TRUE decoy +chrUn_JTFH01000452v1_decoy 1695 TRUE decoy +chrUn_JTFH01000453v1_decoy 1695 TRUE decoy +chrUn_JTFH01000454v1_decoy 1693 TRUE decoy +chrUn_JTFH01000455v1_decoy 1687 TRUE decoy +chrUn_JTFH01000456v1_decoy 1686 TRUE decoy +chrUn_JTFH01000457v1_decoy 1680 TRUE decoy +chrUn_JTFH01000458v1_decoy 1679 TRUE decoy +chrUn_JTFH01000459v1_decoy 1679 TRUE decoy +chrUn_JTFH01000460v1_decoy 1678 TRUE decoy +chrUn_JTFH01000461v1_decoy 1674 TRUE decoy +chrUn_JTFH01000462v1_decoy 1674 TRUE decoy +chrUn_JTFH01000463v1_decoy 1671 TRUE decoy +chrUn_JTFH01000464v1_decoy 1669 TRUE decoy +chrUn_JTFH01000465v1_decoy 1665 TRUE decoy +chrUn_JTFH01000466v1_decoy 1663 TRUE decoy +chrUn_JTFH01000467v1_decoy 1657 TRUE decoy +chrUn_JTFH01000468v1_decoy 1653 TRUE decoy +chrUn_JTFH01000469v1_decoy 1652 TRUE decoy +chrUn_JTFH01000470v1_decoy 1650 TRUE decoy +chrUn_JTFH01000471v1_decoy 1649 TRUE decoy +chrUn_JTFH01000472v1_decoy 1649 TRUE decoy +chrUn_JTFH01000473v1_decoy 1640 TRUE decoy +chrUn_JTFH01000474v1_decoy 1638 TRUE decoy +chrUn_JTFH01000475v1_decoy 1636 TRUE decoy +chrUn_JTFH01000476v1_decoy 1632 TRUE decoy +chrUn_JTFH01000477v1_decoy 1631 TRUE decoy +chrUn_JTFH01000478v1_decoy 1630 TRUE decoy +chrUn_JTFH01000479v1_decoy 1627 TRUE decoy +chrUn_JTFH01000480v1_decoy 1624 TRUE decoy +chrUn_JTFH01000481v1_decoy 1617 TRUE decoy +chrUn_JTFH01000482v1_decoy 1616 TRUE decoy +chrUn_JTFH01000483v1_decoy 1615 TRUE decoy +chrUn_JTFH01000484v1_decoy 1611 TRUE decoy +chrUn_JTFH01000485v1_decoy 1611 TRUE decoy +chrUn_JTFH01000486v1_decoy 1606 TRUE decoy +chrUn_JTFH01000487v1_decoy 1605 TRUE decoy +chrUn_JTFH01000488v1_decoy 1605 TRUE decoy +chrUn_JTFH01000489v1_decoy 1600 TRUE decoy +chrUn_JTFH01000490v1_decoy 1598 TRUE decoy +chrUn_JTFH01000491v1_decoy 1598 TRUE decoy +chrUn_JTFH01000492v1_decoy 1597 TRUE decoy +chrUn_JTFH01000493v1_decoy 1596 TRUE decoy +chrUn_JTFH01000494v1_decoy 1595 TRUE decoy +chrUn_JTFH01000495v1_decoy 1592 TRUE decoy +chrUn_JTFH01000496v1_decoy 1589 TRUE decoy +chrUn_JTFH01000497v1_decoy 1585 TRUE decoy +chrUn_JTFH01000498v1_decoy 1579 TRUE decoy +chrUn_JTFH01000499v1_decoy 1578 TRUE decoy +chrUn_JTFH01000500v1_decoy 1577 TRUE decoy +chrUn_JTFH01000501v1_decoy 1577 TRUE decoy +chrUn_JTFH01000502v1_decoy 1577 TRUE decoy +chrUn_JTFH01000503v1_decoy 1576 TRUE decoy +chrUn_JTFH01000504v1_decoy 1575 TRUE decoy +chrUn_JTFH01000505v1_decoy 1574 TRUE decoy +chrUn_JTFH01000506v1_decoy 1572 TRUE decoy +chrUn_JTFH01000507v1_decoy 1571 TRUE decoy +chrUn_JTFH01000508v1_decoy 1563 TRUE decoy +chrUn_JTFH01000509v1_decoy 1561 TRUE decoy +chrUn_JTFH01000510v1_decoy 1561 TRUE decoy +chrUn_JTFH01000511v1_decoy 1560 TRUE decoy +chrUn_JTFH01000512v1_decoy 1560 TRUE decoy +chrUn_JTFH01000513v1_decoy 1554 TRUE decoy +chrUn_JTFH01000514v1_decoy 1552 TRUE decoy +chrUn_JTFH01000515v1_decoy 1548 TRUE decoy +chrUn_JTFH01000516v1_decoy 1546 TRUE decoy +chrUn_JTFH01000517v1_decoy 1541 TRUE decoy +chrUn_JTFH01000518v1_decoy 1536 TRUE decoy +chrUn_JTFH01000519v1_decoy 1533 TRUE decoy +chrUn_JTFH01000520v1_decoy 1532 TRUE decoy +chrUn_JTFH01000521v1_decoy 1532 TRUE decoy +chrUn_JTFH01000522v1_decoy 1530 TRUE decoy +chrUn_JTFH01000523v1_decoy 1527 TRUE decoy +chrUn_JTFH01000524v1_decoy 1526 TRUE decoy +chrUn_JTFH01000525v1_decoy 1524 TRUE decoy +chrUn_JTFH01000526v1_decoy 1523 TRUE decoy +chrUn_JTFH01000527v1_decoy 1523 TRUE decoy +chrUn_JTFH01000528v1_decoy 1522 TRUE decoy +chrUn_JTFH01000529v1_decoy 1522 TRUE decoy +chrUn_JTFH01000530v1_decoy 1519 TRUE decoy +chrUn_JTFH01000531v1_decoy 1513 TRUE decoy +chrUn_JTFH01000532v1_decoy 1508 TRUE decoy +chrUn_JTFH01000533v1_decoy 1508 TRUE decoy +chrUn_JTFH01000534v1_decoy 1505 TRUE decoy +chrUn_JTFH01000535v1_decoy 1503 TRUE decoy +chrUn_JTFH01000536v1_decoy 1496 TRUE decoy +chrUn_JTFH01000537v1_decoy 1491 TRUE decoy +chrUn_JTFH01000538v1_decoy 1490 TRUE decoy +chrUn_JTFH01000539v1_decoy 1490 TRUE decoy +chrUn_JTFH01000540v1_decoy 1487 TRUE decoy +chrUn_JTFH01000541v1_decoy 1486 TRUE decoy +chrUn_JTFH01000542v1_decoy 1485 TRUE decoy +chrUn_JTFH01000543v1_decoy 1484 TRUE decoy +chrUn_JTFH01000544v1_decoy 1483 TRUE decoy +chrUn_JTFH01000545v1_decoy 1479 TRUE decoy +chrUn_JTFH01000546v1_decoy 1479 TRUE decoy +chrUn_JTFH01000547v1_decoy 1476 TRUE decoy +chrUn_JTFH01000548v1_decoy 1475 TRUE decoy +chrUn_JTFH01000549v1_decoy 1472 TRUE decoy +chrUn_JTFH01000550v1_decoy 1469 TRUE decoy +chrUn_JTFH01000551v1_decoy 1468 TRUE decoy +chrUn_JTFH01000552v1_decoy 1467 TRUE decoy +chrUn_JTFH01000553v1_decoy 1465 TRUE decoy +chrUn_JTFH01000554v1_decoy 1464 TRUE decoy +chrUn_JTFH01000555v1_decoy 1463 TRUE decoy +chrUn_JTFH01000556v1_decoy 1463 TRUE decoy +chrUn_JTFH01000557v1_decoy 1459 TRUE decoy +chrUn_JTFH01000558v1_decoy 1459 TRUE decoy +chrUn_JTFH01000559v1_decoy 1458 TRUE decoy +chrUn_JTFH01000560v1_decoy 1458 TRUE decoy +chrUn_JTFH01000561v1_decoy 1454 TRUE decoy +chrUn_JTFH01000562v1_decoy 1449 TRUE decoy +chrUn_JTFH01000563v1_decoy 1449 TRUE decoy +chrUn_JTFH01000564v1_decoy 1448 TRUE decoy +chrUn_JTFH01000565v1_decoy 1446 TRUE decoy +chrUn_JTFH01000566v1_decoy 1442 TRUE decoy +chrUn_JTFH01000567v1_decoy 1441 TRUE decoy +chrUn_JTFH01000568v1_decoy 1440 TRUE decoy +chrUn_JTFH01000569v1_decoy 1439 TRUE decoy +chrUn_JTFH01000570v1_decoy 1437 TRUE decoy +chrUn_JTFH01000571v1_decoy 1436 TRUE decoy +chrUn_JTFH01000572v1_decoy 1429 TRUE decoy +chrUn_JTFH01000573v1_decoy 1429 TRUE decoy +chrUn_JTFH01000574v1_decoy 1427 TRUE decoy +chrUn_JTFH01000575v1_decoy 1426 TRUE decoy +chrUn_JTFH01000576v1_decoy 1425 TRUE decoy +chrUn_JTFH01000577v1_decoy 1424 TRUE decoy +chrUn_JTFH01000578v1_decoy 1424 TRUE decoy +chrUn_JTFH01000579v1_decoy 1423 TRUE decoy +chrUn_JTFH01000580v1_decoy 1423 TRUE decoy +chrUn_JTFH01000581v1_decoy 1423 TRUE decoy +chrUn_JTFH01000582v1_decoy 1414 TRUE decoy +chrUn_JTFH01000583v1_decoy 1414 TRUE decoy +chrUn_JTFH01000584v1_decoy 1413 TRUE decoy +chrUn_JTFH01000585v1_decoy 1413 TRUE decoy +chrUn_JTFH01000586v1_decoy 1410 TRUE decoy +chrUn_JTFH01000587v1_decoy 1409 TRUE decoy +chrUn_JTFH01000588v1_decoy 1409 TRUE decoy +chrUn_JTFH01000589v1_decoy 1406 TRUE decoy +chrUn_JTFH01000590v1_decoy 1405 TRUE decoy +chrUn_JTFH01000591v1_decoy 1405 TRUE decoy +chrUn_JTFH01000592v1_decoy 1404 TRUE decoy +chrUn_JTFH01000593v1_decoy 1404 TRUE decoy +chrUn_JTFH01000594v1_decoy 1402 TRUE decoy +chrUn_JTFH01000595v1_decoy 1402 TRUE decoy +chrUn_JTFH01000596v1_decoy 1402 TRUE decoy +chrUn_JTFH01000597v1_decoy 1402 TRUE decoy +chrUn_JTFH01000598v1_decoy 1400 TRUE decoy +chrUn_JTFH01000599v1_decoy 1398 TRUE decoy +chrUn_JTFH01000600v1_decoy 1396 TRUE decoy +chrUn_JTFH01000601v1_decoy 1395 TRUE decoy +chrUn_JTFH01000602v1_decoy 1394 TRUE decoy +chrUn_JTFH01000603v1_decoy 1393 TRUE decoy +chrUn_JTFH01000604v1_decoy 1391 TRUE decoy +chrUn_JTFH01000605v1_decoy 1389 TRUE decoy +chrUn_JTFH01000606v1_decoy 1389 TRUE decoy +chrUn_JTFH01000607v1_decoy 1388 TRUE decoy +chrUn_JTFH01000608v1_decoy 1387 TRUE decoy +chrUn_JTFH01000609v1_decoy 1384 TRUE decoy +chrUn_JTFH01000610v1_decoy 1381 TRUE decoy +chrUn_JTFH01000611v1_decoy 1381 TRUE decoy +chrUn_JTFH01000612v1_decoy 1379 TRUE decoy +chrUn_JTFH01000613v1_decoy 1377 TRUE decoy +chrUn_JTFH01000614v1_decoy 1376 TRUE decoy +chrUn_JTFH01000615v1_decoy 1376 TRUE decoy +chrUn_JTFH01000616v1_decoy 1375 TRUE decoy +chrUn_JTFH01000617v1_decoy 1374 TRUE decoy +chrUn_JTFH01000618v1_decoy 1372 TRUE decoy +chrUn_JTFH01000619v1_decoy 1371 TRUE decoy +chrUn_JTFH01000620v1_decoy 1370 TRUE decoy +chrUn_JTFH01000621v1_decoy 1370 TRUE decoy +chrUn_JTFH01000622v1_decoy 1366 TRUE decoy +chrUn_JTFH01000623v1_decoy 1363 TRUE decoy +chrUn_JTFH01000624v1_decoy 1360 TRUE decoy +chrUn_JTFH01000625v1_decoy 1356 TRUE decoy +chrUn_JTFH01000626v1_decoy 1355 TRUE decoy +chrUn_JTFH01000627v1_decoy 1355 TRUE decoy +chrUn_JTFH01000628v1_decoy 1352 TRUE decoy +chrUn_JTFH01000629v1_decoy 1345 TRUE decoy +chrUn_JTFH01000630v1_decoy 1344 TRUE decoy +chrUn_JTFH01000631v1_decoy 1344 TRUE decoy +chrUn_JTFH01000632v1_decoy 1342 TRUE decoy +chrUn_JTFH01000633v1_decoy 1342 TRUE decoy +chrUn_JTFH01000634v1_decoy 1336 TRUE decoy +chrUn_JTFH01000635v1_decoy 1334 TRUE decoy +chrUn_JTFH01000636v1_decoy 1334 TRUE decoy +chrUn_JTFH01000637v1_decoy 1333 TRUE decoy +chrUn_JTFH01000638v1_decoy 1332 TRUE decoy +chrUn_JTFH01000639v1_decoy 1328 TRUE decoy +chrUn_JTFH01000640v1_decoy 1328 TRUE decoy +chrUn_JTFH01000641v1_decoy 1328 TRUE decoy +chrUn_JTFH01000642v1_decoy 1327 TRUE decoy +chrUn_JTFH01000643v1_decoy 1325 TRUE decoy +chrUn_JTFH01000644v1_decoy 1322 TRUE decoy +chrUn_JTFH01000645v1_decoy 1320 TRUE decoy +chrUn_JTFH01000646v1_decoy 1319 TRUE decoy +chrUn_JTFH01000647v1_decoy 1318 TRUE decoy +chrUn_JTFH01000648v1_decoy 1315 TRUE decoy +chrUn_JTFH01000649v1_decoy 1314 TRUE decoy +chrUn_JTFH01000650v1_decoy 1313 TRUE decoy +chrUn_JTFH01000651v1_decoy 1313 TRUE decoy +chrUn_JTFH01000652v1_decoy 1312 TRUE decoy +chrUn_JTFH01000653v1_decoy 1310 TRUE decoy +chrUn_JTFH01000654v1_decoy 1309 TRUE decoy +chrUn_JTFH01000655v1_decoy 1309 TRUE decoy +chrUn_JTFH01000656v1_decoy 1307 TRUE decoy +chrUn_JTFH01000657v1_decoy 1307 TRUE decoy +chrUn_JTFH01000658v1_decoy 1305 TRUE decoy +chrUn_JTFH01000659v1_decoy 1304 TRUE decoy +chrUn_JTFH01000660v1_decoy 1303 TRUE decoy +chrUn_JTFH01000661v1_decoy 1302 TRUE decoy +chrUn_JTFH01000662v1_decoy 1302 TRUE decoy +chrUn_JTFH01000663v1_decoy 1301 TRUE decoy +chrUn_JTFH01000664v1_decoy 1301 TRUE decoy +chrUn_JTFH01000665v1_decoy 1300 TRUE decoy +chrUn_JTFH01000666v1_decoy 1299 TRUE decoy +chrUn_JTFH01000667v1_decoy 1297 TRUE decoy +chrUn_JTFH01000668v1_decoy 1295 TRUE decoy +chrUn_JTFH01000669v1_decoy 1294 TRUE decoy +chrUn_JTFH01000670v1_decoy 1293 TRUE decoy +chrUn_JTFH01000671v1_decoy 1291 TRUE decoy +chrUn_JTFH01000672v1_decoy 1291 TRUE decoy +chrUn_JTFH01000673v1_decoy 1289 TRUE decoy +chrUn_JTFH01000674v1_decoy 1288 TRUE decoy +chrUn_JTFH01000675v1_decoy 1288 TRUE decoy +chrUn_JTFH01000676v1_decoy 1287 TRUE decoy +chrUn_JTFH01000677v1_decoy 1287 TRUE decoy +chrUn_JTFH01000678v1_decoy 1287 TRUE decoy +chrUn_JTFH01000679v1_decoy 1286 TRUE decoy +chrUn_JTFH01000680v1_decoy 1283 TRUE decoy +chrUn_JTFH01000681v1_decoy 1281 TRUE decoy +chrUn_JTFH01000682v1_decoy 1277 TRUE decoy +chrUn_JTFH01000683v1_decoy 1274 TRUE decoy +chrUn_JTFH01000684v1_decoy 1270 TRUE decoy +chrUn_JTFH01000685v1_decoy 1267 TRUE decoy +chrUn_JTFH01000686v1_decoy 1266 TRUE decoy +chrUn_JTFH01000687v1_decoy 1260 TRUE decoy +chrUn_JTFH01000688v1_decoy 1259 TRUE decoy +chrUn_JTFH01000689v1_decoy 1258 TRUE decoy +chrUn_JTFH01000690v1_decoy 1258 TRUE decoy +chrUn_JTFH01000691v1_decoy 1258 TRUE decoy +chrUn_JTFH01000692v1_decoy 1256 TRUE decoy +chrUn_JTFH01000693v1_decoy 1255 TRUE decoy +chrUn_JTFH01000694v1_decoy 1254 TRUE decoy +chrUn_JTFH01000695v1_decoy 1254 TRUE decoy +chrUn_JTFH01000696v1_decoy 1253 TRUE decoy +chrUn_JTFH01000697v1_decoy 1250 TRUE decoy +chrUn_JTFH01000698v1_decoy 1249 TRUE decoy +chrUn_JTFH01000699v1_decoy 1248 TRUE decoy +chrUn_JTFH01000700v1_decoy 1248 TRUE decoy +chrUn_JTFH01000701v1_decoy 1247 TRUE decoy +chrUn_JTFH01000702v1_decoy 1242 TRUE decoy +chrUn_JTFH01000703v1_decoy 1242 TRUE decoy +chrUn_JTFH01000704v1_decoy 1241 TRUE decoy +chrUn_JTFH01000705v1_decoy 1241 TRUE decoy +chrUn_JTFH01000706v1_decoy 1241 TRUE decoy +chrUn_JTFH01000707v1_decoy 1239 TRUE decoy +chrUn_JTFH01000708v1_decoy 1238 TRUE decoy +chrUn_JTFH01000709v1_decoy 1237 TRUE decoy +chrUn_JTFH01000710v1_decoy 1236 TRUE decoy +chrUn_JTFH01000711v1_decoy 1235 TRUE decoy +chrUn_JTFH01000712v1_decoy 1234 TRUE decoy +chrUn_JTFH01000713v1_decoy 1234 TRUE decoy +chrUn_JTFH01000714v1_decoy 1234 TRUE decoy +chrUn_JTFH01000715v1_decoy 1233 TRUE decoy +chrUn_JTFH01000716v1_decoy 1232 TRUE decoy +chrUn_JTFH01000717v1_decoy 1232 TRUE decoy +chrUn_JTFH01000718v1_decoy 1231 TRUE decoy +chrUn_JTFH01000719v1_decoy 1230 TRUE decoy +chrUn_JTFH01000720v1_decoy 1228 TRUE decoy +chrUn_JTFH01000721v1_decoy 1227 TRUE decoy +chrUn_JTFH01000722v1_decoy 1227 TRUE decoy +chrUn_JTFH01000723v1_decoy 1226 TRUE decoy +chrUn_JTFH01000724v1_decoy 1224 TRUE decoy +chrUn_JTFH01000725v1_decoy 1224 TRUE decoy +chrUn_JTFH01000726v1_decoy 1220 TRUE decoy +chrUn_JTFH01000727v1_decoy 1220 TRUE decoy +chrUn_JTFH01000728v1_decoy 1219 TRUE decoy +chrUn_JTFH01000729v1_decoy 1217 TRUE decoy +chrUn_JTFH01000730v1_decoy 1216 TRUE decoy +chrUn_JTFH01000731v1_decoy 1215 TRUE decoy +chrUn_JTFH01000732v1_decoy 1214 TRUE decoy +chrUn_JTFH01000733v1_decoy 1214 TRUE decoy +chrUn_JTFH01000734v1_decoy 1214 TRUE decoy +chrUn_JTFH01000735v1_decoy 1213 TRUE decoy +chrUn_JTFH01000736v1_decoy 1212 TRUE decoy +chrUn_JTFH01000737v1_decoy 1209 TRUE decoy +chrUn_JTFH01000738v1_decoy 1208 TRUE decoy +chrUn_JTFH01000739v1_decoy 1207 TRUE decoy +chrUn_JTFH01000740v1_decoy 1207 TRUE decoy +chrUn_JTFH01000741v1_decoy 1207 TRUE decoy +chrUn_JTFH01000742v1_decoy 1206 TRUE decoy +chrUn_JTFH01000743v1_decoy 1206 TRUE decoy +chrUn_JTFH01000744v1_decoy 1205 TRUE decoy +chrUn_JTFH01000745v1_decoy 1205 TRUE decoy +chrUn_JTFH01000746v1_decoy 1204 TRUE decoy +chrUn_JTFH01000747v1_decoy 1204 TRUE decoy +chrUn_JTFH01000748v1_decoy 1204 TRUE decoy +chrUn_JTFH01000749v1_decoy 1203 TRUE decoy +chrUn_JTFH01000750v1_decoy 1201 TRUE decoy +chrUn_JTFH01000751v1_decoy 1201 TRUE decoy +chrUn_JTFH01000752v1_decoy 1200 TRUE decoy +chrUn_JTFH01000753v1_decoy 1200 TRUE decoy +chrUn_JTFH01000754v1_decoy 1199 TRUE decoy +chrUn_JTFH01000755v1_decoy 1198 TRUE decoy +chrUn_JTFH01000756v1_decoy 1197 TRUE decoy +chrUn_JTFH01000757v1_decoy 1196 TRUE decoy +chrUn_JTFH01000758v1_decoy 1195 TRUE decoy +chrUn_JTFH01000759v1_decoy 1194 TRUE decoy +chrUn_JTFH01000760v1_decoy 1194 TRUE decoy +chrUn_JTFH01000761v1_decoy 1191 TRUE decoy +chrUn_JTFH01000762v1_decoy 1189 TRUE decoy +chrUn_JTFH01000763v1_decoy 1186 TRUE decoy +chrUn_JTFH01000764v1_decoy 1186 TRUE decoy +chrUn_JTFH01000765v1_decoy 1184 TRUE decoy +chrUn_JTFH01000766v1_decoy 1183 TRUE decoy +chrUn_JTFH01000767v1_decoy 1183 TRUE decoy +chrUn_JTFH01000768v1_decoy 1182 TRUE decoy +chrUn_JTFH01000769v1_decoy 1181 TRUE decoy +chrUn_JTFH01000770v1_decoy 1181 TRUE decoy +chrUn_JTFH01000771v1_decoy 1181 TRUE decoy +chrUn_JTFH01000772v1_decoy 1181 TRUE decoy +chrUn_JTFH01000773v1_decoy 1179 TRUE decoy +chrUn_JTFH01000774v1_decoy 1178 TRUE decoy +chrUn_JTFH01000775v1_decoy 1178 TRUE decoy +chrUn_JTFH01000776v1_decoy 1177 TRUE decoy +chrUn_JTFH01000777v1_decoy 1177 TRUE decoy +chrUn_JTFH01000778v1_decoy 1171 TRUE decoy +chrUn_JTFH01000779v1_decoy 1171 TRUE decoy +chrUn_JTFH01000780v1_decoy 1171 TRUE decoy +chrUn_JTFH01000781v1_decoy 1170 TRUE decoy +chrUn_JTFH01000782v1_decoy 1170 TRUE decoy +chrUn_JTFH01000783v1_decoy 1167 TRUE decoy +chrUn_JTFH01000784v1_decoy 1167 TRUE decoy +chrUn_JTFH01000785v1_decoy 1167 TRUE decoy +chrUn_JTFH01000786v1_decoy 1165 TRUE decoy +chrUn_JTFH01000787v1_decoy 1165 TRUE decoy +chrUn_JTFH01000788v1_decoy 1162 TRUE decoy +chrUn_JTFH01000789v1_decoy 1157 TRUE decoy +chrUn_JTFH01000790v1_decoy 1156 TRUE decoy +chrUn_JTFH01000791v1_decoy 1156 TRUE decoy +chrUn_JTFH01000792v1_decoy 1154 TRUE decoy +chrUn_JTFH01000793v1_decoy 1154 TRUE decoy +chrUn_JTFH01000794v1_decoy 1151 TRUE decoy +chrUn_JTFH01000795v1_decoy 1151 TRUE decoy +chrUn_JTFH01000796v1_decoy 1150 TRUE decoy +chrUn_JTFH01000797v1_decoy 1150 TRUE decoy +chrUn_JTFH01000798v1_decoy 1147 TRUE decoy +chrUn_JTFH01000799v1_decoy 1147 TRUE decoy +chrUn_JTFH01000800v1_decoy 1146 TRUE decoy +chrUn_JTFH01000801v1_decoy 1144 TRUE decoy +chrUn_JTFH01000802v1_decoy 1144 TRUE decoy +chrUn_JTFH01000803v1_decoy 1143 TRUE decoy +chrUn_JTFH01000804v1_decoy 1142 TRUE decoy +chrUn_JTFH01000805v1_decoy 1141 TRUE decoy +chrUn_JTFH01000806v1_decoy 1141 TRUE decoy +chrUn_JTFH01000807v1_decoy 1140 TRUE decoy +chrUn_JTFH01000808v1_decoy 1138 TRUE decoy +chrUn_JTFH01000809v1_decoy 1134 TRUE decoy +chrUn_JTFH01000810v1_decoy 1134 TRUE decoy +chrUn_JTFH01000811v1_decoy 1132 TRUE decoy +chrUn_JTFH01000812v1_decoy 1131 TRUE decoy +chrUn_JTFH01000813v1_decoy 1131 TRUE decoy +chrUn_JTFH01000814v1_decoy 1130 TRUE decoy +chrUn_JTFH01000815v1_decoy 1127 TRUE decoy +chrUn_JTFH01000816v1_decoy 1126 TRUE decoy +chrUn_JTFH01000817v1_decoy 1124 TRUE decoy +chrUn_JTFH01000818v1_decoy 1122 TRUE decoy +chrUn_JTFH01000819v1_decoy 1122 TRUE decoy +chrUn_JTFH01000820v1_decoy 1121 TRUE decoy +chrUn_JTFH01000821v1_decoy 1119 TRUE decoy +chrUn_JTFH01000822v1_decoy 1119 TRUE decoy +chrUn_JTFH01000823v1_decoy 1119 TRUE decoy +chrUn_JTFH01000824v1_decoy 1119 TRUE decoy +chrUn_JTFH01000825v1_decoy 1118 TRUE decoy +chrUn_JTFH01000826v1_decoy 1116 TRUE decoy +chrUn_JTFH01000827v1_decoy 1116 TRUE decoy +chrUn_JTFH01000828v1_decoy 1115 TRUE decoy +chrUn_JTFH01000829v1_decoy 1115 TRUE decoy +chrUn_JTFH01000830v1_decoy 1115 TRUE decoy +chrUn_JTFH01000831v1_decoy 1114 TRUE decoy +chrUn_JTFH01000832v1_decoy 1113 TRUE decoy +chrUn_JTFH01000833v1_decoy 1113 TRUE decoy +chrUn_JTFH01000834v1_decoy 1110 TRUE decoy +chrUn_JTFH01000835v1_decoy 1110 TRUE decoy +chrUn_JTFH01000836v1_decoy 1109 TRUE decoy +chrUn_JTFH01000837v1_decoy 1108 TRUE decoy +chrUn_JTFH01000838v1_decoy 1107 TRUE decoy +chrUn_JTFH01000839v1_decoy 1107 TRUE decoy +chrUn_JTFH01000840v1_decoy 1107 TRUE decoy +chrUn_JTFH01000841v1_decoy 1107 TRUE decoy +chrUn_JTFH01000842v1_decoy 1106 TRUE decoy +chrUn_JTFH01000843v1_decoy 1103 TRUE decoy +chrUn_JTFH01000844v1_decoy 1103 TRUE decoy +chrUn_JTFH01000845v1_decoy 1103 TRUE decoy +chrUn_JTFH01000846v1_decoy 1100 TRUE decoy +chrUn_JTFH01000847v1_decoy 1099 TRUE decoy +chrUn_JTFH01000848v1_decoy 1098 TRUE decoy +chrUn_JTFH01000849v1_decoy 1097 TRUE decoy +chrUn_JTFH01000850v1_decoy 1096 TRUE decoy +chrUn_JTFH01000851v1_decoy 1096 TRUE decoy +chrUn_JTFH01000852v1_decoy 1094 TRUE decoy +chrUn_JTFH01000853v1_decoy 1093 TRUE decoy +chrUn_JTFH01000854v1_decoy 1090 TRUE decoy +chrUn_JTFH01000855v1_decoy 1088 TRUE decoy +chrUn_JTFH01000856v1_decoy 1087 TRUE decoy +chrUn_JTFH01000857v1_decoy 1086 TRUE decoy +chrUn_JTFH01000858v1_decoy 1085 TRUE decoy +chrUn_JTFH01000859v1_decoy 1084 TRUE decoy +chrUn_JTFH01000860v1_decoy 1084 TRUE decoy +chrUn_JTFH01000861v1_decoy 1084 TRUE decoy +chrUn_JTFH01000862v1_decoy 1084 TRUE decoy +chrUn_JTFH01000863v1_decoy 1083 TRUE decoy +chrUn_JTFH01000864v1_decoy 1083 TRUE decoy +chrUn_JTFH01000865v1_decoy 1082 TRUE decoy +chrUn_JTFH01000866v1_decoy 1082 TRUE decoy +chrUn_JTFH01000867v1_decoy 1081 TRUE decoy +chrUn_JTFH01000868v1_decoy 1081 TRUE decoy +chrUn_JTFH01000869v1_decoy 1079 TRUE decoy +chrUn_JTFH01000870v1_decoy 1076 TRUE decoy +chrUn_JTFH01000871v1_decoy 1074 TRUE decoy +chrUn_JTFH01000872v1_decoy 1073 TRUE decoy +chrUn_JTFH01000873v1_decoy 1073 TRUE decoy +chrUn_JTFH01000874v1_decoy 1071 TRUE decoy +chrUn_JTFH01000875v1_decoy 1069 TRUE decoy +chrUn_JTFH01000876v1_decoy 1067 TRUE decoy +chrUn_JTFH01000877v1_decoy 1067 TRUE decoy +chrUn_JTFH01000878v1_decoy 1067 TRUE decoy +chrUn_JTFH01000879v1_decoy 1066 TRUE decoy +chrUn_JTFH01000880v1_decoy 1065 TRUE decoy +chrUn_JTFH01000881v1_decoy 1065 TRUE decoy +chrUn_JTFH01000882v1_decoy 1065 TRUE decoy +chrUn_JTFH01000883v1_decoy 1065 TRUE decoy +chrUn_JTFH01000884v1_decoy 1065 TRUE decoy +chrUn_JTFH01000885v1_decoy 1064 TRUE decoy +chrUn_JTFH01000886v1_decoy 1064 TRUE decoy +chrUn_JTFH01000887v1_decoy 1064 TRUE decoy +chrUn_JTFH01000888v1_decoy 1063 TRUE decoy +chrUn_JTFH01000889v1_decoy 1062 TRUE decoy +chrUn_JTFH01000890v1_decoy 1062 TRUE decoy +chrUn_JTFH01000891v1_decoy 1062 TRUE decoy +chrUn_JTFH01000892v1_decoy 1061 TRUE decoy +chrUn_JTFH01000893v1_decoy 1060 TRUE decoy +chrUn_JTFH01000894v1_decoy 1057 TRUE decoy +chrUn_JTFH01000895v1_decoy 1057 TRUE decoy +chrUn_JTFH01000896v1_decoy 1056 TRUE decoy +chrUn_JTFH01000897v1_decoy 1055 TRUE decoy +chrUn_JTFH01000898v1_decoy 1055 TRUE decoy +chrUn_JTFH01000899v1_decoy 1055 TRUE decoy +chrUn_JTFH01000900v1_decoy 1055 TRUE decoy +chrUn_JTFH01000901v1_decoy 1054 TRUE decoy +chrUn_JTFH01000902v1_decoy 1051 TRUE decoy +chrUn_JTFH01000903v1_decoy 1050 TRUE decoy +chrUn_JTFH01000904v1_decoy 1050 TRUE decoy +chrUn_JTFH01000905v1_decoy 1049 TRUE decoy +chrUn_JTFH01000906v1_decoy 1048 TRUE decoy +chrUn_JTFH01000907v1_decoy 1047 TRUE decoy +chrUn_JTFH01000908v1_decoy 1046 TRUE decoy +chrUn_JTFH01000909v1_decoy 1046 TRUE decoy +chrUn_JTFH01000910v1_decoy 1046 TRUE decoy +chrUn_JTFH01000911v1_decoy 1045 TRUE decoy +chrUn_JTFH01000912v1_decoy 1045 TRUE decoy +chrUn_JTFH01000913v1_decoy 1045 TRUE decoy +chrUn_JTFH01000914v1_decoy 1044 TRUE decoy +chrUn_JTFH01000915v1_decoy 1042 TRUE decoy +chrUn_JTFH01000916v1_decoy 1041 TRUE decoy +chrUn_JTFH01000917v1_decoy 1039 TRUE decoy +chrUn_JTFH01000918v1_decoy 1039 TRUE decoy +chrUn_JTFH01000919v1_decoy 1038 TRUE decoy +chrUn_JTFH01000920v1_decoy 1036 TRUE decoy +chrUn_JTFH01000921v1_decoy 1036 TRUE decoy +chrUn_JTFH01000922v1_decoy 1035 TRUE decoy +chrUn_JTFH01000923v1_decoy 1035 TRUE decoy +chrUn_JTFH01000924v1_decoy 1033 TRUE decoy +chrUn_JTFH01000925v1_decoy 1032 TRUE decoy +chrUn_JTFH01000926v1_decoy 1031 TRUE decoy +chrUn_JTFH01000927v1_decoy 1031 TRUE decoy +chrUn_JTFH01000928v1_decoy 1031 TRUE decoy +chrUn_JTFH01000929v1_decoy 1027 TRUE decoy +chrUn_JTFH01000930v1_decoy 1027 TRUE decoy +chrUn_JTFH01000931v1_decoy 1026 TRUE decoy +chrUn_JTFH01000932v1_decoy 1026 TRUE decoy +chrUn_JTFH01000933v1_decoy 1024 TRUE decoy +chrUn_JTFH01000934v1_decoy 1024 TRUE decoy +chrUn_JTFH01000935v1_decoy 1022 TRUE decoy +chrUn_JTFH01000936v1_decoy 1022 TRUE decoy +chrUn_JTFH01000937v1_decoy 1021 TRUE decoy +chrUn_JTFH01000938v1_decoy 1020 TRUE decoy +chrUn_JTFH01000939v1_decoy 1019 TRUE decoy +chrUn_JTFH01000940v1_decoy 1018 TRUE decoy +chrUn_JTFH01000941v1_decoy 1018 TRUE decoy +chrUn_JTFH01000942v1_decoy 1018 TRUE decoy +chrUn_JTFH01000943v1_decoy 1016 TRUE decoy +chrUn_JTFH01000944v1_decoy 1010 TRUE decoy +chrUn_JTFH01000945v1_decoy 1010 TRUE decoy +chrUn_JTFH01000946v1_decoy 1009 TRUE decoy +chrUn_JTFH01000947v1_decoy 1008 TRUE decoy +chrUn_JTFH01000948v1_decoy 1007 TRUE decoy +chrUn_JTFH01000949v1_decoy 1006 TRUE decoy +chrUn_JTFH01000950v1_decoy 1005 TRUE decoy +chrUn_JTFH01000951v1_decoy 1005 TRUE decoy +chrUn_JTFH01000952v1_decoy 1004 TRUE decoy +chrUn_JTFH01000953v1_decoy 1004 TRUE decoy +chrUn_JTFH01000954v1_decoy 1003 TRUE decoy +chrUn_JTFH01000955v1_decoy 1003 TRUE decoy +chrUn_JTFH01000956v1_decoy 1003 TRUE decoy +chrUn_JTFH01000957v1_decoy 1003 TRUE decoy +chrUn_JTFH01000958v1_decoy 1002 TRUE decoy +chrUn_JTFH01000959v1_decoy 1002 TRUE decoy +chrUn_JTFH01000960v1_decoy 1000 TRUE decoy +chrUn_JTFH01000961v1_decoy 1000 TRUE decoy +chrUn_JTFH01000962v1_decoy 8358 TRUE decoy +chrUn_JTFH01000963v1_decoy 7932 TRUE decoy +chrUn_JTFH01000964v1_decoy 6846 TRUE decoy +chrUn_JTFH01000965v1_decoy 4591 TRUE decoy +chrUn_JTFH01000966v1_decoy 4041 TRUE decoy +chrUn_JTFH01000967v1_decoy 3841 TRUE decoy +chrUn_JTFH01000968v1_decoy 3754 TRUE decoy +chrUn_JTFH01000969v1_decoy 3743 TRUE decoy +chrUn_JTFH01000970v1_decoy 3702 TRUE decoy +chrUn_JTFH01000971v1_decoy 3625 TRUE decoy +chrUn_JTFH01000972v1_decoy 3529 TRUE decoy +chrUn_JTFH01000973v1_decoy 3508 TRUE decoy +chrUn_JTFH01000974v1_decoy 3359 TRUE decoy +chrUn_JTFH01000975v1_decoy 3320 TRUE decoy +chrUn_JTFH01000976v1_decoy 3231 TRUE decoy +chrUn_JTFH01000977v1_decoy 3220 TRUE decoy +chrUn_JTFH01000978v1_decoy 3212 TRUE decoy +chrUn_JTFH01000979v1_decoy 3192 TRUE decoy +chrUn_JTFH01000980v1_decoy 3092 TRUE decoy +chrUn_JTFH01000981v1_decoy 3087 TRUE decoy +chrUn_JTFH01000982v1_decoy 3048 TRUE decoy +chrUn_JTFH01000983v1_decoy 3005 TRUE decoy +chrUn_JTFH01000984v1_decoy 3004 TRUE decoy +chrUn_JTFH01000985v1_decoy 2959 TRUE decoy +chrUn_JTFH01000986v1_decoy 2934 TRUE decoy +chrUn_JTFH01000987v1_decoy 2933 TRUE decoy +chrUn_JTFH01000988v1_decoy 2827 TRUE decoy +chrUn_JTFH01000989v1_decoy 2794 TRUE decoy +chrUn_JTFH01000990v1_decoy 2749 TRUE decoy +chrUn_JTFH01000991v1_decoy 2745 TRUE decoy +chrUn_JTFH01000992v1_decoy 2733 TRUE decoy +chrUn_JTFH01000993v1_decoy 2698 TRUE decoy +chrUn_JTFH01000994v1_decoy 2665 TRUE decoy +chrUn_JTFH01000995v1_decoy 2634 TRUE decoy +chrUn_JTFH01000996v1_decoy 2492 TRUE decoy +chrUn_JTFH01000997v1_decoy 2489 TRUE decoy +chrUn_JTFH01000998v1_decoy 2468 TRUE decoy +chrUn_JTFH01000999v1_decoy 2414 TRUE decoy +chrUn_JTFH01001000v1_decoy 2395 TRUE decoy +chrUn_JTFH01001001v1_decoy 2356 TRUE decoy +chrUn_JTFH01001002v1_decoy 2339 TRUE decoy +chrUn_JTFH01001003v1_decoy 2310 TRUE decoy +chrUn_JTFH01001004v1_decoy 2288 TRUE decoy +chrUn_JTFH01001005v1_decoy 2285 TRUE decoy +chrUn_JTFH01001006v1_decoy 2269 TRUE decoy +chrUn_JTFH01001007v1_decoy 2253 TRUE decoy +chrUn_JTFH01001008v1_decoy 2203 TRUE decoy +chrUn_JTFH01001009v1_decoy 2176 TRUE decoy +chrUn_JTFH01001010v1_decoy 2159 TRUE decoy +chrUn_JTFH01001011v1_decoy 2155 TRUE decoy +chrUn_JTFH01001012v1_decoy 2149 TRUE decoy +chrUn_JTFH01001013v1_decoy 2129 TRUE decoy +chrUn_JTFH01001014v1_decoy 2116 TRUE decoy +chrUn_JTFH01001015v1_decoy 2113 TRUE decoy +chrUn_JTFH01001016v1_decoy 2098 TRUE decoy +chrUn_JTFH01001017v1_decoy 2066 TRUE decoy +chrUn_JTFH01001018v1_decoy 2066 TRUE decoy +chrUn_JTFH01001019v1_decoy 2059 TRUE decoy +chrUn_JTFH01001020v1_decoy 2047 TRUE decoy +chrUn_JTFH01001021v1_decoy 2040 TRUE decoy +chrUn_JTFH01001022v1_decoy 2030 TRUE decoy +chrUn_JTFH01001023v1_decoy 2024 TRUE decoy +chrUn_JTFH01001024v1_decoy 2001 TRUE decoy +chrUn_JTFH01001025v1_decoy 1992 TRUE decoy +chrUn_JTFH01001026v1_decoy 1981 TRUE decoy +chrUn_JTFH01001027v1_decoy 1979 TRUE decoy +chrUn_JTFH01001028v1_decoy 1957 TRUE decoy +chrUn_JTFH01001029v1_decoy 1953 TRUE decoy +chrUn_JTFH01001030v1_decoy 1944 TRUE decoy +chrUn_JTFH01001031v1_decoy 1936 TRUE decoy +chrUn_JTFH01001032v1_decoy 1932 TRUE decoy +chrUn_JTFH01001033v1_decoy 1882 TRUE decoy +chrUn_JTFH01001034v1_decoy 1878 TRUE decoy +chrUn_JTFH01001035v1_decoy 1870 TRUE decoy +chrUn_JTFH01001036v1_decoy 1821 TRUE decoy +chrUn_JTFH01001037v1_decoy 1813 TRUE decoy +chrUn_JTFH01001038v1_decoy 1809 TRUE decoy +chrUn_JTFH01001039v1_decoy 1804 TRUE decoy +chrUn_JTFH01001040v1_decoy 1797 TRUE decoy +chrUn_JTFH01001041v1_decoy 1791 TRUE decoy +chrUn_JTFH01001042v1_decoy 1781 TRUE decoy +chrUn_JTFH01001043v1_decoy 1766 TRUE decoy +chrUn_JTFH01001044v1_decoy 1764 TRUE decoy +chrUn_JTFH01001045v1_decoy 1743 TRUE decoy +chrUn_JTFH01001046v1_decoy 1741 TRUE decoy +chrUn_JTFH01001047v1_decoy 1709 TRUE decoy +chrUn_JTFH01001048v1_decoy 1706 TRUE decoy +chrUn_JTFH01001049v1_decoy 1701 TRUE decoy +chrUn_JTFH01001050v1_decoy 1689 TRUE decoy +chrUn_JTFH01001051v1_decoy 1646 TRUE decoy +chrUn_JTFH01001052v1_decoy 1641 TRUE decoy +chrUn_JTFH01001053v1_decoy 1639 TRUE decoy +chrUn_JTFH01001054v1_decoy 1636 TRUE decoy +chrUn_JTFH01001055v1_decoy 1632 TRUE decoy +chrUn_JTFH01001056v1_decoy 1629 TRUE decoy +chrUn_JTFH01001057v1_decoy 1623 TRUE decoy +chrUn_JTFH01001058v1_decoy 1622 TRUE decoy +chrUn_JTFH01001059v1_decoy 1622 TRUE decoy +chrUn_JTFH01001060v1_decoy 1619 TRUE decoy +chrUn_JTFH01001061v1_decoy 1606 TRUE decoy +chrUn_JTFH01001062v1_decoy 1593 TRUE decoy +chrUn_JTFH01001063v1_decoy 1592 TRUE decoy +chrUn_JTFH01001064v1_decoy 1558 TRUE decoy +chrUn_JTFH01001065v1_decoy 1545 TRUE decoy +chrUn_JTFH01001066v1_decoy 1542 TRUE decoy +chrUn_JTFH01001067v1_decoy 1540 TRUE decoy +chrUn_JTFH01001068v1_decoy 1529 TRUE decoy +chrUn_JTFH01001069v1_decoy 1518 TRUE decoy +chrUn_JTFH01001070v1_decoy 1515 TRUE decoy +chrUn_JTFH01001071v1_decoy 1513 TRUE decoy +chrUn_JTFH01001072v1_decoy 1507 TRUE decoy +chrUn_JTFH01001073v1_decoy 1504 TRUE decoy +chrUn_JTFH01001074v1_decoy 1499 TRUE decoy +chrUn_JTFH01001075v1_decoy 1495 TRUE decoy +chrUn_JTFH01001076v1_decoy 1495 TRUE decoy +chrUn_JTFH01001077v1_decoy 1492 TRUE decoy +chrUn_JTFH01001078v1_decoy 1492 TRUE decoy +chrUn_JTFH01001079v1_decoy 1489 TRUE decoy +chrUn_JTFH01001080v1_decoy 1485 TRUE decoy +chrUn_JTFH01001081v1_decoy 1483 TRUE decoy +chrUn_JTFH01001082v1_decoy 1473 TRUE decoy +chrUn_JTFH01001083v1_decoy 1470 TRUE decoy +chrUn_JTFH01001084v1_decoy 1463 TRUE decoy +chrUn_JTFH01001085v1_decoy 1460 TRUE decoy +chrUn_JTFH01001086v1_decoy 1458 TRUE decoy +chrUn_JTFH01001087v1_decoy 1456 TRUE decoy +chrUn_JTFH01001088v1_decoy 1453 TRUE decoy +chrUn_JTFH01001089v1_decoy 1443 TRUE decoy +chrUn_JTFH01001090v1_decoy 1441 TRUE decoy +chrUn_JTFH01001091v1_decoy 1426 TRUE decoy +chrUn_JTFH01001092v1_decoy 1425 TRUE decoy +chrUn_JTFH01001093v1_decoy 1418 TRUE decoy +chrUn_JTFH01001094v1_decoy 1413 TRUE decoy +chrUn_JTFH01001095v1_decoy 1413 TRUE decoy +chrUn_JTFH01001096v1_decoy 1412 TRUE decoy +chrUn_JTFH01001097v1_decoy 1407 TRUE decoy +chrUn_JTFH01001098v1_decoy 1406 TRUE decoy +chrUn_JTFH01001099v1_decoy 1396 TRUE decoy +chrUn_JTFH01001100v1_decoy 1390 TRUE decoy +chrUn_JTFH01001101v1_decoy 1382 TRUE decoy +chrUn_JTFH01001102v1_decoy 1376 TRUE decoy +chrUn_JTFH01001103v1_decoy 1375 TRUE decoy +chrUn_JTFH01001104v1_decoy 1371 TRUE decoy +chrUn_JTFH01001105v1_decoy 1367 TRUE decoy +chrUn_JTFH01001106v1_decoy 1364 TRUE decoy +chrUn_JTFH01001107v1_decoy 1356 TRUE decoy +chrUn_JTFH01001108v1_decoy 1355 TRUE decoy +chrUn_JTFH01001109v1_decoy 1352 TRUE decoy +chrUn_JTFH01001110v1_decoy 1350 TRUE decoy +chrUn_JTFH01001111v1_decoy 1346 TRUE decoy +chrUn_JTFH01001112v1_decoy 1345 TRUE decoy +chrUn_JTFH01001113v1_decoy 1340 TRUE decoy +chrUn_JTFH01001114v1_decoy 1330 TRUE decoy +chrUn_JTFH01001115v1_decoy 1329 TRUE decoy +chrUn_JTFH01001116v1_decoy 1324 TRUE decoy +chrUn_JTFH01001117v1_decoy 1316 TRUE decoy +chrUn_JTFH01001118v1_decoy 1307 TRUE decoy +chrUn_JTFH01001119v1_decoy 1304 TRUE decoy +chrUn_JTFH01001120v1_decoy 1304 TRUE decoy +chrUn_JTFH01001121v1_decoy 1303 TRUE decoy +chrUn_JTFH01001122v1_decoy 1301 TRUE decoy +chrUn_JTFH01001123v1_decoy 1300 TRUE decoy +chrUn_JTFH01001124v1_decoy 1297 TRUE decoy +chrUn_JTFH01001125v1_decoy 1296 TRUE decoy +chrUn_JTFH01001126v1_decoy 1290 TRUE decoy +chrUn_JTFH01001127v1_decoy 1284 TRUE decoy +chrUn_JTFH01001128v1_decoy 1282 TRUE decoy +chrUn_JTFH01001129v1_decoy 1281 TRUE decoy +chrUn_JTFH01001130v1_decoy 1280 TRUE decoy +chrUn_JTFH01001131v1_decoy 1279 TRUE decoy +chrUn_JTFH01001132v1_decoy 1272 TRUE decoy +chrUn_JTFH01001133v1_decoy 1267 TRUE decoy +chrUn_JTFH01001134v1_decoy 1267 TRUE decoy +chrUn_JTFH01001135v1_decoy 1266 TRUE decoy +chrUn_JTFH01001136v1_decoy 1264 TRUE decoy +chrUn_JTFH01001137v1_decoy 1264 TRUE decoy +chrUn_JTFH01001138v1_decoy 1264 TRUE decoy +chrUn_JTFH01001139v1_decoy 1263 TRUE decoy +chrUn_JTFH01001140v1_decoy 1249 TRUE decoy +chrUn_JTFH01001141v1_decoy 1240 TRUE decoy +chrUn_JTFH01001142v1_decoy 1239 TRUE decoy +chrUn_JTFH01001143v1_decoy 1235 TRUE decoy +chrUn_JTFH01001144v1_decoy 1235 TRUE decoy +chrUn_JTFH01001145v1_decoy 1233 TRUE decoy +chrUn_JTFH01001146v1_decoy 1232 TRUE decoy +chrUn_JTFH01001147v1_decoy 1230 TRUE decoy +chrUn_JTFH01001148v1_decoy 1226 TRUE decoy +chrUn_JTFH01001149v1_decoy 1223 TRUE decoy +chrUn_JTFH01001150v1_decoy 1214 TRUE decoy +chrUn_JTFH01001151v1_decoy 1213 TRUE decoy +chrUn_JTFH01001152v1_decoy 1211 TRUE decoy +chrUn_JTFH01001153v1_decoy 1209 TRUE decoy +chrUn_JTFH01001154v1_decoy 1202 TRUE decoy +chrUn_JTFH01001155v1_decoy 1199 TRUE decoy +chrUn_JTFH01001156v1_decoy 1197 TRUE decoy +chrUn_JTFH01001157v1_decoy 1193 TRUE decoy +chrUn_JTFH01001158v1_decoy 1191 TRUE decoy +chrUn_JTFH01001159v1_decoy 1187 TRUE decoy +chrUn_JTFH01001160v1_decoy 1186 TRUE decoy +chrUn_JTFH01001161v1_decoy 1184 TRUE decoy +chrUn_JTFH01001162v1_decoy 1184 TRUE decoy +chrUn_JTFH01001163v1_decoy 1182 TRUE decoy +chrUn_JTFH01001164v1_decoy 1179 TRUE decoy +chrUn_JTFH01001165v1_decoy 1173 TRUE decoy +chrUn_JTFH01001166v1_decoy 1169 TRUE decoy +chrUn_JTFH01001167v1_decoy 1167 TRUE decoy +chrUn_JTFH01001168v1_decoy 1166 TRUE decoy +chrUn_JTFH01001169v1_decoy 1165 TRUE decoy +chrUn_JTFH01001170v1_decoy 1164 TRUE decoy +chrUn_JTFH01001171v1_decoy 1163 TRUE decoy +chrUn_JTFH01001172v1_decoy 1158 TRUE decoy +chrUn_JTFH01001173v1_decoy 1158 TRUE decoy +chrUn_JTFH01001174v1_decoy 1157 TRUE decoy +chrUn_JTFH01001175v1_decoy 1157 TRUE decoy +chrUn_JTFH01001176v1_decoy 1157 TRUE decoy +chrUn_JTFH01001177v1_decoy 1155 TRUE decoy +chrUn_JTFH01001178v1_decoy 1154 TRUE decoy +chrUn_JTFH01001179v1_decoy 1149 TRUE decoy +chrUn_JTFH01001180v1_decoy 1148 TRUE decoy +chrUn_JTFH01001181v1_decoy 1148 TRUE decoy +chrUn_JTFH01001182v1_decoy 1146 TRUE decoy +chrUn_JTFH01001183v1_decoy 1144 TRUE decoy +chrUn_JTFH01001184v1_decoy 1140 TRUE decoy +chrUn_JTFH01001185v1_decoy 1136 TRUE decoy +chrUn_JTFH01001186v1_decoy 1134 TRUE decoy +chrUn_JTFH01001187v1_decoy 1133 TRUE decoy +chrUn_JTFH01001188v1_decoy 1129 TRUE decoy +chrUn_JTFH01001189v1_decoy 1127 TRUE decoy +chrUn_JTFH01001190v1_decoy 1127 TRUE decoy +chrUn_JTFH01001191v1_decoy 1118 TRUE decoy +chrUn_JTFH01001192v1_decoy 1110 TRUE decoy +chrUn_JTFH01001193v1_decoy 1104 TRUE decoy +chrUn_JTFH01001194v1_decoy 1104 TRUE decoy +chrUn_JTFH01001195v1_decoy 1101 TRUE decoy +chrUn_JTFH01001196v1_decoy 1098 TRUE decoy +chrUn_JTFH01001197v1_decoy 1096 TRUE decoy +chrUn_JTFH01001198v1_decoy 1094 TRUE decoy +chrUn_JTFH01001199v1_decoy 1091 TRUE decoy +chrUn_JTFH01001200v1_decoy 1089 TRUE decoy +chrUn_JTFH01001201v1_decoy 1086 TRUE decoy +chrUn_JTFH01001202v1_decoy 1085 TRUE decoy +chrUn_JTFH01001203v1_decoy 1084 TRUE decoy +chrUn_JTFH01001204v1_decoy 1083 TRUE decoy +chrUn_JTFH01001205v1_decoy 1083 TRUE decoy +chrUn_JTFH01001206v1_decoy 1079 TRUE decoy +chrUn_JTFH01001207v1_decoy 1076 TRUE decoy +chrUn_JTFH01001208v1_decoy 1069 TRUE decoy +chrUn_JTFH01001209v1_decoy 1068 TRUE decoy +chrUn_JTFH01001210v1_decoy 1067 TRUE decoy +chrUn_JTFH01001211v1_decoy 1067 TRUE decoy +chrUn_JTFH01001212v1_decoy 1067 TRUE decoy +chrUn_JTFH01001213v1_decoy 1063 TRUE decoy +chrUn_JTFH01001214v1_decoy 1062 TRUE decoy +chrUn_JTFH01001215v1_decoy 1059 TRUE decoy +chrUn_JTFH01001216v1_decoy 1058 TRUE decoy +chrUn_JTFH01001217v1_decoy 1058 TRUE decoy +chrUn_JTFH01001218v1_decoy 1055 TRUE decoy +chrUn_JTFH01001219v1_decoy 1054 TRUE decoy +chrUn_JTFH01001220v1_decoy 1054 TRUE decoy +chrUn_JTFH01001221v1_decoy 1053 TRUE decoy +chrUn_JTFH01001222v1_decoy 1053 TRUE decoy +chrUn_JTFH01001223v1_decoy 1052 TRUE decoy +chrUn_JTFH01001224v1_decoy 1051 TRUE decoy +chrUn_JTFH01001225v1_decoy 1049 TRUE decoy +chrUn_JTFH01001226v1_decoy 1047 TRUE decoy +chrUn_JTFH01001227v1_decoy 1044 TRUE decoy +chrUn_JTFH01001228v1_decoy 1043 TRUE decoy +chrUn_JTFH01001229v1_decoy 1043 TRUE decoy +chrUn_JTFH01001230v1_decoy 1042 TRUE decoy +chrUn_JTFH01001231v1_decoy 1042 TRUE decoy +chrUn_JTFH01001232v1_decoy 1041 TRUE decoy +chrUn_JTFH01001233v1_decoy 1040 TRUE decoy +chrUn_JTFH01001234v1_decoy 1039 TRUE decoy +chrUn_JTFH01001235v1_decoy 1038 TRUE decoy +chrUn_JTFH01001236v1_decoy 1037 TRUE decoy +chrUn_JTFH01001237v1_decoy 1037 TRUE decoy +chrUn_JTFH01001238v1_decoy 1035 TRUE decoy +chrUn_JTFH01001239v1_decoy 1027 TRUE decoy +chrUn_JTFH01001240v1_decoy 1021 TRUE decoy +chrUn_JTFH01001241v1_decoy 1021 TRUE decoy +chrUn_JTFH01001242v1_decoy 1019 TRUE decoy +chrUn_JTFH01001243v1_decoy 1019 TRUE decoy +chrUn_JTFH01001244v1_decoy 1016 TRUE decoy +chrUn_JTFH01001245v1_decoy 1014 TRUE decoy +chrUn_JTFH01001246v1_decoy 1013 TRUE decoy +chrUn_JTFH01001247v1_decoy 1009 TRUE decoy +chrUn_JTFH01001248v1_decoy 1008 TRUE decoy +chrUn_JTFH01001249v1_decoy 1007 TRUE decoy +chrUn_JTFH01001250v1_decoy 1004 TRUE decoy +chrUn_JTFH01001251v1_decoy 1004 TRUE decoy +chrUn_JTFH01001252v1_decoy 1003 TRUE decoy +chrUn_JTFH01001253v1_decoy 1001 TRUE decoy +chrUn_JTFH01001254v1_decoy 1000 TRUE decoy +chrUn_JTFH01001255v1_decoy 1000 TRUE decoy +chrUn_JTFH01001256v1_decoy 1000 TRUE decoy +chrUn_JTFH01001257v1_decoy 17929 TRUE decoy +chrUn_JTFH01001258v1_decoy 9749 TRUE decoy +chrUn_JTFH01001259v1_decoy 8053 TRUE decoy +chrUn_JTFH01001260v1_decoy 7826 TRUE decoy +chrUn_JTFH01001261v1_decoy 7768 TRUE decoy +chrUn_JTFH01001262v1_decoy 5691 TRUE decoy +chrUn_JTFH01001263v1_decoy 5444 TRUE decoy +chrUn_JTFH01001264v1_decoy 5077 TRUE decoy +chrUn_JTFH01001265v1_decoy 4990 TRUE decoy +chrUn_JTFH01001266v1_decoy 4545 TRUE decoy +chrUn_JTFH01001267v1_decoy 4544 TRUE decoy +chrUn_JTFH01001268v1_decoy 4202 TRUE decoy +chrUn_JTFH01001269v1_decoy 4195 TRUE decoy +chrUn_JTFH01001270v1_decoy 3807 TRUE decoy +chrUn_JTFH01001271v1_decoy 3741 TRUE decoy +chrUn_JTFH01001272v1_decoy 3699 TRUE decoy +chrUn_JTFH01001273v1_decoy 3640 TRUE decoy +chrUn_JTFH01001274v1_decoy 3531 TRUE decoy +chrUn_JTFH01001275v1_decoy 3455 TRUE decoy +chrUn_JTFH01001276v1_decoy 3411 TRUE decoy +chrUn_JTFH01001277v1_decoy 3387 TRUE decoy +chrUn_JTFH01001278v1_decoy 3358 TRUE decoy +chrUn_JTFH01001279v1_decoy 3285 TRUE decoy +chrUn_JTFH01001280v1_decoy 3273 TRUE decoy +chrUn_JTFH01001281v1_decoy 3262 TRUE decoy +chrUn_JTFH01001282v1_decoy 3259 TRUE decoy +chrUn_JTFH01001283v1_decoy 3222 TRUE decoy +chrUn_JTFH01001284v1_decoy 3127 TRUE decoy +chrUn_JTFH01001285v1_decoy 3110 TRUE decoy +chrUn_JTFH01001286v1_decoy 3104 TRUE decoy +chrUn_JTFH01001287v1_decoy 3071 TRUE decoy +chrUn_JTFH01001288v1_decoy 3063 TRUE decoy +chrUn_JTFH01001289v1_decoy 3059 TRUE decoy +chrUn_JTFH01001290v1_decoy 2990 TRUE decoy +chrUn_JTFH01001291v1_decoy 2986 TRUE decoy +chrUn_JTFH01001292v1_decoy 2928 TRUE decoy +chrUn_JTFH01001293v1_decoy 2922 TRUE decoy +chrUn_JTFH01001294v1_decoy 2875 TRUE decoy +chrUn_JTFH01001295v1_decoy 2859 TRUE decoy +chrUn_JTFH01001296v1_decoy 2850 TRUE decoy +chrUn_JTFH01001297v1_decoy 2813 TRUE decoy +chrUn_JTFH01001298v1_decoy 2785 TRUE decoy +chrUn_JTFH01001299v1_decoy 2736 TRUE decoy +chrUn_JTFH01001300v1_decoy 2688 TRUE decoy +chrUn_JTFH01001301v1_decoy 2658 TRUE decoy +chrUn_JTFH01001302v1_decoy 2643 TRUE decoy +chrUn_JTFH01001303v1_decoy 2618 TRUE decoy +chrUn_JTFH01001304v1_decoy 2605 TRUE decoy +chrUn_JTFH01001305v1_decoy 2583 TRUE decoy +chrUn_JTFH01001306v1_decoy 2534 TRUE decoy +chrUn_JTFH01001307v1_decoy 2512 TRUE decoy +chrUn_JTFH01001308v1_decoy 2500 TRUE decoy +chrUn_JTFH01001309v1_decoy 2481 TRUE decoy +chrUn_JTFH01001310v1_decoy 2478 TRUE decoy +chrUn_JTFH01001311v1_decoy 2473 TRUE decoy +chrUn_JTFH01001312v1_decoy 2467 TRUE decoy +chrUn_JTFH01001313v1_decoy 2442 TRUE decoy +chrUn_JTFH01001314v1_decoy 2430 TRUE decoy +chrUn_JTFH01001315v1_decoy 2417 TRUE decoy +chrUn_JTFH01001316v1_decoy 2408 TRUE decoy +chrUn_JTFH01001317v1_decoy 2395 TRUE decoy +chrUn_JTFH01001318v1_decoy 2352 TRUE decoy +chrUn_JTFH01001319v1_decoy 2337 TRUE decoy +chrUn_JTFH01001320v1_decoy 2322 TRUE decoy +chrUn_JTFH01001321v1_decoy 2307 TRUE decoy +chrUn_JTFH01001322v1_decoy 2306 TRUE decoy +chrUn_JTFH01001323v1_decoy 2292 TRUE decoy +chrUn_JTFH01001324v1_decoy 2271 TRUE decoy +chrUn_JTFH01001325v1_decoy 2265 TRUE decoy +chrUn_JTFH01001326v1_decoy 2260 TRUE decoy +chrUn_JTFH01001327v1_decoy 2240 TRUE decoy +chrUn_JTFH01001328v1_decoy 2238 TRUE decoy +chrUn_JTFH01001329v1_decoy 2228 TRUE decoy +chrUn_JTFH01001330v1_decoy 2215 TRUE decoy +chrUn_JTFH01001331v1_decoy 2205 TRUE decoy +chrUn_JTFH01001332v1_decoy 2191 TRUE decoy +chrUn_JTFH01001333v1_decoy 2191 TRUE decoy +chrUn_JTFH01001334v1_decoy 2190 TRUE decoy +chrUn_JTFH01001335v1_decoy 2184 TRUE decoy +chrUn_JTFH01001336v1_decoy 2166 TRUE decoy +chrUn_JTFH01001337v1_decoy 2165 TRUE decoy +chrUn_JTFH01001338v1_decoy 2162 TRUE decoy +chrUn_JTFH01001339v1_decoy 2146 TRUE decoy +chrUn_JTFH01001340v1_decoy 2116 TRUE decoy +chrUn_JTFH01001341v1_decoy 2112 TRUE decoy +chrUn_JTFH01001342v1_decoy 2108 TRUE decoy +chrUn_JTFH01001343v1_decoy 2106 TRUE decoy +chrUn_JTFH01001344v1_decoy 2106 TRUE decoy +chrUn_JTFH01001345v1_decoy 2106 TRUE decoy +chrUn_JTFH01001346v1_decoy 2097 TRUE decoy +chrUn_JTFH01001347v1_decoy 2081 TRUE decoy +chrUn_JTFH01001348v1_decoy 2058 TRUE decoy +chrUn_JTFH01001349v1_decoy 2055 TRUE decoy +chrUn_JTFH01001350v1_decoy 2054 TRUE decoy +chrUn_JTFH01001351v1_decoy 2037 TRUE decoy +chrUn_JTFH01001352v1_decoy 2032 TRUE decoy +chrUn_JTFH01001353v1_decoy 2032 TRUE decoy +chrUn_JTFH01001354v1_decoy 2020 TRUE decoy +chrUn_JTFH01001355v1_decoy 2018 TRUE decoy +chrUn_JTFH01001356v1_decoy 2014 TRUE decoy +chrUn_JTFH01001357v1_decoy 2001 TRUE decoy +chrUn_JTFH01001358v1_decoy 2001 TRUE decoy +chrUn_JTFH01001359v1_decoy 1991 TRUE decoy +chrUn_JTFH01001360v1_decoy 1990 TRUE decoy +chrUn_JTFH01001361v1_decoy 1983 TRUE decoy +chrUn_JTFH01001362v1_decoy 1981 TRUE decoy +chrUn_JTFH01001363v1_decoy 1981 TRUE decoy +chrUn_JTFH01001364v1_decoy 1979 TRUE decoy +chrUn_JTFH01001365v1_decoy 1963 TRUE decoy +chrUn_JTFH01001366v1_decoy 1932 TRUE decoy +chrUn_JTFH01001367v1_decoy 1929 TRUE decoy +chrUn_JTFH01001368v1_decoy 1881 TRUE decoy +chrUn_JTFH01001369v1_decoy 1874 TRUE decoy +chrUn_JTFH01001370v1_decoy 1849 TRUE decoy +chrUn_JTFH01001371v1_decoy 1849 TRUE decoy +chrUn_JTFH01001372v1_decoy 1833 TRUE decoy +chrUn_JTFH01001373v1_decoy 1832 TRUE decoy +chrUn_JTFH01001374v1_decoy 1826 TRUE decoy +chrUn_JTFH01001375v1_decoy 1814 TRUE decoy +chrUn_JTFH01001376v1_decoy 1814 TRUE decoy +chrUn_JTFH01001377v1_decoy 1791 TRUE decoy +chrUn_JTFH01001378v1_decoy 1789 TRUE decoy +chrUn_JTFH01001379v1_decoy 1786 TRUE decoy +chrUn_JTFH01001380v1_decoy 1778 TRUE decoy +chrUn_JTFH01001381v1_decoy 1776 TRUE decoy +chrUn_JTFH01001382v1_decoy 1762 TRUE decoy +chrUn_JTFH01001383v1_decoy 1758 TRUE decoy +chrUn_JTFH01001384v1_decoy 1757 TRUE decoy +chrUn_JTFH01001385v1_decoy 1754 TRUE decoy +chrUn_JTFH01001386v1_decoy 1752 TRUE decoy +chrUn_JTFH01001387v1_decoy 1751 TRUE decoy +chrUn_JTFH01001388v1_decoy 1749 TRUE decoy +chrUn_JTFH01001389v1_decoy 1738 TRUE decoy +chrUn_JTFH01001390v1_decoy 1729 TRUE decoy +chrUn_JTFH01001391v1_decoy 1726 TRUE decoy +chrUn_JTFH01001392v1_decoy 1716 TRUE decoy +chrUn_JTFH01001393v1_decoy 1712 TRUE decoy +chrUn_JTFH01001394v1_decoy 1711 TRUE decoy +chrUn_JTFH01001395v1_decoy 1703 TRUE decoy +chrUn_JTFH01001396v1_decoy 1702 TRUE decoy +chrUn_JTFH01001397v1_decoy 1699 TRUE decoy +chrUn_JTFH01001398v1_decoy 1686 TRUE decoy +chrUn_JTFH01001399v1_decoy 1684 TRUE decoy +chrUn_JTFH01001400v1_decoy 1680 TRUE decoy +chrUn_JTFH01001401v1_decoy 1678 TRUE decoy +chrUn_JTFH01001402v1_decoy 1678 TRUE decoy +chrUn_JTFH01001403v1_decoy 1677 TRUE decoy +chrUn_JTFH01001404v1_decoy 1676 TRUE decoy +chrUn_JTFH01001405v1_decoy 1672 TRUE decoy +chrUn_JTFH01001406v1_decoy 1669 TRUE decoy +chrUn_JTFH01001407v1_decoy 1668 TRUE decoy +chrUn_JTFH01001408v1_decoy 1663 TRUE decoy +chrUn_JTFH01001409v1_decoy 1660 TRUE decoy +chrUn_JTFH01001410v1_decoy 1660 TRUE decoy +chrUn_JTFH01001411v1_decoy 1658 TRUE decoy +chrUn_JTFH01001412v1_decoy 1656 TRUE decoy +chrUn_JTFH01001413v1_decoy 1656 TRUE decoy +chrUn_JTFH01001414v1_decoy 1652 TRUE decoy +chrUn_JTFH01001415v1_decoy 1647 TRUE decoy +chrUn_JTFH01001416v1_decoy 1645 TRUE decoy +chrUn_JTFH01001417v1_decoy 1641 TRUE decoy +chrUn_JTFH01001418v1_decoy 1638 TRUE decoy +chrUn_JTFH01001419v1_decoy 1633 TRUE decoy +chrUn_JTFH01001420v1_decoy 1626 TRUE decoy +chrUn_JTFH01001421v1_decoy 1614 TRUE decoy +chrUn_JTFH01001422v1_decoy 1612 TRUE decoy +chrUn_JTFH01001423v1_decoy 1605 TRUE decoy +chrUn_JTFH01001424v1_decoy 1603 TRUE decoy +chrUn_JTFH01001425v1_decoy 1599 TRUE decoy +chrUn_JTFH01001426v1_decoy 1589 TRUE decoy +chrUn_JTFH01001427v1_decoy 1588 TRUE decoy +chrUn_JTFH01001428v1_decoy 1585 TRUE decoy +chrUn_JTFH01001429v1_decoy 1584 TRUE decoy +chrUn_JTFH01001430v1_decoy 1584 TRUE decoy +chrUn_JTFH01001431v1_decoy 1580 TRUE decoy +chrUn_JTFH01001432v1_decoy 1572 TRUE decoy +chrUn_JTFH01001433v1_decoy 1570 TRUE decoy +chrUn_JTFH01001434v1_decoy 1569 TRUE decoy +chrUn_JTFH01001435v1_decoy 1568 TRUE decoy +chrUn_JTFH01001436v1_decoy 1567 TRUE decoy +chrUn_JTFH01001437v1_decoy 1565 TRUE decoy +chrUn_JTFH01001438v1_decoy 1559 TRUE decoy +chrUn_JTFH01001439v1_decoy 1559 TRUE decoy +chrUn_JTFH01001440v1_decoy 1556 TRUE decoy +chrUn_JTFH01001441v1_decoy 1554 TRUE decoy +chrUn_JTFH01001442v1_decoy 1549 TRUE decoy +chrUn_JTFH01001443v1_decoy 1542 TRUE decoy +chrUn_JTFH01001444v1_decoy 1541 TRUE decoy +chrUn_JTFH01001445v1_decoy 1538 TRUE decoy +chrUn_JTFH01001446v1_decoy 1537 TRUE decoy +chrUn_JTFH01001447v1_decoy 1535 TRUE decoy +chrUn_JTFH01001448v1_decoy 1530 TRUE decoy +chrUn_JTFH01001449v1_decoy 1528 TRUE decoy +chrUn_JTFH01001450v1_decoy 1522 TRUE decoy +chrUn_JTFH01001451v1_decoy 1514 TRUE decoy +chrUn_JTFH01001452v1_decoy 1509 TRUE decoy +chrUn_JTFH01001453v1_decoy 1507 TRUE decoy +chrUn_JTFH01001454v1_decoy 1500 TRUE decoy +chrUn_JTFH01001455v1_decoy 1499 TRUE decoy +chrUn_JTFH01001456v1_decoy 1499 TRUE decoy +chrUn_JTFH01001457v1_decoy 1497 TRUE decoy +chrUn_JTFH01001458v1_decoy 1496 TRUE decoy +chrUn_JTFH01001459v1_decoy 1488 TRUE decoy +chrUn_JTFH01001460v1_decoy 1486 TRUE decoy +chrUn_JTFH01001461v1_decoy 1485 TRUE decoy +chrUn_JTFH01001462v1_decoy 1481 TRUE decoy +chrUn_JTFH01001463v1_decoy 1479 TRUE decoy +chrUn_JTFH01001464v1_decoy 1472 TRUE decoy +chrUn_JTFH01001465v1_decoy 1472 TRUE decoy +chrUn_JTFH01001466v1_decoy 1470 TRUE decoy +chrUn_JTFH01001467v1_decoy 1466 TRUE decoy +chrUn_JTFH01001468v1_decoy 1465 TRUE decoy +chrUn_JTFH01001469v1_decoy 1461 TRUE decoy +chrUn_JTFH01001470v1_decoy 1458 TRUE decoy +chrUn_JTFH01001471v1_decoy 1457 TRUE decoy +chrUn_JTFH01001472v1_decoy 1448 TRUE decoy +chrUn_JTFH01001473v1_decoy 1447 TRUE decoy +chrUn_JTFH01001474v1_decoy 1444 TRUE decoy +chrUn_JTFH01001475v1_decoy 1443 TRUE decoy +chrUn_JTFH01001476v1_decoy 1443 TRUE decoy +chrUn_JTFH01001477v1_decoy 1438 TRUE decoy +chrUn_JTFH01001478v1_decoy 1432 TRUE decoy +chrUn_JTFH01001479v1_decoy 1430 TRUE decoy +chrUn_JTFH01001480v1_decoy 1430 TRUE decoy +chrUn_JTFH01001481v1_decoy 1429 TRUE decoy +chrUn_JTFH01001482v1_decoy 1429 TRUE decoy +chrUn_JTFH01001483v1_decoy 1429 TRUE decoy +chrUn_JTFH01001484v1_decoy 1426 TRUE decoy +chrUn_JTFH01001485v1_decoy 1426 TRUE decoy +chrUn_JTFH01001486v1_decoy 1420 TRUE decoy +chrUn_JTFH01001487v1_decoy 1416 TRUE decoy +chrUn_JTFH01001488v1_decoy 1416 TRUE decoy +chrUn_JTFH01001489v1_decoy 1415 TRUE decoy +chrUn_JTFH01001490v1_decoy 1415 TRUE decoy +chrUn_JTFH01001491v1_decoy 1414 TRUE decoy +chrUn_JTFH01001492v1_decoy 1413 TRUE decoy +chrUn_JTFH01001493v1_decoy 1410 TRUE decoy +chrUn_JTFH01001494v1_decoy 1405 TRUE decoy +chrUn_JTFH01001495v1_decoy 1402 TRUE decoy +chrUn_JTFH01001496v1_decoy 1398 TRUE decoy +chrUn_JTFH01001497v1_decoy 1397 TRUE decoy +chrUn_JTFH01001498v1_decoy 1395 TRUE decoy +chrUn_JTFH01001499v1_decoy 1392 TRUE decoy +chrUn_JTFH01001500v1_decoy 1388 TRUE decoy +chrUn_JTFH01001501v1_decoy 1386 TRUE decoy +chrUn_JTFH01001502v1_decoy 1382 TRUE decoy +chrUn_JTFH01001503v1_decoy 1381 TRUE decoy +chrUn_JTFH01001504v1_decoy 1379 TRUE decoy +chrUn_JTFH01001505v1_decoy 1376 TRUE decoy +chrUn_JTFH01001506v1_decoy 1374 TRUE decoy +chrUn_JTFH01001507v1_decoy 1374 TRUE decoy +chrUn_JTFH01001508v1_decoy 1373 TRUE decoy +chrUn_JTFH01001509v1_decoy 1373 TRUE decoy +chrUn_JTFH01001510v1_decoy 1372 TRUE decoy +chrUn_JTFH01001511v1_decoy 1370 TRUE decoy +chrUn_JTFH01001512v1_decoy 1367 TRUE decoy +chrUn_JTFH01001513v1_decoy 1365 TRUE decoy +chrUn_JTFH01001514v1_decoy 1364 TRUE decoy +chrUn_JTFH01001515v1_decoy 1361 TRUE decoy +chrUn_JTFH01001516v1_decoy 1361 TRUE decoy +chrUn_JTFH01001517v1_decoy 1355 TRUE decoy +chrUn_JTFH01001518v1_decoy 1355 TRUE decoy +chrUn_JTFH01001519v1_decoy 1354 TRUE decoy +chrUn_JTFH01001520v1_decoy 1353 TRUE decoy +chrUn_JTFH01001521v1_decoy 1349 TRUE decoy +chrUn_JTFH01001522v1_decoy 1345 TRUE decoy +chrUn_JTFH01001523v1_decoy 1344 TRUE decoy +chrUn_JTFH01001524v1_decoy 1343 TRUE decoy +chrUn_JTFH01001525v1_decoy 1338 TRUE decoy +chrUn_JTFH01001526v1_decoy 1338 TRUE decoy +chrUn_JTFH01001527v1_decoy 1338 TRUE decoy +chrUn_JTFH01001528v1_decoy 1336 TRUE decoy +chrUn_JTFH01001529v1_decoy 1333 TRUE decoy +chrUn_JTFH01001530v1_decoy 1333 TRUE decoy +chrUn_JTFH01001531v1_decoy 1332 TRUE decoy +chrUn_JTFH01001532v1_decoy 1324 TRUE decoy +chrUn_JTFH01001533v1_decoy 1323 TRUE decoy +chrUn_JTFH01001534v1_decoy 1323 TRUE decoy +chrUn_JTFH01001535v1_decoy 1320 TRUE decoy +chrUn_JTFH01001536v1_decoy 1320 TRUE decoy +chrUn_JTFH01001537v1_decoy 1317 TRUE decoy +chrUn_JTFH01001538v1_decoy 1316 TRUE decoy +chrUn_JTFH01001539v1_decoy 1304 TRUE decoy +chrUn_JTFH01001540v1_decoy 1304 TRUE decoy +chrUn_JTFH01001541v1_decoy 1303 TRUE decoy +chrUn_JTFH01001542v1_decoy 1302 TRUE decoy +chrUn_JTFH01001543v1_decoy 1301 TRUE decoy +chrUn_JTFH01001544v1_decoy 1300 TRUE decoy +chrUn_JTFH01001545v1_decoy 1298 TRUE decoy +chrUn_JTFH01001546v1_decoy 1297 TRUE decoy +chrUn_JTFH01001547v1_decoy 1295 TRUE decoy +chrUn_JTFH01001548v1_decoy 1284 TRUE decoy +chrUn_JTFH01001549v1_decoy 1283 TRUE decoy +chrUn_JTFH01001550v1_decoy 1283 TRUE decoy +chrUn_JTFH01001551v1_decoy 1279 TRUE decoy +chrUn_JTFH01001552v1_decoy 1278 TRUE decoy +chrUn_JTFH01001553v1_decoy 1271 TRUE decoy +chrUn_JTFH01001554v1_decoy 1271 TRUE decoy +chrUn_JTFH01001555v1_decoy 1268 TRUE decoy +chrUn_JTFH01001556v1_decoy 1264 TRUE decoy +chrUn_JTFH01001557v1_decoy 1263 TRUE decoy +chrUn_JTFH01001558v1_decoy 1262 TRUE decoy +chrUn_JTFH01001559v1_decoy 1261 TRUE decoy +chrUn_JTFH01001560v1_decoy 1260 TRUE decoy +chrUn_JTFH01001561v1_decoy 1259 TRUE decoy +chrUn_JTFH01001562v1_decoy 1259 TRUE decoy +chrUn_JTFH01001563v1_decoy 1258 TRUE decoy +chrUn_JTFH01001564v1_decoy 1256 TRUE decoy +chrUn_JTFH01001565v1_decoy 1253 TRUE decoy +chrUn_JTFH01001566v1_decoy 1248 TRUE decoy +chrUn_JTFH01001567v1_decoy 1248 TRUE decoy +chrUn_JTFH01001568v1_decoy 1246 TRUE decoy +chrUn_JTFH01001569v1_decoy 1246 TRUE decoy +chrUn_JTFH01001570v1_decoy 1244 TRUE decoy +chrUn_JTFH01001571v1_decoy 1238 TRUE decoy +chrUn_JTFH01001572v1_decoy 1238 TRUE decoy +chrUn_JTFH01001573v1_decoy 1236 TRUE decoy +chrUn_JTFH01001574v1_decoy 1234 TRUE decoy +chrUn_JTFH01001575v1_decoy 1234 TRUE decoy +chrUn_JTFH01001576v1_decoy 1231 TRUE decoy +chrUn_JTFH01001577v1_decoy 1231 TRUE decoy +chrUn_JTFH01001578v1_decoy 1230 TRUE decoy +chrUn_JTFH01001579v1_decoy 1230 TRUE decoy +chrUn_JTFH01001580v1_decoy 1228 TRUE decoy +chrUn_JTFH01001581v1_decoy 1227 TRUE decoy +chrUn_JTFH01001582v1_decoy 1222 TRUE decoy +chrUn_JTFH01001583v1_decoy 1222 TRUE decoy +chrUn_JTFH01001584v1_decoy 1221 TRUE decoy +chrUn_JTFH01001585v1_decoy 1221 TRUE decoy +chrUn_JTFH01001586v1_decoy 1220 TRUE decoy +chrUn_JTFH01001587v1_decoy 1218 TRUE decoy +chrUn_JTFH01001588v1_decoy 1218 TRUE decoy +chrUn_JTFH01001589v1_decoy 1216 TRUE decoy +chrUn_JTFH01001590v1_decoy 1216 TRUE decoy +chrUn_JTFH01001591v1_decoy 1212 TRUE decoy +chrUn_JTFH01001592v1_decoy 1210 TRUE decoy +chrUn_JTFH01001593v1_decoy 1209 TRUE decoy +chrUn_JTFH01001594v1_decoy 1208 TRUE decoy +chrUn_JTFH01001595v1_decoy 1208 TRUE decoy +chrUn_JTFH01001596v1_decoy 1206 TRUE decoy +chrUn_JTFH01001597v1_decoy 1205 TRUE decoy +chrUn_JTFH01001598v1_decoy 1205 TRUE decoy +chrUn_JTFH01001599v1_decoy 1202 TRUE decoy +chrUn_JTFH01001600v1_decoy 1200 TRUE decoy +chrUn_JTFH01001601v1_decoy 1199 TRUE decoy +chrUn_JTFH01001602v1_decoy 1198 TRUE decoy +chrUn_JTFH01001603v1_decoy 1198 TRUE decoy +chrUn_JTFH01001604v1_decoy 1198 TRUE decoy +chrUn_JTFH01001605v1_decoy 1195 TRUE decoy +chrUn_JTFH01001606v1_decoy 1194 TRUE decoy +chrUn_JTFH01001607v1_decoy 1191 TRUE decoy +chrUn_JTFH01001608v1_decoy 1189 TRUE decoy +chrUn_JTFH01001609v1_decoy 1188 TRUE decoy +chrUn_JTFH01001610v1_decoy 1180 TRUE decoy +chrUn_JTFH01001611v1_decoy 1180 TRUE decoy +chrUn_JTFH01001612v1_decoy 1179 TRUE decoy +chrUn_JTFH01001613v1_decoy 1172 TRUE decoy +chrUn_JTFH01001614v1_decoy 1168 TRUE decoy +chrUn_JTFH01001615v1_decoy 1166 TRUE decoy +chrUn_JTFH01001616v1_decoy 1157 TRUE decoy +chrUn_JTFH01001617v1_decoy 1156 TRUE decoy +chrUn_JTFH01001618v1_decoy 1156 TRUE decoy +chrUn_JTFH01001619v1_decoy 1155 TRUE decoy +chrUn_JTFH01001620v1_decoy 1154 TRUE decoy +chrUn_JTFH01001621v1_decoy 1154 TRUE decoy +chrUn_JTFH01001622v1_decoy 1149 TRUE decoy +chrUn_JTFH01001623v1_decoy 1143 TRUE decoy +chrUn_JTFH01001624v1_decoy 1143 TRUE decoy +chrUn_JTFH01001625v1_decoy 1140 TRUE decoy +chrUn_JTFH01001626v1_decoy 1137 TRUE decoy +chrUn_JTFH01001627v1_decoy 1135 TRUE decoy +chrUn_JTFH01001628v1_decoy 1135 TRUE decoy +chrUn_JTFH01001629v1_decoy 1135 TRUE decoy +chrUn_JTFH01001630v1_decoy 1127 TRUE decoy +chrUn_JTFH01001631v1_decoy 1127 TRUE decoy +chrUn_JTFH01001632v1_decoy 1126 TRUE decoy +chrUn_JTFH01001633v1_decoy 1123 TRUE decoy +chrUn_JTFH01001634v1_decoy 1123 TRUE decoy +chrUn_JTFH01001635v1_decoy 1123 TRUE decoy +chrUn_JTFH01001636v1_decoy 1122 TRUE decoy +chrUn_JTFH01001637v1_decoy 1122 TRUE decoy +chrUn_JTFH01001638v1_decoy 1121 TRUE decoy +chrUn_JTFH01001639v1_decoy 1121 TRUE decoy +chrUn_JTFH01001640v1_decoy 1119 TRUE decoy +chrUn_JTFH01001641v1_decoy 1119 TRUE decoy +chrUn_JTFH01001642v1_decoy 1119 TRUE decoy +chrUn_JTFH01001643v1_decoy 1118 TRUE decoy +chrUn_JTFH01001644v1_decoy 1115 TRUE decoy +chrUn_JTFH01001645v1_decoy 1106 TRUE decoy +chrUn_JTFH01001646v1_decoy 1106 TRUE decoy +chrUn_JTFH01001647v1_decoy 1104 TRUE decoy +chrUn_JTFH01001648v1_decoy 1102 TRUE decoy +chrUn_JTFH01001649v1_decoy 1101 TRUE decoy +chrUn_JTFH01001650v1_decoy 1098 TRUE decoy +chrUn_JTFH01001651v1_decoy 1098 TRUE decoy +chrUn_JTFH01001652v1_decoy 1096 TRUE decoy +chrUn_JTFH01001653v1_decoy 1096 TRUE decoy +chrUn_JTFH01001654v1_decoy 1095 TRUE decoy +chrUn_JTFH01001655v1_decoy 1093 TRUE decoy +chrUn_JTFH01001656v1_decoy 1090 TRUE decoy +chrUn_JTFH01001657v1_decoy 1089 TRUE decoy +chrUn_JTFH01001658v1_decoy 1087 TRUE decoy +chrUn_JTFH01001659v1_decoy 1087 TRUE decoy +chrUn_JTFH01001660v1_decoy 1085 TRUE decoy +chrUn_JTFH01001661v1_decoy 1085 TRUE decoy +chrUn_JTFH01001662v1_decoy 1085 TRUE decoy +chrUn_JTFH01001663v1_decoy 1083 TRUE decoy +chrUn_JTFH01001664v1_decoy 1080 TRUE decoy +chrUn_JTFH01001665v1_decoy 1080 TRUE decoy +chrUn_JTFH01001666v1_decoy 1079 TRUE decoy +chrUn_JTFH01001667v1_decoy 1079 TRUE decoy +chrUn_JTFH01001668v1_decoy 1079 TRUE decoy +chrUn_JTFH01001669v1_decoy 1075 TRUE decoy +chrUn_JTFH01001670v1_decoy 1074 TRUE decoy +chrUn_JTFH01001671v1_decoy 1073 TRUE decoy +chrUn_JTFH01001672v1_decoy 1070 TRUE decoy +chrUn_JTFH01001673v1_decoy 1068 TRUE decoy +chrUn_JTFH01001674v1_decoy 1067 TRUE decoy +chrUn_JTFH01001675v1_decoy 1066 TRUE decoy +chrUn_JTFH01001676v1_decoy 1066 TRUE decoy +chrUn_JTFH01001677v1_decoy 1066 TRUE decoy +chrUn_JTFH01001678v1_decoy 1063 TRUE decoy +chrUn_JTFH01001679v1_decoy 1063 TRUE decoy +chrUn_JTFH01001680v1_decoy 1063 TRUE decoy +chrUn_JTFH01001681v1_decoy 1062 TRUE decoy +chrUn_JTFH01001682v1_decoy 1058 TRUE decoy +chrUn_JTFH01001683v1_decoy 1056 TRUE decoy +chrUn_JTFH01001684v1_decoy 1052 TRUE decoy +chrUn_JTFH01001685v1_decoy 1051 TRUE decoy +chrUn_JTFH01001686v1_decoy 1051 TRUE decoy +chrUn_JTFH01001687v1_decoy 1050 TRUE decoy +chrUn_JTFH01001688v1_decoy 1048 TRUE decoy +chrUn_JTFH01001689v1_decoy 1046 TRUE decoy +chrUn_JTFH01001690v1_decoy 1046 TRUE decoy +chrUn_JTFH01001691v1_decoy 1045 TRUE decoy +chrUn_JTFH01001692v1_decoy 1043 TRUE decoy +chrUn_JTFH01001693v1_decoy 1038 TRUE decoy +chrUn_JTFH01001694v1_decoy 1036 TRUE decoy +chrUn_JTFH01001695v1_decoy 1035 TRUE decoy +chrUn_JTFH01001696v1_decoy 1035 TRUE decoy +chrUn_JTFH01001697v1_decoy 1035 TRUE decoy +chrUn_JTFH01001698v1_decoy 1033 TRUE decoy +chrUn_JTFH01001699v1_decoy 1032 TRUE decoy +chrUn_JTFH01001700v1_decoy 1031 TRUE decoy +chrUn_JTFH01001701v1_decoy 1026 TRUE decoy +chrUn_JTFH01001702v1_decoy 1026 TRUE decoy +chrUn_JTFH01001703v1_decoy 1026 TRUE decoy +chrUn_JTFH01001704v1_decoy 1023 TRUE decoy +chrUn_JTFH01001705v1_decoy 1022 TRUE decoy +chrUn_JTFH01001706v1_decoy 1020 TRUE decoy +chrUn_JTFH01001707v1_decoy 1020 TRUE decoy +chrUn_JTFH01001708v1_decoy 1020 TRUE decoy +chrUn_JTFH01001709v1_decoy 1019 TRUE decoy +chrUn_JTFH01001710v1_decoy 1018 TRUE decoy +chrUn_JTFH01001711v1_decoy 1018 TRUE decoy +chrUn_JTFH01001712v1_decoy 1017 TRUE decoy +chrUn_JTFH01001713v1_decoy 1015 TRUE decoy +chrUn_JTFH01001714v1_decoy 1015 TRUE decoy +chrUn_JTFH01001715v1_decoy 1015 TRUE decoy +chrUn_JTFH01001716v1_decoy 1014 TRUE decoy +chrUn_JTFH01001717v1_decoy 1014 TRUE decoy +chrUn_JTFH01001718v1_decoy 1013 TRUE decoy +chrUn_JTFH01001719v1_decoy 1013 TRUE decoy +chrUn_JTFH01001720v1_decoy 1013 TRUE decoy +chrUn_JTFH01001721v1_decoy 1012 TRUE decoy +chrUn_JTFH01001722v1_decoy 1011 TRUE decoy +chrUn_JTFH01001723v1_decoy 1011 TRUE decoy +chrUn_JTFH01001724v1_decoy 1009 TRUE decoy +chrUn_JTFH01001725v1_decoy 1008 TRUE decoy +chrUn_JTFH01001726v1_decoy 1008 TRUE decoy +chrUn_JTFH01001727v1_decoy 1007 TRUE decoy +chrUn_JTFH01001728v1_decoy 1007 TRUE decoy +chrUn_JTFH01001729v1_decoy 1007 TRUE decoy +chrUn_JTFH01001730v1_decoy 1006 TRUE decoy +chrUn_JTFH01001731v1_decoy 1005 TRUE decoy +chrUn_JTFH01001732v1_decoy 1003 TRUE decoy +chrUn_JTFH01001733v1_decoy 1001 TRUE decoy +chrUn_JTFH01001734v1_decoy 1000 TRUE decoy +chrUn_JTFH01001735v1_decoy 19311 TRUE decoy +chrUn_JTFH01001736v1_decoy 11713 TRUE decoy +chrUn_JTFH01001737v1_decoy 11263 TRUE decoy +chrUn_JTFH01001738v1_decoy 9779 TRUE decoy +chrUn_JTFH01001739v1_decoy 9568 TRUE decoy +chrUn_JTFH01001740v1_decoy 9344 TRUE decoy +chrUn_JTFH01001741v1_decoy 9188 TRUE decoy +chrUn_JTFH01001742v1_decoy 9100 TRUE decoy +chrUn_JTFH01001743v1_decoy 8771 TRUE decoy +chrUn_JTFH01001744v1_decoy 8690 TRUE decoy +chrUn_JTFH01001745v1_decoy 8566 TRUE decoy +chrUn_JTFH01001746v1_decoy 8058 TRUE decoy +chrUn_JTFH01001747v1_decoy 7759 TRUE decoy +chrUn_JTFH01001748v1_decoy 7585 TRUE decoy +chrUn_JTFH01001749v1_decoy 7471 TRUE decoy +chrUn_JTFH01001750v1_decoy 7461 TRUE decoy +chrUn_JTFH01001751v1_decoy 7342 TRUE decoy +chrUn_JTFH01001752v1_decoy 7223 TRUE decoy +chrUn_JTFH01001753v1_decoy 7064 TRUE decoy +chrUn_JTFH01001754v1_decoy 6916 TRUE decoy +chrUn_JTFH01001755v1_decoy 6897 TRUE decoy +chrUn_JTFH01001756v1_decoy 6880 TRUE decoy +chrUn_JTFH01001757v1_decoy 6857 TRUE decoy +chrUn_JTFH01001758v1_decoy 6840 TRUE decoy +chrUn_JTFH01001759v1_decoy 6728 TRUE decoy +chrUn_JTFH01001760v1_decoy 6688 TRUE decoy +chrUn_JTFH01001761v1_decoy 6553 TRUE decoy +chrUn_JTFH01001762v1_decoy 6396 TRUE decoy +chrUn_JTFH01001763v1_decoy 6345 TRUE decoy +chrUn_JTFH01001764v1_decoy 6295 TRUE decoy +chrUn_JTFH01001765v1_decoy 6266 TRUE decoy +chrUn_JTFH01001766v1_decoy 6173 TRUE decoy +chrUn_JTFH01001767v1_decoy 6171 TRUE decoy +chrUn_JTFH01001768v1_decoy 6120 TRUE decoy +chrUn_JTFH01001769v1_decoy 6105 TRUE decoy +chrUn_JTFH01001770v1_decoy 6099 TRUE decoy +chrUn_JTFH01001771v1_decoy 5893 TRUE decoy +chrUn_JTFH01001772v1_decoy 5829 TRUE decoy +chrUn_JTFH01001773v1_decoy 5793 TRUE decoy +chrUn_JTFH01001774v1_decoy 5776 TRUE decoy +chrUn_JTFH01001775v1_decoy 5759 TRUE decoy +chrUn_JTFH01001776v1_decoy 5716 TRUE decoy +chrUn_JTFH01001777v1_decoy 5708 TRUE decoy +chrUn_JTFH01001778v1_decoy 5590 TRUE decoy +chrUn_JTFH01001779v1_decoy 5566 TRUE decoy +chrUn_JTFH01001780v1_decoy 5558 TRUE decoy +chrUn_JTFH01001781v1_decoy 5418 TRUE decoy +chrUn_JTFH01001782v1_decoy 5375 TRUE decoy +chrUn_JTFH01001783v1_decoy 5300 TRUE decoy +chrUn_JTFH01001784v1_decoy 5255 TRUE decoy +chrUn_JTFH01001785v1_decoy 5157 TRUE decoy +chrUn_JTFH01001786v1_decoy 5130 TRUE decoy +chrUn_JTFH01001787v1_decoy 4978 TRUE decoy +chrUn_JTFH01001788v1_decoy 4957 TRUE decoy +chrUn_JTFH01001789v1_decoy 4947 TRUE decoy +chrUn_JTFH01001790v1_decoy 4897 TRUE decoy +chrUn_JTFH01001791v1_decoy 4867 TRUE decoy +chrUn_JTFH01001792v1_decoy 4845 TRUE decoy +chrUn_JTFH01001793v1_decoy 4678 TRUE decoy +chrUn_JTFH01001794v1_decoy 4641 TRUE decoy +chrUn_JTFH01001795v1_decoy 4592 TRUE decoy +chrUn_JTFH01001796v1_decoy 4543 TRUE decoy +chrUn_JTFH01001797v1_decoy 4532 TRUE decoy +chrUn_JTFH01001798v1_decoy 4503 TRUE decoy +chrUn_JTFH01001799v1_decoy 4495 TRUE decoy +chrUn_JTFH01001800v1_decoy 4444 TRUE decoy +chrUn_JTFH01001801v1_decoy 4414 TRUE decoy +chrUn_JTFH01001802v1_decoy 4409 TRUE decoy +chrUn_JTFH01001803v1_decoy 4302 TRUE decoy +chrUn_JTFH01001804v1_decoy 4300 TRUE decoy +chrUn_JTFH01001805v1_decoy 4277 TRUE decoy +chrUn_JTFH01001806v1_decoy 4173 TRUE decoy +chrUn_JTFH01001807v1_decoy 4169 TRUE decoy +chrUn_JTFH01001808v1_decoy 4136 TRUE decoy +chrUn_JTFH01001809v1_decoy 4101 TRUE decoy +chrUn_JTFH01001810v1_decoy 4089 TRUE decoy +chrUn_JTFH01001811v1_decoy 4015 TRUE decoy +chrUn_JTFH01001812v1_decoy 4000 TRUE decoy +chrUn_JTFH01001813v1_decoy 3973 TRUE decoy +chrUn_JTFH01001814v1_decoy 3732 TRUE decoy +chrUn_JTFH01001815v1_decoy 3709 TRUE decoy +chrUn_JTFH01001816v1_decoy 3686 TRUE decoy +chrUn_JTFH01001817v1_decoy 3676 TRUE decoy +chrUn_JTFH01001818v1_decoy 3673 TRUE decoy +chrUn_JTFH01001819v1_decoy 3672 TRUE decoy +chrUn_JTFH01001820v1_decoy 3633 TRUE decoy +chrUn_JTFH01001821v1_decoy 3633 TRUE decoy +chrUn_JTFH01001822v1_decoy 3613 TRUE decoy +chrUn_JTFH01001823v1_decoy 3605 TRUE decoy +chrUn_JTFH01001824v1_decoy 3592 TRUE decoy +chrUn_JTFH01001825v1_decoy 3586 TRUE decoy +chrUn_JTFH01001826v1_decoy 3584 TRUE decoy +chrUn_JTFH01001827v1_decoy 3577 TRUE decoy +chrUn_JTFH01001828v1_decoy 3537 TRUE decoy +chrUn_JTFH01001829v1_decoy 3510 TRUE decoy +chrUn_JTFH01001830v1_decoy 3509 TRUE decoy +chrUn_JTFH01001831v1_decoy 3488 TRUE decoy +chrUn_JTFH01001832v1_decoy 3473 TRUE decoy +chrUn_JTFH01001833v1_decoy 3445 TRUE decoy +chrUn_JTFH01001834v1_decoy 3427 TRUE decoy +chrUn_JTFH01001835v1_decoy 3395 TRUE decoy +chrUn_JTFH01001836v1_decoy 3367 TRUE decoy +chrUn_JTFH01001837v1_decoy 3337 TRUE decoy +chrUn_JTFH01001838v1_decoy 3324 TRUE decoy +chrUn_JTFH01001839v1_decoy 3315 TRUE decoy +chrUn_JTFH01001840v1_decoy 3313 TRUE decoy +chrUn_JTFH01001841v1_decoy 3283 TRUE decoy +chrUn_JTFH01001842v1_decoy 3250 TRUE decoy +chrUn_JTFH01001843v1_decoy 3247 TRUE decoy +chrUn_JTFH01001844v1_decoy 3237 TRUE decoy +chrUn_JTFH01001845v1_decoy 3235 TRUE decoy +chrUn_JTFH01001846v1_decoy 3200 TRUE decoy +chrUn_JTFH01001847v1_decoy 3195 TRUE decoy +chrUn_JTFH01001848v1_decoy 3175 TRUE decoy +chrUn_JTFH01001849v1_decoy 3158 TRUE decoy +chrUn_JTFH01001850v1_decoy 3143 TRUE decoy +chrUn_JTFH01001851v1_decoy 3139 TRUE decoy +chrUn_JTFH01001852v1_decoy 3138 TRUE decoy +chrUn_JTFH01001853v1_decoy 3136 TRUE decoy +chrUn_JTFH01001854v1_decoy 3132 TRUE decoy +chrUn_JTFH01001855v1_decoy 3132 TRUE decoy +chrUn_JTFH01001856v1_decoy 3095 TRUE decoy +chrUn_JTFH01001857v1_decoy 3094 TRUE decoy +chrUn_JTFH01001858v1_decoy 3093 TRUE decoy +chrUn_JTFH01001859v1_decoy 3059 TRUE decoy +chrUn_JTFH01001860v1_decoy 2985 TRUE decoy +chrUn_JTFH01001861v1_decoy 2975 TRUE decoy +chrUn_JTFH01001862v1_decoy 2967 TRUE decoy +chrUn_JTFH01001863v1_decoy 2961 TRUE decoy +chrUn_JTFH01001864v1_decoy 2955 TRUE decoy +chrUn_JTFH01001865v1_decoy 2935 TRUE decoy +chrUn_JTFH01001866v1_decoy 2933 TRUE decoy +chrUn_JTFH01001867v1_decoy 2909 TRUE decoy +chrUn_JTFH01001868v1_decoy 2904 TRUE decoy +chrUn_JTFH01001869v1_decoy 2892 TRUE decoy +chrUn_JTFH01001870v1_decoy 2886 TRUE decoy +chrUn_JTFH01001871v1_decoy 2885 TRUE decoy +chrUn_JTFH01001872v1_decoy 2878 TRUE decoy +chrUn_JTFH01001873v1_decoy 2875 TRUE decoy +chrUn_JTFH01001874v1_decoy 2861 TRUE decoy +chrUn_JTFH01001875v1_decoy 2856 TRUE decoy +chrUn_JTFH01001876v1_decoy 2838 TRUE decoy +chrUn_JTFH01001877v1_decoy 2801 TRUE decoy +chrUn_JTFH01001878v1_decoy 2797 TRUE decoy +chrUn_JTFH01001879v1_decoy 2788 TRUE decoy +chrUn_JTFH01001880v1_decoy 2773 TRUE decoy +chrUn_JTFH01001881v1_decoy 2755 TRUE decoy +chrUn_JTFH01001882v1_decoy 2754 TRUE decoy +chrUn_JTFH01001883v1_decoy 2743 TRUE decoy +chrUn_JTFH01001884v1_decoy 2725 TRUE decoy +chrUn_JTFH01001885v1_decoy 2722 TRUE decoy +chrUn_JTFH01001886v1_decoy 2682 TRUE decoy +chrUn_JTFH01001887v1_decoy 2669 TRUE decoy +chrUn_JTFH01001888v1_decoy 2663 TRUE decoy +chrUn_JTFH01001889v1_decoy 2652 TRUE decoy +chrUn_JTFH01001890v1_decoy 2647 TRUE decoy +chrUn_JTFH01001891v1_decoy 2635 TRUE decoy +chrUn_JTFH01001892v1_decoy 2633 TRUE decoy +chrUn_JTFH01001893v1_decoy 2629 TRUE decoy +chrUn_JTFH01001894v1_decoy 2612 TRUE decoy +chrUn_JTFH01001895v1_decoy 2599 TRUE decoy +chrUn_JTFH01001896v1_decoy 2566 TRUE decoy +chrUn_JTFH01001897v1_decoy 2556 TRUE decoy +chrUn_JTFH01001898v1_decoy 2551 TRUE decoy +chrUn_JTFH01001899v1_decoy 2551 TRUE decoy +chrUn_JTFH01001900v1_decoy 2538 TRUE decoy +chrUn_JTFH01001901v1_decoy 2538 TRUE decoy +chrUn_JTFH01001902v1_decoy 2525 TRUE decoy +chrUn_JTFH01001903v1_decoy 2498 TRUE decoy +chrUn_JTFH01001904v1_decoy 2496 TRUE decoy +chrUn_JTFH01001905v1_decoy 2483 TRUE decoy +chrUn_JTFH01001906v1_decoy 2475 TRUE decoy +chrUn_JTFH01001907v1_decoy 2469 TRUE decoy +chrUn_JTFH01001908v1_decoy 2455 TRUE decoy +chrUn_JTFH01001909v1_decoy 2444 TRUE decoy +chrUn_JTFH01001910v1_decoy 2437 TRUE decoy +chrUn_JTFH01001911v1_decoy 2435 TRUE decoy +chrUn_JTFH01001912v1_decoy 2427 TRUE decoy +chrUn_JTFH01001913v1_decoy 2419 TRUE decoy +chrUn_JTFH01001914v1_decoy 2413 TRUE decoy +chrUn_JTFH01001915v1_decoy 2412 TRUE decoy +chrUn_JTFH01001916v1_decoy 2400 TRUE decoy +chrUn_JTFH01001917v1_decoy 2399 TRUE decoy +chrUn_JTFH01001918v1_decoy 2396 TRUE decoy +chrUn_JTFH01001919v1_decoy 2393 TRUE decoy +chrUn_JTFH01001920v1_decoy 2386 TRUE decoy +chrUn_JTFH01001921v1_decoy 2384 TRUE decoy +chrUn_JTFH01001922v1_decoy 2382 TRUE decoy +chrUn_JTFH01001923v1_decoy 2382 TRUE decoy +chrUn_JTFH01001924v1_decoy 2367 TRUE decoy +chrUn_JTFH01001925v1_decoy 2366 TRUE decoy +chrUn_JTFH01001926v1_decoy 2362 TRUE decoy +chrUn_JTFH01001927v1_decoy 2361 TRUE decoy +chrUn_JTFH01001928v1_decoy 2353 TRUE decoy +chrUn_JTFH01001929v1_decoy 2349 TRUE decoy +chrUn_JTFH01001930v1_decoy 2348 TRUE decoy +chrUn_JTFH01001931v1_decoy 2340 TRUE decoy +chrUn_JTFH01001932v1_decoy 2339 TRUE decoy +chrUn_JTFH01001933v1_decoy 2336 TRUE decoy +chrUn_JTFH01001934v1_decoy 2333 TRUE decoy +chrUn_JTFH01001935v1_decoy 2330 TRUE decoy +chrUn_JTFH01001936v1_decoy 2327 TRUE decoy +chrUn_JTFH01001937v1_decoy 2318 TRUE decoy +chrUn_JTFH01001938v1_decoy 2293 TRUE decoy +chrUn_JTFH01001939v1_decoy 2292 TRUE decoy +chrUn_JTFH01001940v1_decoy 2287 TRUE decoy +chrUn_JTFH01001941v1_decoy 2274 TRUE decoy +chrUn_JTFH01001942v1_decoy 2274 TRUE decoy +chrUn_JTFH01001943v1_decoy 2267 TRUE decoy +chrUn_JTFH01001944v1_decoy 2260 TRUE decoy +chrUn_JTFH01001945v1_decoy 2257 TRUE decoy +chrUn_JTFH01001946v1_decoy 2240 TRUE decoy +chrUn_JTFH01001947v1_decoy 2239 TRUE decoy +chrUn_JTFH01001948v1_decoy 2232 TRUE decoy +chrUn_JTFH01001949v1_decoy 2230 TRUE decoy +chrUn_JTFH01001950v1_decoy 2230 TRUE decoy +chrUn_JTFH01001951v1_decoy 2222 TRUE decoy +chrUn_JTFH01001952v1_decoy 2216 TRUE decoy +chrUn_JTFH01001953v1_decoy 2214 TRUE decoy +chrUn_JTFH01001954v1_decoy 2210 TRUE decoy +chrUn_JTFH01001955v1_decoy 2203 TRUE decoy +chrUn_JTFH01001956v1_decoy 2197 TRUE decoy +chrUn_JTFH01001957v1_decoy 2196 TRUE decoy +chrUn_JTFH01001958v1_decoy 2196 TRUE decoy +chrUn_JTFH01001959v1_decoy 2179 TRUE decoy +chrUn_JTFH01001960v1_decoy 2178 TRUE decoy +chrUn_JTFH01001961v1_decoy 2178 TRUE decoy +chrUn_JTFH01001962v1_decoy 2172 TRUE decoy +chrUn_JTFH01001963v1_decoy 2170 TRUE decoy +chrUn_JTFH01001964v1_decoy 2167 TRUE decoy +chrUn_JTFH01001965v1_decoy 2167 TRUE decoy +chrUn_JTFH01001966v1_decoy 2157 TRUE decoy +chrUn_JTFH01001967v1_decoy 2153 TRUE decoy +chrUn_JTFH01001968v1_decoy 2151 TRUE decoy +chrUn_JTFH01001969v1_decoy 2147 TRUE decoy +chrUn_JTFH01001970v1_decoy 2145 TRUE decoy +chrUn_JTFH01001971v1_decoy 2142 TRUE decoy +chrUn_JTFH01001972v1_decoy 2142 TRUE decoy +chrUn_JTFH01001973v1_decoy 2136 TRUE decoy +chrUn_JTFH01001974v1_decoy 2130 TRUE decoy +chrUn_JTFH01001975v1_decoy 2128 TRUE decoy +chrUn_JTFH01001976v1_decoy 2126 TRUE decoy +chrUn_JTFH01001977v1_decoy 2126 TRUE decoy +chrUn_JTFH01001978v1_decoy 2119 TRUE decoy +chrUn_JTFH01001979v1_decoy 2107 TRUE decoy +chrUn_JTFH01001980v1_decoy 2091 TRUE decoy +chrUn_JTFH01001981v1_decoy 2087 TRUE decoy +chrUn_JTFH01001982v1_decoy 2086 TRUE decoy +chrUn_JTFH01001983v1_decoy 2083 TRUE decoy +chrUn_JTFH01001984v1_decoy 2075 TRUE decoy +chrUn_JTFH01001985v1_decoy 2075 TRUE decoy +chrUn_JTFH01001986v1_decoy 2072 TRUE decoy +chrUn_JTFH01001987v1_decoy 2068 TRUE decoy +chrUn_JTFH01001988v1_decoy 2067 TRUE decoy +chrUn_JTFH01001989v1_decoy 2055 TRUE decoy +chrUn_JTFH01001990v1_decoy 2051 TRUE decoy +chrUn_JTFH01001991v1_decoy 2050 TRUE decoy +chrUn_JTFH01001992v1_decoy 2033 TRUE decoy +chrUn_JTFH01001993v1_decoy 2024 TRUE decoy +chrUn_JTFH01001994v1_decoy 2016 TRUE decoy +chrUn_JTFH01001995v1_decoy 2011 TRUE decoy +chrUn_JTFH01001996v1_decoy 2009 TRUE decoy +chrUn_JTFH01001997v1_decoy 2003 TRUE decoy +chrUn_JTFH01001998v1_decoy 2001 TRUE decoy +HLA-A*01:01:01:01 3503 TRUE hla +HLA-A*01:01:01:02N 3291 TRUE hla +HLA-A*01:01:38L 3374 TRUE hla +HLA-A*01:02 3374 TRUE hla +HLA-A*01:03 3503 TRUE hla +HLA-A*01:04N 3136 TRUE hla +HLA-A*01:09 3105 TRUE hla +HLA-A*01:11N 3374 TRUE hla +HLA-A*01:14 3095 TRUE hla +HLA-A*01:16N 2985 TRUE hla +HLA-A*01:20 3105 TRUE hla +HLA-A*02:01:01:01 3517 TRUE hla +HLA-A*02:01:01:02L 3287 TRUE hla +HLA-A*02:01:01:03 3023 TRUE hla +HLA-A*02:01:01:04 3516 TRUE hla +HLA-A*02:02:01 2917 TRUE hla +HLA-A*02:03:01 3517 TRUE hla +HLA-A*02:03:03 3148 TRUE hla +HLA-A*02:05:01 3517 TRUE hla +HLA-A*02:06:01 3517 TRUE hla +HLA-A*02:07:01 3517 TRUE hla +HLA-A*02:10 3517 TRUE hla +HLA-A*02:251 3517 TRUE hla +HLA-A*02:259 2978 TRUE hla +HLA-A*02:264 3002 TRUE hla +HLA-A*02:265 3148 TRUE hla +HLA-A*02:266 3084 TRUE hla +HLA-A*02:269 3101 TRUE hla +HLA-A*02:279 3103 TRUE hla +HLA-A*02:32N 3517 TRUE hla +HLA-A*02:376 3104 TRUE hla +HLA-A*02:43N 3218 TRUE hla +HLA-A*02:455 3118 TRUE hla +HLA-A*02:48 3517 TRUE hla +HLA-A*02:51 3109 TRUE hla +HLA-A*02:533 3217 TRUE hla +HLA-A*02:53N 3305 TRUE hla +HLA-A*02:57 3054 TRUE hla +HLA-A*02:60:01 3112 TRUE hla +HLA-A*02:65 3387 TRUE hla +HLA-A*02:68 3109 TRUE hla +HLA-A*02:77 3371 TRUE hla +HLA-A*02:81 3309 TRUE hla +HLA-A*02:89 3371 TRUE hla +HLA-A*02:95 3388 TRUE hla +HLA-A*03:01:01:01 3502 TRUE hla +HLA-A*03:01:01:02N 3373 TRUE hla +HLA-A*03:01:01:03 3094 TRUE hla +HLA-A*03:02:01 3502 TRUE hla +HLA-A*03:11N 3404 TRUE hla +HLA-A*03:21N 3095 TRUE hla +HLA-A*03:36N 3142 TRUE hla +HLA-A*11:01:01 3503 TRUE hla +HLA-A*11:01:18 3503 TRUE hla +HLA-A*11:02:01 3503 TRUE hla +HLA-A*11:05 3373 TRUE hla +HLA-A*11:110 2903 TRUE hla +HLA-A*11:25 3073 TRUE hla +HLA-A*11:50Q 3362 TRUE hla +HLA-A*11:60 3241 TRUE hla +HLA-A*11:69N 3500 TRUE hla +HLA-A*11:74 3227 TRUE hla +HLA-A*11:75 3184 TRUE hla +HLA-A*11:77 3233 TRUE hla +HLA-A*23:01:01 3502 TRUE hla +HLA-A*23:09 3104 TRUE hla +HLA-A*23:38N 3020 TRUE hla +HLA-A*24:02:01:01 3502 TRUE hla +HLA-A*24:02:01:02L 3502 TRUE hla +HLA-A*24:02:01:03 3075 TRUE hla +HLA-A*24:02:03Q 3247 TRUE hla +HLA-A*24:02:10 3356 TRUE hla +HLA-A*24:03:01 3502 TRUE hla +HLA-A*24:07:01 3502 TRUE hla +HLA-A*24:08 3502 TRUE hla +HLA-A*24:09N 3502 TRUE hla +HLA-A*24:10:01 3502 TRUE hla +HLA-A*24:11N 3503 TRUE hla +HLA-A*24:152 3176 TRUE hla +HLA-A*24:20 3502 TRUE hla +HLA-A*24:215 3116 TRUE hla +HLA-A*24:61 3043 TRUE hla +HLA-A*24:86N 3415 TRUE hla +HLA-A*25:01:01 2917 TRUE hla +HLA-A*26:01:01 3517 TRUE hla +HLA-A*26:11N 3091 TRUE hla +HLA-A*26:15 3217 TRUE hla +HLA-A*26:50 3141 TRUE hla +HLA-A*29:01:01:01 3518 TRUE hla +HLA-A*29:01:01:02N 3303 TRUE hla +HLA-A*29:02:01:01 3518 TRUE hla +HLA-A*29:02:01:02 3518 TRUE hla +HLA-A*29:46 3310 TRUE hla +HLA-A*30:01:01 3503 TRUE hla +HLA-A*30:02:01:01 2903 TRUE hla +HLA-A*30:02:01:02 3374 TRUE hla +HLA-A*30:04:01 3503 TRUE hla +HLA-A*30:89 2903 TRUE hla +HLA-A*31:01:02 3518 TRUE hla +HLA-A*31:01:23 2918 TRUE hla +HLA-A*31:04 2918 TRUE hla +HLA-A*31:14N 3090 TRUE hla +HLA-A*31:46 3075 TRUE hla +HLA-A*32:01:01 3518 TRUE hla +HLA-A*32:06 3389 TRUE hla +HLA-A*33:01:01 3518 TRUE hla +HLA-A*33:03:01 3518 TRUE hla +HLA-A*33:07 3389 TRUE hla +HLA-A*34:01:01 3517 TRUE hla +HLA-A*34:02:01 3096 TRUE hla +HLA-A*36:01 2903 TRUE hla +HLA-A*43:01 3388 TRUE hla +HLA-A*66:01:01 3517 TRUE hla +HLA-A*66:17 3075 TRUE hla +HLA-A*68:01:01:01 2930 TRUE hla +HLA-A*68:01:01:02 3517 TRUE hla +HLA-A*68:01:02:01 3517 TRUE hla +HLA-A*68:01:02:02 3388 TRUE hla +HLA-A*68:02:01:01 3517 TRUE hla +HLA-A*68:02:01:02 3506 TRUE hla +HLA-A*68:02:01:03 2909 TRUE hla +HLA-A*68:02:02 2916 TRUE hla +HLA-A*68:03:01 2917 TRUE hla +HLA-A*68:08:01 3120 TRUE hla +HLA-A*68:113 3070 TRUE hla +HLA-A*68:17 3134 TRUE hla +HLA-A*68:18N 3237 TRUE hla +HLA-A*68:22 3119 TRUE hla +HLA-A*68:71 3198 TRUE hla +HLA-A*69:01 2917 TRUE hla +HLA-A*74:01 2918 TRUE hla +HLA-A*74:02:01:01 2918 TRUE hla +HLA-A*74:02:01:02 3518 TRUE hla +HLA-A*80:01:01:01 3263 TRUE hla +HLA-A*80:01:01:02 3055 TRUE hla +HLA-B*07:02:01 3323 TRUE hla +HLA-B*07:05:01 2676 TRUE hla +HLA-B*07:06 2676 TRUE hla +HLA-B*07:156 2967 TRUE hla +HLA-B*07:33:01 3239 TRUE hla +HLA-B*07:41 3266 TRUE hla +HLA-B*07:44 3270 TRUE hla +HLA-B*07:50 3323 TRUE hla +HLA-B*08:01:01 3322 TRUE hla +HLA-B*08:08N 3035 TRUE hla +HLA-B*08:132 2675 TRUE hla +HLA-B*08:134 2959 TRUE hla +HLA-B*08:19N 3322 TRUE hla +HLA-B*08:20 3322 TRUE hla +HLA-B*08:33 3322 TRUE hla +HLA-B*08:79 2676 TRUE hla +HLA-B*13:01:01 3324 TRUE hla +HLA-B*13:02:01 3324 TRUE hla +HLA-B*13:02:03 3323 TRUE hla +HLA-B*13:02:09 2919 TRUE hla +HLA-B*13:08 3324 TRUE hla +HLA-B*13:15 3323 TRUE hla +HLA-B*13:25 2689 TRUE hla +HLA-B*14:01:01 3312 TRUE hla +HLA-B*14:02:01 3312 TRUE hla +HLA-B*14:07N 3255 TRUE hla +HLA-B*15:01:01:01 3336 TRUE hla +HLA-B*15:01:01:02N 1208 TRUE hla +HLA-B*15:01:01:03 3026 TRUE hla +HLA-B*15:02:01 3335 TRUE hla +HLA-B*15:03:01 2689 TRUE hla +HLA-B*15:04:01 3052 TRUE hla +HLA-B*15:07:01 3336 TRUE hla +HLA-B*15:108 3283 TRUE hla +HLA-B*15:10:01 2689 TRUE hla +HLA-B*15:11:01 3336 TRUE hla +HLA-B*15:13:01 2688 TRUE hla +HLA-B*15:16:01 2688 TRUE hla +HLA-B*15:17:01:01 3051 TRUE hla +HLA-B*15:17:01:02 3051 TRUE hla +HLA-B*15:18:01 3336 TRUE hla +HLA-B*15:220 2878 TRUE hla +HLA-B*15:25:01 3335 TRUE hla +HLA-B*15:27:01 2689 TRUE hla +HLA-B*15:32:01 3336 TRUE hla +HLA-B*15:42 3333 TRUE hla +HLA-B*15:58 3336 TRUE hla +HLA-B*15:66 2902 TRUE hla +HLA-B*15:77 3336 TRUE hla +HLA-B*15:83 3337 TRUE hla +HLA-B*18:01:01:01 3323 TRUE hla +HLA-B*18:01:01:02 3323 TRUE hla +HLA-B*18:02 2686 TRUE hla +HLA-B*18:03 3323 TRUE hla +HLA-B*18:17N 2979 TRUE hla +HLA-B*18:26 3323 TRUE hla +HLA-B*18:94N 2970 TRUE hla +HLA-B*27:04:01 3325 TRUE hla +HLA-B*27:05:02 3325 TRUE hla +HLA-B*27:05:18 3321 TRUE hla +HLA-B*27:06 3325 TRUE hla +HLA-B*27:07:01 2677 TRUE hla +HLA-B*27:131 3325 TRUE hla +HLA-B*27:24 2677 TRUE hla +HLA-B*27:25 2677 TRUE hla +HLA-B*27:32 3325 TRUE hla +HLA-B*35:01:01:01 3327 TRUE hla +HLA-B*35:01:01:02 3327 TRUE hla +HLA-B*35:01:22 2806 TRUE hla +HLA-B*35:02:01 3327 TRUE hla +HLA-B*35:03:01 2689 TRUE hla +HLA-B*35:05:01 2690 TRUE hla +HLA-B*35:08:01 2689 TRUE hla +HLA-B*35:14:02 3327 TRUE hla +HLA-B*35:241 3042 TRUE hla +HLA-B*35:41 3327 TRUE hla +HLA-B*37:01:01 3324 TRUE hla +HLA-B*37:01:05 2687 TRUE hla +HLA-B*38:01:01 3312 TRUE hla +HLA-B*38:02:01 3312 TRUE hla +HLA-B*38:14 2738 TRUE hla +HLA-B*39:01:01:01 3155 TRUE hla +HLA-B*39:01:01:02L 3153 TRUE hla +HLA-B*39:01:01:03 3312 TRUE hla +HLA-B*39:01:03 3155 TRUE hla +HLA-B*39:01:16 3155 TRUE hla +HLA-B*39:01:21 3312 TRUE hla +HLA-B*39:05:01 2675 TRUE hla +HLA-B*39:06:02 2674 TRUE hla +HLA-B*39:10:01 3027 TRUE hla +HLA-B*39:13:02 3255 TRUE hla +HLA-B*39:14 2765 TRUE hla +HLA-B*39:34 3254 TRUE hla +HLA-B*39:38Q 2675 TRUE hla +HLA-B*40:01:01 2676 TRUE hla +HLA-B*40:01:02 3323 TRUE hla +HLA-B*40:02:01 3258 TRUE hla +HLA-B*40:03 2677 TRUE hla +HLA-B*40:06:01:01 3325 TRUE hla +HLA-B*40:06:01:02 3299 TRUE hla +HLA-B*40:10:01 3304 TRUE hla +HLA-B*40:150 2800 TRUE hla +HLA-B*40:40 2677 TRUE hla +HLA-B*40:72:01 3283 TRUE hla +HLA-B*40:79 3257 TRUE hla +HLA-B*41:01:01 3322 TRUE hla +HLA-B*41:02:01 3322 TRUE hla +HLA-B*42:01:01 3322 TRUE hla +HLA-B*42:02 2675 TRUE hla +HLA-B*42:08 3165 TRUE hla +HLA-B*44:02:01:01 3323 TRUE hla +HLA-B*44:02:01:02S 3152 TRUE hla +HLA-B*44:02:01:03 3152 TRUE hla +HLA-B*44:02:17 3323 TRUE hla +HLA-B*44:02:27 2872 TRUE hla +HLA-B*44:03:01 3323 TRUE hla +HLA-B*44:03:02 2676 TRUE hla +HLA-B*44:04 3239 TRUE hla +HLA-B*44:09 3317 TRUE hla +HLA-B*44:138Q 3043 TRUE hla +HLA-B*44:150 2676 TRUE hla +HLA-B*44:23N 3323 TRUE hla +HLA-B*44:26 2804 TRUE hla +HLA-B*44:46 3323 TRUE hla +HLA-B*44:49 3039 TRUE hla +HLA-B*44:56N 2676 TRUE hla +HLA-B*45:01:01 3338 TRUE hla +HLA-B*45:04 3339 TRUE hla +HLA-B*46:01:01 3336 TRUE hla +HLA-B*46:01:05 2891 TRUE hla +HLA-B*47:01:01:01 3041 TRUE hla +HLA-B*47:01:01:02 3041 TRUE hla +HLA-B*48:01:01 3323 TRUE hla +HLA-B*48:03:01 2676 TRUE hla +HLA-B*48:04 2676 TRUE hla +HLA-B*48:08 3323 TRUE hla +HLA-B*49:01:01 3340 TRUE hla +HLA-B*49:32 3340 TRUE hla +HLA-B*50:01:01 3340 TRUE hla +HLA-B*51:01:01 3327 TRUE hla +HLA-B*51:01:02 3043 TRUE hla +HLA-B*51:02:01 3327 TRUE hla +HLA-B*51:07:01 3327 TRUE hla +HLA-B*51:42 2962 TRUE hla +HLA-B*52:01:01:01 3327 TRUE hla +HLA-B*52:01:01:02 3327 TRUE hla +HLA-B*52:01:01:03 3327 TRUE hla +HLA-B*52:01:02 3327 TRUE hla +HLA-B*53:01:01 3327 TRUE hla +HLA-B*53:11 3274 TRUE hla +HLA-B*54:01:01 3332 TRUE hla +HLA-B*54:18 2813 TRUE hla +HLA-B*55:01:01 3332 TRUE hla +HLA-B*55:01:03 3332 TRUE hla +HLA-B*55:02:01 3333 TRUE hla +HLA-B*55:12 3332 TRUE hla +HLA-B*55:24 3332 TRUE hla +HLA-B*55:48 2980 TRUE hla +HLA-B*56:01:01 2688 TRUE hla +HLA-B*56:03 2688 TRUE hla +HLA-B*56:04 2688 TRUE hla +HLA-B*57:01:01 3337 TRUE hla +HLA-B*57:03:01 2689 TRUE hla +HLA-B*57:06 3284 TRUE hla +HLA-B*57:11 3306 TRUE hla +HLA-B*57:29 3337 TRUE hla +HLA-B*58:01:01 3336 TRUE hla +HLA-B*58:31N 3004 TRUE hla +HLA-B*59:01:01:01 3333 TRUE hla +HLA-B*59:01:01:02 3332 TRUE hla +HLA-B*67:01:01 3312 TRUE hla +HLA-B*67:01:02 2675 TRUE hla +HLA-B*67:02 3307 TRUE hla +HLA-B*73:01 3323 TRUE hla +HLA-B*78:01:01 3327 TRUE hla +HLA-B*81:01 2676 TRUE hla +HLA-B*82:02:01 3050 TRUE hla +HLA-C*01:02:01 3349 TRUE hla +HLA-C*01:02:11 3057 TRUE hla +HLA-C*01:02:29 3349 TRUE hla +HLA-C*01:02:30 3333 TRUE hla +HLA-C*01:03 3349 TRUE hla +HLA-C*01:06 2895 TRUE hla +HLA-C*01:08 3349 TRUE hla +HLA-C*01:14 2895 TRUE hla +HLA-C*01:21 2895 TRUE hla +HLA-C*01:30 3349 TRUE hla +HLA-C*01:40 2968 TRUE hla +HLA-C*02:02:02:01 3347 TRUE hla +HLA-C*02:02:02:02 3347 TRUE hla +HLA-C*02:10 2893 TRUE hla +HLA-C*02:11 3320 TRUE hla +HLA-C*02:16:02 3029 TRUE hla +HLA-C*02:69 2933 TRUE hla +HLA-C*02:85 3347 TRUE hla +HLA-C*02:86 3347 TRUE hla +HLA-C*02:87 3064 TRUE hla +HLA-C*03:02:01 2894 TRUE hla +HLA-C*03:02:02:01 3348 TRUE hla +HLA-C*03:02:02:02 2896 TRUE hla +HLA-C*03:02:02:03 3348 TRUE hla +HLA-C*03:03:01 3348 TRUE hla +HLA-C*03:04:01:01 3348 TRUE hla +HLA-C*03:04:01:02 3348 TRUE hla +HLA-C*03:04:02 2877 TRUE hla +HLA-C*03:04:04 2966 TRUE hla +HLA-C*03:05 2894 TRUE hla +HLA-C*03:06 2894 TRUE hla +HLA-C*03:100 3034 TRUE hla +HLA-C*03:13:01 3065 TRUE hla +HLA-C*03:20N 3321 TRUE hla +HLA-C*03:219 3070 TRUE hla +HLA-C*03:261 3348 TRUE hla +HLA-C*03:40:01 2894 TRUE hla +HLA-C*03:41:02 3328 TRUE hla +HLA-C*03:46 2997 TRUE hla +HLA-C*03:61 2894 TRUE hla +HLA-C*04:01:01:01 3349 TRUE hla +HLA-C*04:01:01:02 3349 TRUE hla +HLA-C*04:01:01:03 3349 TRUE hla +HLA-C*04:01:01:04 3012 TRUE hla +HLA-C*04:01:01:05 2931 TRUE hla +HLA-C*04:01:62 3329 TRUE hla +HLA-C*04:03:01 3349 TRUE hla +HLA-C*04:06 3349 TRUE hla +HLA-C*04:09N 2991 TRUE hla +HLA-C*04:128 3086 TRUE hla +HLA-C*04:161 3237 TRUE hla +HLA-C*04:177 3349 TRUE hla +HLA-C*04:70 3058 TRUE hla +HLA-C*04:71 3086 TRUE hla +HLA-C*05:01:01:01 3349 TRUE hla +HLA-C*05:01:01:02 3349 TRUE hla +HLA-C*05:08 3059 TRUE hla +HLA-C*05:09:01 3322 TRUE hla +HLA-C*05:93 2946 TRUE hla +HLA-C*06:02:01:01 3349 TRUE hla +HLA-C*06:02:01:02 3349 TRUE hla +HLA-C*06:02:01:03 3349 TRUE hla +HLA-C*06:23 3349 TRUE hla +HLA-C*06:24 3349 TRUE hla +HLA-C*06:46N 2987 TRUE hla +HLA-C*07:01:01:01 3354 TRUE hla +HLA-C*07:01:01:02 3093 TRUE hla +HLA-C*07:01:02 3352 TRUE hla +HLA-C*07:01:19 3354 TRUE hla +HLA-C*07:01:27 3195 TRUE hla +HLA-C*07:01:45 3354 TRUE hla +HLA-C*07:02:01:01 3354 TRUE hla +HLA-C*07:02:01:02 3074 TRUE hla +HLA-C*07:02:01:03 3354 TRUE hla +HLA-C*07:02:01:04 3353 TRUE hla +HLA-C*07:02:01:05 3354 TRUE hla +HLA-C*07:02:05 2903 TRUE hla +HLA-C*07:02:06 3354 TRUE hla +HLA-C*07:02:64 3354 TRUE hla +HLA-C*07:04:01 3354 TRUE hla +HLA-C*07:04:02 3343 TRUE hla +HLA-C*07:06 3354 TRUE hla +HLA-C*07:149 3098 TRUE hla +HLA-C*07:18 3353 TRUE hla +HLA-C*07:19 3222 TRUE hla +HLA-C*07:26 3069 TRUE hla +HLA-C*07:30 2903 TRUE hla +HLA-C*07:32N 3334 TRUE hla +HLA-C*07:384 3349 TRUE hla +HLA-C*07:385 3354 TRUE hla +HLA-C*07:386 3183 TRUE hla +HLA-C*07:391 3354 TRUE hla +HLA-C*07:392 3354 TRUE hla +HLA-C*07:49 2935 TRUE hla +HLA-C*07:56:02 3354 TRUE hla +HLA-C*07:66 3354 TRUE hla +HLA-C*07:67 3354 TRUE hla +HLA-C*08:01:01 3349 TRUE hla +HLA-C*08:01:03 2998 TRUE hla +HLA-C*08:02:01:01 3349 TRUE hla +HLA-C*08:02:01:02 3349 TRUE hla +HLA-C*08:03:01 3349 TRUE hla +HLA-C*08:04:01 2895 TRUE hla +HLA-C*08:112 3178 TRUE hla +HLA-C*08:20 3349 TRUE hla +HLA-C*08:21 3349 TRUE hla +HLA-C*08:22 3349 TRUE hla +HLA-C*08:24 2895 TRUE hla +HLA-C*08:27 3349 TRUE hla +HLA-C*08:36N 3097 TRUE hla +HLA-C*08:40 2978 TRUE hla +HLA-C*08:41 3019 TRUE hla +HLA-C*08:62 3086 TRUE hla +HLA-C*12:02:02 3349 TRUE hla +HLA-C*12:03:01:01 3349 TRUE hla +HLA-C*12:03:01:02 3348 TRUE hla +HLA-C*12:08 3066 TRUE hla +HLA-C*12:13 3058 TRUE hla +HLA-C*12:19 3349 TRUE hla +HLA-C*12:22 2895 TRUE hla +HLA-C*12:99 3349 TRUE hla +HLA-C*14:02:01 3349 TRUE hla +HLA-C*14:03 3349 TRUE hla +HLA-C*14:21N 3099 TRUE hla +HLA-C*14:23 2976 TRUE hla +HLA-C*15:02:01 3349 TRUE hla +HLA-C*15:05:01 3349 TRUE hla +HLA-C*15:05:02 3349 TRUE hla +HLA-C*15:13 2895 TRUE hla +HLA-C*15:16 3066 TRUE hla +HLA-C*15:17 3349 TRUE hla +HLA-C*15:96Q 3349 TRUE hla +HLA-C*16:01:01 3349 TRUE hla +HLA-C*16:02:01 2895 TRUE hla +HLA-C*16:04:01 3349 TRUE hla +HLA-C*17:01:01:01 3368 TRUE hla +HLA-C*17:01:01:02 3368 TRUE hla +HLA-C*17:01:01:03 3368 TRUE hla +HLA-C*17:03 3197 TRUE hla +HLA-C*18:01 3346 TRUE hla +HLA-DQA1*01:01:02 6489 TRUE hla +HLA-DQA1*01:02:01:01 6484 TRUE hla +HLA-DQA1*01:02:01:02 6485 TRUE hla +HLA-DQA1*01:02:01:03 6485 TRUE hla +HLA-DQA1*01:02:01:04 6492 TRUE hla +HLA-DQA1*01:03:01:01 6485 TRUE hla +HLA-DQA1*01:03:01:02 6492 TRUE hla +HLA-DQA1*01:04:01:01 6484 TRUE hla +HLA-DQA1*01:04:01:02 6485 TRUE hla +HLA-DQA1*01:05:01 6485 TRUE hla +HLA-DQA1*01:07 5959 TRUE hla +HLA-DQA1*01:10 5790 TRUE hla +HLA-DQA1*01:11 5926 TRUE hla +HLA-DQA1*02:01 6403 TRUE hla +HLA-DQA1*03:01:01 6437 TRUE hla +HLA-DQA1*03:02 6437 TRUE hla +HLA-DQA1*03:03:01 6437 TRUE hla +HLA-DQA1*04:01:02:01 5853 TRUE hla +HLA-DQA1*04:01:02:02 5666 TRUE hla +HLA-DQA1*04:02 6210 TRUE hla +HLA-DQA1*05:01:01:01 5806 TRUE hla +HLA-DQA1*05:01:01:02 6529 TRUE hla +HLA-DQA1*05:03 6121 TRUE hla +HLA-DQA1*05:05:01:01 6593 TRUE hla +HLA-DQA1*05:05:01:02 6597 TRUE hla +HLA-DQA1*05:05:01:03 6393 TRUE hla +HLA-DQA1*05:11 6589 TRUE hla +HLA-DQA1*06:01:01 5878 TRUE hla +HLA-DQB1*02:01:01 7480 TRUE hla +HLA-DQB1*02:02:01 7471 TRUE hla +HLA-DQB1*03:01:01:01 7231 TRUE hla +HLA-DQB1*03:01:01:02 7230 TRUE hla +HLA-DQB1*03:01:01:03 7231 TRUE hla +HLA-DQB1*03:02:01 7126 TRUE hla +HLA-DQB1*03:03:02:01 7126 TRUE hla +HLA-DQB1*03:03:02:02 7126 TRUE hla +HLA-DQB1*03:03:02:03 6800 TRUE hla +HLA-DQB1*03:05:01 6934 TRUE hla +HLA-DQB1*05:01:01:01 7090 TRUE hla +HLA-DQB1*05:01:01:02 7090 TRUE hla +HLA-DQB1*05:03:01:01 7089 TRUE hla +HLA-DQB1*05:03:01:02 7089 TRUE hla +HLA-DQB1*06:01:01 7111 TRUE hla +HLA-DQB1*06:02:01 7102 TRUE hla +HLA-DQB1*06:03:01 7103 TRUE hla +HLA-DQB1*06:09:01 7102 TRUE hla +HLA-DRB1*01:01:01 10741 TRUE hla +HLA-DRB1*01:02:01 11229 TRUE hla +HLA-DRB1*03:01:01:01 13908 TRUE hla +HLA-DRB1*03:01:01:02 13426 TRUE hla +HLA-DRB1*04:03:01 15246 TRUE hla +HLA-DRB1*07:01:01:01 16110 TRUE hla +HLA-DRB1*07:01:01:02 16120 TRUE hla +HLA-DRB1*08:03:02 13562 TRUE hla +HLA-DRB1*09:21 16039 TRUE hla +HLA-DRB1*10:01:01 13501 TRUE hla +HLA-DRB1*11:01:01 13921 TRUE hla +HLA-DRB1*11:01:02 13931 TRUE hla +HLA-DRB1*11:04:01 13919 TRUE hla +HLA-DRB1*12:01:01 13404 TRUE hla +HLA-DRB1*12:17 11260 TRUE hla +HLA-DRB1*13:01:01 13935 TRUE hla +HLA-DRB1*13:02:01 13941 TRUE hla +HLA-DRB1*14:05:01 13933 TRUE hla +HLA-DRB1*14:54:01 13936 TRUE hla +HLA-DRB1*15:01:01:01 11080 TRUE hla +HLA-DRB1*15:01:01:02 11571 TRUE hla +HLA-DRB1*15:01:01:03 11056 TRUE hla +HLA-DRB1*15:01:01:04 11056 TRUE hla +HLA-DRB1*15:02:01 10313 TRUE hla +HLA-DRB1*15:03:01:01 11567 TRUE hla +HLA-DRB1*15:03:01:02 11569 TRUE hla +HLA-DRB1*16:02:01 11005 TRUE hla +phix 5386 FALSE technical diff --git a/resources/hs37d5+phix.tsv b/resources/hs37d5+phix.tsv new file mode 100644 index 0000000..c8b2a38 --- /dev/null +++ b/resources/hs37d5+phix.tsv @@ -0,0 +1,88 @@ +chromosome size compressedMref category +1 249250621 TRUE autosome +2 243199373 TRUE autosome +3 198022430 TRUE autosome +4 191154276 TRUE autosome +5 180915260 TRUE autosome +6 171115067 TRUE autosome +7 159138663 TRUE autosome +8 146364022 TRUE autosome +9 141213431 TRUE autosome +10 135534747 TRUE autosome +11 135006516 TRUE autosome +12 133851895 TRUE autosome +13 115169878 TRUE autosome +14 107349540 TRUE autosome +15 102531392 TRUE autosome +16 90354753 TRUE autosome +17 81195210 TRUE autosome +18 78077248 TRUE autosome +19 59128983 TRUE autosome +20 63025520 TRUE autosome +21 48129895 TRUE autosome +22 51304566 TRUE autosome +X 155270560 TRUE x +Y 59373566 TRUE y +MT 16569 FALSE extrachromosomal +GL000207.1 4262 TRUE unassigned +GL000226.1 15008 TRUE unassigned +GL000229.1 19913 TRUE unassigned +GL000231.1 27386 TRUE unassigned +GL000210.1 27682 TRUE unassigned +GL000239.1 33824 TRUE unassigned +GL000235.1 34474 TRUE unassigned +GL000201.1 36148 TRUE unassigned +GL000247.1 36422 TRUE unassigned +GL000245.1 36651 TRUE unassigned +GL000197.1 37175 TRUE unassigned +GL000203.1 37498 TRUE unassigned +GL000246.1 38154 TRUE unassigned +GL000249.1 38502 TRUE unassigned +GL000196.1 38914 TRUE unassigned +GL000248.1 39786 TRUE unassigned +GL000244.1 39929 TRUE unassigned +GL000238.1 39939 TRUE unassigned +GL000202.1 40103 TRUE unassigned +GL000234.1 40531 TRUE unassigned +GL000232.1 40652 TRUE unassigned +GL000206.1 41001 TRUE unassigned +GL000240.1 41933 TRUE unassigned +GL000236.1 41934 TRUE unassigned +GL000241.1 42152 TRUE unassigned +GL000243.1 43341 TRUE unassigned +GL000242.1 43523 TRUE unassigned +GL000230.1 43691 TRUE unassigned +GL000237.1 45867 TRUE unassigned +GL000233.1 45941 TRUE unassigned +GL000204.1 81310 TRUE unassigned +GL000198.1 90085 TRUE unassigned +GL000208.1 92689 TRUE unassigned +GL000191.1 106433 TRUE unassigned +GL000227.1 128374 TRUE unassigned +GL000228.1 129120 TRUE unassigned +GL000214.1 137718 TRUE unassigned +GL000221.1 155397 TRUE unassigned +GL000209.1 159169 TRUE unassigned +GL000218.1 161147 TRUE unassigned +GL000220.1 161802 TRUE unassigned +GL000213.1 164239 TRUE unassigned +GL000211.1 166566 TRUE unassigned +GL000199.1 169874 TRUE unassigned +GL000217.1 172149 TRUE unassigned +GL000216.1 172294 TRUE unassigned +GL000215.1 172545 TRUE unassigned +GL000205.1 174588 TRUE unassigned +GL000219.1 179198 TRUE unassigned +GL000224.1 179693 TRUE unassigned +GL000223.1 180455 TRUE unassigned +GL000195.1 182896 TRUE unassigned +GL000212.1 186858 TRUE unassigned +GL000222.1 186861 TRUE unassigned +GL000200.1 187035 TRUE unassigned +GL000193.1 189789 TRUE unassigned +GL000194.1 191469 TRUE unassigned +GL000225.1 211173 TRUE unassigned +GL000192.1 547496 TRUE unassigned +NC_007605 171823 TRUE virus +hs37d5 35477943 TRUE decoy +phiX174 5386 FALSE technical diff --git a/sophia.cpp b/sophia.cpp deleted file mode 100644 index 5afecba..0000000 --- a/sophia.cpp +++ /dev/null @@ -1,135 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "Alignment.h" -#include "SuppAlignment.h" -#include "Breakpoint.h" -#include "SamSegmentMapper.h" -#include "ChrConverter.h" -#include "HelperFunctions.h" - -std::pair getIsizeParameters(const std::string &ISIZEFILE); -int main(int argc, char** argv) { - std::ios_base::sync_with_stdio(false); - std::cin.tie(nullptr); - boost::program_options::options_description desc("Allowed options"); - desc.add_options() // - ("help", "produce help message") // - ("mergedisizes", boost::program_options::value(), "insertsize distribution file for the merged bam") // - ("medianisize", boost::program_options::value(), "median insert size for the merged bam") // - ("stdisizepercentage", boost::program_options::value(), "percentage standard deviation of the insert size for the merged bam") // - ("defaultreadlength", boost::program_options::value(), "Default read length for the technology used in sequencing 101,151 etc.") // - ("clipsize", boost::program_options::value(), "Minimum length of soft/hard clips in the alignment. (10)") // - ("basequality", boost::program_options::value(), "Minimum median quality of split read overhangs. (23)") // - ("basequalitylow", boost::program_options::value(), "If 5 consecutive bases in a split read overhang have lower quality than this strict threshold, it will be low-qual. (12)") // - ("lowqualclipsize", boost::program_options::value(), "Maximum length of a low qality split read overhang for discarding. (5)") // - ("isizesigma", boost::program_options::value(), "The number of sds a s's mate has to be away to be called as discordant. (5)") // - ("bpsupport", boost::program_options::value(), "Minimum number of reads supporting a discordant contig. (5)") // - ("properpairpercentage", boost::program_options::value(), "Proper pair ratio as a percentage (100.0)"); - boost::program_options::variables_map inputVariables { }; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), inputVariables); - boost::program_options::notify(inputVariables); - if (inputVariables.count("help")) { - std::cout << desc << std::endl; - return 0; - } - int defaultReadLength { 0 }; - if (inputVariables.count("defaultreadlength")) { - defaultReadLength = inputVariables["defaultreadlength"].as(); - } else { - std::cerr << "Default read Length not given, exiting" << std::endl; - return 1; - } - auto clipSize = 10, baseQuality = 23, baseQualityLow = 12, lowQualClipSize = 5, isizeSigmaLevel = 5; - auto bpSupport = 5; - auto properPairRatio = 1.0; - if (inputVariables.count("clipsize")) { - clipSize = inputVariables["clipsize"].as(); - } - if (inputVariables.count("basequality")) { - baseQuality = inputVariables["basequality"].as(); - } - if (inputVariables.count("basequalitylow")) { - baseQualityLow = inputVariables["basequalitylow"].as(); - } - if (inputVariables.count("lowqualclipsize")) { - lowQualClipSize = inputVariables["lowqualclipsize"].as(); - } - if (inputVariables.count("isizesigma")) { - isizeSigmaLevel = inputVariables["isizesigma"].as(); - } - if (inputVariables.count("bpsupport")) { - bpSupport = inputVariables["bpsupport"].as(); - } - if (inputVariables.count("properpairpercentage")) { - properPairRatio = inputVariables["properpairpercentage"].as(); - properPairRatio /= 100; - if (properPairRatio < 0.9) { - sophia::Breakpoint::PROPERPAIRCOMPENSATIONMODE = true; - sophia::Breakpoint::IMPROPERPAIRRATIO = 0.9 - properPairRatio; - } - } - std::string mergedIsizeFile; - if (inputVariables.count("mergedisizes")) { - mergedIsizeFile = inputVariables["mergedisizes"].as(); - auto isizeparams = getIsizeParameters(mergedIsizeFile); - sophia::Alignment::ISIZEMAX = std::min(4000.0, isizeparams.first + isizeSigmaLevel * isizeparams.second); - sophia::SuppAlignment::ISIZEMAX = sophia::Alignment::ISIZEMAX; - } else { - if (inputVariables.count("medianisize") && inputVariables.count("stdisizepercentage")) { - auto medianIsize = inputVariables["medianisize"].as(); - auto isizeStdPercentage = inputVariables["stdisizepercentage"].as(); - sophia::Alignment::ISIZEMAX = std::min(4000.0, medianIsize + isizeSigmaLevel * medianIsize * isizeStdPercentage * 0.01); - sophia::SuppAlignment::ISIZEMAX = sophia::Alignment::ISIZEMAX; - } else { - sophia::Alignment::ISIZEMAX = 2000.0; - sophia::SuppAlignment::ISIZEMAX = 2000.0; - std::cerr << "No insert size distribution file given, using a dummy default value of 2000 as the min insert size of a distant event" << std::endl; - } - } - sophia::Alignment::CLIPPEDNUCLEOTIDECOUNTTHRESHOLD = clipSize; - sophia::Alignment::BASEQUALITYTHRESHOLD = baseQuality + 33; - sophia::Alignment::BASEQUALITYTHRESHOLDLOW = baseQualityLow + 33; - sophia::Alignment::LOWQUALCLIPTHRESHOLD = lowQualClipSize; - sophia::Breakpoint::BPSUPPORTTHRESHOLD = bpSupport; - sophia::Breakpoint::DEFAULTREADLENGTH = defaultReadLength; - sophia::Breakpoint::DISCORDANTLOWQUALLEFTRANGE = static_cast(std::round(defaultReadLength * 1.11)); - sophia::Breakpoint::DISCORDANTLOWQUALRIGHTRANGE = static_cast(std::round(defaultReadLength * 0.51)); - - sophia::SuppAlignment::DEFAULTREADLENGTH = defaultReadLength; - sophia::ChosenBp::BPSUPPORTTHRESHOLD = bpSupport; - std::cout << sophia::Breakpoint::COLUMNSSTR; - sophia::SamSegmentMapper segmentRefMaster { defaultReadLength }; - segmentRefMaster.parseSamStream(); - return 0; -} -std::pair getIsizeParameters(const std::string &ISIZEFILE) { - std::pair isizeMedianStd { }; - std::ifstream infile { ISIZEFILE }; - std::string line; - auto i = 0; - while (sophia::error_terminating_getline(infile, line)) { - boost::algorithm::trim_right(line); - switch (i) { - case 0: - isizeMedianStd.first = boost::lexical_cast(line); - break; - case 2: - isizeMedianStd.second = boost::lexical_cast(line); - break; - default: - break; - } - ++i; - } - return isizeMedianStd; -} diff --git a/sophiaAnnotate.cpp b/sophiaAnnotate.cpp deleted file mode 100644 index da1f2f5..0000000 --- a/sophiaAnnotate.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/* - * sophiaAnnotate.cpp - * - * Created on: 28 Apr 2016 - * Author: umuttoprak - */ -#include -#include -#include -#include "cxxopts.hpp" -#include "BreakpointReduced.h" -#include "AnnotationProcessor.h" -#include "SuppAlignment.h" -#include "SuppAlignmentAnno.h" -#include "SvEvent.h" -#include "strtk.hpp" -#include -#include -#include "MrefEntryAnno.h" -#include -#include -#include "ChrConverter.h" -#include "HelperFunctions.h" - - -int main(int argc, char** argv) { - using namespace std; - - ios_base::sync_with_stdio(false); - cin.tie(nullptr); - cxxopts::Options options("SophiaAnnotate", "Annotates SOPHIA output"); - options.add_options() // - ("help", "produce help message") // - ("mref", "mref file", cxxopts::value()) // - ("tumorresults", "_bps.bed.gz file from sophia for the tumor, or control for a no-tumor analysis", cxxopts::value()) // - ("controlresults", "_bps.bed.gz file from sophia for the control", cxxopts::value()) // - ("defaultreadlengthtumor", "Default read length for the technology used in sequencing 101,151 etc., tumor", cxxopts::value()) // - ("defaultreadlengthcontrol", "Default read length for the technology used in sequencing 101,151 etc., tumor", cxxopts::value()) // - ("pidsinmref", "Number of PIDs in the MREF", cxxopts::value()) // - ("artifactlofreq", "PERCENTAGE frequency of artifact supports for bps to be considered as artifact_like (33)", cxxopts::value()) // - ("artifacthifreq", "PERCENTAGE frequency of artifact supports for bps to be considered as artifacts (50)", cxxopts::value()) // - ("clonalitylofreq", "PERCENTAGE clonality for bps to be considered as extreme_subclonal (10)", cxxopts::value()) // - ("clonalitystrictlofreq", "PERCENTAGE clonality for bps to be considered as extreme_subclonal (20)", cxxopts::value()) // - ("clonalityhifreq", "PERCENTAGE clonality for bps to be considered as homozygous (85)", cxxopts::value()) // - ("bpfreq", "PERCENTAGE frequency of a BP for consideration as rare. (3)", cxxopts::value()) // - ("germlineoffset", "Minimum offset a germline bp and a control bp. (5)", cxxopts::value()) // - ("germlinedblimit", "Maximum occurrence of germline variants in the db. (5)", cxxopts::value()) // - ("debugmode", "debugmode"); - options.parse(argc, argv); - vector> mref { 85, vector { } }; - if (!options.count("mref")) { - cerr << "No mref file given, exiting" << endl; - return 1; - } - - string tumorResults; - if (options.count("tumorresults")) { - tumorResults = options["tumorresults"].as(); - } else { - cerr << "No input file given, exiting" << endl; - return 1; - } - int pidsInMref { 0 }; - if (options.count("pidsinmref")) { - pidsInMref = options["pidsinmref"].as(); - } else { - cerr << "number of PIDS in the MREF not given, exiting" << endl; - return 1; - } - int defaultReadLengthTumor { 0 }; - if (options.count("defaultreadlengthtumor")) { - defaultReadLengthTumor = options["defaultreadlengthtumor"].as(); - } else { - cerr << "Default read Length not given, exiting" << endl; - return 1; - } - int artifactlofreq { 33 }; - if (options.count("artifactlofreq")) { - artifactlofreq = options["artifactlofreq"].as(); - } - int artifacthifreq { 50 }; - if (options.count("artifacthifreq")) { - artifacthifreq = options["artifacthifreq"].as(); - } - int clonalitylofreq { 5 }; - if (options.count("clonalitylofreq")) { - clonalitylofreq = options["clonalitylofreq"].as(); - } - int clonalitystrictlofreq { 20 }; - if (options.count("clonalitystrictlofreq")) { - clonalitystrictlofreq = options["clonalitystrictlofreq"].as(); - } - int clonalityhifreq { 85 }; - if (options.count("clonalityhifreq")) { - clonalityhifreq = options["clonalityhifreq"].as(); - } - int bpFreq { 3 }; - if (options.count("bpfreq")) { - bpFreq = options["bpfreq"].as(); - } - int germlineOffset { 5 }; - if (options.count("germlineoffset")) { - germlineOffset = options["germlineoffset"].as(); - } - int germlineDbLimit { 5 }; - if (options.count("germlinedblimit")) { - germlineDbLimit = options["germlinedblimit"].as(); - } - sophia::MrefEntryAnno::PIDSINMREF = pidsInMref; - unique_ptr mrefInputHandle { make_unique(options["mref"].as(), ios_base::in | ios_base::binary) }; - unique_ptr mrefGzHandle { make_unique() }; - mrefGzHandle->push(boost::iostreams::gzip_decompressor()); - mrefGzHandle->push(*mrefInputHandle); - cerr << "m\n"; - string line { }; - while (sophia::error_terminating_getline(*mrefGzHandle, line)) { - if (line.front() == '#') { - continue; - }; - auto chrIndex = sophia::ChrConverter::indexConverter[sophia::ChrConverter::readChromosomeIndex(line.cbegin(), '\t')]; - if (chrIndex < 0) { - continue; - } - mref[chrIndex].emplace_back(line); - } - sophia::SvEvent::ARTIFACTFREQLOWTHRESHOLD = (artifactlofreq + 0.0) / 100; - sophia::SvEvent::ARTIFACTFREQHIGHTHRESHOLD = (artifacthifreq + 0.0) / 100; - sophia::BreakpointReduced::ARTIFACTFREQHIGHTHRESHOLD = sophia::SvEvent::ARTIFACTFREQHIGHTHRESHOLD; - sophia::SvEvent::CLONALITYLOWTHRESHOLD = (clonalitylofreq + 0.0) / 100; - sophia::SvEvent::CLONALITYSTRICTLOWTHRESHOLD = (clonalitystrictlofreq + 0.0) / 100; - sophia::BreakpointReduced::CLONALITYSTRICTLOWTHRESHOLD = sophia::SvEvent::CLONALITYSTRICTLOWTHRESHOLD; - sophia::SvEvent::CLONALITYHIGHTHRESHOLD = (clonalityhifreq + 0.0) / 100; - sophia::SvEvent::BPFREQTHRESHOLD = pidsInMref * (bpFreq + 0.0) / 100; - sophia::SvEvent::RELAXEDBPFREQTHRESHOLD = 3 * sophia::SvEvent::BPFREQTHRESHOLD; - sophia::SvEvent::PIDSINMREFSTR = strtk::type_to_string(pidsInMref); - sophia::BreakpointReduced::PIDSINMREFSTR = sophia::SvEvent::PIDSINMREFSTR; - sophia::BreakpointReduced::DEFAULTREADLENGTH = defaultReadLengthTumor; - sophia::Breakpoint::DEFAULTREADLENGTH = defaultReadLengthTumor; - sophia::SuppAlignment::DEFAULTREADLENGTH = defaultReadLengthTumor; - sophia::SuppAlignmentAnno::DEFAULTREADLENGTH = defaultReadLengthTumor; - sophia::SvEvent::HALFDEFAULTREADLENGTH = round(defaultReadLengthTumor / 2.0); - sophia::SvEvent::GERMLINEOFFSETTHRESHOLD = germlineOffset; - sophia::SvEvent::GERMLINEDBLIMIT = germlineDbLimit; - sophia::SvEvent::ABRIDGEDOUTPUT = true; - if (options.count("debugmode")) { - sophia::SvEvent::DEBUGMODE = true; - } else { - sophia::SvEvent::DEBUGMODE = false; - } - sophia::AnnotationProcessor::ABRIDGEDOUTPUT = true; - sophia::Breakpoint::BPSUPPORTTHRESHOLD = 3; - if (options.count("controlresults")) { - string controlResults { options["controlresults"].as() }; - int defaultReadLengthControl { 0 }; - if (options.count("defaultreadlengthcontrol")) { - defaultReadLengthControl = options["defaultreadlengthtumor"].as(); - } else { - cerr << "Default read Length not given, exiting" << endl; - return 1; - } - auto lowQualControl = 0; - auto pathogenInControl = false; - { - sophia::SvEvent::NOCONTROLMODE = true; - sophia::AnnotationProcessor annotationProcessorControlCheck { controlResults, mref, defaultReadLengthControl, true, germlineDbLimit }; - lowQualControl = annotationProcessorControlCheck.getMassiveInvFilteringLevel(); - pathogenInControl = annotationProcessorControlCheck.isContaminationObserved(); - sophia::SvEvent::NOCONTROLMODE = false; - } - sophia::AnnotationProcessor annotationProcessor { tumorResults, mref, controlResults, defaultReadLengthTumor, defaultReadLengthControl, germlineDbLimit, lowQualControl, pathogenInControl }; - annotationProcessor.printFilteredResults(pathogenInControl, lowQualControl); - } else { - sophia::SvEvent::NOCONTROLMODE = true; - sophia::AnnotationProcessor annotationProcessor { tumorResults, mref, defaultReadLengthTumor, false, germlineDbLimit }; - annotationProcessor.printFilteredResults(false, 0); - } - return 0; -} diff --git a/sophiaMref.cpp b/sophiaMref.cpp deleted file mode 100644 index 15cbb32..0000000 --- a/sophiaMref.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/* - * sophiaMref.cpp - * - * Created on: 27 Apr 2016 - * Author: umuttoprak - */ - -#include -#include -#include -#include -#include -#include "MasterRefProcessor.h" -#include "MrefEntry.h" -#include "HelperFunctions.h" - - -int main(int argc, char** argv) { - using namespace std; - - boost::program_options::options_description desc("Allowed options"); - desc.add_options() // - ("help", "produce help message") // - ("gzins", boost::program_options::value(), "list of all gzipped control beds") // - ("version", boost::program_options::value(), "version") // - ("defaultreadlength", boost::program_options::value(), "Default read length for the technology used in sequencing 101,151 etc.") // - ("outputrootname", boost::program_options::value(), "outputrootname"); - boost::program_options::variables_map inputVariables { }; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), inputVariables); - boost::program_options::notify(inputVariables); - if (inputVariables.count("help")) { - cout << desc << endl; - return 0; - } - string gzInFilesList; - if (inputVariables.count("gzins")) { - gzInFilesList = inputVariables["gzins"].as(); - } else { - cerr << "No gzipped control bed list file given, exiting" << endl; - return 1; - } - ifstream gzInFilesHandle { gzInFilesList }; - vector gzListIn; - for (string line; error_terminating_getline(gzInFilesHandle, line);) { - gzListIn.push_back(line); - } - string version { }; - if (inputVariables.count("version")) { - version = inputVariables["version"].as(); - } else { - cerr << "No input version given, exiting" << endl; - return 1; - } - int defaultReadLength { 0 }; - if (inputVariables.count("defaultreadlength")) { - defaultReadLength = inputVariables["defaultreadlength"].as(); - } else { - cerr << "Default read Length not given, exiting" << endl; - return 1; - } - string outputRoot { }; - if (inputVariables.count("outputrootname")) { - outputRoot = inputVariables["outputrootname"].as(); - } else { - cerr << "No output file root name given, exiting" << endl; - return 1; - } - sophia::SuppAlignment::DEFAULTREADLENGTH = defaultReadLength; - sophia::SuppAlignmentAnno::DEFAULTREADLENGTH = defaultReadLength; - sophia::MrefEntry::NUMPIDS = gzListIn.size(); - sophia::MasterRefProcessor mRefProcessor { gzListIn, outputRoot, version, defaultReadLength }; -} diff --git a/sophiaMref/makefile b/sophiaMref/makefile deleted file mode 100644 index fd0bb29..0000000 --- a/sophiaMref/makefile +++ /dev/null @@ -1,58 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - --include ../makefile.init - -RM := rm -rf - -# All of the sources participating in the build are defined here --include sources.mk --include src/subdir.mk --include subdir.mk --include objects.mk - -ifneq ($(MAKECMDGOALS),clean) -ifneq ($(strip $(CC_DEPS)),) --include $(CC_DEPS) -endif -ifneq ($(strip $(C++_DEPS)),) --include $(C++_DEPS) -endif -ifneq ($(strip $(C_UPPER_DEPS)),) --include $(C_UPPER_DEPS) -endif -ifneq ($(strip $(CXX_DEPS)),) --include $(CXX_DEPS) -endif -ifneq ($(strip $(CPP_DEPS)),) --include $(CPP_DEPS) -endif -ifneq ($(strip $(C_DEPS)),) --include $(C_DEPS) -endif -endif - --include ../makefile.defs - -# Add inputs and outputs from these tool invocations to the build variables - -# All Target -all: sophiaMref - -# Tool invocations -sophiaMref: $(OBJS) $(USER_OBJS) - @echo 'Building target: $@' - @echo 'Invoking: GCC C++ Linker' - g++ -static -static-libgcc -static-libstdc++ -flto -o "sophiaMref" $(OBJS) $(USER_OBJS) $(LIBS) - @echo 'Finished building target: $@' - @echo ' ' - -# Other Targets -clean: - -$(RM) $(CC_DEPS)$(C++_DEPS)$(EXECUTABLES)$(C_UPPER_DEPS)$(CXX_DEPS)$(OBJS)$(CPP_DEPS)$(C_DEPS) sophiaMref - -@echo ' ' - -.PHONY: all clean dependents - --include ../makefile.targets diff --git a/sophiaMref/src/Alignment.d b/sophiaMref/src/Alignment.d deleted file mode 100644 index 25c1c20..0000000 --- a/sophiaMref/src/Alignment.d +++ /dev/null @@ -1,28 +0,0 @@ -src/Alignment.o: ../src/Alignment.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Sdust.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Sdust.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: diff --git a/sophiaMref/src/Alignment.o b/sophiaMref/src/Alignment.o deleted file mode 100644 index 0400b31..0000000 Binary files a/sophiaMref/src/Alignment.o and /dev/null differ diff --git a/sophiaMref/src/AnnotationProcessor.d b/sophiaMref/src/AnnotationProcessor.d deleted file mode 100644 index 1b0f812..0000000 --- a/sophiaMref/src/AnnotationProcessor.d +++ /dev/null @@ -1,64 +0,0 @@ -src/AnnotationProcessor.o: ../src/AnnotationProcessor.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/AnnotationProcessor.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntryAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SvEvent.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/DeFuzzier.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntry.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/AnnotationProcessor.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntryAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SvEvent.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/DeFuzzier.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntry.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: diff --git a/sophiaMref/src/AnnotationProcessor.o b/sophiaMref/src/AnnotationProcessor.o deleted file mode 100644 index 5bab101..0000000 Binary files a/sophiaMref/src/AnnotationProcessor.o and /dev/null differ diff --git a/sophiaMref/src/Breakpoint.d b/sophiaMref/src/Breakpoint.d deleted file mode 100644 index a0d669d..0000000 --- a/sophiaMref/src/Breakpoint.d +++ /dev/null @@ -1,31 +0,0 @@ -src/Breakpoint.o: ../src/Breakpoint.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: diff --git a/sophiaMref/src/Breakpoint.o b/sophiaMref/src/Breakpoint.o deleted file mode 100644 index 1defed6..0000000 Binary files a/sophiaMref/src/Breakpoint.o and /dev/null differ diff --git a/sophiaMref/src/BreakpointReduced.d b/sophiaMref/src/BreakpointReduced.d deleted file mode 100644 index 95b5921..0000000 --- a/sophiaMref/src/BreakpointReduced.d +++ /dev/null @@ -1,43 +0,0 @@ -src/BreakpointReduced.o: ../src/BreakpointReduced.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: diff --git a/sophiaMref/src/BreakpointReduced.o b/sophiaMref/src/BreakpointReduced.o deleted file mode 100644 index 50b88c1..0000000 Binary files a/sophiaMref/src/BreakpointReduced.o and /dev/null differ diff --git a/sophiaMref/src/ChosenBp.d b/sophiaMref/src/ChosenBp.d deleted file mode 100644 index 1956f4f..0000000 --- a/sophiaMref/src/ChosenBp.d +++ /dev/null @@ -1,10 +0,0 @@ -src/ChosenBp.o: ../src/ChosenBp.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: diff --git a/sophiaMref/src/ChosenBp.o b/sophiaMref/src/ChosenBp.o deleted file mode 100644 index 41108b4..0000000 Binary files a/sophiaMref/src/ChosenBp.o and /dev/null differ diff --git a/sophiaMref/src/ChrConverter.d b/sophiaMref/src/ChrConverter.d deleted file mode 100644 index ec95d25..0000000 --- a/sophiaMref/src/ChrConverter.d +++ /dev/null @@ -1,4 +0,0 @@ -src/ChrConverter.o: ../src/ChrConverter.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: diff --git a/sophiaMref/src/ChrConverter.o b/sophiaMref/src/ChrConverter.o deleted file mode 100644 index aa40cbd..0000000 Binary files a/sophiaMref/src/ChrConverter.o and /dev/null differ diff --git a/sophiaMref/src/DeFuzzier.d b/sophiaMref/src/DeFuzzier.d deleted file mode 100644 index c986851..0000000 --- a/sophiaMref/src/DeFuzzier.d +++ /dev/null @@ -1,46 +0,0 @@ -src/DeFuzzier.o: ../src/DeFuzzier.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/DeFuzzier.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntry.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/DeFuzzier.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntry.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: diff --git a/sophiaMref/src/DeFuzzier.o b/sophiaMref/src/DeFuzzier.o deleted file mode 100644 index 48ab1de..0000000 Binary files a/sophiaMref/src/DeFuzzier.o and /dev/null differ diff --git a/sophiaMref/src/GermlineMatch.d b/sophiaMref/src/GermlineMatch.d deleted file mode 100644 index 0e37833..0000000 --- a/sophiaMref/src/GermlineMatch.d +++ /dev/null @@ -1,13 +0,0 @@ -src/GermlineMatch.o: ../src/GermlineMatch.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: diff --git a/sophiaMref/src/GermlineMatch.o b/sophiaMref/src/GermlineMatch.o deleted file mode 100644 index 275e11f..0000000 Binary files a/sophiaMref/src/GermlineMatch.o and /dev/null differ diff --git a/sophiaMref/src/MasterRefProcessor.d b/sophiaMref/src/MasterRefProcessor.d deleted file mode 100644 index 8f8be8d..0000000 --- a/sophiaMref/src/MasterRefProcessor.d +++ /dev/null @@ -1,52 +0,0 @@ -src/MasterRefProcessor.o: ../src/MasterRefProcessor.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MasterRefProcessor.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntry.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/DeFuzzier.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MasterRefProcessor.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntry.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/DeFuzzier.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: diff --git a/sophiaMref/src/MasterRefProcessor.o b/sophiaMref/src/MasterRefProcessor.o deleted file mode 100644 index a08e23b..0000000 Binary files a/sophiaMref/src/MasterRefProcessor.o and /dev/null differ diff --git a/sophiaMref/src/MrefEntry.d b/sophiaMref/src/MrefEntry.d deleted file mode 100644 index 85a6b33..0000000 --- a/sophiaMref/src/MrefEntry.d +++ /dev/null @@ -1,49 +0,0 @@ -src/MrefEntry.o: ../src/MrefEntry.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntry.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntry.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: diff --git a/sophiaMref/src/MrefEntry.o b/sophiaMref/src/MrefEntry.o deleted file mode 100644 index 4bd9277..0000000 Binary files a/sophiaMref/src/MrefEntry.o and /dev/null differ diff --git a/sophiaMref/src/MrefEntryAnno.d b/sophiaMref/src/MrefEntryAnno.d deleted file mode 100644 index 544c973..0000000 --- a/sophiaMref/src/MrefEntryAnno.d +++ /dev/null @@ -1,46 +0,0 @@ -src/MrefEntryAnno.o: ../src/MrefEntryAnno.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntryAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefEntryAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: diff --git a/sophiaMref/src/MrefEntryAnno.o b/sophiaMref/src/MrefEntryAnno.o deleted file mode 100644 index 6ab9b9e..0000000 Binary files a/sophiaMref/src/MrefEntryAnno.o and /dev/null differ diff --git a/sophiaMref/src/MrefMatch.d b/sophiaMref/src/MrefMatch.d deleted file mode 100644 index 75109fd..0000000 --- a/sophiaMref/src/MrefMatch.d +++ /dev/null @@ -1,13 +0,0 @@ -src/MrefMatch.o: ../src/MrefMatch.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: diff --git a/sophiaMref/src/MrefMatch.o b/sophiaMref/src/MrefMatch.o deleted file mode 100644 index f119ae8..0000000 Binary files a/sophiaMref/src/MrefMatch.o and /dev/null differ diff --git a/sophiaMref/src/SamSegmentMapper.d b/sophiaMref/src/SamSegmentMapper.d deleted file mode 100644 index 419a80e..0000000 --- a/sophiaMref/src/SamSegmentMapper.d +++ /dev/null @@ -1,31 +0,0 @@ -src/SamSegmentMapper.o: ../src/SamSegmentMapper.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SamSegmentMapper.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SamSegmentMapper.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h: diff --git a/sophiaMref/src/SamSegmentMapper.o b/sophiaMref/src/SamSegmentMapper.o deleted file mode 100644 index b189c5f..0000000 Binary files a/sophiaMref/src/SamSegmentMapper.o and /dev/null differ diff --git a/sophiaMref/src/Sdust.d b/sophiaMref/src/Sdust.d deleted file mode 100644 index 5b6db3d..0000000 --- a/sophiaMref/src/Sdust.d +++ /dev/null @@ -1,4 +0,0 @@ -src/Sdust.o: ../src/Sdust.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Sdust.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Sdust.h: diff --git a/sophiaMref/src/Sdust.o b/sophiaMref/src/Sdust.o deleted file mode 100644 index 8d5ea8a..0000000 Binary files a/sophiaMref/src/Sdust.o and /dev/null differ diff --git a/sophiaMref/src/SuppAlignment.d b/sophiaMref/src/SuppAlignment.d deleted file mode 100644 index 885c68e..0000000 --- a/sophiaMref/src/SuppAlignment.d +++ /dev/null @@ -1,10 +0,0 @@ -src/SuppAlignment.o: ../src/SuppAlignment.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: diff --git a/sophiaMref/src/SuppAlignment.o b/sophiaMref/src/SuppAlignment.o deleted file mode 100644 index e0606f3..0000000 Binary files a/sophiaMref/src/SuppAlignment.o and /dev/null differ diff --git a/sophiaMref/src/SuppAlignmentAnno.d b/sophiaMref/src/SuppAlignmentAnno.d deleted file mode 100644 index 45af36a..0000000 --- a/sophiaMref/src/SuppAlignmentAnno.d +++ /dev/null @@ -1,13 +0,0 @@ -src/SuppAlignmentAnno.o: ../src/SuppAlignmentAnno.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: diff --git a/sophiaMref/src/SuppAlignmentAnno.o b/sophiaMref/src/SuppAlignmentAnno.o deleted file mode 100644 index 479b93d..0000000 Binary files a/sophiaMref/src/SuppAlignmentAnno.o and /dev/null differ diff --git a/sophiaMref/src/SvEvent.d b/sophiaMref/src/SvEvent.d deleted file mode 100644 index 99cdc10..0000000 --- a/sophiaMref/src/SvEvent.d +++ /dev/null @@ -1,43 +0,0 @@ -src/SvEvent.o: ../src/SvEvent.cpp \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SvEvent.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h \ - /home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SvEvent.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Breakpoint.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CigarChunk.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/SuppAlignmentAnno.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/Alignment.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/OverhangRange.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChosenBp.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/CoverageAtBase.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MateInfo.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/BreakpointReduced.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/MrefMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/GermlineMatch.h: - -/home/umuttoprak/cppProjectsCevelop/sophia/include/ChrConverter.h: diff --git a/sophiaMref/src/SvEvent.o b/sophiaMref/src/SvEvent.o deleted file mode 100644 index 9dfce09..0000000 Binary files a/sophiaMref/src/SvEvent.o and /dev/null differ diff --git a/sophiaMref/src/subdir.mk b/sophiaMref/src/subdir.mk deleted file mode 100644 index 98b7fe7..0000000 --- a/sophiaMref/src/subdir.mk +++ /dev/null @@ -1,72 +0,0 @@ -################################################################################ -# Automatically-generated file. Do not edit! -################################################################################ - -# Add inputs and outputs from these tool invocations to the build variables -CPP_SRCS += \ -../src/Alignment.cpp \ -../src/AnnotationProcessor.cpp \ -../src/Breakpoint.cpp \ -../src/BreakpointReduced.cpp \ -../src/ChosenBp.cpp \ -../src/ChrConverter.cpp \ -../src/DeFuzzier.cpp \ -../src/GermlineMatch.cpp \ -../src/MasterRefProcessor.cpp \ -../src/MrefEntry.cpp \ -../src/MrefEntryAnno.cpp \ -../src/MrefMatch.cpp \ -../src/SamSegmentMapper.cpp \ -../src/Sdust.cpp \ -../src/SuppAlignment.cpp \ -../src/SuppAlignmentAnno.cpp \ -../src/SvEvent.cpp - -OBJS += \ -./src/Alignment.o \ -./src/AnnotationProcessor.o \ -./src/Breakpoint.o \ -./src/BreakpointReduced.o \ -./src/ChosenBp.o \ -./src/ChrConverter.o \ -./src/DeFuzzier.o \ -./src/GermlineMatch.o \ -./src/MasterRefProcessor.o \ -./src/MrefEntry.o \ -./src/MrefEntryAnno.o \ -./src/MrefMatch.o \ -./src/SamSegmentMapper.o \ -./src/Sdust.o \ -./src/SuppAlignment.o \ -./src/SuppAlignmentAnno.o \ -./src/SvEvent.o - -CPP_DEPS += \ -./src/Alignment.d \ -./src/AnnotationProcessor.d \ -./src/Breakpoint.d \ -./src/BreakpointReduced.d \ -./src/ChosenBp.d \ -./src/ChrConverter.d \ -./src/DeFuzzier.d \ -./src/GermlineMatch.d \ -./src/MasterRefProcessor.d \ -./src/MrefEntry.d \ -./src/MrefEntryAnno.d \ -./src/MrefMatch.d \ -./src/SamSegmentMapper.d \ -./src/Sdust.d \ -./src/SuppAlignment.d \ -./src/SuppAlignmentAnno.d \ -./src/SvEvent.d - - -# Each subdirectory must supply rules for building sources it contributes -src/%.o: ../src/%.cpp - @echo 'Building file: $<' - @echo 'Invoking: GCC C++ Compiler' - g++ -std=c++1y -I"/home/umuttoprak/cppProjectsCevelop/sophia/include" -O3 -Wall -c -fmessage-length=0 -static -flto -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" -o "$@" "$<" - @echo 'Finished building: $<' - @echo ' ' - - diff --git a/src/Alignment.cpp b/src/Alignment.cpp index a9e049a..3150a43 100644 --- a/src/Alignment.cpp +++ b/src/Alignment.cpp @@ -22,691 +22,810 @@ * LICENSE: GPL */ +#include "global.h" #include "Alignment.h" -#include "ChrConverter.h" +#include "GlobalAppConfig.h" #include "HelperFunctions.h" #include "MateInfo.h" #include "Sdust.h" -#include "strtk.hpp" +#include "strtk-wrap.h" #include #include +#include +#include namespace sophia { -using namespace std; - -int Alignment::LOWQUALCLIPTHRESHOLD{}, Alignment::BASEQUALITYTHRESHOLD{}, - Alignment::BASEQUALITYTHRESHOLDLOW{}, // - Alignment::CLIPPEDNUCLEOTIDECOUNTTHRESHOLD{}, - Alignment::INDELNUCLEOTIDECOUNTTHRESHOLD{}; - -double Alignment::ISIZEMAX{}; -Alignment::Alignment() - : lowMapq{false}, nullMapq{true}, distantMate{0}, chosenBp{nullptr}, - chrIndex{0}, readType{0}, startPos{0}, endPos{0}, mateChrIndex{0}, - matePos{0}, samLine{}, validLine{error_terminating_getline(cin, samLine)}, - samChunkPositions{}, saCbegin{}, saCend{}, hasSa{false}, - supplementary{false}, fwdStrand{true}, invertedMate{false}, qualChecked{ - false} { - if (validLine) { - auto index = 0; - for (auto it = samLine.cbegin(); it != samLine.cend(); ++it) { - if (*it == '\t') { - samChunkPositions.push_back(index); + ChrSize Alignment::LOW_QUAL_CLIP_THRESHOLD{}; + + int Alignment::BASE_QUALITY_THRESHOLD{}, + Alignment::BASE_QUALITY_THRESHOLD_LOW{}; + + ChrSize Alignment::CLIPPED_NUCLEOTIDE_COUNT_THRESHOLD{}, + Alignment::INDEL_NUCLEOTIDE_COUNT_THRESHOLD{}; + + double Alignment::ISIZEMAX{}; + Alignment::Alignment() + : lowMapq(false), + nullMapq(true), + distantMate(0), + chosenBp(nullptr), + chrIndex(0), + readType(0), + startPos(0), + endPos(0), + mateChrIndex(0), + matePos(0), + samLine(), + validLine(error_terminating_getline(std::cin, samLine)), + samTabPositions(), + saCbegin(), + saCend(), + hasSa(false), + supplementary(false), + fwdStrand(true), + invertedMate(false), + qualChecked(false) { + + if (validLine) { + unsigned int index = 0; + for (auto it = samLine.cbegin(); it != samLine.cend(); ++it) { + if (*it == '\t') { + samTabPositions.push_back(index); + } + ++index; + } + try { + chrIndex = GlobalAppConfig::getInstance().getChrConverter().parseChrAndReturnIndex( + next(samLine.cbegin(), static_cast(samTabPositions[1]) + 1), + samLine.cend(), + '\t'); + } catch (DomainError &e) { + e << error_info_string("line = " + + std::string(next(samLine.cbegin(), + static_cast(samTabPositions[1]) + 1), + samLine.cend())); + throw e; } - ++index; } - chrIndex = ChrConverter::readChromosomeIndex( - next(samLine.cbegin(), samChunkPositions[1] + 1), '\t'); } -} - -void -Alignment::continueConstruction() { - mappingQualityCheck(); - for (auto startPos_cit = samLine.cbegin() + 1 + samChunkPositions[2]; - startPos_cit != samLine.cbegin() + samChunkPositions[3]; - ++startPos_cit) { - startPos = startPos * 10 + (*startPos_cit - '0'); - } - auto readLength = (samChunkPositions[9] - samChunkPositions[8] - 1); - endPos = startPos + readLength; - auto flag = 0; - for (auto flag_cit = samLine.cbegin() + 1 + samChunkPositions[0]; - flag_cit != samLine.cbegin() + samChunkPositions[1]; ++flag_cit) { - flag = flag * 10 + (*flag_cit - '0'); - } - auto flags = bitset<12>(flag); - supplementary = (flags[11] == true); - fwdStrand = (flags[4] == false); - auto mateFwdStrand = (flags[5] == false); - invertedMate = (fwdStrand == mateFwdStrand); - - bool eventCandidate = isEventCandidate(); - if (eventCandidate) { - createCigarChunks(); - assignBreakpointsAndOverhangs(); - if (supplementary) { - auto startCit = next(samLine.cbegin(), 1 + samChunkPositions[9]); - auto endCit = next(samLine.cbegin(), samChunkPositions[10]); - vector overhangPerBaseQuality{}; - fullMedianQuality(startCit, endCit, overhangPerBaseQuality); - if (overhangPerBaseQuality.empty() || - getMedian(overhangPerBaseQuality.begin(), - overhangPerBaseQuality.end()) < - BASEQUALITYTHRESHOLD) { - readType = 5; + void + Alignment::continueConstruction() { + mappingQualityCheck(); // May set the readType to 7! + for (auto startPos_cit = samLine.cbegin() + 1 + static_cast(samTabPositions[2]); + startPos_cit != samLine.cbegin() + static_cast(samTabPositions[3]); + ++startPos_cit) { + startPos = startPos * 10 + ChrSize(*startPos_cit - '0'); + } + ChrSize readLength = static_cast(samTabPositions[9] - samTabPositions[8] - 1); + if (readLength < 0) { + throw_with_trace(std::logic_error("Invalid calculated readLength < 0: " + + std::to_string(readLength))); + } + endPos = startPos + ChrSize(readLength); + + unsigned short flag = 0; + for (auto flag_cit = samLine.cbegin() + 1 + static_cast(samTabPositions[0]); + flag_cit != samLine.cbegin() + static_cast(samTabPositions[1]); ++flag_cit) { + if (*flag_cit >= '0') { + flag = flag * 10 + (unsigned short) ((signed short) *flag_cit - '0'); } else { - readType = 2; + throw_with_trace(std::logic_error("Invalid flag in SAM file: " + samLine)); } } - if (readType == 7) { + auto flags = std::bitset<12>(flag); + supplementary = (flags[11] == true); + fwdStrand = (flags[4] == false); + auto mateFwdStrand = (flags[5] == false); + invertedMate = (fwdStrand == mateFwdStrand); + + // Alignment ends in match, or read contains soft-clip, hard-clip, insertion, or deletion. + bool eventCandidate = isEventCandidate(); + if (eventCandidate) { + createCigarChunks(); + assignBreakpointsAndOverhangs(); if (supplementary) { - if (uniqueSuppCheck() && hasSa) { - readType = 2; - } else { + auto startCit = next(samLine.cbegin(), 1 + static_cast(samTabPositions[9])); + auto endCit = next(samLine.cbegin(), static_cast(samTabPositions[10])); + std::vector overhangPerBaseQuality{}; + fullMedianQuality(startCit, endCit, overhangPerBaseQuality); + if (overhangPerBaseQuality.empty() || + getMedian(overhangPerBaseQuality.begin(), + overhangPerBaseQuality.end()) < + BASE_QUALITY_THRESHOLD) { + // eventCandidate, supplementary, medianOverhangPerBaseQualities < BASQUALITYTHRESHOLD readType = 5; + } else { + // eventCandidate, supplementary, medianOverhangPerBaseQualities >= BASQUALITYTHRESHOLD + readType = 2; } - } else { - readType = 5; - auto rescueCandidate = false; - for (const auto &cigarChunk : cigarChunks) { - if (cigarChunk.chunkType == 'S') { - auto medianQual = overhangMedianQuality(cigarChunk); - if (cigarChunk.length > LOWQUALCLIPTHRESHOLD && - medianQual < BASEQUALITYTHRESHOLD) { - rescueCandidate = false; - break; - } - if (cigarChunk.length / (readLength + 0.0) > 0.5) { - if (medianQual >= BASEQUALITYTHRESHOLD) { - rescueCandidate = true; + } + if (readType == 7) { // mapq != 0 && mapq < 13 + if (supplementary) { + if (uniqueSuppCheck() && hasSa) { + // eventCandidate, lowMapqCheckFailed, supplementary, uniqueSuppCheck, hasSa + readType = 2; + } else { + // eventCandidate, lowMapqCheckFailed, supplementary, (!uniqueSuppCheck || !hasSa) + readType = 5; + } + } else { + readType = 5; // eventCandidate, lowMapqCheckFailed, !supplementary + auto rescueCandidate = false; + for (const auto &cigarChunk : cigarChunks) { + if (cigarChunk.chunkType == 'S') { + auto medianQual = overhangMedianQuality(cigarChunk); + if (cigarChunk.length > LOW_QUAL_CLIP_THRESHOLD && + medianQual < BASE_QUALITY_THRESHOLD) { + rescueCandidate = false; + break; + } + if (cigarChunk.length / (readLength + 0.0) > 0.5) { + if (medianQual >= BASE_QUALITY_THRESHOLD) { + rescueCandidate = true; + } } } } + if (rescueCandidate) { + // eventCandidate, lowMapqCheckFailed, !supplementary, rescueCandidate + readType = 1; + } + qualChecked = true; } - if (rescueCandidate) { - readType = 1; - } - qualChecked = true; } + if (readType < 5) { + // Note that here readType is used as ordinal. It might be a score, ...? + qualityCheckCascade(); + } + } else if (readType == 7) { + // !eventCandidate, mapq != 0 && mapq < 13 + readType = 5; } - if (readType < 5) { - qualityCheckCascade(); - } - } else if (readType == 7) { - readType = 5; - } - switch (readType) { - case 0: - case 3: - case 5: - assessOutlierMateDistance(); - if (distantMate == 1 && readType != 5) { - readType = 4; - } - break; - default: - break; - } - for (auto mpos_cit = samLine.cbegin() + 1 + samChunkPositions[6]; - mpos_cit != samLine.cbegin() + samChunkPositions[7]; ++mpos_cit) { - matePos = matePos * 10 + (*mpos_cit - '0'); - } - if (samLine[1 + samChunkPositions[5]] == '=') { - mateChrIndex = chrIndex; - } else { - mateChrIndex = ChrConverter::readChromosomeIndex( - next(samLine.cbegin(), 1 + samChunkPositions[5]), '\t'); - } -} - -void -Alignment::mappingQualityCheck() { - if (samLine[1 + samChunkPositions[3]] != - '0') { // mapq 0 is treated as a special case, where number of SAs and - // base qualities will be the sole determinants of read quality - nullMapq = false; - switch (samChunkPositions[4] - samChunkPositions[3]) { - case 2: // this checks if the mapq is a single-digit number - readType = 7; - lowMapq = true; - break; + + switch (readType) { + case 0: case 3: - if (samLine[1 + samChunkPositions[3]] == '1' && - ((samLine[2 + samChunkPositions[3]] - '0') < 3)) { - // this checks if the mapq is a two-digit number, and if the - // first digit is a "1", that the second digit is less than 3 - readType = 7; - lowMapq = true; + case 5: + assessOutlierMateDistance(); + if (distantMate == 1 && readType != 5) { + readType = 4; } break; default: break; } + + for (auto mpos_cit = samLine.cbegin() + 1 + static_cast(samTabPositions[6]); + mpos_cit != samLine.cbegin() + static_cast(samTabPositions[7]); ++mpos_cit) { + matePos = matePos * 10 + ChrSize(*mpos_cit - '0'); + } + if (samLine[1 + samTabPositions[5]] == '=') { + mateChrIndex = chrIndex; + } else { + try { + mateChrIndex = GlobalAppConfig::getInstance().getChrConverter(). + parseChrAndReturnIndex( + next(samLine.cbegin(), 1 + static_cast(samTabPositions[5])), + samLine.cend(), + '\t'); + } catch (const DomainError &e) { + throw e << error_info_string( + "from = " + std::string(next(samLine.cbegin(), + 1 + static_cast(samTabPositions[5])), + samLine.cend())); + } + } } -} -bool -Alignment::isEventCandidate() const { - if (samLine[samChunkPositions[5] - 1] != 'M') { - return true; - } else { - for (auto cigarString_cit = samLine.cbegin() + 1 + samChunkPositions[4]; - cigarString_cit != samLine.cbegin() + samChunkPositions[5] - 1; - ++cigarString_cit) { - switch (*cigarString_cit) { - case 'S': - case 'H': - case 'I': - case 'D': - return true; - default: - break; + void + Alignment::mappingQualityCheck() { + int mapq = boost::lexical_cast( + samLine.substr(samTabPositions[3] + 1, + samTabPositions[4] - samTabPositions[3] - 1)); + if (mapq == 0) { + nullMapq = true; + // readType = 0; lowMapq = true; see constructor + } else { + nullMapq = false; + + if (mapq < 13) { + readType = 7; + lowMapq = true; } } - return false; } -} - -void -Alignment::createCigarChunks() { - auto encounteredM = false; - auto cumulativeNucleotideCount = 0, currentNucleotideCount = 0, - indelAdjustment = 0, leftClipAdjustment = 0, rightClipAdjustment = 0; - for (auto cigarString_cit = samLine.cbegin() + 1 + samChunkPositions[4]; - cigarString_cit != samLine.cbegin() + samChunkPositions[5]; - ++cigarString_cit) { - if (isdigit(*cigarString_cit)) { - currentNucleotideCount = - currentNucleotideCount * 10 + (*cigarString_cit - '0'); + + /** The `Alignment` isEventCandidate` is true, if the last CIGAR code indicates a match, + * or if the CIGAR indicates a soft-clip, hard-clip, insertion, or deletion. + */ + bool + Alignment::isEventCandidate() const { + // samTabPositions[0] is the position of the first tabulator. Zero-based index of the + // CIGAR column in SAM is 5. Therefore, this means: The CIGAR string ends with a match. + if (samLine[samTabPositions[5] - 1] != 'M') { + return true; } else { - switch (*cigarString_cit) { - case 'M': - encounteredM = true; - cumulativeNucleotideCount += currentNucleotideCount; - break; + // If the CIGAR does not end with a match, then (continue parsing the CIGAR string). + // Return true, if there is a soft-clip, hard-clip, insertion, or deletion. + for (auto cigarString_it = samLine.cbegin() + static_cast(samTabPositions[4]) + 1; + cigarString_it != samLine.cbegin() + static_cast(samTabPositions[5]) - 1; + ++cigarString_it) { + switch (*cigarString_it) { + case 'S': // soft-clipped + case 'H': // hard-clipped + case 'I': // insertion + case 'D': // deletion + // The CIGAR string starts with a soft-clip, hard-clip, insertion or deletion. + return true; + default: + // Continue with the next CIGAR code. + break; + } + } + return false; + } + } + + void + Alignment::createCigarChunks() { + auto encounteredM = false; + auto cumulativeNucleotideCount = 0, + currentNucleotideCount = 0, + indelAdjustment = 0, + leftClipAdjustment = 0, + rightClipAdjustment = 0; + for (auto cigarString_cit = samLine.cbegin() + 1 + static_cast(samTabPositions[4]); + cigarString_cit != samLine.cbegin() + static_cast(samTabPositions[5]); + ++cigarString_cit) { + if (isdigit(*cigarString_cit)) { + currentNucleotideCount = + currentNucleotideCount * 10 + (*cigarString_cit - '0'); + } else { + switch (*cigarString_cit) { + case 'M': + encounteredM = true; + cumulativeNucleotideCount += currentNucleotideCount; + break; + case 'S': + cigarChunks.emplace_back( + *cigarString_cit, + encounteredM, + cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment, + currentNucleotideCount, + indelAdjustment - leftClipAdjustment); + cumulativeNucleotideCount += currentNucleotideCount; + if (!encounteredM) { + leftClipAdjustment = currentNucleotideCount; + } else { + rightClipAdjustment = currentNucleotideCount; + } + break; + case 'H': + cigarChunks.emplace_back( + *cigarString_cit, encounteredM, + cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment, + currentNucleotideCount); + break; + case 'I': + cigarChunks.emplace_back( + *cigarString_cit, + encounteredM, + cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment, + currentNucleotideCount); + cumulativeNucleotideCount += currentNucleotideCount; + indelAdjustment -= currentNucleotideCount; + break; + case 'D': + cigarChunks.emplace_back( + *cigarString_cit, + encounteredM, + cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment, + currentNucleotideCount); + indelAdjustment += currentNucleotideCount; + break; + default: + break; + } + currentNucleotideCount = 0; + } + } + endPos += ChrSize(indelAdjustment - leftClipAdjustment - rightClipAdjustment); + } + + void + Alignment::assignBreakpointsAndOverhangs() { + for (const auto &chunk : cigarChunks) { + switch (chunk.chunkType) { case 'S': - cigarChunks.emplace_back(*cigarString_cit, encounteredM, - cumulativeNucleotideCount + - indelAdjustment - - leftClipAdjustment, - currentNucleotideCount, - indelAdjustment - leftClipAdjustment); - cumulativeNucleotideCount += currentNucleotideCount; - if (!encounteredM) { - leftClipAdjustment = currentNucleotideCount; + readBreakpointTypes.push_back(chunk.chunkType); + readBreakpointSizes.push_back(chunk.length); + readBreakpointsEncounteredM.push_back(chunk.encounteredM); + if (chunk.encounteredM) { + readBreakpoints.push_back(endPos); + readOverhangCoords.emplace_back( + chunk.encounteredM, + endPos, + static_cast(chunk.startPosOnRead) - static_cast(chunk.indelAdjustment), + chunk.length); } else { - rightClipAdjustment = currentNucleotideCount; + readBreakpoints.push_back(startPos); + readOverhangCoords.emplace_back( + chunk.encounteredM, + startPos, + static_cast(chunk.startPosOnRead) - static_cast(chunk.indelAdjustment), + chunk.length); } break; case 'H': - cigarChunks.emplace_back(*cigarString_cit, encounteredM, - cumulativeNucleotideCount + - indelAdjustment - - leftClipAdjustment, - currentNucleotideCount); + if (chunk.encounteredM) { + readBreakpoints.push_back(endPos); + } else { + readBreakpoints.push_back(startPos); + } + readBreakpointSizes.push_back(chunk.length); + readBreakpointTypes.push_back(chunk.chunkType); + readBreakpointsEncounteredM.push_back(chunk.encounteredM); break; case 'I': - cigarChunks.emplace_back(*cigarString_cit, encounteredM, - cumulativeNucleotideCount + - indelAdjustment - - leftClipAdjustment, - currentNucleotideCount); - cumulativeNucleotideCount += currentNucleotideCount; - indelAdjustment -= currentNucleotideCount; + readBreakpoints.push_back(startPos + ChrSize(chunk.startPosOnRead)); + readBreakpointTypes.push_back(chunk.chunkType); + readBreakpointSizes.push_back(chunk.length); + readBreakpointsEncounteredM.push_back(chunk.encounteredM); break; case 'D': - cigarChunks.emplace_back(*cigarString_cit, encounteredM, - cumulativeNucleotideCount + - indelAdjustment - - leftClipAdjustment, - currentNucleotideCount); - indelAdjustment += currentNucleotideCount; + readBreakpoints.push_back(startPos + ChrSize(chunk.startPosOnRead)); + readBreakpointTypes.push_back(chunk.chunkType); + readBreakpointSizes.push_back(chunk.length); + readBreakpointsEncounteredM.push_back(chunk.encounteredM); + readBreakpoints.push_back(startPos + + chunk.startPosOnRead + + chunk.length); + readBreakpointSizes.push_back(-1); + readBreakpointTypes.push_back('#'); + readBreakpointsEncounteredM.push_back(chunk.encounteredM); break; default: break; } - currentNucleotideCount = 0; } } - endPos += indelAdjustment - leftClipAdjustment - rightClipAdjustment; -} - -void -Alignment::assignBreakpointsAndOverhangs() { - for (const auto &chunk : cigarChunks) { - switch (chunk.chunkType) { - case 'S': - readBreakpointTypes.push_back(chunk.chunkType); - readBreakpointSizes.push_back(chunk.length); - readBreakpointsEncounteredM.push_back(chunk.encounteredM); - if (chunk.encounteredM) { - readBreakpoints.push_back(endPos); - readOverhangCoords.emplace_back( - chunk.encounteredM, endPos, - chunk.startPosOnRead - chunk.indelAdjustment, chunk.length); - } else { - readBreakpoints.push_back(startPos); - readOverhangCoords.emplace_back( - chunk.encounteredM, startPos, - chunk.startPosOnRead - chunk.indelAdjustment, chunk.length); - } - break; - case 'H': - if (chunk.encounteredM) { - readBreakpoints.push_back(endPos); - } else { - readBreakpoints.push_back(startPos); + + void + Alignment::qualityCheckCascade() { + if (!clipCountCheck()) { + readType = 5; + return; + } + if (!uniqueSuppCheck()) { + readType = 5; + return; + } + if (!qualChecked) { + for (const auto &cigarChunk : cigarChunks) { + if (cigarChunk.chunkType == 'S' && + cigarChunk.length > LOW_QUAL_CLIP_THRESHOLD && + overhangMedianQuality(cigarChunk) < BASE_QUALITY_THRESHOLD) { + readType = 5; + return; + } } - readBreakpointSizes.push_back(chunk.length); - readBreakpointTypes.push_back(chunk.chunkType); - readBreakpointsEncounteredM.push_back(chunk.encounteredM); - break; - case 'I': - readBreakpoints.push_back(startPos + chunk.startPosOnRead); - readBreakpointTypes.push_back(chunk.chunkType); - readBreakpointSizes.push_back(chunk.length); - readBreakpointsEncounteredM.push_back(chunk.encounteredM); - break; - case 'D': - readBreakpoints.push_back(startPos + chunk.startPosOnRead); - readBreakpointTypes.push_back(chunk.chunkType); - readBreakpointSizes.push_back(chunk.length); - readBreakpointsEncounteredM.push_back(chunk.encounteredM); - readBreakpoints.push_back(startPos + chunk.startPosOnRead + - chunk.length); - readBreakpointSizes.push_back(-1); - readBreakpointTypes.push_back('#'); - readBreakpointsEncounteredM.push_back(chunk.encounteredM); - break; - default: - break; } + assessReadType(); } -} - -void -Alignment::qualityCheckCascade() { - // cerr << "a\n"; - if (!clipCountCheck()) { - readType = 5; - return; - } - // cerr << "b\n"; - if (!uniqueSuppCheck()) { - readType = 5; - return; - } - // cerr << "c\n"; - if (!qualChecked) { + + bool + Alignment::clipCountCheck() { + auto hCounts = 0; + auto sCounts = 0; for (const auto &cigarChunk : cigarChunks) { - if (cigarChunk.chunkType == 'S' && - cigarChunk.length > LOWQUALCLIPTHRESHOLD && - overhangMedianQuality(cigarChunk) < BASEQUALITYTHRESHOLD) { - readType = 5; - return; + switch (cigarChunk.chunkType) { + case 'H': + ++hCounts; + break; + case 'S': + ++sCounts; + break; + default: + break; } } - } - // cerr << "d\n"; - assessReadType(); - // cerr << "e\n"; -} - -bool -Alignment::clipCountCheck() { - auto hCounts = 0; - auto sCounts = 0; - for (const auto &cigarChunk : cigarChunks) { - switch (cigarChunk.chunkType) { - case 'H': - ++hCounts; - break; - case 'S': - ++sCounts; - break; - default: - break; - } - } - if (hCounts + sCounts > 1 && nullMapq) { - lowMapq = true; - return false; - } - return (hCounts < 2 && !(hCounts > 0 && sCounts > 0)); -} - -bool -Alignment::uniqueSuppCheck() { - auto hCounts = 0, sCounts = 0; - for (const auto &cigarChunk : cigarChunks) { - switch (cigarChunk.chunkType) { - case 'H': - ++hCounts; - break; - case 'S': - ++sCounts; - break; - default: - break; + if (hCounts + sCounts > 1 && nullMapq) { + // mapq 0 is treated as a special case, where number of SAs and + // base qualities will be the sole determinants of read quality + lowMapq = true; + return false; } + return (hCounts < 2 && !(hCounts > 0 && sCounts > 0)); } - saCbegin = samLine.cend(); - saCend = samLine.cend(); - if (samLine.back() == ';' && samLine[samChunkPositions.back() + 1] == 'S' && - samLine[samChunkPositions.back() + 2] == 'A') { - saCbegin = samLine.cbegin() + samChunkPositions.back() + 6; - saCend = samLine.cend() - 1; - hasSa = true; - } else { - for (auto i = 10u; i < samChunkPositions.size() - 1; ++i) { - if (samLine[samChunkPositions[i + 1] - 1] == ';' && - samLine[samChunkPositions[i] + 1] == 'S' && - samLine[samChunkPositions[i] + 2] == 'A') { - saCbegin = samLine.cbegin() + samChunkPositions[i] + 6; - saCend = samLine.cbegin() + samChunkPositions[i + 1] - 1; - hasSa = true; + + bool + Alignment::uniqueSuppCheck() { + auto hCounts = 0, sCounts = 0; + for (const auto &cigarChunk : cigarChunks) { + switch (cigarChunk.chunkType) { + case 'H': + ++hCounts; + break; + case 'S': + ++sCounts; + break; + default: break; } } - } - if (hasSa) { - auto lowQualSacounts = 0; - auto block = 0; - auto mapq = 0; - auto highQualSa = false; - for (auto saCit = saCbegin; saCit != saCend; ++saCit) { - //"SA:Z:10,24753146,+,68S33M,48,1;X,135742083,-,47S22M32S,0,0;8,72637925,-,29S19M53S,0,0;" - // 0-chr,1-pos,2-orientation,3-cigar,4-mapq,5-whatever - switch (*saCit) { - case ',': - ++block; - if (block == 4) { - ++saCit; - while (*saCit != ',') { - mapq = mapq * 10 + (*saCit - '0'); + saCbegin = samLine.cend(); + saCend = samLine.cend(); + if (samLine.back() == ';' && samLine[samTabPositions.back() + 1] == 'S' && + samLine[samTabPositions.back() + 2] == 'A') { + saCbegin = samLine.cbegin() + static_cast(samTabPositions.back()) + 6; + saCend = samLine.cend() - 1; + hasSa = true; + } else { + for (auto i = 10u; i < samTabPositions.size() - 1; ++i) { + if (samLine[samTabPositions[i + 1] - 1] == ';' && + samLine[samTabPositions[i] + 1] == 'S' && + samLine[samTabPositions[i] + 2] == 'A') { + saCbegin = samLine.cbegin() + static_cast(samTabPositions[i]) + 6; + saCend = samLine.cbegin() + static_cast(samTabPositions[i + 1]) - 1; + hasSa = true; + break; + } + } + } + if (hasSa) { + auto lowQualSacounts = 0; + auto block = 0; + auto mapq = 0; + auto highQualSa = false; + for (auto saCit = saCbegin; saCit != saCend; ++saCit) { + //"SA:Z:10,24753146,+,68S33M,48,1;X,135742083,-,47S22M32S,0,0;8,72637925,-,29S19M53S,0,0;" + // 0-chr,1-pos,2-orientation,3-cigar,4-mapq,5-whatever + switch (*saCit) { + case ',': + ++block; + if (block == 4) { ++saCit; + while (*saCit != ',') { + mapq = mapq * 10 + (*saCit - '0'); + ++saCit; + } + } + break; + case ';': + if (mapq < 13) { + ++lowQualSacounts; + } else if (mapq > 20) { + highQualSa = true; } + if (!highQualSa && ((sCounts == 1 && lowQualSacounts == 2) || + (hCounts == 1 && lowQualSacounts == 2) || + (sCounts == 2 && lowQualSacounts == 4))) { + return false; + } + block = 0; + mapq = 0; + break; + default: + break; } - break; - case ';': - if (mapq < 13) { - ++lowQualSacounts; - } else if (mapq > 20) { - highQualSa = true; + } + } + return true; + } + + double + Alignment::overhangMedianQuality(const CigarChunk &cigarChunk) const { + std::vector overhangPerBaseQuality{}; + if (!cigarChunk.encounteredM) { + auto startCit = next( + samLine.cbegin(), + 1 + static_cast(samTabPositions[9]) + + (static_cast(cigarChunk.startPosOnRead) - static_cast(cigarChunk.indelAdjustment))); + auto endCit = + next(samLine.cbegin(), + 1 + static_cast(samTabPositions[9]) + + (static_cast(cigarChunk.startPosOnRead) - static_cast(cigarChunk.indelAdjustment)) + + static_cast(cigarChunk.length)); + fullMedianQuality(startCit, endCit, overhangPerBaseQuality); + } else { + std::string::const_reverse_iterator startCrit{ + next(samLine.cbegin(), + 1 + static_cast(samTabPositions[9]) + + (static_cast(cigarChunk.startPosOnRead) - static_cast(cigarChunk.indelAdjustment)) + + static_cast(cigarChunk.length)) + }; // dito + std::string::const_reverse_iterator endCrit{ + next(samLine.cbegin(), + 1 + static_cast(samTabPositions[9]) + + (static_cast(cigarChunk.startPosOnRead) - static_cast(cigarChunk.indelAdjustment)))}; + fullMedianQuality(startCrit, endCrit, overhangPerBaseQuality); + } + if (overhangPerBaseQuality.empty()) { + return -1.0; + } else { + return getMedian(overhangPerBaseQuality.begin(), + overhangPerBaseQuality.end()); + } + } + + void + Alignment::assessReadType() { + /* 0 for non-split, + * 1 for softSplit, + * 2 for hardSplit, + * 3 for indel, + * 4 for distant mate, + * 5 for low quality overhang + * 6 for low quality hardClipped + * (precedence: + * decoy mate> + * low qual > + * soft clips > + * hard clips > + * distant mate > + * indels > + * normal) + */ + auto hardLongClip = false, indelStatus = false; + for (const auto &chunk : cigarChunks) { + switch (chunk.chunkType) { + case 'S': + if (static_cast(chunk.length) >= CLIPPED_NUCLEOTIDE_COUNT_THRESHOLD) { + readType = 1; + return; } - if (!highQualSa && ((sCounts == 1 && lowQualSacounts == 2) || - (hCounts == 1 && lowQualSacounts == 2) || - (sCounts == 2 && lowQualSacounts == 4))) { - return false; + break; + case 'H': + if (static_cast(chunk.length) >= CLIPPED_NUCLEOTIDE_COUNT_THRESHOLD) { + hardLongClip = true; } - block = 0; - mapq = 0; + break; + case 'I': + case 'D': + indelStatus = true; break; default: break; } } - } - return true; -} - -double -Alignment::overhangMedianQuality(const CigarChunk &cigarChunk) const { - vector overhangPerBaseQuality{}; - if (!cigarChunk.encounteredM) { - auto startCit = next(samLine.cbegin(), 1 + samChunkPositions[9] + - cigarChunk.startPosOnRead - - cigarChunk.indelAdjustment); - auto endCit = - next(samLine.cbegin(), - 1 + samChunkPositions[9] + cigarChunk.startPosOnRead - - cigarChunk.indelAdjustment + cigarChunk.length); - fullMedianQuality(startCit, endCit, overhangPerBaseQuality); - } else { - string::const_reverse_iterator startCrit{ - next(samLine.cbegin(), - 1 + samChunkPositions[9] + cigarChunk.startPosOnRead - - cigarChunk.indelAdjustment + cigarChunk.length)}; - string::const_reverse_iterator endCrit{ - next(samLine.cbegin(), 1 + samChunkPositions[9] + - cigarChunk.startPosOnRead - - cigarChunk.indelAdjustment)}; - fullMedianQuality(startCrit, endCrit, overhangPerBaseQuality); - } - if (overhangPerBaseQuality.empty()) { - return -1.0; - } else { - return getMedian(overhangPerBaseQuality.begin(), - overhangPerBaseQuality.end()); - } -} - -void -Alignment::assessReadType() { - /* 0 for non-split, - * 1 for softSplit, - * 2 for hardSplit, - * 3 for indel, - * 4 for distant mate, - * 5 for low quality overhang - * 6 for low quality hardClipped - * (precedence: - * decoy mate> - * low qual > - * soft clips > - * hard clips > - * distant mate > - * indels > - * normal) - */ - auto hardLongClip = false, indelStatus = false; - for (const auto &chunk : cigarChunks) { - switch (chunk.chunkType) { - case 'S': - if (chunk.length >= CLIPPEDNUCLEOTIDECOUNTTHRESHOLD) { - readType = 1; - return; - } - break; - case 'H': - if (chunk.length >= CLIPPEDNUCLEOTIDECOUNTTHRESHOLD) { - hardLongClip = true; - } - break; - case 'I': - case 'D': - indelStatus = true; - break; - default: - break; + if (hardLongClip) { + readType = 2; + } else if (indelStatus) { + readType = 3; } } - if (hardLongClip) { - readType = 2; - } else if (indelStatus) { - readType = 3; - } -} - -bool -Alignment::assessOutlierMateDistance() { - switch (distantMate) { - case -1: - return false; - case 1: - return true; - default: - if (*(samLine.cbegin() + 1 + samChunkPositions[5]) != '=') { - distantMate = 1; + + bool + Alignment::assessOutlierMateDistance() { + switch (distantMate) { + case -1: + return false; + case 1: return true; - } else { - auto isize_cit = samLine.cbegin() + 1 + samChunkPositions[7]; - if (*isize_cit == '-') { - ++isize_cit; - } - auto isize = 0; - for (; isize_cit != samLine.cbegin() + samChunkPositions[8]; - ++isize_cit) { - isize = isize * 10 + (*isize_cit - '0'); - } - if (isize > ISIZEMAX) { + default: + if (*(samLine.cbegin() + 1 + static_cast(samTabPositions[5])) != '=') { distantMate = 1; return true; + } else { + auto isize_cit = samLine.cbegin() + 1 + static_cast(samTabPositions[7]); + if (*isize_cit == '-') { + ++isize_cit; + } + auto isize = 0; + for (; isize_cit != samLine.cbegin() + static_cast(samTabPositions[8]); + ++isize_cit) { + isize = isize * 10 + (*isize_cit - '0'); + } + if (isize > ISIZEMAX) { + distantMate = 1; + return true; + } } + distantMate = -1; + return false; } - distantMate = -1; - return false; } -} - -void -Alignment::setChosenBp(int chosenBpLoc, int alignmentIndex) { - auto overhangStartIndex = 0; - auto overhangLength = 0; - char bpType{}; - auto bpEncounteredM = false; - auto bpSize = 0; - for (auto i = 0u; i < readBreakpoints.size(); ++i) { - if (readBreakpoints[i] == chosenBpLoc) { - bpEncounteredM = readBreakpointsEncounteredM[i]; - bpType = readBreakpointTypes[i]; - if (bpType == 'S') { - for (const auto &overhang : readOverhangCoords) { - if (overhang.bpPos == chosenBpLoc) { - overhangStartIndex = - 1 + samChunkPositions[8] + overhang.startPosOnRead; - overhangLength = overhang.length; - break; + + void + Alignment::setChosenBp(ChrSize chosenBpLoc, int alignmentIndex) { + auto overhangStartIndex = 0; + ChrSize overhangLength = 0; + char bpType{}; + auto bpEncounteredM = false; + auto bpSize = 0; + for (auto i = 0u; i < readBreakpoints.size(); ++i) { + if (readBreakpoints[i] == chosenBpLoc) { + bpEncounteredM = readBreakpointsEncounteredM[i]; + bpType = readBreakpointTypes[i]; + if (bpType == 'S') { + for (const auto &overhang : readOverhangCoords) { + if (overhang.bpPos == chosenBpLoc) { + overhangStartIndex = 1 + static_cast(samTabPositions[8]) + overhang.startPosOnRead; + overhangLength = overhang.length; + break; + } } } + bpSize = readBreakpointSizes[i]; + break; } - bpSize = readBreakpointSizes[i]; - break; } + chosenBp.reset(); + chosenBp = std::make_unique(ChosenBp(bpType, + bpSize, + bpEncounteredM, + overhangStartIndex, + overhangLength, + alignmentIndex /* origin index */)); } - chosenBp.reset(); - chosenBp = make_unique(bpType, bpSize, bpEncounteredM, - overhangStartIndex, overhangLength, - alignmentIndex); -} -vector -Alignment::generateSuppAlignments(int bpChrIndex, int bpPos) { - vector suppAlignmentsTmp; - if (hasSa) { - vector saBegins = {saCbegin}; - vector saEnds; - for (auto it = saCbegin; it != saCend; ++it) { - if (*it == ';') { - saEnds.push_back(it); - saBegins.push_back(it + 1); + + std::vector + Alignment::generateSuppAlignments(ChrIndex bpChrIndex, int bpPos) { + std::vector suppAlignmentsTmp; + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + + if (hasSa) { + std::vector saBegins = {saCbegin}; + std::vector saEnds; + for (auto it = saCbegin; it != saCend; ++it) { + if (*it == ';') { + saEnds.push_back(it); + saBegins.push_back(it + 1); + } } - } - saEnds.push_back(saCend); - for (auto i = 0u; i < saBegins.size(); ++i) { - SuppAlignment saTmp{saBegins[i], - saEnds[i], - !supplementary, - lowMapq, - nullMapq, - fwdStrand, - chosenBp->bpEncounteredM, - chosenBp->selfNodeIndex, - bpChrIndex, - bpPos}; - if (saTmp.getChrIndex() < 1002) { - suppAlignmentsTmp.push_back(saTmp); + saEnds.push_back(saCend); + for (auto i = 0u; i < saBegins.size(); ++i) { + SuppAlignment saTmp = SuppAlignment::parseSamSaTag( + saBegins[i], + saEnds[i], + !supplementary, + lowMapq, + nullMapq, + fwdStrand, + chosenBp->bpEncounteredM, + chosenBp->selfNodeIndex, + bpChrIndex, + bpPos); + if (!chrConverter.isTechnical(saTmp.getChrIndex())) { + suppAlignmentsTmp.push_back(saTmp); + } } } - } - if (assessOutlierMateDistance()) { - if (getMateChrIndex() < 1002) { - auto foundMatch = false; - MateInfo tmpPairDummy{ - 0, 0, getMateChrIndex(), getMatePos(), true, invertedMate}; - for (const auto &sa : suppAlignmentsTmp) { - if (tmpPairDummy.suppAlignmentFuzzyMatch(sa)) { - foundMatch = true; - break; + if (assessOutlierMateDistance()) { + if (!chrConverter.isTechnical(getMateChrIndex())) { + auto foundMatch = false; + MateInfo tmpPairDummy{ + 0, 0, getMateChrIndex(), getMatePos(), true, invertedMate}; + for (const auto &sa : suppAlignmentsTmp) { + if (tmpPairDummy.suppAlignmentFuzzyMatch(sa)) { + foundMatch = true; + break; + } + } + if (!foundMatch) { + suppAlignmentsTmp.emplace_back(SuppAlignment::create( + getMateChrIndex(), + getMatePos(), + 0, + 0, + chosenBp->bpEncounteredM, + invertedMate, + getMatePos() + 1, + !supplementary, + lowMapq, + nullMapq, + chosenBp->selfNodeIndex /* origin index */)); } - } - if (!foundMatch) { - suppAlignmentsTmp.emplace_back( - getMateChrIndex(), getMatePos(), 0, 0, - chosenBp->bpEncounteredM, invertedMate, getMatePos() + 1, - !supplementary, lowMapq, nullMapq, chosenBp->selfNodeIndex); } } + return suppAlignmentsTmp; } - return suppAlignmentsTmp; -} -string -Alignment::printOverhang() const { - string res{}; - res.reserve(chosenBp->overhangLength + 9); - if (chosenBp->bpEncounteredM) { - res.append("|").append(samLine.substr(chosenBp->overhangStartIndex, - chosenBp->overhangLength)); - } else { - res.append(samLine.substr(chosenBp->overhangStartIndex, - chosenBp->overhangLength)) - .append("|"); + + std::string + Alignment::printOverhang() const { + std::string res{}; + res.reserve(static_cast(chosenBp->overhangLength) + 9); + if (chosenBp->bpEncounteredM) { + res.append("|").append(samLine.substr(static_cast(chosenBp->overhangStartIndex), + static_cast(chosenBp->overhangLength))); + } else { + res.append(samLine.substr(static_cast(chosenBp->overhangStartIndex), + static_cast(chosenBp->overhangLength))) + .append("|"); + } + res.append("(") + .append(strtk::type_to_string(chosenBp->childrenNodes.size())) + .append(")"); + return res; } - res.append("(") - .append(strtk::type_to_string(chosenBp->childrenNodes.size())) - .append(")"); - return res; -} - -double -Alignment::overhangComplexityMaskRatio() const { - auto fullSizesTotal = 0.0; - auto maskedIntervalsTotal = 0.0; - vector overhang; - for (auto i = 0; i < chosenBp->overhangLength; ++i) { - switch (samLine[chosenBp->overhangStartIndex + i]) { - case 'A': - overhang.push_back(0); - break; - case 'T': - overhang.push_back(1); - break; - case 'G': - overhang.push_back(2); - break; - case 'C': - overhang.push_back(3); - break; - case 'N': - if (!overhang.empty()) { - fullSizesTotal += overhang.size(); - auto res = Sdust{overhang}.getRes(); - if (!res.empty()) { - for (const auto &resInterval : res) { - maskedIntervalsTotal += - resInterval.endIndex - resInterval.startIndex + 1; + + double + Alignment::overhangComplexityMaskRatio() const { + auto fullSizesTotal = 0.0; + auto maskedIntervalsTotal = 0.0; + std::vector overhang; + for (unsigned long i = 0; i < static_cast(chosenBp->overhangLength); ++i) { + switch (samLine[static_cast(chosenBp->overhangStartIndex) + i]) { + case 'A': + overhang.push_back(0); + break; + case 'T': + overhang.push_back(1); + break; + case 'G': + overhang.push_back(2); + break; + case 'C': + overhang.push_back(3); + break; + case 'N': + if (!overhang.empty()) { + fullSizesTotal += overhang.size(); + auto res = Sdust{overhang}.getRes(); + if (!res.empty()) { + for (const auto &resInterval : res) { + maskedIntervalsTotal += + resInterval.endIndex - resInterval.startIndex + 1; + } } + overhang.clear(); + } + break; + default: + break; + } + } + if (!overhang.empty()) { + fullSizesTotal += overhang.size(); + auto res = Sdust{overhang}.getRes(); + if (!res.empty()) { + for (const auto &resInterval : res) { + maskedIntervalsTotal += + resInterval.endIndex - resInterval.startIndex + 1; } - overhang.clear(); } - break; - default: - break; } + return maskedIntervalsTotal / fullSizesTotal; } - if (!overhang.empty()) { - fullSizesTotal += overhang.size(); - auto res = Sdust{overhang}.getRes(); - if (!res.empty()) { - for (const auto &resInterval : res) { - maskedIntervalsTotal += - resInterval.endIndex - resInterval.startIndex + 1; + + + template + void + Alignment::fullMedianQuality(Iterator qualBegin, Iterator qualEnd, + std::vector &overhangPerBaseQuality) const { + overhangPerBaseQuality.reserve((size_t) distance(qualBegin, qualEnd)); + auto consecutiveLowQuals = 0; + for (auto cit = qualBegin; cit != qualEnd; ++cit) { + if (*cit < BASE_QUALITY_THRESHOLD_LOW) { // 33 + phred 11 + if (consecutiveLowQuals == 5) { + overhangPerBaseQuality.clear(); + return; + } + ++consecutiveLowQuals; + } else { + consecutiveLowQuals = 0; } + overhangPerBaseQuality.push_back(*cit); + } + } + + // Median Code taken from http://rosettacode.org/wiki/Averages/Median#C.2B.2B + template + double + Alignment::getMedian(Iterator begin, Iterator end) const { + // this is middle for odd-length, and "upper-middle" for even length + Iterator middle = begin + (end - begin) / 2; + // This function runs in O(n) on average, according to the standard + nth_element(begin, middle, end); + if ((end - begin) % 2 != 0) { // odd length + return *middle; + } else { // even length + // the "lower middle" is the max of the lower half + Iterator lower_middle = max_element(begin, middle); + return (*middle + *lower_middle) / 2.0; } } - return maskedIntervalsTotal / fullSizesTotal; -} } /* namespace sophia */ diff --git a/src/AnnotationProcessor.cpp b/src/AnnotationProcessor.cpp index 7f5a9b1..553e87c 100644 --- a/src/AnnotationProcessor.cpp +++ b/src/AnnotationProcessor.cpp @@ -25,6 +25,7 @@ #include "Breakpoint.h" #include "HelperFunctions.h" #include "SuppAlignment.h" +#include "GlobalAppConfig.h" #include #include #include @@ -36,556 +37,591 @@ namespace sophia { -using namespace std; + bool AnnotationProcessor::ABRIDGED_OUTPUT{false}; -bool AnnotationProcessor::ABRIDGEDOUTPUT{false}; + AnnotationProcessor::AnnotationProcessor(const std::string &tumorResultsIn, + std::vector> &mref, + ChrSize defaultReadLengthTumorIn, + bool controlCheckMode, + int GERMLINE_DB_LIMIT) + : NO_CONTROL_MODE{true}, + GERMLINE_DB_LIMIT{GERMLINE_DB_LIMIT}, + contaminationObserved{false}, + massiveInvFilteringLevel{0}, + filteredResults{}, + visitedLineIndices{} { -AnnotationProcessor::AnnotationProcessor(const string &tumorResultsIn, - vector> &mref, - int defaultReadLengthTumorIn, - bool controlCheckMode, - int germlineDbLimit) - : NOCONTROLMODE{true}, GERMLINEDBLIMIT{germlineDbLimit}, - contaminationObserved{false}, massiveInvFilteringLevel{0}, - filteredResults{}, tumorResults{85, vector{}}, - controlResults{85, vector{}}, visitedLineIndices{} { - unique_ptr tumorInputHandle{ - make_unique(tumorResultsIn, ios_base::in | ios_base::binary)}; - unique_ptr tumorGzHandle{ - make_unique()}; - tumorGzHandle->push(boost::iostreams::gzip_decompressor()); - tumorGzHandle->push(*tumorInputHandle); - string line; - auto lineIndex = 0; - while (error_terminating_getline(*tumorGzHandle, line)) { - if (line.front() == '#') { - continue; - }; - Breakpoint tmpBp{line, true}; - auto chrIndex = ChrConverter::indexConverter[tmpBp.getChrIndex()]; - if (chrIndex < 0) { - continue; - } - auto hasOverhang = line.back() != '.' && line.back() != '#'; - tumorResults[chrIndex].emplace_back(tmpBp, lineIndex, hasOverhang); - if (hasOverhang) { - string overhang{}; - for (auto it = line.rbegin(); it != line.rend(); ++it) { - if (*it == '\t') { - break; - } else { - overhang.push_back(*it); + CompressedMrefIndex nCompressedMrefChromosomes = GlobalAppConfig::getInstance(). + getChrConverter().nChromosomesCompressedMref(); + std::vector>::size_type vectorSize = + std::vector>::size_type(nCompressedMrefChromosomes); + + tumorResults = std::vector> + { vectorSize, std::vector{} }; + controlResults = std::vector> + { vectorSize, std::vector{} }; + + std::unique_ptr tumorInputHandle{ + std::make_unique(tumorResultsIn, std::ios_base::in | std::ios_base::binary)}; + std::unique_ptr tumorGzHandle{ + std::make_unique()}; + tumorGzHandle->push(boost::iostreams::gzip_decompressor()); + tumorGzHandle->push(*tumorInputHandle); + std::string line; + int lineIndex = 0; + const ChrConverter& chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + while (error_terminating_getline(*tumorGzHandle, line)) { + if (line.front() == '#') { + continue; + }; + + Breakpoint tmpBp = Breakpoint::parse(line, true); + CompressedMrefIndex compressedMrefChrIndex; + if (!chrConverter.isCompressedMref(tmpBp.getChrIndex())) { + continue; + } else { + compressedMrefChrIndex = chrConverter.indexToCompressedMrefIndex(tmpBp.getChrIndex()); + } + auto hasOverhang = line.back() != '.' && line.back() != '#'; + tumorResults[static_cast(compressedMrefChrIndex)]. + emplace_back(BreakpointReduced(tmpBp, lineIndex, hasOverhang)); + if (hasOverhang) { + std::string overhang{}; + for (auto it = line.rbegin(); it != line.rend(); ++it) { + if (*it == '\t') { + break; + } else { + overhang.push_back(*it); + } } + reverse(overhang.begin(), overhang.end()); + overhangs.emplace_back(lineIndex, overhang); + } else { + visitedLineIndices.push_back(lineIndex); } - reverse(overhang.begin(), overhang.end()); - overhangs.emplace_back(lineIndex, overhang); - } else { - visitedLineIndices.push_back(lineIndex); + ++lineIndex; } - ++lineIndex; - } - for (auto &tres : tumorResults) { - DeFuzzier deFuzzier{defaultReadLengthTumorIn * 6, false}; - deFuzzier.deFuzzyDb(tres); - } - searchMatches(mref); - if (applyMassiveInversionFiltering(false, controlCheckMode)) { - ++massiveInvFilteringLevel; - } - if (massiveInvFilteringLevel == 1) { - if (applyMassiveInversionFiltering(true, controlCheckMode)) { + for (auto &tres : tumorResults) { + DeFuzzier deFuzzier { defaultReadLengthTumorIn * 6, false }; + deFuzzier.deFuzzyDb(tres); + } + searchMatches(mref); + if (applyMassiveInversionFiltering(false, controlCheckMode)) { ++massiveInvFilteringLevel; } + if (massiveInvFilteringLevel == 1) { + if (applyMassiveInversionFiltering(true, controlCheckMode)) { + ++massiveInvFilteringLevel; + } + } + contaminationObserved = applyPathogenContaminationFiltering(); + if (!controlCheckMode && !contaminationObserved && + massiveInvFilteringLevel == 0) { + printUnresolvedRareOverhangs(mref); + } } - contaminationObserved = applyPathogenContaminationFiltering(); - if (!controlCheckMode && !contaminationObserved && - massiveInvFilteringLevel == 0) { - printUnresolvedRareOverhangs(mref); - } -} -AnnotationProcessor::AnnotationProcessor( - const string &tumorResultsIn, vector> &mref, - const string &controlResultsIn, int defaultReadLengthTumorIn, - int defaultReadLengthControlIn, int germlineDbLimit, int lowQualControlIn, - bool pathogenInControlIn) - : NOCONTROLMODE{false}, GERMLINEDBLIMIT{germlineDbLimit}, - contaminationObserved{false}, massiveInvFilteringLevel{0}, - filteredResults{}, tumorResults{85, vector{}}, - controlResults{85, vector{}} { - unique_ptr controlInputHandle{make_unique( - controlResultsIn, ios_base::in | ios_base::binary)}; - unique_ptr controlGzHandle{ - make_unique()}; - controlGzHandle->push(boost::iostreams::gzip_decompressor()); - controlGzHandle->push(*controlInputHandle); - string line; - auto lineIndex = 0; - while (error_terminating_getline(*controlGzHandle, line)) { - if (line.front() == '#') { - continue; - }; - Breakpoint tmpBpPre{line, true}; - BreakpointReduced tmpBp{tmpBpPre, lineIndex, false}; - if (tmpBp.getChrIndex() > 1001) { - continue; - } - if (pathogenInControlIn) { - if ((tmpBp.getPairedBreaksSoft() + tmpBp.getUnpairedBreaksSoft()) > - 19 && - (tmpBp.getPairedBreaksHard() + tmpBp.getUnpairedBreaksHard() < - 3)) { - if (line.back() != '.' && line.back() != '#') { - string overhang{}; - auto overhangLength = 0; - auto maxOverhangLength = 0; - for (auto it = line.rbegin(); *it != '\t'; ++it) { - switch (*it) { - case '(': - overhangLength = 0; - break; - case ':': - maxOverhangLength = - max(maxOverhangLength, overhangLength); - overhangLength = 0; - break; - default: - ++overhangLength; - break; + AnnotationProcessor::AnnotationProcessor( + const std::string &tumorResultsIn, + std::vector> &mref, + const std::string &controlResultsIn, + ChrSize defaultReadLengthTumorIn, + ChrSize defaultReadLengthControlIn, + int germlineDbLimit, + int lowQualControlIn, + bool pathogenInControlIn) + : NO_CONTROL_MODE{false}, + GERMLINE_DB_LIMIT{germlineDbLimit}, + contaminationObserved{false}, + massiveInvFilteringLevel{0}, + filteredResults{} { + + CompressedMrefIndex nCompressedMrefChromosomes = GlobalAppConfig::getInstance(). + getChrConverter().nChromosomesCompressedMref(); + std::vector>::size_type vectorSize = + std::vector>::size_type(nCompressedMrefChromosomes); + + tumorResults = std::vector> + { vectorSize, std::vector{} }; + controlResults = std::vector> + { vectorSize, std::vector{} }; + + std::unique_ptr controlInputHandle{ + std::make_unique(controlResultsIn, std::ios_base::in | std::ios_base::binary)}; + std::unique_ptr controlGzHandle{ + std::make_unique()}; + + controlGzHandle->push(boost::iostreams::gzip_decompressor()); + controlGzHandle->push(*controlInputHandle); + std::string line; + auto lineIndex = 0; + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + while (error_terminating_getline(*controlGzHandle, line)) { + if (line.front() == '#') { + continue; + }; + Breakpoint tmpBpPre = Breakpoint::parse(line, true); + BreakpointReduced tmpBp{tmpBpPre, lineIndex, false}; + if (chrConverter.isTechnical(tmpBp.getChrIndex())) { + continue; + } + if (pathogenInControlIn) { + if ((tmpBp.getPairedBreaksSoft() + tmpBp.getUnpairedBreaksSoft()) > + 19 && + (tmpBp.getPairedBreaksHard() + tmpBp.getUnpairedBreaksHard() < + 3)) { + if (line.back() != '.' && line.back() != '#') { + std::string overhang{}; + ChrSize overhangLength = 0; + ChrSize maxOverhangLength = 0; + for (auto it = line.rbegin(); *it != '\t'; ++it) { + switch (*it) { + case '(': + overhangLength = 0; + break; + case ':': + maxOverhangLength = std::max(maxOverhangLength, overhangLength); + overhangLength = 0; + break; + default: + ++overhangLength; + break; + } + } + auto maxOverhangLengthRatio = + (maxOverhangLength + 0.0) / defaultReadLengthControlIn; + if (maxOverhangLengthRatio > 0.7) { + continue; } - } - auto maxOverhangLengthRatio = - (maxOverhangLength + 0.0) / defaultReadLengthControlIn; - if (maxOverhangLengthRatio > 0.7) { - continue; } } } - } - if (lowQualControlIn > 0) { - SuppAlignmentAnno *bestSa = nullptr; - auto bestSaSupport = 0; - if (!tmpBp.getSuppAlignments().empty()) { - for (const auto sa : tmpBp.getSupplementsPtr()) { - if (!sa->isSuspicious()) { - if (bestSa) { - auto saSupport = sa->getSupport() + - sa->getSecondarySupport() + - sa->getMateSupport(); - if (saSupport > bestSaSupport) { + if (lowQualControlIn > 0) { + SuppAlignmentAnno *bestSa = nullptr; + auto bestSaSupport = 0; + if (!tmpBp.getSuppAlignments().empty()) { + for (const auto sa : tmpBp.getSupplementsPtr()) { + if (!sa->isSuspicious()) { + if (bestSa) { + auto saSupport = sa->getSupport() + + sa->getSecondarySupport() + + sa->getMateSupport(); + if (saSupport > bestSaSupport) { + bestSa = sa; + bestSaSupport = saSupport; + } + } else { bestSa = sa; - bestSaSupport = saSupport; } - } else { - bestSa = sa; } } } - } - auto clipTotal = - tmpBp.getPairedBreaksHard() + tmpBp.getPairedBreaksSoft() + - tmpBp.getUnpairedBreaksHard() + tmpBp.getUnpairedBreaksSoft(); - if (!bestSa) { - if (clipTotal < 10) { - continue; - } - } else { - if (bestSa->isInverted()) { - auto suppDist = tmpBp.distanceToSupp(*bestSa); - if (suppDist > 0) { - if (suppDist < 10000) { - if (lowQualControlIn == 2) { - continue; + auto clipTotal = + tmpBp.getPairedBreaksHard() + tmpBp.getPairedBreaksSoft() + + tmpBp.getUnpairedBreaksHard() + tmpBp.getUnpairedBreaksSoft(); + if (!bestSa) { + if (clipTotal < 10) { + continue; + } + } else { + if (bestSa->isInverted()) { + ChrDistance suppDist = tmpBp.distanceToSupp(*bestSa); + if (suppDist > 0) { + if (suppDist < 10000) { + if (lowQualControlIn == 2) { + continue; + } else { + if (bestSa->getSupport() < 5 || + bestSa->getSecondarySupport() < 5) { + continue; + } + } } else { - if (bestSa->getSupport() < 5 || + if (bestSa->getSupport() < 5 && bestSa->getSecondarySupport() < 5) { continue; } } - } else { + } else if (suppDist < 0) { if (bestSa->getSupport() < 5 && bestSa->getSecondarySupport() < 5) { continue; } } - } else if (suppDist < 0) { - if (bestSa->getSupport() < 5 && - bestSa->getSecondarySupport() < 5) { - continue; - } } } } + controlResults[static_cast(chrConverter.indexToCompressedMrefIndex(tmpBp.getChrIndex()))] + .push_back(tmpBp); + ++lineIndex; } - auto chrIndex = ChrConverter::indexConverter[tmpBp.getChrIndex()]; - controlResults[chrIndex].push_back(tmpBp); - ++lineIndex; - } - for (auto &cres : controlResults) { - DeFuzzier deFuzzierControl{defaultReadLengthControlIn * 6, false}; - deFuzzierControl.deFuzzyDb(cres); - } - unique_ptr tumorInputHandle{ - make_unique(tumorResultsIn, ios_base::in | ios_base::binary)}; - unique_ptr tumorGzHandle{ - make_unique()}; - tumorGzHandle->push(boost::iostreams::gzip_decompressor()); - tumorGzHandle->push(*tumorInputHandle); - lineIndex = 0; - while (error_terminating_getline(*tumorGzHandle, line)) { - if (line.front() == '#') { - continue; - }; - Breakpoint tmpBp{line, true}; - auto chrIndex = ChrConverter::indexConverter[tmpBp.getChrIndex()]; - if (chrIndex < 0) { - continue; + for (auto &cres : controlResults) { + DeFuzzier deFuzzierControl{defaultReadLengthControlIn * 6, false}; + deFuzzierControl.deFuzzyDb(cres); } - auto hasOverhang = line.back() != '.' && line.back() != '#'; - tumorResults[chrIndex].emplace_back(tmpBp, lineIndex, hasOverhang); - if (line.back() != '.' && line.back() != '#') { - string overhang{}; - for (auto it = line.rbegin(); it != line.rend(); ++it) { - if (*it == '\t') { - break; - } else { - overhang.push_back(*it); + std::unique_ptr tumorInputHandle{ + std::make_unique(tumorResultsIn, std::ios_base::in | std::ios_base::binary)}; + std::unique_ptr tumorGzHandle{ + std::make_unique()}; + + tumorGzHandle->push(boost::iostreams::gzip_decompressor()); + tumorGzHandle->push(*tumorInputHandle); + lineIndex = 0; + while (error_terminating_getline(*tumorGzHandle, line)) { + if (line.front() == '#') { + continue; + }; + Breakpoint tmpBp = Breakpoint::parse(line, true); + if (!chrConverter.isCompressedMref(tmpBp.getChrIndex())) { + continue; + } + CompressedMrefIndex compressedMrefChrIndex = + chrConverter.indexToCompressedMrefIndex(tmpBp.getChrIndex()); + auto hasOverhang = line.back() != '.' && line.back() != '#'; + tumorResults[static_cast(compressedMrefChrIndex)]. + emplace_back(tmpBp, lineIndex, hasOverhang); + if (line.back() != '.' && line.back() != '#') { + std::string overhang{}; + for (auto it = line.rbegin(); it != line.rend(); ++it) { + if (*it == '\t') { + break; + } else { + overhang.push_back(*it); + } } + reverse(overhang.begin(), overhang.end()); + overhangs.emplace_back(lineIndex, overhang); + } else { + visitedLineIndices.push_back(lineIndex); } - reverse(overhang.begin(), overhang.end()); - overhangs.emplace_back(lineIndex, overhang); - } else { - visitedLineIndices.push_back(lineIndex); + ++lineIndex; } - ++lineIndex; - } - for (auto &tres : tumorResults) { - DeFuzzier deFuzzierTumor{defaultReadLengthTumorIn * 6, false}; - deFuzzierTumor.deFuzzyDb(tres); - } - searchMatches(mref); - if (applyMassiveInversionFiltering(false, false)) { - ++massiveInvFilteringLevel; - } - if (massiveInvFilteringLevel == 1) { - if (applyMassiveInversionFiltering(true, false)) { + for (auto &tres : tumorResults) { + DeFuzzier deFuzzierTumor{defaultReadLengthTumorIn * 6, false}; + deFuzzierTumor.deFuzzyDb(tres); + } + searchMatches(mref); + if (applyMassiveInversionFiltering(false, false)) { ++massiveInvFilteringLevel; } + if (massiveInvFilteringLevel == 1) { + if (applyMassiveInversionFiltering(true, false)) { + ++massiveInvFilteringLevel; + } + } + contaminationObserved = applyPathogenContaminationFiltering(); + if (!contaminationObserved && massiveInvFilteringLevel == 0) { + printUnresolvedRareOverhangs(mref); + } } - contaminationObserved = applyPathogenContaminationFiltering(); - if (!contaminationObserved && massiveInvFilteringLevel == 0) { - printUnresolvedRareOverhangs(mref); - } -} -void -AnnotationProcessor::searchMatches(vector> &mref) { - for (auto j = 0; j < 85; ++j) { - for (auto i = 0u; i < tumorResults[j].size(); ++i) { - for (const auto &sa : tumorResults[j][i].getSuppAlignments()) { - if (SvEvent::DEBUGMODE || !sa.isSuspicious()) { - if (sa.getSecondarySupport() > 0 || - (sa.getSupport() > 0 && sa.getMateSupport() > 0)) { - searchSa(j, i, sa, true, mref); - } else { - searchSa(j, i, sa, false, mref); + void + AnnotationProcessor::searchMatches(std::vector> &mref) { + CompressedMrefIndex nCompressedMrefChromosomes = GlobalAppConfig::getInstance(). + getChrConverter().nChromosomesCompressedMref(); + for (CompressedMrefIndex mrefIdx = 0; mrefIdx < nCompressedMrefChromosomes; ++mrefIdx) { + for (size_t dbIdx = 0; dbIdx < tumorResults[static_cast(mrefIdx)].size(); ++dbIdx) { + for (const auto &sa : tumorResults[static_cast(mrefIdx)][dbIdx].getSuppAlignments()) { + if (SvEvent::DEBUG_MODE || !sa.isSuspicious()) { + if (sa.getSecondarySupport() > 0 || + (sa.getSupport() > 0 && sa.getMateSupport() > 0)) { + searchSa(mrefIdx, dbIdx, sa, true, mref); + } else { + searchSa(mrefIdx, dbIdx, sa, false, mref); + } } } } } } -} -void -AnnotationProcessor::searchSa(int chrIndex, int dbIndex, - const SuppAlignmentAnno &sa, bool doubleSupportSa, - vector> &mref) { - if (sa.getSupport() + sa.getSecondarySupport() + sa.getMateSupport() < 3) { - return; - } - if (sa.getChrIndex() == 1001) { - if (createUnknownMatchSvPreCheck(sa, doubleSupportSa)) { - createUnknownMatchSv(tumorResults[chrIndex][dbIndex], sa, mref, - doubleSupportSa); + void + AnnotationProcessor::searchSa(CompressedMrefIndex compressedMrefIndex, + size_t dbIndex, + const SuppAlignmentAnno &sa, + bool doubleSupportSa, + std::vector> &mref) { + if (sa.getSupport() + sa.getSecondarySupport() + sa.getMateSupport() < 3) { + return; } - return; - } - auto saChrIndex = ChrConverter::indexConverter[sa.getChrIndex()]; - if (saChrIndex < 0) { - return; - } - auto fuzziness = 3 * SuppAlignmentAnno::DEFAULTREADLENGTH; - vector::iterator>> dbHits{}; - - if (!tumorResults[saChrIndex].empty()) { - auto itStart = lower_bound(tumorResults[saChrIndex].begin(), - tumorResults[saChrIndex].end(), sa); - if (itStart == tumorResults[saChrIndex].end()) { - --itStart; + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + if (chrConverter.isExtrachromosomal(sa.getChrIndex())) { + if (createUnknownMatchSvPreCheck(sa, doubleSupportSa)) { + createUnknownMatchSv(tumorResults[static_cast(compressedMrefIndex)][dbIndex], sa, mref, + doubleSupportSa); + } + return; } - if (itStart != tumorResults[saChrIndex].begin() && - !itStart->closeToSupp(sa, fuzziness)) { - --itStart; - if (!itStart->closeToSupp(sa, fuzziness)) { - if (createUnknownMatchSvPreCheck(sa, doubleSupportSa)) { + if (!chrConverter.isCompressedMref(sa.getChrIndex())) { + return; + } + CompressedMrefIndex saChrIndex = chrConverter.indexToCompressedMrefIndex(sa.getChrIndex()); + ChrDistance fuzziness = ChrDistance(3 * SuppAlignmentAnno::DEFAULT_READ_LENGTH); + std::vector::iterator>> dbHits{}; - createUnknownMatchSv(tumorResults[chrIndex][dbIndex], sa, - mref, doubleSupportSa); - } - return; + if (!tumorResults[static_cast(saChrIndex)].empty()) { + auto itStart = lower_bound(tumorResults[static_cast(saChrIndex)].begin(), + tumorResults[static_cast(saChrIndex)].end(), sa); + if (itStart == tumorResults[static_cast(saChrIndex)].end()) { + --itStart; } - } - auto it = itStart; - while (it != tumorResults[saChrIndex].begin()) { - auto distance = it->distanceToSupp(sa); - if (distance <= fuzziness) { - dbHits.emplace_back(distance, it); - --it; - } else { - break; + if (itStart != tumorResults[static_cast(saChrIndex)].begin() && + !itStart->closeToSupp(sa, fuzziness)) { + --itStart; + if (!itStart->closeToSupp(sa, fuzziness)) { + if (createUnknownMatchSvPreCheck(sa, doubleSupportSa)) { + + createUnknownMatchSv(tumorResults[static_cast(compressedMrefIndex)][dbIndex], + sa, + mref, + doubleSupportSa); + } + return; + } } - } - if (itStart != tumorResults[saChrIndex].end()) { - auto it = next(itStart); - while (it != tumorResults[saChrIndex].end()) { - auto distance = it->distanceToSupp(sa); + auto it = itStart; + while (it != tumorResults[static_cast(saChrIndex)].begin()) { + ChrDistance distance = it->distanceToSupp(sa); if (distance <= fuzziness) { dbHits.emplace_back(distance, it); - ++it; + --it; } else { break; } } - } - sort(dbHits.begin(), dbHits.end()); - } - if (dbHits.empty()) { - if (createUnknownMatchSvPreCheck(sa, doubleSupportSa)) { - createUnknownMatchSv(tumorResults[chrIndex][dbIndex], sa, mref, - doubleSupportSa); - } - return; - } else { - auto createdMatch = false; - for (auto &res : dbHits) { - for (const auto &saMatch : res.second->getSuppAlignments()) { - if (tumorResults[chrIndex][dbIndex].closeToSupp(saMatch, - fuzziness)) { - if (createDoubleMatchSvPreCheck(saMatch)) { - createDoubleMatchSv(tumorResults[chrIndex][dbIndex], - *res.second, sa, saMatch, true, - mref); - createdMatch = true; + if (itStart != tumorResults[static_cast(saChrIndex)].end()) { + auto it = next(itStart); + while (it != tumorResults[static_cast(saChrIndex)].end()) { + auto distance = it->distanceToSupp(sa); + if (distance <= fuzziness) { + dbHits.emplace_back(distance, it); + ++it; + } else { + break; } } } + sort(dbHits.begin(), dbHits.end()); } - if (!createdMatch) { - auto res = dbHits[0]; - for (const auto &saMatch : res.second->getSuppAlignments()) { - if (tumorResults[chrIndex][dbIndex].closeToSupp( - saMatch, fuzziness * 3)) { - if (createDoubleMatchSvPreCheck(saMatch)) { - createDoubleMatchSv(tumorResults[chrIndex][dbIndex], - *res.second, sa, saMatch, true, - mref); - return; + if (dbHits.empty()) { + if (createUnknownMatchSvPreCheck(sa, doubleSupportSa)) { + createUnknownMatchSv(tumorResults[static_cast(compressedMrefIndex)][dbIndex], + sa, + mref, + doubleSupportSa); + } + return; + } else { + auto createdMatch = false; + for (auto &res : dbHits) { + for (const auto &saMatch : res.second->getSuppAlignments()) { + if (tumorResults[static_cast(compressedMrefIndex)][dbIndex].closeToSupp( + saMatch, fuzziness)) { + if (createDoubleMatchSvPreCheck(saMatch)) { + createDoubleMatchSv(tumorResults[static_cast(compressedMrefIndex)][dbIndex], + *res.second, sa, saMatch, true, + mref); + createdMatch = true; + } + } + } + } + if (!createdMatch) { + auto res = dbHits[0]; + for (const auto &saMatch : res.second->getSuppAlignments()) { + if (tumorResults[static_cast(compressedMrefIndex)][dbIndex].closeToSupp( + saMatch, fuzziness * 3)) { + if (createDoubleMatchSvPreCheck(saMatch)) { + createDoubleMatchSv(tumorResults[static_cast(compressedMrefIndex)][dbIndex], + *res.second, sa, saMatch, true, + mref); + return; + } } } + createUnmatchedSaSv(tumorResults[static_cast(compressedMrefIndex)][dbIndex], *res.second, + sa, mref); } - createUnmatchedSaSv(tumorResults[chrIndex][dbIndex], *res.second, - sa, mref); } } -} -void -AnnotationProcessor::createDoubleMatchSv(BreakpointReduced &sourceBp, - BreakpointReduced &targetBp, - const SuppAlignmentAnno &sa, - const SuppAlignmentAnno &saMatch, - bool checkOrder, - vector> &mref) { - if (checkOrder) { + void + AnnotationProcessor::createDoubleMatchSv(BreakpointReduced &sourceBp, + BreakpointReduced &targetBp, + const SuppAlignmentAnno &sa, + const SuppAlignmentAnno &saMatch, + bool checkOrder, + std::vector> &mref) { + if (checkOrder) { + if (sourceBp.getMrefHits().getNumConsevativeHits() == -1) { + auto germlineInfo = searchGermlineHitsNew( + sourceBp, + SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD); + sourceBp.setGermlineInfo(germlineInfo); + auto mrefInfo = searchMrefHitsNew( + sourceBp, + SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD, + mref); + sourceBp.setMrefHits(mrefInfo); + } + if (targetBp.getMrefHits().getNumConsevativeHits() == -1) { + auto germlineInfo = searchGermlineHitsNew( + targetBp, + SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD); + targetBp.setGermlineInfo(germlineInfo); + auto mrefInfo = searchMrefHitsNew( + targetBp, + SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD, + mref); + targetBp.setMrefHits(mrefInfo); + } + if (targetBp.fullSmaller(sourceBp)) { + createDoubleMatchSv(targetBp, sourceBp, saMatch, sa, false, mref); + } + } + visitedLineIndices.push_back(sourceBp.getLineIndex()); + visitedLineIndices.push_back(targetBp.getLineIndex()); + filteredResults.emplace_back(sourceBp, targetBp, sa, saMatch, overhangs); + checkSvQuality(); + } + bool + AnnotationProcessor::createDoubleMatchSvPreCheck( + const SuppAlignmentAnno &saMatch) { + if (SvEvent::DEBUG_MODE || !saMatch.isSuspicious()) { + return true; + } + return false; + } + void + AnnotationProcessor::createUnmatchedSaSv(BreakpointReduced &sourceBp, + BreakpointReduced &targetBp, + const SuppAlignmentAnno &sa, + std::vector> &mref) { if (sourceBp.getMrefHits().getNumConsevativeHits() == -1) { auto germlineInfo = searchGermlineHitsNew( - sourceBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD); + sourceBp, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD); sourceBp.setGermlineInfo(germlineInfo); auto mrefInfo = searchMrefHitsNew( - sourceBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD, mref); + sourceBp, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD, mref); sourceBp.setMrefHits(mrefInfo); } - if (targetBp.getMrefHits().getNumConsevativeHits() == -1) { - auto germlineInfo = searchGermlineHitsNew( - targetBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD); - targetBp.setGermlineInfo(germlineInfo); - auto mrefInfo = searchMrefHitsNew( - targetBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD, mref); - targetBp.setMrefHits(mrefInfo); - } - if (targetBp.fullSmaller(sourceBp)) { - createDoubleMatchSv(targetBp, sourceBp, saMatch, sa, false, mref); - } + targetBp.addDummySa(sa, sourceBp); + auto germlineInfo = searchGermlineHitsNew( + targetBp, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD); + auto mrefHits = + searchMrefHitsNew(targetBp, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD, mref); + targetBp.setMrefHits(mrefHits); + targetBp.setGermlineInfo(germlineInfo); + visitedLineIndices.push_back(sourceBp.getLineIndex()); + visitedLineIndices.push_back(targetBp.getLineIndex()); + filteredResults.emplace_back(sourceBp, targetBp, sa, overhangs, + targetBp.getDummySa()); + checkSvQuality(); + targetBp.removeMarkedFuzzies(); } - visitedLineIndices.push_back(sourceBp.getLineIndex()); - visitedLineIndices.push_back(targetBp.getLineIndex()); - filteredResults.emplace_back(sourceBp, targetBp, sa, saMatch, overhangs); - checkSvQuality(); -} -bool -AnnotationProcessor::createDoubleMatchSvPreCheck( - const SuppAlignmentAnno &saMatch) { - if (SvEvent::DEBUGMODE || !saMatch.isSuspicious()) { - return true; + bool + AnnotationProcessor::createUnknownMatchSvPreCheck(const SuppAlignmentAnno &sa, + bool doubleSupportSa) { + if (SvEvent::DEBUG_MODE || !sa.isSemiSuspicious()) { + if (doubleSupportSa || + (!sa.isFuzzy() && + (sa.getSupport() > 0 || sa.getSecondarySupport() > 0))) { + return true; + } + } + return false; } - return false; -} -void -AnnotationProcessor::createUnmatchedSaSv(BreakpointReduced &sourceBp, - BreakpointReduced &targetBp, - const SuppAlignmentAnno &sa, - vector> &mref) { - if (sourceBp.getMrefHits().getNumConsevativeHits() == -1) { + void + AnnotationProcessor::createUnknownMatchSv(BreakpointReduced &sourceBp, + const SuppAlignmentAnno &sa, + std::vector> &mref, + bool doubleSupportSa[[gnu::unused]] // TODO: remove + ) { auto germlineInfo = searchGermlineHitsNew( - sourceBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD); + sourceBp, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD); sourceBp.setGermlineInfo(germlineInfo); - auto mrefInfo = searchMrefHitsNew( - sourceBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD, mref); + auto mrefInfo = + searchMrefHitsNew(sourceBp, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD, mref); sourceBp.setMrefHits(mrefInfo); - } - targetBp.addDummySa(sa, sourceBp); - auto germlineInfo = searchGermlineHitsNew( - targetBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD); - auto mrefHits = - searchMrefHitsNew(targetBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD, mref); - targetBp.setMrefHits(mrefHits); - targetBp.setGermlineInfo(germlineInfo); - visitedLineIndices.push_back(sourceBp.getLineIndex()); - visitedLineIndices.push_back(targetBp.getLineIndex()); - filteredResults.emplace_back(sourceBp, targetBp, sa, overhangs, - targetBp.getDummySa()); - checkSvQuality(); - targetBp.removeMarkedFuzzies(); -} -bool -AnnotationProcessor::createUnknownMatchSvPreCheck(const SuppAlignmentAnno &sa, - bool doubleSupportSa) { - if (SvEvent::DEBUGMODE || !sa.isSemiSuspicious()) { - if (doubleSupportSa || - (!sa.isFuzzy() && - (sa.getSupport() > 0 || sa.getSecondarySupport() > 0))) { - return true; - } - } - return false; -} -void -AnnotationProcessor::createUnknownMatchSv(BreakpointReduced &sourceBp, - const SuppAlignmentAnno &sa, - vector> &mref, - bool doubleSupportSa) { - auto germlineInfo = searchGermlineHitsNew( - sourceBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD); - sourceBp.setGermlineInfo(germlineInfo); - auto mrefInfo = - searchMrefHitsNew(sourceBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD, mref); - sourceBp.setMrefHits(mrefInfo); - BreakpointReduced dummyBp{sa, sourceBp, false}; - auto dummyGermline = - searchGermlineHitsNew(dummyBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD * 3); - auto dummyMref = - searchMrefHitsNew(dummyBp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD * 3, mref); - if (sa.isFuzzy() && sa.getExtendedPos() - sa.getPos() > - 3 * SvEvent::GERMLINEOFFSETTHRESHOLD) { - BreakpointReduced dummyBp2{sa, sourceBp, true}; - auto dummyGermline2 = searchGermlineHitsNew( - dummyBp2, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD * 3); - auto dummyMref2 = searchMrefHitsNew( - dummyBp2, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD * 3, mref); - if (dummyMref2.getNumConsevativeHits() > - dummyMref.getNumConsevativeHits()) { - dummyMref = dummyMref2; - dummyGermline = dummyGermline2; + BreakpointReduced dummyBp{sa, sourceBp, false}; + auto dummyGermline = + searchGermlineHitsNew(dummyBp, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD * 3); + auto dummyMref = + searchMrefHitsNew(dummyBp, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD * 3, mref); + if (sa.isFuzzy() && + static_cast(sa.getExtendedPos()) - static_cast(sa.getPos()) > 3 * SvEvent::GERMLINE_OFFSET_THRESHOLD) { + BreakpointReduced dummyBp2{sa, sourceBp, true}; + auto dummyGermline2 = searchGermlineHitsNew( + dummyBp2, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD * 3); + auto dummyMref2 = searchMrefHitsNew( + dummyBp2, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD * 3, mref); + if (dummyMref2.getNumConsevativeHits() > + dummyMref.getNumConsevativeHits()) { + dummyMref = dummyMref2; + dummyGermline = dummyGermline2; + } } + visitedLineIndices.push_back(sourceBp.getLineIndex()); + filteredResults.emplace_back(sourceBp, sa, dummyGermline, dummyMref, + overhangs, dummyBp.getDummySa()); + checkSvQuality(); } - visitedLineIndices.push_back(sourceBp.getLineIndex()); - filteredResults.emplace_back(sourceBp, sa, dummyGermline, dummyMref, - overhangs, dummyBp.getDummySa()); - checkSvQuality(); -} -void -AnnotationProcessor::checkSvQuality() { - auto key = filteredResults.back().getKey(); - if (!key.empty()) { - if (filteredResultKeys.count(key) == 0) { - filteredResultKeys.insert(key); - return; + void + AnnotationProcessor::checkSvQuality() { + auto key = filteredResults.back().getKey(); + if (!key.empty()) { + if (filteredResultKeys.count(key) == 0) { + filteredResultKeys.insert(key); + return; + } } + filteredResults.pop_back(); } - filteredResults.pop_back(); -} -MrefMatch -AnnotationProcessor::searchMrefHitsNew(const BreakpointReduced &bpIn, - int distanceThreshold, - int conservativeDistanceThreshold, - vector> &mref) { - auto convertedChrIndex = ChrConverter::indexConverter[bpIn.getChrIndex()]; - vector suppMatches{}; - if (convertedChrIndex < 0) { - return MrefMatch{0, 0, 10000, suppMatches}; - } - auto itStart = lower_bound(mref[convertedChrIndex].begin(), - mref[convertedChrIndex].end(), bpIn); - if (itStart == mref[convertedChrIndex].end()) { - return MrefMatch{0, 0, 10000, suppMatches}; - } - if (itStart != mref[convertedChrIndex].begin() && - !(itStart->distanceTo(bpIn) < SvEvent::GERMLINEOFFSETTHRESHOLD) && - prev(itStart)->distanceTo(bpIn) < SvEvent::GERMLINEOFFSETTHRESHOLD) { - --itStart; - } - auto it = itStart; - - vector::iterator> dbHits{}; - vector::iterator> dbHitsConservative{}; - while (true) { - auto tmpDistance = it->distanceTo(bpIn); - if (tmpDistance < SvEvent::GERMLINEOFFSETTHRESHOLD) { - dbHitsConservative.push_back(it); + MrefMatch + AnnotationProcessor::searchMrefHitsNew(const BreakpointReduced &bpIn, + int distanceThreshold, + int conservativeDistanceThreshold[[gnu::unused]], // TODO: remove + std::vector> &mref) { + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + std::vector suppMatches{}; + if (!chrConverter.isCompressedMref(bpIn.getChrIndex())) { + return MrefMatch{0, 0, 10000, suppMatches}; } - if (tmpDistance < distanceThreshold) { - dbHits.push_back(it); - } else { - break; + unsigned int compressedMrefIndex = + static_cast(chrConverter.indexToCompressedMrefIndex(bpIn.getChrIndex())); + + auto itStart = lower_bound(mref[compressedMrefIndex].begin(), + mref[compressedMrefIndex].end(), bpIn); + if (itStart == mref[compressedMrefIndex].end()) { + return MrefMatch{0, 0, 10000, suppMatches}; } - if (it == mref[convertedChrIndex].begin()) { - break; + if (itStart != mref[compressedMrefIndex].begin() && + !(itStart->distanceTo(bpIn) < SvEvent::GERMLINE_OFFSET_THRESHOLD) && + prev(itStart)->distanceTo( bpIn) < SvEvent::GERMLINE_OFFSET_THRESHOLD) { + --itStart; } - --it; - } - if (itStart != mref[convertedChrIndex].end()) { - it = next(itStart); + auto it = itStart; + + std::vector::iterator> dbHits{}; + std::vector::iterator> dbHitsConservative{}; while (true) { - if (it == mref[convertedChrIndex].end()) { - break; - } auto tmpDistance = it->distanceTo(bpIn); - if (tmpDistance < SvEvent::GERMLINEOFFSETTHRESHOLD) { + if (tmpDistance < SvEvent::GERMLINE_OFFSET_THRESHOLD) { dbHitsConservative.push_back(it); } if (tmpDistance < distanceThreshold) { @@ -593,138 +629,144 @@ AnnotationProcessor::searchMrefHitsNew(const BreakpointReduced &bpIn, } else { break; } - ++it; + if (it == mref[compressedMrefIndex].begin()) { + break; + } + --it; } - } - if (dbHits.empty()) { - return MrefMatch{0, 0, 0, suppMatches}; - } - short score{0}; - auto offset = 0; - for (auto res : dbHits) { - auto saMatch = false; - for (const auto &saRef : res->getSuppAlignments()) { - for (const auto &sa : bpIn.getSuppAlignments()) { - if (saRef.saCloseness(sa, SuppAlignmentAnno::DEFAULTREADLENGTH / - 2)) { - saMatch = true; - auto previouslyRecorded = false; - for (auto &saTmp : suppMatches) { - if (saRef.saCloseness( - saTmp, - SuppAlignmentAnno::DEFAULTREADLENGTH / 2)) { - previouslyRecorded = true; - if (saTmp.isFuzzy()) { - if (saRef.isFuzzy()) { - saTmp.extendSuppAlignment( - saRef.getPos(), saRef.getExtendedPos()); - } else { - saTmp.removeFuzziness(saRef); + if (itStart != mref[compressedMrefIndex].end()) { + it = next(itStart); + while (true) { + if (it == mref[compressedMrefIndex].end()) { + break; + } + auto tmpDistance = it->distanceTo(bpIn); + if (tmpDistance < SvEvent::GERMLINE_OFFSET_THRESHOLD) { + dbHitsConservative.push_back(it); + } + if (tmpDistance < distanceThreshold) { + dbHits.push_back(it); + } else { + break; + } + ++it; + } + } + if (dbHits.empty()) { + return MrefMatch{0, 0, 0, suppMatches}; + } + short score{0}; + auto offset = 0; + for (auto res : dbHits) { + auto saMatch = false; + for (const auto &saRef : res->getSuppAlignments()) { + for (const auto &sa : bpIn.getSuppAlignments()) { + if (saRef.saCloseness(sa, SuppAlignmentAnno::DEFAULT_READ_LENGTH / + 2)) { + saMatch = true; + auto previouslyRecorded = false; + for (auto &saTmp : suppMatches) { + if (saRef.saCloseness( + saTmp, + SuppAlignmentAnno::DEFAULT_READ_LENGTH / 2)) { + previouslyRecorded = true; + if (saTmp.isFuzzy()) { + if (saRef.isFuzzy()) { + saTmp.extendSuppAlignment( + saRef.getPos(), saRef.getExtendedPos()); + } else { + saTmp.removeFuzziness(saRef); + } + } + if (saRef.getSupport() > saTmp.getSupport()) { + saTmp.setSupport(saRef.getSupport()); + } + if (saRef.getSecondarySupport() > + saTmp.getSecondarySupport()) { + saTmp.setSecondarySupport( + saRef.getSecondarySupport()); + } + if (saTmp.getSecondarySupport() < + saTmp.getSupport()) { + saTmp.setSecondarySupport(saTmp.getSupport()); } - } - if (saRef.getSupport() > saTmp.getSupport()) { - saTmp.setSupport(saRef.getSupport()); - } - if (saRef.getSecondarySupport() > - saTmp.getSecondarySupport()) { - saTmp.setSecondarySupport( - saRef.getSecondarySupport()); - } - if (saTmp.getSecondarySupport() < - saTmp.getSupport()) { - saTmp.setSecondarySupport(saTmp.getSupport()); } } - } - if (!previouslyRecorded) { - suppMatches.push_back(saRef); + if (!previouslyRecorded) { + suppMatches.push_back(saRef); + } } } } - } - if (saMatch) { - auto tmpScore = res->getNumHits(); - if (tmpScore > score) { - score = tmpScore; - offset = res->distanceTo(bpIn); + if (saMatch) { + auto tmpScore = res->getNumHits(); + if (tmpScore > score) { + score = tmpScore; + offset = res->distanceTo(bpIn); + } } } - } - short conservativeScore{0}; - for (const auto res : dbHitsConservative) { - auto tmpScore = res->getNumHits(); - if (tmpScore < SvEvent::RELAXEDBPFREQTHRESHOLD) { - auto sas = res->getSuppAlignments(); - if (sas.size() == 1) { - if ((sas[0].getSupport() + 0.0) / tmpScore > 0.8) { - continue; + short conservativeScore{0}; + for (const auto res : dbHitsConservative) { + auto tmpScore = res->getNumHits(); + if (tmpScore < SvEvent::RELAXED_BP_FREQ_THRESHOLD) { + auto sas = res->getSuppAlignments(); + if (sas.size() == 1) { + if ((sas[0].getSupport() + 0.0) / tmpScore > 0.8) { + continue; + } } } + if (tmpScore > conservativeScore) { + conservativeScore = tmpScore; + } } - if (tmpScore > conservativeScore) { - conservativeScore = tmpScore; - } + return MrefMatch{std::max(score, conservativeScore), + conservativeScore, + offset, + suppMatches}; } - return MrefMatch{max(score, conservativeScore), conservativeScore, offset, - suppMatches}; -} -GermlineMatch -AnnotationProcessor::searchGermlineHitsNew(const BreakpointReduced &bpIn, - int distanceThreshold, - int conservativeDistanceThreshold) { - GermlineMatch dummyMatchTrue{1.0, 1.0, - vector>{}}; - GermlineMatch dummyMatchFalse{0.0, 0.0, - vector>{}}; - if (NOCONTROLMODE) { - return dummyMatchTrue; - } - auto convertedChrIndex = ChrConverter::indexConverter[bpIn.getChrIndex()]; - if (convertedChrIndex < 0) { - return dummyMatchFalse; - } - if (controlResults[convertedChrIndex].empty()) { - return dummyMatchFalse; - } - auto itStart = lower_bound(controlResults[convertedChrIndex].begin(), - controlResults[convertedChrIndex].end(), bpIn); - if (itStart == controlResults[convertedChrIndex].end()) { - return dummyMatchFalse; - } - if (itStart != controlResults[convertedChrIndex].cbegin() && - !(itStart->distanceToBp(bpIn) < SvEvent::GERMLINEOFFSETTHRESHOLD) && - prev(itStart)->distanceToBp(bpIn) < SvEvent::GERMLINEOFFSETTHRESHOLD) { - --itStart; - } - auto it = itStart; - vector::iterator> dbHits{}; - vector::iterator> dbHitsConservative{}; + GermlineMatch + AnnotationProcessor::searchGermlineHitsNew(const BreakpointReduced &bpIn, + int distanceThreshold, + int conservativeDistanceThreshold) { + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); - while (true) { - auto tmpDistance = it->distanceToBp(bpIn); - if (tmpDistance < 0) { - break; + GermlineMatch dummyMatchTrue{1.0, 1.0, + std::vector>{}}; + GermlineMatch dummyMatchFalse{0.0, 0.0, + std::vector>{}}; + if (NO_CONTROL_MODE) { + return dummyMatchTrue; } - if (tmpDistance < conservativeDistanceThreshold) { - dbHitsConservative.push_back(it); + + if (!chrConverter.isCompressedMref(bpIn.getChrIndex())) { + return dummyMatchFalse; } - if (tmpDistance < distanceThreshold) { - dbHits.push_back(it); - } else { - break; + + unsigned int compressedMrefIndex = + static_cast(GlobalAppConfig::getInstance().getChrConverter(). + indexToCompressedMrefIndex(bpIn.getChrIndex())); + + if (controlResults[compressedMrefIndex].empty()) { + return dummyMatchFalse; } - if (it == controlResults[convertedChrIndex].begin()) { - break; + auto itStart = lower_bound(controlResults[compressedMrefIndex].begin(), + controlResults[compressedMrefIndex].end(), bpIn); + if (itStart == controlResults[compressedMrefIndex].end()) { + return dummyMatchFalse; } - --it; - } - if (itStart != controlResults[convertedChrIndex].end()) { - it = next(itStart); + if (itStart != controlResults[compressedMrefIndex].cbegin() && + !(itStart->distanceToBp(bpIn) < SvEvent::GERMLINE_OFFSET_THRESHOLD) && + prev(itStart)->distanceToBp(bpIn) < SvEvent::GERMLINE_OFFSET_THRESHOLD) { + --itStart; + } + auto it = itStart; + std::vector::iterator> dbHits{}; + std::vector::iterator> dbHitsConservative{}; + while (true) { - if (it == controlResults[convertedChrIndex].end()) { - break; - } auto tmpDistance = it->distanceToBp(bpIn); if (tmpDistance < 0) { break; @@ -737,362 +779,371 @@ AnnotationProcessor::searchGermlineHitsNew(const BreakpointReduced &bpIn, } else { break; } - ++it; + if (it == controlResults[compressedMrefIndex].begin()) { + break; + } + --it; } - } - if (dbHits.empty()) { - return dummyMatchFalse; - } - auto conservativeClonality = 0.0; - if (!dbHitsConservative.empty()) { - auto maxSupport = 0.0; - for (const auto res : dbHitsConservative) { - auto mateSupSa = 0; - for (const auto sa : res->getSuppAlignments()) { - if (sa.getMateSupport() > mateSupSa) { - mateSupSa = sa.getMateSupport(); + + if (itStart != controlResults[compressedMrefIndex].end()) { + it = next(itStart); + while (true) { + if (it == controlResults[compressedMrefIndex].end()) { + break; } + auto tmpDistance = it->distanceToBp(bpIn); + if (tmpDistance < 0) { + break; + } + if (tmpDistance < conservativeDistanceThreshold) { + dbHitsConservative.push_back(it); + } + if (tmpDistance < distanceThreshold) { + dbHits.push_back(it); + } else { + break; + } + ++it; } - if (res->getMateSupport() > mateSupSa) { - mateSupSa = res->getMateSupport(); - } - auto support = - 0.0 + res->getPairedBreaksSoft() + res->getBreaksShortIndel() + - res->getPairedBreaksHard() + res->getUnpairedBreaksSoft() + - res->getUnpairedBreaksHard() + mateSupSa; - if (support > maxSupport) { - maxSupport = support; - if (support + res->getNormalSpans() > 0) { - conservativeClonality = - support / (support + res->getNormalSpans()); + } + if (dbHits.empty()) { + return dummyMatchFalse; + } + auto conservativeClonality = 0.0; + if (!dbHitsConservative.empty()) { + auto maxSupport = 0.0; + for (const auto res : dbHitsConservative) { + auto mateSupSa = 0; + for (const SuppAlignmentAnno &sa : res->getSuppAlignments()) { + if (sa.getMateSupport() > mateSupSa) { + mateSupSa = sa.getMateSupport(); + } + } + if (res->getMateSupport() > mateSupSa) { + mateSupSa = res->getMateSupport(); + } + auto support = + 0.0 + res->getPairedBreaksSoft() + res->getBreaksShortIndel() + + res->getPairedBreaksHard() + res->getUnpairedBreaksSoft() + + res->getUnpairedBreaksHard() + mateSupSa; + if (support > maxSupport) { + maxSupport = support; + if (support + res->getNormalSpans() > 0) { + conservativeClonality = + support / (support + res->getNormalSpans()); + } } } } - } - auto clonality = conservativeClonality; - vector> suppMatches{}; - for (auto res : dbHits) { - auto breakSupportSoft = - res->getPairedBreaksSoft() + res->getUnpairedBreaksSoft(); - auto breakSupportHard = - res->getPairedBreaksHard() + res->getUnpairedBreaksHard(); - for (const auto &saRef : res->getSuppAlignments()) { - for (const auto &sa : bpIn.getSuppAlignments()) { - if (saRef.saCloseness(sa, SuppAlignmentAnno::DEFAULTREADLENGTH / - 2)) { - auto previouslyRecorded = false; - for (auto &saTmp : suppMatches) { - if (saRef.saClosenessDirectional( - saTmp.first, - SuppAlignmentAnno::DEFAULTREADLENGTH / 2)) { - previouslyRecorded = true; - if (saTmp.first.isFuzzy()) { - if (saRef.isFuzzy()) { - saTmp.first.extendSuppAlignment( - saRef.getPos(), saRef.getExtendedPos()); - } else { - saTmp.first.removeFuzziness(saRef); + auto clonality = conservativeClonality; + std::vector> suppMatches{}; + for (auto res : dbHits) { + auto breakSupportSoft = + res->getPairedBreaksSoft() + res->getUnpairedBreaksSoft(); + auto breakSupportHard = + res->getPairedBreaksHard() + res->getUnpairedBreaksHard(); + for (const auto &saRef : res->getSuppAlignments()) { + for (const auto &sa : bpIn.getSuppAlignments()) { + if (saRef.saCloseness(sa, SuppAlignmentAnno::DEFAULT_READ_LENGTH / + 2)) { + auto previouslyRecorded = false; + for (auto &saTmp : suppMatches) { + if (saRef.saClosenessDirectional( + saTmp.first, + SuppAlignmentAnno::DEFAULT_READ_LENGTH / 2)) { + previouslyRecorded = true; + if (saTmp.first.isFuzzy()) { + if (saRef.isFuzzy()) { + saTmp.first.extendSuppAlignment( + saRef.getPos(), saRef.getExtendedPos()); + } else { + saTmp.first.removeFuzziness(saRef); + } } + if (saRef.getSupport() > saTmp.first.getSupport()) { + saTmp.first.setSupport(saRef.getSupport()); + } + if (saRef.getSecondarySupport() > + saTmp.first.getSecondarySupport()) { + saTmp.first.setSecondarySupport( + saRef.getSecondarySupport()); + } + if (saRef.getMateSupport() > + saTmp.first.getMateSupport()) { + saTmp.first.setMateSupport( + saRef.getMateSupport()); + saTmp.first.setExpectedDiscordants( + saRef.getExpectedDiscordants()); + } + auto currentSoftSupport = + std::max(saTmp.first.getSupport(), breakSupportSoft); + auto currentHardSupport = + std::max(saTmp.first.getSecondarySupport(), + breakSupportHard); + auto breakSupport = + currentSoftSupport + currentHardSupport + 0.0; + auto currentClonality = + (breakSupport + saTmp.first.getMateSupport()) / + (breakSupport + saTmp.first.getMateSupport() + + res->getNormalSpans()); + saTmp.second = std::max(currentClonality, saTmp.second); } - if (saRef.getSupport() > saTmp.first.getSupport()) { - saTmp.first.setSupport(saRef.getSupport()); - } - if (saRef.getSecondarySupport() > - saTmp.first.getSecondarySupport()) { - saTmp.first.setSecondarySupport( - saRef.getSecondarySupport()); - } - if (saRef.getMateSupport() > - saTmp.first.getMateSupport()) { - saTmp.first.setMateSupport( - saRef.getMateSupport()); - saTmp.first.setExpectedDiscordants( - saRef.getExpectedDiscordants()); - } + } + if (!previouslyRecorded) { auto currentSoftSupport = - max(saTmp.first.getSupport(), breakSupportSoft); + std::max(saRef.getSupport(), breakSupportSoft); auto currentHardSupport = - max(saTmp.first.getSecondarySupport(), - breakSupportHard); + std::max(saRef.getSecondarySupport(), breakSupportHard); auto breakSupport = currentSoftSupport + currentHardSupport + 0.0; auto currentClonality = - (breakSupport + saTmp.first.getMateSupport()) / - (breakSupport + saTmp.first.getMateSupport() + + (breakSupport + saRef.getMateSupport()) / + (breakSupport + saRef.getMateSupport() + res->getNormalSpans()); - saTmp.second = max(currentClonality, saTmp.second); + suppMatches.push_back({saRef, currentClonality}); } } - if (!previouslyRecorded) { - auto currentSoftSupport = - max(saRef.getSupport(), breakSupportSoft); - auto currentHardSupport = - max(saRef.getSecondarySupport(), breakSupportHard); - auto breakSupport = - currentSoftSupport + currentHardSupport + 0.0; - auto currentClonality = - (breakSupport + saRef.getMateSupport()) / - (breakSupport + saRef.getMateSupport() + - res->getNormalSpans()); - suppMatches.push_back({saRef, currentClonality}); - } } } } + return GermlineMatch{clonality, conservativeClonality, suppMatches}; } - return GermlineMatch{clonality, conservativeClonality, suppMatches}; -} -bool -AnnotationProcessor::applyMassiveInversionFiltering(bool stricterMode, - bool controlCheckMode) { - auto deletionCandidateCount = 0; - auto totalCount = 0; - for (const auto &sv : filteredResults) { - if (sv.isToRemove()) { - continue; - } - if (sv.getContaminationCandidate() > 0) { - continue; - } - if (sv.getSuspicious() == 0 && sv.getEventScore() > 2) { - if (sv.isInverted() || sv.isSemiSuspicious()) { - ++deletionCandidateCount; - } else if (stricterMode && sv.getEvidenceLevel2() == 0) { - ++deletionCandidateCount; - } - ++totalCount; - } - } - auto totalCountThreshold = 300; - if (stricterMode || controlCheckMode) { - totalCountThreshold = 200; - } - auto invRatio = (deletionCandidateCount + 0.0) / totalCount; - if (invRatio > 0.7 && totalCount > totalCountThreshold) { - for (auto &sv : filteredResults) { + bool + AnnotationProcessor::applyMassiveInversionFiltering(bool stricterMode, + bool controlCheckMode) { + auto deletionCandidateCount = 0; + auto totalCount = 0; + for (const auto &sv : filteredResults) { if (sv.isToRemove()) { continue; } if (sv.getContaminationCandidate() > 0) { continue; } - if (sv.getSuspicious() != 0) { - continue; + if (sv.getSuspicious() == 0 && sv.getEventScore() > 2) { + if (sv.isInverted() || sv.isSemiSuspicious()) { + ++deletionCandidateCount; + } else if (stricterMode && sv.getEvidenceLevel2() == 0) { + ++deletionCandidateCount; + } + ++totalCount; } - if (sv.isDistant()) { - if (sv.getSelectedSa1().isProperPairErrorProne() && - sv.getSelectedSa1().getMateSupport() < 6) { - sv.setToRemove(true); + } + auto totalCountThreshold = 300; + if (stricterMode || controlCheckMode) { + totalCountThreshold = 200; + } + auto invRatio = (deletionCandidateCount + 0.0) / totalCount; + if (invRatio > 0.7 && totalCount > totalCountThreshold) { + for (auto &sv : filteredResults) { + if (sv.isToRemove()) { continue; } - if (sv.getSelectedSa2().isProperPairErrorProne() && - sv.getSelectedSa2().getMateSupport() < 6) { - sv.setToRemove(true); + if (sv.getContaminationCandidate() > 0) { continue; } - if (sv.isSemiSuspicious()) { - if (sv.getEventScore() < 3) { + if (sv.getSuspicious() != 0) { + continue; + } + if (sv.isDistant()) { + if (sv.getSelectedSa1().isProperPairErrorProne() && + sv.getSelectedSa1().getMateSupport() < 6) { sv.setToRemove(true); continue; } - if (sv.getSelectedSa1().isSemiSuspicious() && - sv.getSelectedSa2().isSemiSuspicious()) { + if (sv.getSelectedSa2().isProperPairErrorProne() && + sv.getSelectedSa2().getMateSupport() < 6) { sv.setToRemove(true); continue; } - if (sv.getEvidenceLevel2() < 3 && - sv.getSelectedSa1().isSemiSuspicious()) { - if (sv.getEvidenceLevel1() < 3 || - sv.getSelectedSa1().getSupport() < 5) { + if (sv.isSemiSuspicious()) { + if (sv.getEventScore() < 3) { sv.setToRemove(true); continue; } - } - if (sv.getEvidenceLevel1() < 3 && - sv.getSelectedSa2().isSemiSuspicious()) { - if (sv.getEvidenceLevel2() < 3 || - sv.getSelectedSa2().getSupport() < 5) { + if (sv.getSelectedSa1().isSemiSuspicious() && + sv.getSelectedSa2().isSemiSuspicious()) { sv.setToRemove(true); continue; } - } - } - } else { - if (sv.getEventScore() == 3 || sv.getEvidenceLevel2() == 0) { - sv.setToRemove(true); - continue; - } - } - if (sv.isInverted()) { - if (stricterMode) { - if (sv.getSelectedSa1().isStrictFuzzyCandidate() || - sv.getSelectedSa2().isStrictFuzzyCandidate()) { - sv.setToRemove(true); - continue; - } - if (sv.isSemiSuspicious()) { - sv.setToRemove(true); - continue; - } - } else { - if (sv.getSelectedSa1().isStrictFuzzyCandidate() && - sv.getSelectedSa2().isStrictFuzzyCandidate()) { - sv.setToRemove(true); - continue; - } - } - if (sv.getEventScore() < 3) { - sv.setToRemove(true); - continue; - } - if (sv.getMateRatio1() < 0.6 && sv.getMateRatio2() < 0.6) { - sv.setToRemove(true); - continue; - } - if (sv.getEventSize() > 0 && sv.getEventSize() < 10000) { - if (stricterMode || sv.getTotalEvidence1() < 5 || - sv.getTotalEvidence2() < 5) { - sv.setToRemove(true); - continue; + if (sv.getEvidenceLevel2() < 3 && + sv.getSelectedSa1().isSemiSuspicious()) { + if (sv.getEvidenceLevel1() < 3 || + sv.getSelectedSa1().getSupport() < 5) { + sv.setToRemove(true); + continue; + } + } + if (sv.getEvidenceLevel1() < 3 && + sv.getSelectedSa2().isSemiSuspicious()) { + if (sv.getEvidenceLevel2() < 3 || + sv.getSelectedSa2().getSupport() < 5) { + sv.setToRemove(true); + continue; + } + } } } else { - if (sv.getTotalEvidence1() < 5 && - sv.getTotalEvidence2() < 5) { + if (sv.getEventScore() == 3 || sv.getEvidenceLevel2() == 0) { sv.setToRemove(true); continue; } } - } - if (stricterMode) { - if (sv.getEvidenceLevel2() == 0) { - if (sv.getSelectedSa1().getSupport() < 10 || - !sv.isOverhang1Compensation()) { - sv.setToRemove(true); - continue; + if (sv.isInverted()) { + if (stricterMode) { + if (sv.getSelectedSa1().isStrictFuzzyCandidate() || + sv.getSelectedSa2().isStrictFuzzyCandidate()) { + sv.setToRemove(true); + continue; + } + if (sv.isSemiSuspicious()) { + sv.setToRemove(true); + continue; + } + } else { + if (sv.getSelectedSa1().isStrictFuzzyCandidate() && + sv.getSelectedSa2().isStrictFuzzyCandidate()) { + sv.setToRemove(true); + continue; + } } - } - if (sv.getEventScore() == 3) { - if (sv.getSelectedSa1().getSupport() < 10) { + if (sv.getEventScore() < 3) { sv.setToRemove(true); continue; } - if (sv.getSelectedSa1().getSecondarySupport() < 5) { + if (sv.getMateRatio1() < 0.6 && sv.getMateRatio2() < 0.6) { sv.setToRemove(true); continue; } - if (sv.isDistant()) { - if (sv.getSelectedSa1().getMateSupport() < 10) { + if (sv.getEventSize() > 0 && sv.getEventSize() < 10000) { + if (stricterMode || sv.getTotalEvidence1() < 5 || + sv.getTotalEvidence2() < 5) { sv.setToRemove(true); continue; } - } - } - if (sv.isDistant()) { - if (sv.getSelectedSa1().isProperPairErrorProne() && - (sv.getSelectedSa2().isProperPairErrorProne() || - sv.getEvidenceLevel2() == 0)) { - if (sv.getSelectedSa1().getSupport() < 5 || - sv.getSelectedSa2().getSupport() < 5) { + } else { + if (sv.getTotalEvidence1() < 5 && + sv.getTotalEvidence2() < 5) { sv.setToRemove(true); continue; } - if (sv.getSelectedSa1().getSecondarySupport() < 3 && - sv.getSelectedSa2().getSecondarySupport() < 3) { + } + } + if (stricterMode) { + if (sv.getEvidenceLevel2() == 0) { + if (sv.getSelectedSa1().getSupport() < 10 || + !sv.isOverhang1Compensation()) { sv.setToRemove(true); continue; } } - if (sv.getSelectedSa1().isSemiSuspicious()) { - if (sv.getEvidenceLevel1() < 3 || - sv.getSelectedSa1().getSupport() < 5) { + if (sv.getEventScore() == 3) { + if (sv.getSelectedSa1().getSupport() < 10) { sv.setToRemove(true); continue; } - } - if (sv.getSelectedSa2().isSemiSuspicious()) { - if (sv.getEvidenceLevel2() < 3 || - sv.getSelectedSa2().getSupport() < 5) { + if (sv.getSelectedSa1().getSecondarySupport() < 5) { sv.setToRemove(true); continue; } + if (sv.isDistant()) { + if (sv.getSelectedSa1().getMateSupport() < 10) { + sv.setToRemove(true); + continue; + } + } } + if (sv.isDistant()) { + if (sv.getSelectedSa1().isProperPairErrorProne() && + (sv.getSelectedSa2().isProperPairErrorProne() || + sv.getEvidenceLevel2() == 0)) { + if (sv.getSelectedSa1().getSupport() < 5 || + sv.getSelectedSa2().getSupport() < 5) { + sv.setToRemove(true); + continue; + } + if (sv.getSelectedSa1().getSecondarySupport() < 3 && + sv.getSelectedSa2().getSecondarySupport() < 3) { + sv.setToRemove(true); + continue; + } + } + if (sv.getSelectedSa1().isSemiSuspicious()) { + if (sv.getEvidenceLevel1() < 3 || + sv.getSelectedSa1().getSupport() < 5) { + sv.setToRemove(true); + continue; + } + } + if (sv.getSelectedSa2().isSemiSuspicious()) { + if (sv.getEvidenceLevel2() < 3 || + sv.getSelectedSa2().getSupport() < 5) { + sv.setToRemove(true); + continue; + } + } - if (sv.getEvidenceLevel2() == 0) { - if (sv.getEvidenceLevel1() < 3 || - sv.getSelectedSa1().getSupport() < 5) { + if (sv.getEvidenceLevel2() == 0) { + if (sv.getEvidenceLevel1() < 3 || + sv.getSelectedSa1().getSupport() < 5) { + sv.setToRemove(true); + continue; + } + } + if (sv.getEvidenceLevel1() < 3 && + sv.getEvidenceLevel2() < 3) { + sv.setToRemove(true); + continue; + } + } else { + if (sv.getEvidenceLevel1() < 2 || + sv.getEvidenceLevel2() < 2) { + sv.setToRemove(true); + continue; + } + if (sv.getTotalEvidence1() < 5 && + sv.getTotalEvidence2() < 5) { sv.setToRemove(true); continue; } } - if (sv.getEvidenceLevel1() < 3 && - sv.getEvidenceLevel2() < 3) { - sv.setToRemove(true); - continue; - } - } else { - if (sv.getEvidenceLevel1() < 2 || - sv.getEvidenceLevel2() < 2) { - sv.setToRemove(true); - continue; - } - if (sv.getTotalEvidence1() < 5 && - sv.getTotalEvidence2() < 5) { - sv.setToRemove(true); - continue; - } - } - if (sv.getTotalEvidence2() < 5) { - auto evidenceLevelThreshold = sv.isDistant() ? 3 : 2; - if (sv.getEvidenceLevel1() < evidenceLevelThreshold) { - sv.setToRemove(true); - continue; + if (sv.getTotalEvidence2() < 5) { + auto evidenceLevelThreshold = sv.isDistant() ? 3 : 2; + if (sv.getEvidenceLevel1() < evidenceLevelThreshold) { + sv.setToRemove(true); + continue; + } } } } + return true; } - return true; - } - return false; -} -bool -AnnotationProcessor::applyPathogenContaminationFiltering() { - auto likelyContaminants = 0; - for (auto &sv : filteredResults) { - if (sv.isToRemove()) { - continue; - } - if (sv.getContaminationCandidate() != 2) { - continue; - } - if (sv.getSuspicious() != 0) { - continue; - } - if (sv.getEventScore() < 3) { - continue; - } - ++likelyContaminants; - if (likelyContaminants > 9) { - break; - } + return false; } - if (likelyContaminants > 9) { - auto cleanedContaminants = 0; + bool + AnnotationProcessor::applyPathogenContaminationFiltering() { + auto likelyContaminants = 0; for (auto &sv : filteredResults) { if (sv.isToRemove()) { continue; } + if (sv.getContaminationCandidate() != 2) { + continue; + } if (sv.getSuspicious() != 0) { continue; } if (sv.getEventScore() < 3) { continue; } - if (sv.getContaminationCandidate() == 2) { - ++cleanedContaminants; + ++likelyContaminants; + if (likelyContaminants > 9) { + break; } } - if (cleanedContaminants > 19) { + if (likelyContaminants > 9) { + auto cleanedContaminants = 0; for (auto &sv : filteredResults) { if (sv.isToRemove()) { continue; @@ -1100,116 +1151,131 @@ AnnotationProcessor::applyPathogenContaminationFiltering() { if (sv.getSuspicious() != 0) { continue; } - if (sv.getEventScore() < 2) { - continue; - } - if (sv.getContaminationCandidate() > 0) { - sv.setEventScore(1); - sv.setEventType(5); - continue; - } - if (sv.getSelectedSa1().getSupport() > 19 && - sv.getSelectedSa1().getSecondarySupport() < 3) { - sv.setEventScore(1); - sv.setEventType(5); + if (sv.getEventScore() < 3) { continue; } - if (sv.getSelectedSa2().getSupport() > 19 && - sv.getSelectedSa2().getSecondarySupport() < 3) { - sv.setEventScore(1); - sv.setEventType(5); - continue; + if (sv.getContaminationCandidate() == 2) { + ++cleanedContaminants; } - if (sv.getOverhang1lengthRatio() > 0.7 || - sv.getOverhang2lengthRatio() > 0.7) { - sv.setEventScore(1); - sv.setEventType(5); - continue; + } + if (cleanedContaminants > 19) { + for (auto &sv : filteredResults) { + if (sv.isToRemove()) { + continue; + } + if (sv.getSuspicious() != 0) { + continue; + } + if (sv.getEventScore() < 2) { + continue; + } + if (sv.getContaminationCandidate() > 0) { + sv.setEventScore(1); + sv.setEventType(5); + continue; + } + if (sv.getSelectedSa1().getSupport() > 19 && + sv.getSelectedSa1().getSecondarySupport() < 3) { + sv.setEventScore(1); + sv.setEventType(5); + continue; + } + if (sv.getSelectedSa2().getSupport() > 19 && + sv.getSelectedSa2().getSecondarySupport() < 3) { + sv.setEventScore(1); + sv.setEventType(5); + continue; + } + if (sv.getOverhang1lengthRatio() > 0.7 || + sv.getOverhang2lengthRatio() > 0.7) { + sv.setEventScore(1); + sv.setEventType(5); + continue; + } } } + return true; } - return true; - } - return false; -} -void -AnnotationProcessor::printFilteredResults(bool contaminationInControl, - int controlPrefilteringLevel) const { - if (controlPrefilteringLevel > 0) { - cout << "#controlMassiveInvPrefilteringLevel\t" - << controlPrefilteringLevel << endl; - } - if (massiveInvFilteringLevel > 0) { - cout << "#tumorMassiveInvFilteringLevel\t" << massiveInvFilteringLevel - << endl; - } - if (contaminationInControl) { - cout << "#likelyPathogenInControl\tTRUE" << endl; + return false; } - if (contaminationObserved) { - cout << "#likelyPathogenInTumor\tTRUE" << endl; - } - for (const auto &sv : filteredResults) { - if (!sv.isToRemove()) { - cout << sv.printMatch(overhangs); + void + AnnotationProcessor::printFilteredResults(bool contaminationInControl, + int controlPrefilteringLevel) const { + if (controlPrefilteringLevel > 0) { + std::cout << "#controlMassiveInvPrefilteringLevel\t" + << controlPrefilteringLevel << std::endl; + } + if (massiveInvFilteringLevel > 0) { + std::cout << "#tumorMassiveInvFilteringLevel\t" << massiveInvFilteringLevel + << std::endl; + } + if (contaminationInControl) { + std::cout << "#likelyPathogenInControl\tTRUE" << std::endl; + } + if (contaminationObserved) { + std::cout << "#likelyPathogenInTumor\tTRUE" << std::endl; + } + for (const auto &sv : filteredResults) { + if (!sv.isToRemove()) { + std::cout << sv.printMatch(overhangs); + } } } -} -void -AnnotationProcessor::printUnresolvedRareOverhangs( - vector> &mref) { - if (massiveInvFilteringLevel != 0) { - return; - } - sort(visitedLineIndices.begin(), visitedLineIndices.end()); - unordered_set visitedLineIndicesSet{visitedLineIndices.begin(), - visitedLineIndices.end()}; - for (const auto &tumorChromosome : tumorResults) { - for (const auto &bp : tumorChromosome) { - if (visitedLineIndicesSet.count(bp.getLineIndex())) { - continue; - } - if (bp.testOverhangBasedCandidacy()) { - auto mrefHits = searchMrefHitsNew( - bp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD, mref); - if (mrefHits.getNumHits() < SvEvent::GERMLINEDBLIMIT) { - auto germlineClonality = 1.0; - if (!NOCONTROLMODE) { - germlineClonality = - searchGermlineHitsNew( - bp, SuppAlignmentAnno::DEFAULTREADLENGTH * 6, - SvEvent::GERMLINEOFFSETTHRESHOLD) - .getClonality(); - } - string overhang{""}; - { - pair dummy{bp.getLineIndex(), ""}; - auto lower = lower_bound(overhangs.cbegin(), - overhangs.cend(), dummy); - if (lower != overhangs.cend()) { - if (lower->first == bp.getLineIndex()) { - overhang = lower->second; - } else if (next(lower) != overhangs.cend() && - next(lower)->first == - bp.getLineIndex()) { - overhang = next(lower)->second; - } else if (lower != overhangs.cbegin() && - prev(lower)->first == - bp.getLineIndex()) { - overhang = prev(lower)->second; + void + AnnotationProcessor::printUnresolvedRareOverhangs( + std::vector> &mref) { + if (massiveInvFilteringLevel != 0) { + return; + } + sort(visitedLineIndices.begin(), visitedLineIndices.end()); + std::unordered_set visitedLineIndicesSet{visitedLineIndices.begin(), + visitedLineIndices.end()}; + for (const auto &tumorChromosome : tumorResults) { + for (const auto &bp : tumorChromosome) { + if (visitedLineIndicesSet.count(bp.getLineIndex())) { + continue; + } + if (bp.testOverhangBasedCandidacy()) { + auto mrefHits = searchMrefHitsNew( + bp, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD, mref); + if (mrefHits.getNumHits() < SvEvent::GERMLINE_DB_LIMIT) { + auto germlineClonality = 1.0; + if (!NO_CONTROL_MODE) { + germlineClonality = + searchGermlineHitsNew( + bp, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6, + SvEvent::GERMLINE_OFFSET_THRESHOLD) + .getClonality(); + } + std::string overhang{""}; + { + std::pair dummy{bp.getLineIndex(), ""}; + auto lower = lower_bound(overhangs.cbegin(), + overhangs.cend(), dummy); + if (lower != overhangs.cend()) { + if (lower->first == bp.getLineIndex()) { + overhang = lower->second; + } else if (next(lower) != overhangs.cend() && + next(lower)->first == + bp.getLineIndex()) { + overhang = next(lower)->second; + } else if (lower != overhangs.cbegin() && + prev(lower)->first == + bp.getLineIndex()) { + overhang = prev(lower)->second; + } } } - } - if (!overhang.empty()) { - cout << bp.printOverhang( - germlineClonality, mrefHits.getNumHits(), overhang); + if (!overhang.empty()) { + std::cout << bp.printOverhang( + germlineClonality, mrefHits.getNumHits(), overhang); + } } } } } } -} } /* namespace sophia */ diff --git a/src/Breakpoint.cpp b/src/Breakpoint.cpp index 1cfccc5..90b826c 100644 --- a/src/Breakpoint.cpp +++ b/src/Breakpoint.cpp @@ -23,474 +23,552 @@ */ #include "Breakpoint.h" -#include "ChrConverter.h" -#include "strtk.hpp" +#include "GlobalAppConfig.h" +#include "strtk-wrap.h" +#include "global.h" #include +#include #include +#include #include #include #include namespace sophia { -using namespace std; + const std::string Breakpoint::COLUMN_STR = boost::join( + std::vector{ + "#chr", "start", "end", + "covTypes(pairedBreaksSoft,pairedBreaksHard,mateReadSupport," + "unpairedBreaksSoft,unpairedBreaksHard,shortIndelReads,normalSpans," + "lowQualSpansSoft,lowQualSpansHard,lowQualBreaksSoft,lowQualBreaksHard," + "repetitiveOverhangs)", + "leftCoverage,rightCoverage", + "suppAlignmentsDoubleSupport(primary,secondary,mate)", + "suppAlignments(primary,0,mate)", "significantOverhangs\n"}, + "\t"); -const string Breakpoint::COLUMNSSTR = boost::join( - vector{ - // - "#chr", "start", "end", // - "covTypes(pairedBreaksSoft,pairedBreaksHard,mateReadSupport," - "unpairedBreaksSoft,unpairedBreaksHard,shortIndelReads,normalSpans," - "lowQualSpansSoft,lowQualSpansHard,lowQualBreaksSoft,lowQualBreaksHard," - "repetitiveOverhangs)", // - "leftCoverage,rightCoverage", // - "suppAlignmentsDoubleSupport(primary,secondary,mate)", - "suppAlignments(primary,0,mate)", "significantOverhangs\n"}, - "\t"); -int Breakpoint::BPSUPPORTTHRESHOLD{}; -int Breakpoint::DEFAULTREADLENGTH{}; -int Breakpoint::DISCORDANTLOWQUALLEFTRANGE{}; -int Breakpoint::DISCORDANTLOWQUALRIGHTRANGE{}; -double Breakpoint::IMPROPERPAIRRATIO{0.0}; -bool Breakpoint::PROPERPAIRCOMPENSATIONMODE{false}; -int Breakpoint::bpindex{0}; + int Breakpoint::BP_SUPPORT_THRESHOLD{}; -Breakpoint::Breakpoint(int chrIndexIn, int posIn) - : covFinalized{false}, missingInfoBp{false}, chrIndex{chrIndexIn}, - pos{posIn}, normalSpans{0}, lowQualSpansSoft{0}, lowQualSpansHard{0}, - unpairedBreaksSoft{0}, unpairedBreaksHard{0}, breaksShortIndel{0}, - lowQualBreaksSoft{0}, lowQualBreaksHard{0}, repetitiveOverhangBreaks{0}, - pairedBreaksSoft{0}, pairedBreaksHard{0}, leftSideDiscordantCandidates{0}, - rightSideDiscordantCandidates{0}, mateSupport{0}, leftCoverage{0}, - rightCoverage{0}, totalLowMapqHardClips{0}, - hitsInMref{-1}, germline{false}, poolLeft{}, poolRight{}, - poolLowQualLeft{}, poolLowQualRight{} {} + ChrSize Breakpoint::DEFAULT_READ_LENGTH{}; -void -Breakpoint::addSoftAlignment(shared_ptr alignmentIn) { - if (!alignmentIn->isSupplementary()) { - if (supportingSoftAlignments.size() <= MAXPERMISSIBLESOFTCLIPS) { - supportingSoftAlignments.push_back(alignmentIn); + ChrSize Breakpoint::DISCORDANT_LOW_QUAL_LEFT_RANGE{}; + + ChrSize Breakpoint::DISCORDANT_LOW_QUAL_RIGHT_RANGE{}; + + double Breakpoint::IMPROPER_PAIR_RATIO{0.0}; + + bool Breakpoint::PROPER_PAIR_COMPENSATION_MODE{false}; + + int Breakpoint::bpindex{0}; + + + template + inline void + Breakpoint::cleanUpVector(std::vector &objectPool) { + while (!objectPool.empty() && objectPool.back().isToRemove()) { + objectPool.pop_back(); + } + for (auto saIt = objectPool.begin(); saIt != objectPool.end(); ++saIt) { + if (saIt->isToRemove()) { + std::swap(*saIt, objectPool.back()); + } + while (!objectPool.empty() && objectPool.back().isToRemove()) { + objectPool.pop_back(); + } } } -} -void -Breakpoint::addHardAlignment(shared_ptr alignmentIn) { - if (alignmentIn->isSupplementary()) { - if (!(alignmentIn->isLowMapq() || alignmentIn->isNullMapq())) { - if (supportingHardAlignments.size() <= MAXPERMISSIBLEHARDCLIPS) { - supportingHardAlignments.push_back(alignmentIn); + void + Breakpoint::addSoftAlignment(std::shared_ptr alignmentIn) { + if (!alignmentIn->isSupplementary()) { + if (supportingSoftAlignments.size() <= MAX_PERMISSIBLE_SOFTCLIPS) { + supportingSoftAlignments.push_back(alignmentIn); } - } else { - if (totalLowMapqHardClips < MAXPERMISSIBLELOWMAPQHARDCLIPS) { - supportingHardLowMapqAlignments.push_back(alignmentIn); - ++totalLowMapqHardClips; + } + } + + void + Breakpoint::addHardAlignment(std::shared_ptr alignmentIn) { + if (alignmentIn->isSupplementary()) { + if (!(alignmentIn->isLowMapq() || alignmentIn->isNullMapq())) { + if (supportingHardAlignments.size() <= MAX_PERMISSIBLE_HARDCLIPS) { + supportingHardAlignments.push_back(alignmentIn); + } } else { - supportingHardLowMapqAlignments.clear(); + if (totalLowMapqHardClips < MAX_PERMISSIBLE_LOW_MAPQ_HARDCLIPS) { + supportingHardLowMapqAlignments.push_back(alignmentIn); + ++totalLowMapqHardClips; + } else { + supportingHardLowMapqAlignments.clear(); + } } } } -} -bool -Breakpoint::finalizeBreakpoint( - const deque &discordantAlignmentsPool, - const deque &discordantLowQualAlignmentsPool, - const deque &discordantAlignmentCandidatesPool) { - auto overhangStr = string(); - auto eventTotal = - unpairedBreaksSoft + unpairedBreaksHard + breaksShortIndel; - auto artifactTotal = - lowQualBreaksSoft + lowQualSpansSoft + lowQualSpansHard; - if ((eventTotal + artifactTotal > 50) && - (artifactTotal / (0.0 + eventTotal + artifactTotal)) > 0.85) { - ++bpindex; - missingInfoBp = true; - } else if (static_cast(supportingSoftAlignments.size()) == - MAXPERMISSIBLESOFTCLIPS && - eventTotal + normalSpans + artifactTotal > - MAXPERMISSIBLEHARDCLIPS * 20) { - ++bpindex; - missingInfoBp = true; - } else { - fillMatePool(discordantAlignmentsPool, discordantLowQualAlignmentsPool, - discordantAlignmentCandidatesPool); - if (eventTotal < BPSUPPORTTHRESHOLD && - static_cast(poolLeft.size()) < BPSUPPORTTHRESHOLD && - static_cast(poolLowQualLeft.size()) < BPSUPPORTTHRESHOLD && - static_cast(poolRight.size()) < BPSUPPORTTHRESHOLD && - static_cast(poolLowQualRight.size()) < BPSUPPORTTHRESHOLD) { - if (artifactTotal < normalSpans) { - return false; + // TODO Check for hard-coded cutoffs. Centralize these in GlobalAppConfig. + bool + Breakpoint::finalizeBreakpoint( + const std::deque &discordantAlignmentsPool, + const std::deque &discordantLowQualAlignmentsPool, + const std::deque &discordantAlignmentCandidatesPool) { + auto overhangStr = std::string(); + auto eventTotal = + unpairedBreaksSoft + unpairedBreaksHard + breaksShortIndel; + auto artifactTotal = + lowQualBreaksSoft + lowQualSpansSoft + lowQualSpansHard; + int branch = 0; + if ((eventTotal + artifactTotal > 50) && + (artifactTotal / (0.0 + eventTotal + artifactTotal)) > 0.85) { + branch = 1; + ++bpindex; + missingInfoBp = true; + } else if (static_cast(supportingSoftAlignments.size()) == MAX_PERMISSIBLE_SOFTCLIPS && + eventTotal + normalSpans + artifactTotal > MAX_PERMISSIBLE_HARDCLIPS * 20) { + branch = 2; + ++bpindex; + missingInfoBp = true; + } else { + branch = 3; + fillMatePool(discordantAlignmentsPool, discordantLowQualAlignmentsPool, + discordantAlignmentCandidatesPool); + if (eventTotal < BP_SUPPORT_THRESHOLD && + static_cast(poolLeft.size()) < BP_SUPPORT_THRESHOLD && + static_cast(poolLowQualLeft.size()) < BP_SUPPORT_THRESHOLD && + static_cast(poolRight.size()) < BP_SUPPORT_THRESHOLD && + static_cast(poolLowQualRight.size()) < BP_SUPPORT_THRESHOLD) { + if (artifactTotal < normalSpans) { + return false; + } else { + ++bpindex; + missingInfoBp = true; + } } else { - ++bpindex; - missingInfoBp = true; + overhangStr = finalizeOverhangs(); + detectDoubleSupportSupps(); + collectMateSupport(); } - } else { - overhangStr = finalizeOverhangs(); - detectDoubleSupportSupps(); - collectMateSupport(); } - } - if (eventTotal + mateSupport + artifactTotal < BPSUPPORTTHRESHOLD || - (eventTotal + artifactTotal < BPSUPPORTTHRESHOLD && - doubleSidedMatches.empty() && supplementsPrimary.empty())) { - return false; - } - if (missingInfoBp || - (doubleSidedMatches.empty() && supplementsPrimary.empty() && - overhangStr.size() < 2)) { - auto artifactTotal2 = lowQualBreaksSoft + lowQualSpansHard + - lowQualSpansSoft + repetitiveOverhangBreaks; - auto artifactTotal2Relaxed = - lowQualBreaksSoft + lowQualSpansSoft + repetitiveOverhangBreaks; - auto eventTotal2 = pairedBreaksSoft + pairedBreaksHard + - unpairedBreaksSoft + unpairedBreaksHard + - breaksShortIndel; - auto eventTotal2Strict = - pairedBreaksSoft + unpairedBreaksSoft + pairedBreaksHard; - auto covCriterion = (eventTotal2 + artifactTotal2) > 10; - if (!(covCriterion && eventTotal2Strict + artifactTotal2Relaxed > 0)) { + if (eventTotal + mateSupport + artifactTotal < BP_SUPPORT_THRESHOLD || + (eventTotal + artifactTotal < BP_SUPPORT_THRESHOLD && + doubleSidedMatches.empty() && supplementsPrimary.empty())) { + if (chrIndex == 999 && pos == 19404) { + std::cerr << "Breakpoint: " << chrIndex << ":" << pos << std::endl + << "OverhangStr: " << overhangStr << std::endl + << "MissingInfoBp: " << missingInfoBp << std::endl + << "Branch: " << branch << std::endl + << "Short circuited 1!" << std::endl; + } return false; } + if (missingInfoBp || + (doubleSidedMatches.empty() && supplementsPrimary.empty() && + overhangStr.size() < 2)) { + auto artifactTotal2 = lowQualBreaksSoft + lowQualSpansHard + + lowQualSpansSoft + repetitiveOverhangBreaks; + auto artifactTotal2Relaxed = + lowQualBreaksSoft + lowQualSpansSoft + repetitiveOverhangBreaks; + auto eventTotal2 = pairedBreaksSoft + pairedBreaksHard + + unpairedBreaksSoft + unpairedBreaksHard + + breaksShortIndel; + auto eventTotal2Strict = + pairedBreaksSoft + unpairedBreaksSoft + pairedBreaksHard; + auto covCriterion = (eventTotal2 + artifactTotal2) > 10; + if (!(covCriterion && eventTotal2Strict + artifactTotal2Relaxed > 0)) { + if (chrIndex == 999 && pos == 19404) { + std::cerr << "Breakpoint: " << chrIndex << ":" << pos << std::endl + << "OverhangStr: " << overhangStr << std::endl + << "MissingInfoBp: " << missingInfoBp << std::endl + << "Branch: " << branch << std::endl + << "Short circuited 2!" << std::endl; + } + return false; + } + } + if (chrIndex == 999 && pos == 19404) { + std::cerr << "Breakpoint: " << chrIndex << ":" << pos << std::endl + << "OverhangStr: " << overhangStr << std::endl + << "MissingInfoBp: " << missingInfoBp << std::endl + << "Branch: " << branch << std::endl + << "Printed!" << std::endl; + } + printBreakpointReport(overhangStr); + return true; } - printBreakpointReport(overhangStr); - return true; -} -void -Breakpoint::printBreakpointReport(const string &overhangStr) { - string res{}; - res.reserve(350); - res.append(ChrConverter::indexToChr[chrIndex]).append("\t"); - res.append(strtk::type_to_string(pos)).append("\t"); - res.append(strtk::type_to_string(pos + 1)).append("\t"); + void + Breakpoint::printBreakpointReport(const std::string &overhangStr) { + std::string res{}; + res.reserve(350); + res.append(GlobalAppConfig::getInstance().getChrConverter(). + indexToChrName(chrIndex)). + append("\t"); + res.append(strtk::type_to_string(pos)). + append("\t"); + res.append(strtk::type_to_string(pos + 1)). + append("\t"); - res.append(strtk::type_to_string(pairedBreaksSoft)).append(","); - res.append(strtk::type_to_string(pairedBreaksHard)).append(","); - res.append(strtk::type_to_string(mateSupport)).append(","); - res.append(strtk::type_to_string(unpairedBreaksSoft)).append(","); - res.append(strtk::type_to_string(unpairedBreaksHard)).append(","); - res.append(strtk::type_to_string(breaksShortIndel)).append(","); + res.append(strtk::type_to_string(pairedBreaksSoft)).append(","); + res.append(strtk::type_to_string(pairedBreaksHard)).append(","); + res.append(strtk::type_to_string(mateSupport)).append(","); + res.append(strtk::type_to_string(unpairedBreaksSoft)).append(","); + res.append(strtk::type_to_string(unpairedBreaksHard)).append(","); + res.append(strtk::type_to_string(breaksShortIndel)).append(","); - res.append(strtk::type_to_string(normalSpans)).append(","); + res.append(strtk::type_to_string(normalSpans)).append(","); - res.append(strtk::type_to_string(lowQualSpansSoft)).append(","); - res.append(strtk::type_to_string(lowQualSpansHard)).append(","); - res.append(strtk::type_to_string(lowQualBreaksSoft)).append(","); - res.append(strtk::type_to_string(lowQualBreaksHard)).append(","); - res.append(strtk::type_to_string(repetitiveOverhangBreaks)) - .append("\t"); + res.append(strtk::type_to_string(lowQualSpansSoft)).append(","); + res.append(strtk::type_to_string(lowQualSpansHard)).append(","); + res.append(strtk::type_to_string(lowQualBreaksSoft)).append(","); + res.append(strtk::type_to_string(lowQualBreaksHard)).append(","); + res.append(strtk::type_to_string(repetitiveOverhangBreaks)). + append("\t"); - res.append(strtk::type_to_string(leftCoverage)).append(","); - res.append(strtk::type_to_string(rightCoverage)).append("\t"); - if (missingInfoBp) { - res.append("#\t#\t#\n"); - } else { - collapseSuppRange(res, doubleSidedMatches); - res.append("\t"); - collapseSuppRange(res, supplementsPrimary); - res.append("\t"); - if (overhangStr.empty()) { - res.append(".").append("\n"); + res.append(strtk::type_to_string(leftCoverage)).append(","); + res.append(strtk::type_to_string(rightCoverage)). + append("\t"); + + if (missingInfoBp) { + res.append("#\t#\t#\n"); } else { - res.append(overhangStr).append("\n"); + collapseSuppRange(res, doubleSidedMatches); + res.append("\t"); + collapseSuppRange(res, supplementsPrimary); + res.append("\t"); + if (overhangStr.empty()) { + res.append(".").append("\n"); + } else { + res.append(overhangStr).append("\n"); + } } + std::cout << res; } - cout << res; -} -void -Breakpoint::collapseSuppRange(string &res, - const vector &vec) const { - if (vec.empty()) { - res.append("."); - } else { - for (const auto &suppAlignment : vec) { - res.append(suppAlignment.print()).append(";"); + void + Breakpoint::collapseSuppRange(std::string &res, + const std::vector &vec) const { + if (vec.empty()) { + res.append("."); + } else { + for (const auto &suppAlignment : vec) { + res.append(suppAlignment.print()).append(";"); + } + res.pop_back(); } - res.pop_back(); } -} -string -Breakpoint::finalizeOverhangs() { - ++bpindex; - for (auto i = 0u; i < supportingSoftAlignments.size(); ++i) { - supportingSoftAlignments[i]->setChosenBp(pos, i); - if (supportingSoftAlignments[i]->assessOutlierMateDistance()) { - if (supportingSoftAlignments[i]->getMateChrIndex() < 1002) { - if (supportingSoftAlignments[i]->isOverhangEncounteredM()) { - if (!(supportingSoftAlignments[i]->isNullMapq() || - supportingSoftAlignments[i]->isLowMapq())) { - poolLeft.emplace_back( - supportingSoftAlignments[i]->getStartPos(), - supportingSoftAlignments[i]->getEndPos(), - supportingSoftAlignments[i]->getMateChrIndex(), - supportingSoftAlignments[i]->getMatePos(), 0, - supportingSoftAlignments[i]->isInvertedMate()); - } else { - poolLowQualLeft.emplace_back( - supportingSoftAlignments[i]->getStartPos(), - supportingSoftAlignments[i]->getEndPos(), - supportingSoftAlignments[i]->getMateChrIndex(), - supportingSoftAlignments[i]->getMatePos(), 0, - supportingSoftAlignments[i]->isInvertedMate()); - } - } else { - if (!(supportingSoftAlignments[i]->isNullMapq() || - supportingSoftAlignments[i]->isLowMapq())) { - poolRight.emplace_back( - supportingSoftAlignments[i]->getStartPos(), - supportingSoftAlignments[i]->getEndPos(), - supportingSoftAlignments[i]->getMateChrIndex(), - supportingSoftAlignments[i]->getMatePos(), 0, - supportingSoftAlignments[i]->isInvertedMate()); + std::string + Breakpoint::finalizeOverhangs() { + ++bpindex; + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + if (chrIndex == 999 && pos == 19404) { + std::cerr << "Breakpoint: " << chrIndex << ":" << pos << std::endl + << "supportingSoftAlignments.size()" << supportingSoftAlignments.size() << std::endl; + } + for (size_t i = 0u; i < supportingSoftAlignments.size(); ++i) { + supportingSoftAlignments[i]->setChosenBp(pos, i); + if (supportingSoftAlignments[i]->assessOutlierMateDistance()) { + if (!chrConverter.isTechnical( + supportingSoftAlignments[i]->getMateChrIndex())) { + if (supportingSoftAlignments[i]->isOverhangEncounteredM()) { + if (!(supportingSoftAlignments[i]->isNullMapq() || + supportingSoftAlignments[i]->isLowMapq())) { + poolLeft.emplace_back( + supportingSoftAlignments[i]->getStartPos(), + supportingSoftAlignments[i]->getEndPos(), + supportingSoftAlignments[i]->getMateChrIndex(), + supportingSoftAlignments[i]->getMatePos(), 0, + supportingSoftAlignments[i]->isInvertedMate()); + } else { + poolLowQualLeft.emplace_back( + supportingSoftAlignments[i]->getStartPos(), + supportingSoftAlignments[i]->getEndPos(), + supportingSoftAlignments[i]->getMateChrIndex(), + supportingSoftAlignments[i]->getMatePos(), 0, + supportingSoftAlignments[i]->isInvertedMate()); + } } else { - poolLowQualRight.emplace_back( - supportingSoftAlignments[i]->getStartPos(), - supportingSoftAlignments[i]->getEndPos(), - supportingSoftAlignments[i]->getMateChrIndex(), - supportingSoftAlignments[i]->getMatePos(), 0, - supportingSoftAlignments[i]->isInvertedMate()); + if (!(supportingSoftAlignments[i]->isNullMapq() || + supportingSoftAlignments[i]->isLowMapq())) { + poolRight.emplace_back( + supportingSoftAlignments[i]->getStartPos(), + supportingSoftAlignments[i]->getEndPos(), + supportingSoftAlignments[i]->getMateChrIndex(), + supportingSoftAlignments[i]->getMatePos(), 0, + supportingSoftAlignments[i]->isInvertedMate()); + } else { + poolLowQualRight.emplace_back( + supportingSoftAlignments[i]->getStartPos(), + supportingSoftAlignments[i]->getEndPos(), + supportingSoftAlignments[i]->getMateChrIndex(), + supportingSoftAlignments[i]->getMatePos(), 0, + supportingSoftAlignments[i]->isInvertedMate()); + } } } } } - } - vector supplementsPrimaryTmp{}; - sort(supportingSoftAlignments.begin(), supportingSoftAlignments.end(), - [](const shared_ptr &a, const shared_ptr &b) { - return a->getOverhangLength() < b->getOverhangLength(); - }); - vector> supportingSoftParentAlignments{}; - while (!supportingSoftAlignments.empty()) { - auto substrCheck = false; - auto tmpSas = supportingSoftAlignments.back()->generateSuppAlignments( - chrIndex, pos); - for (auto overhangParent : supportingSoftParentAlignments) { - if (matchDetector(overhangParent, - supportingSoftAlignments.back())) { - substrCheck = true; - overhangParent->addChildNode( - supportingSoftAlignments.back()->getOriginIndex()); - overhangParent->addSupplementaryAlignments(tmpSas); + std::vector supplementsPrimaryTmp {}; + sort(supportingSoftAlignments.begin(), supportingSoftAlignments.end(), + [](const std::shared_ptr &a, const std::shared_ptr &b) { + return a->getOverhangLength() < b->getOverhangLength(); + }); + std::vector> supportingSoftParentAlignments{}; + while (!supportingSoftAlignments.empty()) { + auto substrCheck = false; + auto tmpSas = supportingSoftAlignments.back()->generateSuppAlignments(chrIndex, pos); + for (auto overhangParent : supportingSoftParentAlignments) { + if (matchDetector(overhangParent, + supportingSoftAlignments.back())) { + substrCheck = true; + overhangParent->addChildNode( + supportingSoftAlignments.back()->getOriginIndex()); + overhangParent->addSupplementaryAlignments(tmpSas); + } } - } - if (!substrCheck) { - auto allDistant = - !tmpSas.empty() && - all_of(cbegin(tmpSas), cend(tmpSas), - [](const SuppAlignment &sa) { return sa.isDistant(); }); - if (allDistant || supportingSoftAlignments.back() - ->overhangComplexityMaskRatio() <= 0.5) { - if (supportingSoftAlignments.back()->getOverhangLength() >= - 20) { - supportingSoftAlignments.back()->addSupplementaryAlignments( - tmpSas); - supportingSoftParentAlignments.push_back( - supportingSoftAlignments.back()); - } else { - for (const auto &sa : tmpSas) { - auto it = - find_if(supplementsPrimaryTmp.begin(), - supplementsPrimaryTmp.end(), - [&](const SuppAlignment &suppAlignment) { - return suppAlignment.saCloseness(sa, 5); - }); - if (it == supplementsPrimaryTmp.end()) { - supplementsPrimaryTmp.push_back(sa); - } else { - if (it->isFuzzy() && !sa.isFuzzy()) { - it->removeFuzziness(sa); - } else if (it->isFuzzy() && sa.isFuzzy()) { - it->extendSuppAlignment(sa.getPos(), - sa.getExtendedPos()); - } - it->addSupportingIndices( - supportingSoftAlignments.back() - ->getChildrenNodes()); - if (it->isNullMapqSource() && - !supportingSoftAlignments.back() - ->isNullMapq()) { - it->setNullMapqSource(false); + if (!substrCheck) { + auto allDistant = + !tmpSas.empty() && + all_of(cbegin(tmpSas), cend(tmpSas), + [](const SuppAlignment &sa) { return sa.isDistant(); }); + if (allDistant || supportingSoftAlignments.back()->overhangComplexityMaskRatio() <= 0.5) { + if (supportingSoftAlignments.back()->getOverhangLength() >= 20) { + supportingSoftAlignments.back()->addSupplementaryAlignments(tmpSas); + supportingSoftParentAlignments.push_back(supportingSoftAlignments.back()); + } else { + for (const auto &sa : tmpSas) { + auto it = + find_if(supplementsPrimaryTmp.begin(), + supplementsPrimaryTmp.end(), + [&](const SuppAlignment &suppAlignment) { + return suppAlignment.saCloseness(sa, 5); + }); + if (it == supplementsPrimaryTmp.end()) { + supplementsPrimaryTmp.push_back(sa); + } else { + if (it->isFuzzy() && !sa.isFuzzy()) { + it->removeFuzziness(sa); + } else if (it->isFuzzy() && sa.isFuzzy()) { + it->extendSuppAlignment(sa.getPos(), + sa.getExtendedPos()); + } + it->addSupportingIndices( + supportingSoftAlignments.back() + ->getChildrenNodes()); + if (it->isNullMapqSource() && + !supportingSoftAlignments.back() + ->isNullMapq()) { + it->setNullMapqSource(false); + } } } } + } else { + --unpairedBreaksSoft; + ++repetitiveOverhangBreaks; } - } else { - --unpairedBreaksSoft; - ++repetitiveOverhangBreaks; } + supportingSoftAlignments.pop_back(); } - supportingSoftAlignments.pop_back(); - } - string consensusOverhangsTmp{}; - consensusOverhangsTmp.reserve(250); - { - auto i = 1; - auto indexStr = strtk::type_to_string(bpindex); - for (const auto &overhangParent : supportingSoftParentAlignments) { - if (static_cast(overhangParent->getChildrenNodes().size()) >= - BPSUPPORTTHRESHOLD) { - consensusOverhangsTmp.append(">") - .append(indexStr) - .append("_") - .append(strtk::type_to_string(i)) - .append(":") - .append(overhangParent->printOverhang()) - .append(";"); - ++i; - } - for (const auto &sa : - overhangParent->getSupplementaryAlignments()) { - if (sa.isToRemove()) { - continue; + + std::string consensusOverhangsTmp {}; + consensusOverhangsTmp.reserve(250); + { + auto i = 1; + auto indexStr = strtk::type_to_string(bpindex); + for (const auto &overhangParent : supportingSoftParentAlignments) { + if (static_cast(overhangParent->getChildrenNodes().size()) >= BP_SUPPORT_THRESHOLD) { + consensusOverhangsTmp.append(">") + .append(indexStr) + .append("_") + .append(strtk::type_to_string(i)) + .append(":") + .append(overhangParent->printOverhang()) + .append(";"); + ++i; } - auto it = find_if(supplementsPrimary.begin(), - supplementsPrimary.end(), - [&](const SuppAlignment &suppAlignment) { - return suppAlignment.saCloseness(sa, 5); - }); - if (it == supplementsPrimary.end()) { - supplementsPrimary.push_back(sa); - supplementsPrimary.back().addSupportingIndices( - overhangParent->getChildrenNodes()); - } else { - if (it->isFuzzy() && !sa.isFuzzy()) { - it->removeFuzziness(sa); - } else if (it->isFuzzy() && sa.isFuzzy()) { - it->extendSuppAlignment(sa.getPos(), - sa.getExtendedPos()); + for (const auto &sa : + overhangParent->getSupplementaryAlignments()) { + if (sa.isToRemove()) { + continue; } - it->addSupportingIndices( - overhangParent->getChildrenNodes()); - if (it->isNullMapqSource() && - !supplementsPrimary.back().isNullMapqSource()) { - it->setNullMapqSource(false); + auto it = find_if(supplementsPrimary.begin(), + supplementsPrimary.end(), + [&](const SuppAlignment &suppAlignment) { + return suppAlignment.saCloseness(sa, 5); + }); + if (it == supplementsPrimary.end()) { + supplementsPrimary.push_back(sa); + supplementsPrimary.back().addSupportingIndices( + overhangParent->getChildrenNodes()); + } else { + if (it->isFuzzy() && !sa.isFuzzy()) { + it->removeFuzziness(sa); + } else if (it->isFuzzy() && sa.isFuzzy()) { + it->extendSuppAlignment(sa.getPos(), + sa.getExtendedPos()); + } + it->addSupportingIndices( + overhangParent->getChildrenNodes()); + if (it->isNullMapqSource() && + !supplementsPrimary.back().isNullMapqSource()) { + it->setNullMapqSource(false); + } } } } - } - supportingSoftParentAlignments.clear(); - if (!consensusOverhangsTmp.empty()) { - consensusOverhangsTmp.pop_back(); - } else { - return string(); - } - } - for (const auto &sa : supplementsPrimaryTmp) { - auto it = find_if(supplementsPrimary.begin(), supplementsPrimary.end(), - [&](const SuppAlignment &suppAlignment) { - return suppAlignment.saCloseness(sa, 5); - }); - if (it == supplementsPrimary.end()) { - supplementsPrimary.push_back(sa); - } else { - if (it->isFuzzy() && !sa.isFuzzy()) { - it->removeFuzziness(sa); - } else if (it->isFuzzy() && sa.isFuzzy()) { - it->extendSuppAlignment(sa.getPos(), sa.getExtendedPos()); + supportingSoftParentAlignments.clear(); + if (!consensusOverhangsTmp.empty()) { + consensusOverhangsTmp.pop_back(); + } else { + if (chrIndex == 999 && pos == 19404) { + std::cerr << "Breakpoint: " << chrIndex << ":" << pos << std::endl + << "consensusOverhangsTmp is empty" << std::endl; + } + return std::string(); } - it->addSupportingIndices(sa.getSupportingIndices()); - if (it->isNullMapqSource() && !sa.isNullMapqSource()) { - it->setNullMapqSource(false); + } + for (const auto &sa : supplementsPrimaryTmp) { + auto it = find_if(supplementsPrimary.begin(), supplementsPrimary.end(), + [&](const SuppAlignment &suppAlignment) { + return suppAlignment.saCloseness(sa, 5); + }); + if (it == supplementsPrimary.end()) { + supplementsPrimary.push_back(sa); + } else { + if (it->isFuzzy() && !sa.isFuzzy()) { + it->removeFuzziness(sa); + } else if (it->isFuzzy() && sa.isFuzzy()) { + it->extendSuppAlignment(sa.getPos(), sa.getExtendedPos()); + } + it->addSupportingIndices(sa.getSupportingIndices()); + if (it->isNullMapqSource() && !sa.isNullMapqSource()) { + it->setNullMapqSource(false); + } } } + return consensusOverhangsTmp; } - return consensusOverhangsTmp; -} -bool -Breakpoint::matchDetector(const shared_ptr &longAlignment, - const shared_ptr &shortAlignment) const { - if (longAlignment->isOverhangEncounteredM() != - shortAlignment->isOverhangEncounteredM()) { - return false; - } - auto mismatches = 0; - char cLong{}; - char cShort{}; - auto shortS = shortAlignment->getOverhangLength(); - auto longStart = longAlignment->getOverhangStartIndex(); - auto shortStart = shortAlignment->getOverhangStartIndex(); - const auto pointerToLongSeq = &longAlignment->getSamLine(); - const auto pointerToShortSeq = &shortAlignment->getSamLine(); - if (!longAlignment->isOverhangEncounteredM() && - !shortAlignment->isOverhangEncounteredM()) { - auto lenDiff = longAlignment->getOverhangLength() - shortS; - for (int i = shortS - 1; i >= 0; --i) { - cLong = (*pointerToLongSeq)[longStart + i + lenDiff]; - if (cLong == 'N') - continue; - cShort = (*pointerToShortSeq)[shortStart + i]; - if (cShort == 'N') - continue; - if (cLong != cShort) { - ++mismatches; - if (mismatches > PERMISSIBLEMISMATCHES) - return false; - } + bool + Breakpoint::matchDetector(const std::shared_ptr &longAlignment, + const std::shared_ptr &shortAlignment) const { + if (longAlignment->isOverhangEncounteredM() != + shortAlignment->isOverhangEncounteredM()) { + return false; } - return true; - } else if (longAlignment->isOverhangEncounteredM() && - shortAlignment->isOverhangEncounteredM()) { - for (auto i = 0; i < shortS; ++i) { - cLong = (*pointerToLongSeq)[longStart + i]; - if (cLong == 'N') - continue; - cShort = (*pointerToShortSeq)[shortStart + i]; - if (cShort == 'N') - continue; - if (cLong != cShort) { - ++mismatches; - if (mismatches > PERMISSIBLEMISMATCHES) - return false; + auto mismatches = 0; + char cLong{}; + char cShort{}; + ChrSize shortS = shortAlignment->getOverhangLength(); + ChrSize longStart = longAlignment->getOverhangStartIndex(); + ChrSize shortStart = shortAlignment->getOverhangStartIndex(); + const auto pointerToLongSeq = &longAlignment->getSamLine(); + const auto pointerToShortSeq = &shortAlignment->getSamLine(); + if (!longAlignment->isOverhangEncounteredM() && + !shortAlignment->isOverhangEncounteredM()) { + ChrDistance lenDiff = longAlignment->getOverhangLength() - shortS; + for (ChrSize i = shortS - 1; i >= 0; --i) { + cLong = (*pointerToLongSeq)[static_cast(longStart + i + lenDiff)]; + if (cLong == 'N') + continue; + cShort = (*pointerToShortSeq)[static_cast(shortStart + i)]; + if (cShort == 'N') + continue; + if (cLong != cShort) { + ++mismatches; + if (mismatches > PERMISSIBLE_MISMATCHES) + return false; + } + } + return true; + } else if (longAlignment->isOverhangEncounteredM() && + shortAlignment->isOverhangEncounteredM()) { + for (ChrSize i = 0; i < shortS; ++i) { + cLong = (*pointerToLongSeq)[static_cast(longStart + i)]; + if (cLong == 'N') + continue; + cShort = (*pointerToShortSeq)[static_cast(shortStart + i)]; + if (cShort == 'N') + continue; + if (cLong != cShort) { + ++mismatches; + if (mismatches > PERMISSIBLE_MISMATCHES) + return false; + } } + return true; } - return true; + return false; } - return false; -} -void -Breakpoint::detectDoubleSupportSupps() { - vector saHardTmpLowQual; - { - auto i = 0u; - for (; i < supportingHardAlignments.size(); ++i) { - supportingHardAlignments[i]->setChosenBp(pos, i); - } - for (auto hardAlignment : supportingHardAlignments) { - for (const auto &sa : - hardAlignment->generateSuppAlignments(chrIndex, pos)) { - if (!(sa.isInverted() && sa.getPos() == pos && - sa.getChrIndex() == chrIndex)) { - supplementsSecondary.push_back(sa); - if (supplementsSecondary.back().isDistant()) { - if (supplementsSecondary.back().isEncounteredM()) { - if (!(hardAlignment->isNullMapq() || - hardAlignment->isLowMapq())) { - poolLeft.emplace_back( - hardAlignment->getStartPos(), - hardAlignment->getEndPos(), - hardAlignment->getMateChrIndex(), - hardAlignment->getMatePos(), 1, - hardAlignment->isInvertedMate()); + void + Breakpoint::detectDoubleSupportSupps() { + std::vector saHardTmpLowQual; + { + auto i = 0u; // this is used further down in the code ... + for (; i < supportingHardAlignments.size(); ++i) { + supportingHardAlignments[i]->setChosenBp(pos, static_cast(i)); + } + for (auto hardAlignment : supportingHardAlignments) { + for (const auto &sa : hardAlignment->generateSuppAlignments(chrIndex, pos)) { + if (!(sa.isInverted() && sa.getPos() == pos && + sa.getChrIndex() == chrIndex)) { + supplementsSecondary.push_back(sa); + if (supplementsSecondary.back().isDistant()) { + if (supplementsSecondary.back().isEncounteredM()) { + if (!(hardAlignment->isNullMapq() || + hardAlignment->isLowMapq())) { + poolLeft.emplace_back( + hardAlignment->getStartPos(), + hardAlignment->getEndPos(), + hardAlignment->getMateChrIndex(), + hardAlignment->getMatePos(), 1, + hardAlignment->isInvertedMate()); + } else { + poolLowQualLeft.emplace_back( + hardAlignment->getStartPos(), + hardAlignment->getEndPos(), + hardAlignment->getMateChrIndex(), + hardAlignment->getMatePos(), 1, + hardAlignment->isInvertedMate()); + } + } else { - poolLowQualLeft.emplace_back( - hardAlignment->getStartPos(), - hardAlignment->getEndPos(), - hardAlignment->getMateChrIndex(), - hardAlignment->getMatePos(), 1, - hardAlignment->isInvertedMate()); + if (!(hardAlignment->isNullMapq() || + hardAlignment->isLowMapq())) { + poolRight.emplace_back( + hardAlignment->getStartPos(), + hardAlignment->getEndPos(), + hardAlignment->getMateChrIndex(), + hardAlignment->getMatePos(), 1, + hardAlignment->isInvertedMate()); + } else { + poolLowQualRight.emplace_back( + hardAlignment->getStartPos(), + hardAlignment->getEndPos(), + hardAlignment->getMateChrIndex(), + hardAlignment->getMatePos(), 1, + hardAlignment->isInvertedMate()); + } } + } + } + } + } + supportingHardAlignments.clear(); + for (auto j = 0u; j < supportingHardLowMapqAlignments.size(); ++j) { + supportingHardLowMapqAlignments[j]->setChosenBp(pos, static_cast(i + j)); + } - } else { - if (!(hardAlignment->isNullMapq() || - hardAlignment->isLowMapq())) { - poolRight.emplace_back( + for (auto hardAlignment : supportingHardLowMapqAlignments) { + for (const auto &sa : hardAlignment->generateSuppAlignments(chrIndex, pos)) { + if (!(sa.isInverted() && sa.getPos() == pos && + sa.getChrIndex() == chrIndex)) { + saHardTmpLowQual.push_back(sa); + if (saHardTmpLowQual.back().isDistant()) { + if (saHardTmpLowQual.back().isEncounteredM()) { + poolLowQualLeft.emplace_back( hardAlignment->getStartPos(), hardAlignment->getEndPos(), hardAlignment->getMateChrIndex(), @@ -508,858 +586,952 @@ Breakpoint::detectDoubleSupportSupps() { } } } - } - supportingHardAlignments.clear(); - for (auto j = 0u; j < supportingHardLowMapqAlignments.size(); ++j) { - supportingHardLowMapqAlignments[j]->setChosenBp(pos, i + j); - } - - for (auto hardAlignment : supportingHardLowMapqAlignments) { - for (const auto &sa : - hardAlignment->generateSuppAlignments(chrIndex, pos)) { - if (!(sa.isInverted() && sa.getPos() == pos && - sa.getChrIndex() == chrIndex)) { - saHardTmpLowQual.push_back(sa); - if (saHardTmpLowQual.back().isDistant()) { - if (saHardTmpLowQual.back().isEncounteredM()) { - poolLowQualLeft.emplace_back( - hardAlignment->getStartPos(), - hardAlignment->getEndPos(), - hardAlignment->getMateChrIndex(), - hardAlignment->getMatePos(), 1, - hardAlignment->isInvertedMate()); - } else { - poolLowQualRight.emplace_back( - hardAlignment->getStartPos(), - hardAlignment->getEndPos(), - hardAlignment->getMateChrIndex(), - hardAlignment->getMatePos(), 1, - hardAlignment->isInvertedMate()); + supportingHardLowMapqAlignments.clear(); + } + { + std::vector lowMapqHardSupportWhitelistIndices {}; + for (auto primarySupptIt = supplementsPrimary.begin(); + primarySupptIt != supplementsPrimary.end(); ++primarySupptIt) { + auto foundMatch = false; + for (auto secondarySuppIt = supplementsSecondary.begin(); + secondarySuppIt != supplementsSecondary.end(); + ++secondarySuppIt) { + if (primarySupptIt->saCloseness(*secondarySuppIt, 100)) { + if (primarySupptIt->isFuzzy() && + !secondarySuppIt->isFuzzy()) { + primarySupptIt->removeFuzziness(*secondarySuppIt); + } else if (primarySupptIt->isFuzzy() && + secondarySuppIt->isFuzzy()) { + primarySupptIt->extendSuppAlignment( + secondarySuppIt->getPos(), + secondarySuppIt->getExtendedPos()); } + foundMatch = true; + secondarySuppIt->setToRemove(true); + primarySupptIt->addSecondarySupportIndices( + secondarySuppIt->getSupportingIndicesSecondary()); } } - } - } - supportingHardLowMapqAlignments.clear(); - } - { - vector lowMapqHardSupportWhitelistIndices{}; - for (auto primarySupptIt = supplementsPrimary.begin(); - primarySupptIt != supplementsPrimary.end(); ++primarySupptIt) { - auto foundMatch = false; - for (auto secondarySuppIt = supplementsSecondary.begin(); - secondarySuppIt != supplementsSecondary.end(); - ++secondarySuppIt) { - if (primarySupptIt->saCloseness(*secondarySuppIt, 100)) { - if (primarySupptIt->isFuzzy() && - !secondarySuppIt->isFuzzy()) { - primarySupptIt->removeFuzziness(*secondarySuppIt); - } else if (primarySupptIt->isFuzzy() && - secondarySuppIt->isFuzzy()) { - primarySupptIt->extendSuppAlignment( - secondarySuppIt->getPos(), - secondarySuppIt->getExtendedPos()); + for (auto secondarySuppIt = saHardTmpLowQual.begin(); + secondarySuppIt != saHardTmpLowQual.end(); ++secondarySuppIt) { + // TODO Centralize fuzziness cutoff 100 + if (primarySupptIt->saCloseness(*secondarySuppIt, 100)) { + if (primarySupptIt->isFuzzy() && + !secondarySuppIt->isFuzzy()) { + primarySupptIt->removeFuzziness(*secondarySuppIt); + } else if (primarySupptIt->isFuzzy() && + secondarySuppIt->isFuzzy()) { + primarySupptIt->extendSuppAlignment( + secondarySuppIt->getPos(), + secondarySuppIt->getExtendedPos()); + } + foundMatch = true; + for (auto index : + secondarySuppIt->getSupportingIndicesSecondary()) { + lowMapqHardSupportWhitelistIndices.push_back(index); + } + secondarySuppIt->setToRemove(true); + primarySupptIt->addSecondarySupportIndices( + secondarySuppIt->getSupportingIndicesSecondary()); } - foundMatch = true; - secondarySuppIt->setToRemove(true); - primarySupptIt->addSecondarySupportIndices( - secondarySuppIt->getSupportingIndicesSecondary()); } - } - for (auto secondarySuppIt = saHardTmpLowQual.begin(); - secondarySuppIt != saHardTmpLowQual.end(); ++secondarySuppIt) { - if (primarySupptIt->saCloseness(*secondarySuppIt, 100)) { - if (primarySupptIt->isFuzzy() && - !secondarySuppIt->isFuzzy()) { - primarySupptIt->removeFuzziness(*secondarySuppIt); - } else if (primarySupptIt->isFuzzy() && - secondarySuppIt->isFuzzy()) { - primarySupptIt->extendSuppAlignment( - secondarySuppIt->getPos(), - secondarySuppIt->getExtendedPos()); - } - foundMatch = true; - for (auto index : - secondarySuppIt->getSupportingIndicesSecondary()) { - lowMapqHardSupportWhitelistIndices.push_back(index); - } - secondarySuppIt->setToRemove(true); - primarySupptIt->addSecondarySupportIndices( - secondarySuppIt->getSupportingIndicesSecondary()); + if (foundMatch) { + doubleSidedMatches.push_back(*primarySupptIt); + primarySupptIt->setToRemove(true); } } - if (foundMatch) { - doubleSidedMatches.push_back(*primarySupptIt); - primarySupptIt->setToRemove(true); + cleanUpVector(supplementsPrimary); + cleanUpVector(supplementsSecondary); + cleanUpVector(saHardTmpLowQual); + supplementsSecondary.insert(supplementsSecondary.end(), + saHardTmpLowQual.begin(), + saHardTmpLowQual.end()); + sort(lowMapqHardSupportWhitelistIndices.begin(), + lowMapqHardSupportWhitelistIndices.end()); + auto whitelistSize = + distance(lowMapqHardSupportWhitelistIndices.begin(), + unique(lowMapqHardSupportWhitelistIndices.begin(), + lowMapqHardSupportWhitelistIndices.end())); + unpairedBreaksHard += whitelistSize; + lowQualBreaksHard -= whitelistSize; + } + auto maxMapq = 0; + for (const auto &sa : doubleSidedMatches) { + if (sa.getMapq() > maxMapq) { + maxMapq = sa.getMapq(); } } - cleanUpVector(supplementsPrimary); - cleanUpVector(supplementsSecondary); - cleanUpVector(saHardTmpLowQual); - supplementsSecondary.insert(supplementsSecondary.end(), - saHardTmpLowQual.begin(), - saHardTmpLowQual.end()); - sort(lowMapqHardSupportWhitelistIndices.begin(), - lowMapqHardSupportWhitelistIndices.end()); - auto whitelistSize = - distance(lowMapqHardSupportWhitelistIndices.begin(), - unique(lowMapqHardSupportWhitelistIndices.begin(), - lowMapqHardSupportWhitelistIndices.end())); - unpairedBreaksHard += whitelistSize; - lowQualBreaksHard -= whitelistSize; - } - auto maxMapq = 0; - for (const auto &sa : doubleSidedMatches) { - if (sa.getMapq() > maxMapq) { - maxMapq = sa.getMapq(); - } - } - for (const auto &sa : supplementsPrimary) { - if (sa.getMapq() > maxMapq) { - maxMapq = sa.getMapq(); + for (const auto &sa : supplementsPrimary) { + if (sa.getMapq() > maxMapq) { + maxMapq = sa.getMapq(); + } } - } - for (auto &sa : supplementsPrimary) { - if (sa.getMapq() > 0) { - if (sa.getMapq() < 13 && sa.getMapq() < maxMapq) { - sa.setToRemove(true); + for (auto &sa : supplementsPrimary) { + if (sa.getMapq() > 0) { + // TODO Centralize the mapq threshold + if (sa.getMapq() < 13 && sa.getMapq() < maxMapq) { + sa.setToRemove(true); + } } } + cleanUpVector(supplementsPrimary); + for (auto &sa : supplementsPrimary) { + sa.finalizeSupportingIndices(); + } + std::vector uniqueDoubleSupportPrimaryIndices{}; + std::vector uniqueDoubleSupportSecondaryIndices{}; + for (auto &sa : doubleSidedMatches) { + sa.finalizeSupportingIndices(); + uniqueDoubleSupportPrimaryIndices.insert( + uniqueDoubleSupportPrimaryIndices.end(), + sa.getSupportingIndices().cbegin(), + sa.getSupportingIndices().cend()); + uniqueDoubleSupportSecondaryIndices.insert( + uniqueDoubleSupportSecondaryIndices.end(), + sa.getSupportingIndicesSecondary().cbegin(), + sa.getSupportingIndicesSecondary().cend()); + } + sort(uniqueDoubleSupportPrimaryIndices.begin(), + uniqueDoubleSupportPrimaryIndices.end()); + sort(uniqueDoubleSupportSecondaryIndices.begin(), + uniqueDoubleSupportSecondaryIndices.end()); + auto priCompensation = + distance(uniqueDoubleSupportPrimaryIndices.begin(), + unique(uniqueDoubleSupportPrimaryIndices.begin(), + uniqueDoubleSupportPrimaryIndices.end())); + auto secCompensation = + distance(uniqueDoubleSupportSecondaryIndices.begin(), + unique(uniqueDoubleSupportSecondaryIndices.begin(), + uniqueDoubleSupportSecondaryIndices.end())); + unpairedBreaksSoft -= priCompensation; + unpairedBreaksHard -= secCompensation; + pairedBreaksSoft += priCompensation; + pairedBreaksHard += secCompensation; } - cleanUpVector(supplementsPrimary); - for (auto &sa : supplementsPrimary) { - sa.finalizeSupportingIndices(); - } - vector uniqueDoubleSupportPrimaryIndices{}; - vector uniqueDoubleSupportSecondaryIndices{}; - for (auto &sa : doubleSidedMatches) { - sa.finalizeSupportingIndices(); - uniqueDoubleSupportPrimaryIndices.insert( - uniqueDoubleSupportPrimaryIndices.end(), - sa.getSupportingIndices().cbegin(), - sa.getSupportingIndices().cend()); - uniqueDoubleSupportSecondaryIndices.insert( - uniqueDoubleSupportSecondaryIndices.end(), - sa.getSupportingIndicesSecondary().cbegin(), - sa.getSupportingIndicesSecondary().cend()); - } - sort(uniqueDoubleSupportPrimaryIndices.begin(), - uniqueDoubleSupportPrimaryIndices.end()); - sort(uniqueDoubleSupportSecondaryIndices.begin(), - uniqueDoubleSupportSecondaryIndices.end()); - auto priCompensation = - distance(uniqueDoubleSupportPrimaryIndices.begin(), - unique(uniqueDoubleSupportPrimaryIndices.begin(), - uniqueDoubleSupportPrimaryIndices.end())); - auto secCompensation = - distance(uniqueDoubleSupportSecondaryIndices.begin(), - unique(uniqueDoubleSupportSecondaryIndices.begin(), - uniqueDoubleSupportSecondaryIndices.end())); - unpairedBreaksSoft -= priCompensation; - unpairedBreaksHard -= secCompensation; - pairedBreaksSoft += priCompensation; - pairedBreaksHard += secCompensation; -} -void -Breakpoint::collectMateSupport() { - sort(poolLeft.begin(), poolLeft.end()); - sort(poolRight.begin(), poolRight.end()); - compressMatePool(poolLeft); - compressMatePool(poolRight); - auto leftDiscordantsTotal = 0, rightDiscordantsTotal = 0; - for (const auto &mateInfo : poolLeft) { - leftDiscordantsTotal += mateInfo.matePower; - } - for (const auto &mateInfo : poolRight) { - rightDiscordantsTotal += mateInfo.matePower; - } - auto leftSideExpectedErrors = 0.0; - auto rightSideExpectedErrors = 0.0; - if (PROPERPAIRCOMPENSATIONMODE) { - leftSideExpectedErrors = - leftSideDiscordantCandidates * IMPROPERPAIRRATIO; - rightSideExpectedErrors = - rightSideDiscordantCandidates * IMPROPERPAIRRATIO; - leftDiscordantsTotal = - max(0, static_cast( - round(leftDiscordantsTotal - leftSideExpectedErrors))); - rightDiscordantsTotal = - max(0, static_cast( - round(rightDiscordantsTotal - rightSideExpectedErrors))); - leftSideExpectedErrors *= 0.5; - rightSideExpectedErrors *= 0.5; - } - for (auto &sa : doubleSidedMatches) { - if (sa.isDistant()) { - if (sa.isEncounteredM()) { - sa.setExpectedDiscordants(leftDiscordantsTotal); - collectMateSupportHelper(sa, poolLeft, poolLowQualLeft); - } else { - sa.setExpectedDiscordants(rightDiscordantsTotal); - collectMateSupportHelper(sa, poolRight, poolLowQualRight); + void + Breakpoint::collectMateSupport() { + sort(poolLeft.begin(), poolLeft.end()); + sort(poolRight.begin(), poolRight.end()); + compressMatePool(poolLeft); + compressMatePool(poolRight); + auto leftDiscordantsTotal = 0, rightDiscordantsTotal = 0; + for (const auto &mateInfo : poolLeft) { + leftDiscordantsTotal += mateInfo.matePower; + } + for (const auto &mateInfo : poolRight) { + rightDiscordantsTotal += mateInfo.matePower; + } + auto leftSideExpectedErrors = 0.0; + auto rightSideExpectedErrors = 0.0; + if (PROPER_PAIR_COMPENSATION_MODE) { + leftSideExpectedErrors = + leftSideDiscordantCandidates * IMPROPER_PAIR_RATIO; + rightSideExpectedErrors = + rightSideDiscordantCandidates * IMPROPER_PAIR_RATIO; + leftDiscordantsTotal = + std::max(0, static_cast( + round(leftDiscordantsTotal - leftSideExpectedErrors))); + rightDiscordantsTotal = + std::max(0, static_cast( + round(rightDiscordantsTotal - rightSideExpectedErrors))); + leftSideExpectedErrors *= 0.5; + rightSideExpectedErrors *= 0.5; + } + for (auto &sa : doubleSidedMatches) { + if (sa.isDistant()) { + if (sa.isEncounteredM()) { + sa.setExpectedDiscordants(leftDiscordantsTotal); + collectMateSupportHelper(sa, poolLeft, poolLowQualLeft); + } else { + sa.setExpectedDiscordants(rightDiscordantsTotal); + collectMateSupportHelper(sa, poolRight, poolLowQualRight); + } } } - } - for (auto &sa : supplementsPrimary) { - if (sa.isDistant()) { - if (sa.isEncounteredM()) { - sa.setExpectedDiscordants(leftDiscordantsTotal); - collectMateSupportHelper(sa, poolLeft, poolLowQualLeft); - } else { - sa.setExpectedDiscordants(rightDiscordantsTotal); - collectMateSupportHelper(sa, poolRight, poolLowQualRight); + for (auto &sa : supplementsPrimary) { + if (sa.isDistant()) { + if (sa.isEncounteredM()) { + sa.setExpectedDiscordants(leftDiscordantsTotal); + collectMateSupportHelper(sa, poolLeft, poolLowQualLeft); + } else { + sa.setExpectedDiscordants(rightDiscordantsTotal); + collectMateSupportHelper(sa, poolRight, poolLowQualRight); + } + } else if (sa.getSupport() < BP_SUPPORT_THRESHOLD) { + sa.setToRemove(true); } - } else if (sa.getSupport() < BPSUPPORTTHRESHOLD) { - sa.setToRemove(true); } - } - for (auto &sa : supplementsPrimary) { - if (sa.getMapq() == 0 && sa.getMateSupport() == 0 && - sa.getDistinctReads() < 2 * BPSUPPORTTHRESHOLD) { - sa.setToRemove(true); + for (auto &sa : supplementsPrimary) { + if (sa.getMapq() == 0 && sa.getMateSupport() == 0 && + sa.getDistinctReads() < 2 * BP_SUPPORT_THRESHOLD) { + sa.setToRemove(true); + } } - } - vector candidateSupplementsSecondary{}; - sort(supplementsSecondary.begin(), supplementsSecondary.end(), - [](const SuppAlignment &a, const SuppAlignment &b) { - return a.getMapq() < b.getMapq(); - }); - while (!supplementsSecondary.empty()) { - auto foundMatch = false; - for (auto &sa : candidateSupplementsSecondary) { - if (sa.saCloseness(supplementsSecondary.back(), 100)) { - if (sa.isFuzzy() && !supplementsSecondary.back().isFuzzy()) { - sa.removeFuzziness(supplementsSecondary.back()); - } else if (sa.isFuzzy() && - supplementsSecondary.back().isFuzzy()) { - sa.extendSuppAlignment( - supplementsSecondary.back().getPos(), - supplementsSecondary.back().getExtendedPos()); - } - if (supplementsSecondary.back().getMapq() > sa.getMapq()) { - sa.setMapq(supplementsSecondary.back().getMapq()); - } - if (sa.isNullMapqSource() && - !supplementsSecondary.back().isNullMapqSource()) { - sa.setNullMapqSource(false); - } - for (auto index : supplementsSecondary.back() - .getSupportingIndicesSecondary()) { - sa.addSecondarySupportIndices(index); + std::vector candidateSupplementsSecondary{}; + sort(supplementsSecondary.begin(), supplementsSecondary.end(), + [](const SuppAlignment &a, const SuppAlignment &b) { + return a.getMapq() < b.getMapq(); + }); + while (!supplementsSecondary.empty()) { + auto foundMatch = false; + for (auto &sa : candidateSupplementsSecondary) { + if (sa.saCloseness(supplementsSecondary.back(), 100)) { + if (sa.isFuzzy() && !supplementsSecondary.back().isFuzzy()) { + sa.removeFuzziness(supplementsSecondary.back()); + } else if (sa.isFuzzy() && + supplementsSecondary.back().isFuzzy()) { + sa.extendSuppAlignment( + supplementsSecondary.back().getPos(), + supplementsSecondary.back().getExtendedPos()); + } + if (supplementsSecondary.back().getMapq() > sa.getMapq()) { + sa.setMapq(supplementsSecondary.back().getMapq()); + } + if (sa.isNullMapqSource() && + !supplementsSecondary.back().isNullMapqSource()) { + sa.setNullMapqSource(false); + } + for (auto index : supplementsSecondary.back() + .getSupportingIndicesSecondary()) { + sa.addSecondarySupportIndices(index); + } + foundMatch = true; + break; } - foundMatch = true; - break; } + if (!foundMatch) { + candidateSupplementsSecondary.push_back( + supplementsSecondary.back()); + } + supplementsSecondary.pop_back(); } - if (!foundMatch) { - candidateSupplementsSecondary.push_back( - supplementsSecondary.back()); + for (auto &sa : candidateSupplementsSecondary) { + sa.finalizeSupportingIndices(); } - supplementsSecondary.pop_back(); - } - for (auto &sa : candidateSupplementsSecondary) { - sa.finalizeSupportingIndices(); - } - vector originIndices{}; - for (auto &sa : candidateSupplementsSecondary) { - if (sa.isDistant()) { - if (sa.isEncounteredM()) { - sa.setExpectedDiscordants(leftDiscordantsTotal); - collectMateSupportHelper(sa, poolLeft, poolLowQualLeft); - } else { - sa.setExpectedDiscordants(rightDiscordantsTotal); - collectMateSupportHelper(sa, poolRight, poolLowQualRight); - } - if (sa.getMateSupport() > 0) { - doubleSidedMatches.push_back(sa); - } else { - if (sa.isLowMapqSource() || sa.isNullMapqSource() || - sa.getMapq() < 13) { - for (auto index : sa.getSupportingIndicesSecondary()) { - originIndices.push_back(index); + std::vector originIndices{}; + for (auto &sa : candidateSupplementsSecondary) { + if (sa.isDistant()) { + if (sa.isEncounteredM()) { + sa.setExpectedDiscordants(leftDiscordantsTotal); + collectMateSupportHelper(sa, poolLeft, poolLowQualLeft); + } else { + sa.setExpectedDiscordants(rightDiscordantsTotal); + collectMateSupportHelper(sa, poolRight, poolLowQualRight); + } + if (sa.getMateSupport() > 0) { + doubleSidedMatches.push_back(sa); + } else { + if (sa.isLowMapqSource() || sa.isNullMapqSource() || + sa.getMapq() < 13) { + for (auto index : sa.getSupportingIndicesSecondary()) { + originIndices.push_back(index); + } } } } } - } - sort(originIndices.begin(), originIndices.end()); - int uniqueCount = unique(originIndices.begin(), originIndices.end()) - - originIndices.begin(); - uniqueCount = min(lowQualBreaksHard, uniqueCount); - lowQualBreaksHard -= uniqueCount; - lowQualBreaksSoft += uniqueCount; + sort(originIndices.begin(), originIndices.end()); + int uniqueCount = unique(originIndices.begin(), originIndices.end()) - + originIndices.begin(); + uniqueCount = std::min(lowQualBreaksHard, uniqueCount); + lowQualBreaksHard -= uniqueCount; + lowQualBreaksSoft += uniqueCount; - for (const auto &mateInfo : poolLeft) { - if (!mateInfo.saSupporter && mateInfo.evidenceLevel == 3 && - mateInfo.matePower / (0.0 + leftDiscordantsTotal) >= 0.33 && - (pos - mateInfo.readEndPos) < DEFAULTREADLENGTH / 2) { - supplementsPrimary.emplace_back( - mateInfo.mateChrIndex, mateInfo.mateStartPos, - mateInfo.matePower, leftDiscordantsTotal, true, - mateInfo.inversionSupport > mateInfo.straightSupport, - mateInfo.mateEndPos, false, false, false, -1); + for (const auto &mateInfo : poolLeft) { + if (!mateInfo.saSupporter && mateInfo.evidenceLevel == 3 && + mateInfo.matePower / (0.0 + leftDiscordantsTotal) >= 0.33 && + (pos - mateInfo.readEndPos) < DEFAULT_READ_LENGTH / 2) { + supplementsPrimary.emplace_back(SuppAlignment::create( + mateInfo.mateChrIndex, + mateInfo.mateStartPos, + mateInfo.matePower, + leftDiscordantsTotal, + true, + mateInfo.inversionSupport > mateInfo.straightSupport, + mateInfo.mateEndPos, + false, + false, + false, + -1 /* origin index */)); + } } - } - for (const auto &mateInfo : poolRight) { - if (!mateInfo.saSupporter && mateInfo.evidenceLevel == 3 && - mateInfo.matePower / (0.0 + rightDiscordantsTotal) >= 0.33 && - (mateInfo.readStartPos - pos) < DEFAULTREADLENGTH / 2) { - supplementsPrimary.emplace_back( - mateInfo.mateChrIndex, mateInfo.mateStartPos, - mateInfo.matePower, rightDiscordantsTotal, false, - mateInfo.inversionSupport > mateInfo.straightSupport, - mateInfo.mateEndPos, false, false, false, -1); + for (const auto &mateInfo : poolRight) { + if (!mateInfo.saSupporter && mateInfo.evidenceLevel == 3 && + mateInfo.matePower / (0.0 + rightDiscordantsTotal) >= 0.33 && + (mateInfo.readStartPos - pos) < DEFAULT_READ_LENGTH / 2) { + supplementsPrimary.emplace_back(SuppAlignment::create( + mateInfo.mateChrIndex, + mateInfo.mateStartPos, + mateInfo.matePower, + rightDiscordantsTotal, + false, + mateInfo.inversionSupport > mateInfo.straightSupport, + mateInfo.mateEndPos, + false, + false, + false, + -1 /* origin index */)); + } } - } - for (auto &sa : doubleSidedMatches) { - if (sa.isFuzzy() && sa.getMateSupport() == 0) { - sa.setToRemove(true); + for (auto &sa : doubleSidedMatches) { + if (sa.isFuzzy() && sa.getMateSupport() == 0) { + sa.setToRemove(true); + } } - } - cleanUpVector(doubleSidedMatches); + cleanUpVector(doubleSidedMatches); - for (auto &sa : supplementsPrimary) { - if (sa.isFuzzy() && sa.getMateSupport() == 0) { - sa.setToRemove(true); + for (auto &sa : supplementsPrimary) { + if (sa.isFuzzy() && sa.getMateSupport() == 0) { + sa.setToRemove(true); + } } - } - cleanUpVector(supplementsPrimary); - for (auto &sa : doubleSidedMatches) { - for (auto &sa2 : supplementsPrimary) { - if (sa.saCloseness(sa2, 5)) { - sa.mergeSa(sa2); - sa2.setToRemove(true); + cleanUpVector(supplementsPrimary); + for (auto &sa : doubleSidedMatches) { + for (auto &sa2 : supplementsPrimary) { + if (sa.saCloseness(sa2, 5)) { + sa.mergeSa(sa2); + sa2.setToRemove(true); + } } } - } - cleanUpVector(supplementsPrimary); - for (auto &sa : doubleSidedMatches) { - if (!(sa.getSupport() > 0 && sa.getSecondarySupport() > 0) && - sa.getSupport() + sa.getSecondarySupport() + sa.getMateSupport() < - BPSUPPORTTHRESHOLD) { - sa.setToRemove(true); - } else { - if (sa.isDistant() && PROPERPAIRCOMPENSATIONMODE) { - if (sa.isEncounteredM()) { - if (sa.getMateSupport() < leftSideExpectedErrors) { - sa.setProperPairErrorProne(true); + cleanUpVector(supplementsPrimary); + for (auto &sa : doubleSidedMatches) { + if (!(sa.getSupport() > 0 && sa.getSecondarySupport() > 0) && + sa.getSupport() + sa.getSecondarySupport() + sa.getMateSupport() < + BP_SUPPORT_THRESHOLD) { + sa.setToRemove(true); + } else { + if (sa.isDistant() && PROPER_PAIR_COMPENSATION_MODE) { + if (sa.isEncounteredM()) { + if (sa.getMateSupport() < leftSideExpectedErrors) { + sa.setProperPairErrorProne(true); + } + } else { + if (sa.getMateSupport() < rightSideExpectedErrors) { + sa.setProperPairErrorProne(true); + } } - } else { - if (sa.getMateSupport() < rightSideExpectedErrors) { - sa.setProperPairErrorProne(true); + if (sa.getMateSupport() > sa.getExpectedDiscordants()) { + sa.setExpectedDiscordants(sa.getMateSupport()); } } - if (sa.getMateSupport() > sa.getExpectedDiscordants()) { - sa.setExpectedDiscordants(sa.getMateSupport()); - } } } - } - cleanUpVector(doubleSidedMatches); - for (auto &sa : supplementsPrimary) { - if (!(sa.getSupport() > 0 && sa.getSecondarySupport() > 0) && - sa.getSupport() + sa.getSecondarySupport() + sa.getMateSupport() < - BPSUPPORTTHRESHOLD) { - sa.setToRemove(true); - } else { - if (sa.isDistant() && PROPERPAIRCOMPENSATIONMODE) { - if (sa.isEncounteredM()) { - if (sa.getMateSupport() < leftSideExpectedErrors) { - sa.setProperPairErrorProne(true); + cleanUpVector(doubleSidedMatches); + for (auto &sa : supplementsPrimary) { + if (!(sa.getSupport() > 0 && sa.getSecondarySupport() > 0) && + sa.getSupport() + sa.getSecondarySupport() + sa.getMateSupport() < + BP_SUPPORT_THRESHOLD) { + sa.setToRemove(true); + } else { + if (sa.isDistant() && PROPER_PAIR_COMPENSATION_MODE) { + if (sa.isEncounteredM()) { + if (sa.getMateSupport() < leftSideExpectedErrors) { + sa.setProperPairErrorProne(true); + } + } else { + if (sa.getMateSupport() < rightSideExpectedErrors) { + sa.setProperPairErrorProne(true); + } } - } else { - if (sa.getMateSupport() < rightSideExpectedErrors) { - sa.setProperPairErrorProne(true); + if (sa.getMateSupport() > sa.getExpectedDiscordants()) { + sa.setExpectedDiscordants(sa.getMateSupport()); } } - if (sa.getMateSupport() > sa.getExpectedDiscordants()) { - sa.setExpectedDiscordants(sa.getMateSupport()); - } } } + cleanUpVector(supplementsPrimary); } - cleanUpVector(supplementsPrimary); -} -void -Breakpoint::compressMatePool(vector &discordantAlignmentsPool) { - if (discordantAlignmentsPool.empty()) - return; - auto lastIndex = 0; - for (auto i = 1; i < static_cast(discordantAlignmentsPool.size()); - ++i) { - if (discordantAlignmentsPool[lastIndex].mateChrIndex != - discordantAlignmentsPool[i].mateChrIndex || // - discordantAlignmentsPool[i].mateStartPos - - discordantAlignmentsPool[lastIndex].mateEndPos > - 3.5 * DEFAULTREADLENGTH) { - lastIndex = i; - } else { - discordantAlignmentsPool[lastIndex].mateEndPos = - max(discordantAlignmentsPool[lastIndex].mateEndPos, - discordantAlignmentsPool[i].mateEndPos); - discordantAlignmentsPool[lastIndex].mateStartPos = - min(discordantAlignmentsPool[lastIndex].mateStartPos, - discordantAlignmentsPool[i].mateStartPos); - ++discordantAlignmentsPool[lastIndex].matePower; - if (discordantAlignmentsPool[i].inverted) { - ++discordantAlignmentsPool[lastIndex].inversionSupport; + void + Breakpoint::compressMatePool(std::vector &discordantAlignmentsPool) { + if (discordantAlignmentsPool.empty()) + return; + unsigned int lastIndex = 0; + for (size_t i = 1; i < discordantAlignmentsPool.size(); ++i) { + if (discordantAlignmentsPool[lastIndex].mateChrIndex != + discordantAlignmentsPool[i].mateChrIndex || + discordantAlignmentsPool[i].mateStartPos - + discordantAlignmentsPool[lastIndex].mateEndPos > + 3.5 * DEFAULT_READ_LENGTH) { + lastIndex = i; } else { - ++discordantAlignmentsPool[lastIndex].straightSupport; - } - if (abs(pos - discordantAlignmentsPool[i].readStartPos) <= - abs(pos - discordantAlignmentsPool[i].readEndPos)) { - // left side - if (abs(pos - discordantAlignmentsPool[lastIndex].readEndPos) > - abs(pos - discordantAlignmentsPool[i].readEndPos)) { - discordantAlignmentsPool[lastIndex].readStartPos = - discordantAlignmentsPool[i].readStartPos; - discordantAlignmentsPool[lastIndex].readEndPos = - discordantAlignmentsPool[i].readEndPos; + discordantAlignmentsPool[lastIndex].mateEndPos = + std::max(discordantAlignmentsPool[lastIndex].mateEndPos, + discordantAlignmentsPool[i].mateEndPos); + discordantAlignmentsPool[lastIndex].mateStartPos = + std::min(discordantAlignmentsPool[lastIndex].mateStartPos, + discordantAlignmentsPool[i].mateStartPos); + ++discordantAlignmentsPool[lastIndex].matePower; + if (discordantAlignmentsPool[i].inverted) { + ++discordantAlignmentsPool[lastIndex].inversionSupport; + } else { + ++discordantAlignmentsPool[lastIndex].straightSupport; } - } else { - // right side - if (abs(pos - - discordantAlignmentsPool[lastIndex].readStartPos) > - abs(pos - discordantAlignmentsPool[i].readStartPos)) { - discordantAlignmentsPool[lastIndex].readStartPos = - discordantAlignmentsPool[i].readStartPos; - discordantAlignmentsPool[lastIndex].readEndPos = - discordantAlignmentsPool[i].readEndPos; + if (abs(static_cast(pos) - static_cast(discordantAlignmentsPool[i].readStartPos)) <= + abs(static_cast(pos) - static_cast(discordantAlignmentsPool[i].readEndPos))) { + // left side + if (abs(static_cast(pos) - static_cast(discordantAlignmentsPool[lastIndex].readEndPos)) > + abs(static_cast(pos) - static_cast(discordantAlignmentsPool[i].readEndPos))) { + discordantAlignmentsPool[lastIndex].readStartPos = + discordantAlignmentsPool[i].readStartPos; + discordantAlignmentsPool[lastIndex].readEndPos = + discordantAlignmentsPool[i].readEndPos; + } + } else { + // right side + if (abs(static_cast(pos) - static_cast(discordantAlignmentsPool[lastIndex].readStartPos)) > + abs(static_cast(pos) - static_cast(discordantAlignmentsPool[i].readStartPos))) { + discordantAlignmentsPool[lastIndex].readStartPos = + discordantAlignmentsPool[i].readStartPos; + discordantAlignmentsPool[lastIndex].readEndPos = + discordantAlignmentsPool[i].readEndPos; + } } + if ((discordantAlignmentsPool[lastIndex].source == 0 && + discordantAlignmentsPool[i].source == 1) || + (discordantAlignmentsPool[lastIndex].source == 1 && + discordantAlignmentsPool[i].source == 0)) { + discordantAlignmentsPool[lastIndex].evidenceLevel = 2; + discordantAlignmentsPool[lastIndex].source = 2; + } + if (discordantAlignmentsPool[lastIndex].evidenceLevel != 3 && + discordantAlignmentsPool[i].evidenceLevel == 3) { + discordantAlignmentsPool[lastIndex].evidenceLevel = 3; + discordantAlignmentsPool[lastIndex].source = 2; + } + discordantAlignmentsPool[i].toRemove = true; } - if ((discordantAlignmentsPool[lastIndex].source == 0 && - discordantAlignmentsPool[i].source == 1) || - (discordantAlignmentsPool[lastIndex].source == 1 && - discordantAlignmentsPool[i].source == 0)) { - discordantAlignmentsPool[lastIndex].evidenceLevel = 2; - discordantAlignmentsPool[lastIndex].source = 2; - } - if (discordantAlignmentsPool[lastIndex].evidenceLevel != 3 && - discordantAlignmentsPool[i].evidenceLevel == 3) { - discordantAlignmentsPool[lastIndex].evidenceLevel = 3; - discordantAlignmentsPool[lastIndex].source = 2; - } - discordantAlignmentsPool[i].toRemove = true; } - } - for (auto &cluster : discordantAlignmentsPool) { - if (cluster.evidenceLevel == 1 && - cluster.matePower < BPSUPPORTTHRESHOLD) { - cluster.toRemove = true; + for (auto &cluster : discordantAlignmentsPool) { + if (cluster.evidenceLevel == 1 && + cluster.matePower < BP_SUPPORT_THRESHOLD) { + cluster.toRemove = true; + } } + cleanUpVector(discordantAlignmentsPool); } - cleanUpVector(discordantAlignmentsPool); -} -void -Breakpoint::fillMatePool( - const deque &discordantAlignmentsPool, - const deque &discordantLowQualAlignmentsPool, - const deque &discordantAlignmentCandidatesPool) { - poolLeft.reserve(discordantAlignmentsPool.size()); - poolRight.reserve(discordantAlignmentsPool.size()); - { - auto i = 0u; - for (; i < discordantAlignmentsPool.size(); ++i) { - if (discordantAlignmentsPool[i].readStartPos >= pos) { - break; - } else { - if (discordantAlignmentsPool[i].readEndPos <= pos) { - poolLeft.push_back(discordantAlignmentsPool[i]); + void + Breakpoint::fillMatePool( + const std::deque &discordantAlignmentsPool, + const std::deque &discordantLowQualAlignmentsPool, + const std::deque &discordantAlignmentCandidatesPool) { + poolLeft.reserve(discordantAlignmentsPool.size()); + poolRight.reserve(discordantAlignmentsPool.size()); + { + auto i = 0u; + for (; i < discordantAlignmentsPool.size(); ++i) { + if (discordantAlignmentsPool[i].readStartPos >= pos) { + break; } else { - poolLeft.push_back(discordantAlignmentsPool[i]); - poolRight.push_back(discordantAlignmentsPool[i]); + if (discordantAlignmentsPool[i].readEndPos <= pos) { + poolLeft.push_back(discordantAlignmentsPool[i]); + } else { + poolLeft.push_back(discordantAlignmentsPool[i]); + poolRight.push_back(discordantAlignmentsPool[i]); + } } } + for (; i < discordantAlignmentsPool.size(); ++i) { + poolRight.push_back(discordantAlignmentsPool[i]); + } } - for (; i < discordantAlignmentsPool.size(); ++i) { - poolRight.push_back(discordantAlignmentsPool[i]); - } - } - if (PROPERPAIRCOMPENSATIONMODE) { - auto i = 0u; - for (; i < discordantAlignmentCandidatesPool.size(); ++i) { - if (discordantAlignmentCandidatesPool[i].readStartPos >= pos) { - break; - } else { - if (discordantAlignmentCandidatesPool[i].readEndPos <= pos) { - ++leftSideDiscordantCandidates; + if (PROPER_PAIR_COMPENSATION_MODE) { + auto i = 0u; + for (; i < discordantAlignmentCandidatesPool.size(); ++i) { + if (discordantAlignmentCandidatesPool[i].readStartPos >= pos) { + break; } else { - ++leftSideDiscordantCandidates; - ++rightSideDiscordantCandidates; + if (discordantAlignmentCandidatesPool[i].readEndPos <= pos) { + ++leftSideDiscordantCandidates; + } else { + ++leftSideDiscordantCandidates; + ++rightSideDiscordantCandidates; + } } } - } - for (; i < discordantAlignmentCandidatesPool.size(); ++i) { - ++rightSideDiscordantCandidates; - } - } - poolLowQualLeft.reserve(discordantLowQualAlignmentsPool.size()); - poolLowQualRight.reserve(discordantLowQualAlignmentsPool.size()); - { - auto i = 0u; - for (; i < discordantLowQualAlignmentsPool.size(); ++i) { - if (discordantLowQualAlignmentsPool[i].readStartPos < - pos - DISCORDANTLOWQUALLEFTRANGE) { - continue; + for (; i < discordantAlignmentCandidatesPool.size(); ++i) { + ++rightSideDiscordantCandidates; } - if (discordantLowQualAlignmentsPool[i].readStartPos >= pos) { - break; - } else { - if (discordantLowQualAlignmentsPool[i].readEndPos <= pos) { - poolLowQualLeft.push_back( - discordantLowQualAlignmentsPool[i]); + } + poolLowQualLeft.reserve(discordantLowQualAlignmentsPool.size()); + poolLowQualRight.reserve(discordantLowQualAlignmentsPool.size()); + { + auto i = 0u; + for (; i < discordantLowQualAlignmentsPool.size(); ++i) { + if (discordantLowQualAlignmentsPool[i].readStartPos < + pos - DISCORDANT_LOW_QUAL_LEFT_RANGE) { + continue; + } + if (discordantLowQualAlignmentsPool[i].readStartPos >= pos) { + break; } else { - poolLowQualLeft.push_back( - discordantLowQualAlignmentsPool[i]); - poolLowQualRight.push_back( - discordantLowQualAlignmentsPool[i]); + if (discordantLowQualAlignmentsPool[i].readEndPos <= pos) { + poolLowQualLeft.push_back( + discordantLowQualAlignmentsPool[i]); + } else { + poolLowQualLeft.push_back( + discordantLowQualAlignmentsPool[i]); + poolLowQualRight.push_back( + discordantLowQualAlignmentsPool[i]); + } } } - } - for (; i < discordantLowQualAlignmentsPool.size(); ++i) { - if (discordantLowQualAlignmentsPool[i].readStartPos > - pos + DISCORDANTLOWQUALRIGHTRANGE) { - break; + for (; i < discordantLowQualAlignmentsPool.size(); ++i) { + if (discordantLowQualAlignmentsPool[i].readStartPos > + pos + DISCORDANT_LOW_QUAL_RIGHT_RANGE) { + break; + } + poolLowQualRight.push_back(discordantLowQualAlignmentsPool[i]); } - poolLowQualRight.push_back(discordantLowQualAlignmentsPool[i]); } } -} -void -Breakpoint::collectMateSupportHelper( - SuppAlignment &sa, vector &discordantAlignmentsPool, - vector &discordantLowQualAlignmentsPool) { - auto maxEvidenceLevel = 0; - for (auto &mateInfo : discordantAlignmentsPool) { - if (mateInfo.suppAlignmentFuzzyMatch(sa)) { - if (!mateInfo.saSupporter) { - mateSupport += mateInfo.matePower; - mateInfo.saSupporter = true; - } - sa.incrementMateSupport(mateInfo.matePower); - if (sa.isFuzzy()) { - sa.extendSuppAlignment(mateInfo.mateStartPos, - mateInfo.mateEndPos); - } - if (mateInfo.evidenceLevel > maxEvidenceLevel) { - maxEvidenceLevel = mateInfo.evidenceLevel; - } - } - } - int lowQualSupports{0}; - for (auto &mateInfo : discordantLowQualAlignmentsPool) { - if (mateInfo.suppAlignmentFuzzyMatch(sa)) { - if (!mateInfo.saSupporter) { - mateSupport += 1; - mateInfo.saSupporter = true; - } - sa.incrementMateSupport(1); - ++lowQualSupports; - auto bpPosMatch = false; - for (const auto bpPos : mateInfo.bpLocs) { - if (bpPos == pos) { - bpPosMatch = true; - break; + void + Breakpoint::collectMateSupportHelper( + SuppAlignment &sa, std::vector &discordantAlignmentsPool, + std::vector &discordantLowQualAlignmentsPool) { + auto maxEvidenceLevel = 0; + for (auto &mateInfo : discordantAlignmentsPool) { + if (mateInfo.suppAlignmentFuzzyMatch(sa)) { + if (!mateInfo.saSupporter) { + mateSupport += mateInfo.matePower; + mateInfo.saSupporter = true; + } + sa.incrementMateSupport(mateInfo.matePower); + if (sa.isFuzzy()) { + sa.extendSuppAlignment(mateInfo.mateStartPos, + mateInfo.mateEndPos); } - } - if (!bpPosMatch) { if (mateInfo.evidenceLevel > maxEvidenceLevel) { maxEvidenceLevel = mateInfo.evidenceLevel; } } } - } - auto lowQualDiscordantSupports = - lowQualSupports + - min(lowQualSupports, - static_cast(discordantLowQualAlignmentsPool.size()) - - lowQualSupports); - sa.setExpectedDiscordants(sa.getExpectedDiscordants() + - lowQualDiscordantSupports); - if (sa.getMateSupport() == 0) { - if (sa.getSecondarySupport() == 0 && - sa.getSupport() < BPSUPPORTTHRESHOLD) { - sa.setToRemove(true); - } else { - sa.setSuspicious(true); - } - } else { - if (maxEvidenceLevel < 3) { - sa.setSemiSuspicious(true); - if (!((0.0 + sa.getMateSupport()) / (sa.getExpectedDiscordants()) > - 0.33)) { - if (sa.getSecondarySupport() == 0 && - sa.getSupport() < BPSUPPORTTHRESHOLD) { - sa.setToRemove(true); + int lowQualSupports{0}; + for (auto &mateInfo : discordantLowQualAlignmentsPool) { + if (mateInfo.suppAlignmentFuzzyMatch(sa)) { + if (!mateInfo.saSupporter) { + mateSupport += 1; + mateInfo.saSupporter = true; + } + sa.incrementMateSupport(1); + ++lowQualSupports; + auto bpPosMatch = false; + for (const ChrSize bpPos : mateInfo.bpLocs) { + if (bpPos == pos) { + bpPosMatch = true; + break; + } } + if (!bpPosMatch) { + if (mateInfo.evidenceLevel > maxEvidenceLevel) { + maxEvidenceLevel = mateInfo.evidenceLevel; + } + } + } + } + auto lowQualDiscordantSupports = + lowQualSupports + + std::min(lowQualSupports, + static_cast(discordantLowQualAlignmentsPool.size()) - + lowQualSupports); + sa.setExpectedDiscordants(sa.getExpectedDiscordants() + + lowQualDiscordantSupports); + if (sa.getMateSupport() == 0) { + if (sa.getSecondarySupport() == 0 && + sa.getSupport() < BP_SUPPORT_THRESHOLD) { + sa.setToRemove(true); + } else { + sa.setSuspicious(true); } } else { - sa.setSemiSuspicious(false); + if (maxEvidenceLevel < 3) { + sa.setSemiSuspicious(true); + if (!((0.0 + sa.getMateSupport()) / (sa.getExpectedDiscordants()) > + 0.33)) { + if (sa.getSecondarySupport() == 0 && + sa.getSupport() < BP_SUPPORT_THRESHOLD) { + sa.setToRemove(true); + } + } + } else { + sa.setSemiSuspicious(false); + } } } -} -Breakpoint::Breakpoint(const string &bpIn, bool ignoreOverhang) - : covFinalized{true}, missingInfoBp{false}, chrIndex{0}, pos{0}, - normalSpans{0}, lowQualSpansSoft{0}, lowQualSpansHard{0}, - unpairedBreaksSoft{0}, unpairedBreaksHard{0}, breaksShortIndel{0}, - lowQualBreaksSoft{0}, lowQualBreaksHard{0}, repetitiveOverhangBreaks{0}, - pairedBreaksSoft{0}, pairedBreaksHard{0}, mateSupport{0}, leftCoverage{0}, - rightCoverage{0}, hitsInMref{0}, germline{false} { - auto index = 0; - vector bpChunkPositions{}; - bpChunkPositions.reserve(7); - for (auto it = bpIn.cbegin(); it != bpIn.cend(); ++it) { - if (*it == '\t') { - bpChunkPositions.push_back(index); - } - ++index; - } - chrIndex = ChrConverter::readChromosomeIndex(bpIn.cbegin(), '\t'); - for (auto i = bpChunkPositions[0] + 1; i < bpChunkPositions[1]; ++i) { - pos = pos * 10 + (bpIn[i] - '0'); - } - auto mode = 0; - for (auto i = bpChunkPositions[2] + 1; i < bpChunkPositions[3]; ++i) { - if (bpIn[i] == ',') { - ++mode; - } else { - switch (mode) { - case 0: - pairedBreaksSoft = 10 * pairedBreaksSoft + (bpIn[i] - '0'); - break; - case 1: - pairedBreaksHard = 10 * pairedBreaksHard + (bpIn[i] - '0'); - break; - case 2: - mateSupport = 10 * mateSupport + (bpIn[i] - '0'); - break; - case 3: - unpairedBreaksSoft = 10 * unpairedBreaksSoft + (bpIn[i] - '0'); - break; - case 4: - unpairedBreaksHard = 10 * unpairedBreaksHard + (bpIn[i] - '0'); - break; - case 5: - breaksShortIndel = 10 * breaksShortIndel + (bpIn[i] - '0'); - break; - case 6: - normalSpans = 10 * normalSpans + (bpIn[i] - '0'); - break; - case 7: - lowQualSpansSoft = 10 * lowQualSpansSoft + (bpIn[i] - '0'); - break; - case 8: - lowQualSpansHard = 10 * lowQualSpansHard + (bpIn[i] - '0'); - break; - case 9: - lowQualBreaksSoft = 10 * lowQualBreaksSoft + (bpIn[i] - '0'); - break; - case 10: - lowQualBreaksHard = 10 * lowQualBreaksHard + (bpIn[i] - '0'); - break; - case 11: - repetitiveOverhangBreaks = - 10 * repetitiveOverhangBreaks + (bpIn[i] - '0'); - break; - default: - break; + Breakpoint::Breakpoint(ChrIndex chrIndexIn, + ChrSize posIn) + : covFinalized{false}, + missingInfoBp{false}, + chrIndex{chrIndexIn}, + pos{posIn}, + normalSpans{0}, + lowQualSpansSoft{0}, + lowQualSpansHard{0}, + unpairedBreaksSoft{0}, + unpairedBreaksHard{0}, + breaksShortIndel{0}, + lowQualBreaksSoft{0}, + lowQualBreaksHard{0}, + repetitiveOverhangBreaks{0}, + pairedBreaksSoft{0}, + pairedBreaksHard{0}, + leftSideDiscordantCandidates{0}, + rightSideDiscordantCandidates{0}, + mateSupport{0}, + leftCoverage{0}, + rightCoverage{0}, + totalLowMapqHardClips{0}, + hitsInMref{-1}, + germline{false}, + poolLeft{}, + poolRight{}, + poolLowQualLeft{}, + poolLowQualRight{} {} + + /** + * @param bpIn a line from the breakpoint tumorGz file + * @param ignoreOverhang whether to ignore overhangs + */ + Breakpoint Breakpoint::parse(const std::string &bpIn, bool ignoreOverhang) { + Breakpoint result = Breakpoint(0, 0); + result.covFinalized = true; + result.hitsInMref = 0; + + unsigned int index = 0; + + // Collect ends of the columns in BED file. The end of the last column won't be contained. + std::vector columnSeparatorPos {}; + columnSeparatorPos.reserve(7); + for (auto it = bpIn.cbegin(); it != bpIn.cend(); ++it) { + if (*it == '\t') { + columnSeparatorPos.push_back(index); } + ++index; + } + const unsigned int startStart = columnSeparatorPos[0] + 1, + startEnd = columnSeparatorPos[1], +// endStart = columnSeparatorPos[1] + 1, +// endEnd = columnSeparatorPos[2], + typeCountsStart = columnSeparatorPos[2] + 1, + typeCountsEnd = columnSeparatorPos[3], + leftRightCovStart = columnSeparatorPos[3] + 1, + leftRightCovEnd = columnSeparatorPos[4], + supportStart = columnSeparatorPos[4] + 1, + supportEnd = columnSeparatorPos[5], + supplementsPrimaryStart = columnSeparatorPos[5] + 1, + supplementsPrimaryEnd = columnSeparatorPos[6], + overhangsStart = columnSeparatorPos[6] + 1; +// overhangsEnd = columnSeparatorPos[7], + + // Column 1: chrIndex. + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + try { + result.chrIndex = chrConverter.parseChrAndReturnIndex(bpIn.cbegin(), bpIn.cend(), '\t'); + } catch (DomainError &e) { + e << + error_info_string("from = " + + std::string(bpIn.cbegin(), bpIn.cend())); + throw e; } - } - mode = 0; - for (auto i = bpChunkPositions[3] + 1; i < bpChunkPositions[4]; ++i) { - if (bpIn[i] == ',') { - ++mode; - } else { - switch (mode) { - case 0: - leftCoverage = 10 * leftCoverage + (bpIn[i] - '0'); - break; - case 1: - rightCoverage = 10 * rightCoverage + (bpIn[i] - '0'); - break; - default: - break; - } + + // Column 2: start position + for (auto i = startStart; i < startEnd; ++i) { + // TODO Centralize this parsing code into global.h as inline function. + result.pos = result.pos * 10 + ChrSize(bpIn[i] - '0'); } - } - auto shortClipTotal = normalSpans - min(leftCoverage, rightCoverage); - if (shortClipTotal > 0) { - normalSpans -= shortClipTotal; - if (pairedBreaksSoft > 0) { - pairedBreaksSoft += shortClipTotal; - } else { - unpairedBreaksSoft += shortClipTotal; + // Column 3: end position (not parsed) + + // Now parse column 4, which contain a list of counts for different categories in the order: + enum Mode { + pairedBreaksSoft = 0, + pairedBreaksHard = 1, + mateSupport = 2, + unpairedBreaksSoft = 3, + unpairedBreaksHard = 4, + breaksShortIndel = 5, + normalSpans = 6, + lowQualSpansSoft = 7, + lowQualSpansHard = 8, + lowQualBreaksSoft = 9, + lowQualBreaksHard = 10, + repetitiveOverhangBreaks = 11 + }; + auto next = [](Mode& mode) { + return static_cast(static_cast(mode) + 1); + }; + + Mode mode = Mode::pairedBreaksSoft; + for (auto i = typeCountsStart; i < typeCountsEnd; ++i) { + if (bpIn[i] == ',') { + mode = next(mode); + } else { + switch (mode) { + case Mode::pairedBreaksSoft: + result.pairedBreaksSoft = + 10 * result.pairedBreaksSoft + (bpIn[i] - '0'); + break; + case Mode::pairedBreaksHard: + result.pairedBreaksHard = + 10 * result.pairedBreaksHard + (bpIn[i] - '0'); + break; + case Mode::mateSupport: + result.mateSupport = + 10 * result.mateSupport + (bpIn[i] - '0'); + break; + case Mode::unpairedBreaksSoft: + result.unpairedBreaksSoft = + 10 * result.unpairedBreaksSoft + (bpIn[i] - '0'); + break; + case Mode::unpairedBreaksHard: + result.unpairedBreaksHard = + 10 * result.unpairedBreaksHard + (bpIn[i] - '0'); + break; + case Mode::breaksShortIndel: + result.breaksShortIndel = + 10 * result.breaksShortIndel + (bpIn[i] - '0'); + break; + case Mode::normalSpans: + result.normalSpans = + 10 * result.normalSpans + (bpIn[i] - '0'); + break; + case Mode::lowQualSpansSoft: + result.lowQualSpansSoft = + 10 * result.lowQualSpansSoft + (bpIn[i] - '0'); + break; + case Mode::lowQualSpansHard: + result.lowQualSpansHard = + 10 * result.lowQualSpansHard + (bpIn[i] - '0'); + break; + case Mode::lowQualBreaksSoft: + result.lowQualBreaksSoft = + 10 * result.lowQualBreaksSoft + (bpIn[i] - '0'); + break; + case Mode::lowQualBreaksHard: + result.lowQualBreaksHard = + 10 * result.lowQualBreaksHard + (bpIn[i] - '0'); + break; + case Mode::repetitiveOverhangBreaks: + result.repetitiveOverhangBreaks = + 10 * result.repetitiveOverhangBreaks + (bpIn[i] - '0'); + break; + default: + break; + } + } } - } - if (bpIn[bpChunkPositions[4] + 1] == '#') { - missingInfoBp = true; - } else { - if (bpIn[bpChunkPositions[4] + 1] != '.') { - string saStr{}; - for (auto i = bpChunkPositions[4] + 1; i < bpChunkPositions[5]; - ++i) { - if (bpIn[i] == ';') { - doubleSidedMatches.emplace_back(saStr); - saStr.clear(); - } else { - saStr.push_back(bpIn[i]); + // Parse column 5, which contains the left and right coverage. + unsigned int side = 0; + for (auto i = leftRightCovStart; i < leftRightCovEnd; ++i) { + if (bpIn[i] == ',') { + ++side; + } else { + switch (side) { + case 0: + result.leftCoverage = + 10 * result.leftCoverage + (bpIn[i] - '0'); + break; + case 1: + result.rightCoverage = + 10 * result.rightCoverage + (bpIn[i] - '0'); + break; + default: + break; } } - SuppAlignment saTmp{saStr}; - if (saTmp.getChrIndex() < 1002) { - doubleSidedMatches.push_back(saTmp); + } + + // Some calculations. + auto shortClipTotal = result.normalSpans - std::min(result.leftCoverage, result.rightCoverage); + if (shortClipTotal > 0) { + result.normalSpans -= shortClipTotal; + if (result.pairedBreaksSoft > 0) { + result.pairedBreaksSoft += shortClipTotal; + } else { + result.unpairedBreaksSoft += shortClipTotal; } } - if (bpIn[bpChunkPositions[5] + 1] != '.') { - string saStr{}; - for (auto i = bpChunkPositions[5] + 1; i < bpChunkPositions[6]; - ++i) { - if (bpIn[i] == ';') { - supplementsPrimary.emplace_back(saStr); - saStr.clear(); - } else { - saStr.push_back(bpIn[i]); + + // Parse column 6 and 7, which contains the overhang information. + if (bpIn[supportStart] == '#') { + result.missingInfoBp = true; + } else { + if (bpIn[supportStart] != '.') { + std::string saStr{}; + for (auto i = supportStart; i < supportEnd; ++i) { + if (bpIn[i] == ';') { + result.doubleSidedMatches.emplace_back(SuppAlignment::parseSaSupport(saStr)); + saStr.clear(); + } else { + saStr.push_back(bpIn[i]); + } + } + SuppAlignment saTmp = SuppAlignment::parseSaSupport(saStr); + + if (!chrConverter.isTechnical(saTmp.getChrIndex())) { + result.doubleSidedMatches.push_back(saTmp); } } - SuppAlignment saTmp{saStr}; - if (saTmp.getChrIndex() < 1002) { - supplementsPrimary.push_back(saTmp); + + // Column 7: supplementsPrimary + if (bpIn[supplementsPrimaryStart] != '.') { + std::string saStr{}; + for (auto i = supplementsPrimaryStart; i < supplementsPrimaryEnd; + ++i) { + if (bpIn[i] == ';') { + result.supplementsPrimary.emplace_back(SuppAlignment::parseSaSupport(saStr)); + saStr.clear(); + } else { + saStr.push_back(bpIn[i]); + } + } + SuppAlignment saTmp = SuppAlignment::parseSaSupport(saStr); + if (!chrConverter.isTechnical(saTmp.getChrIndex())) { + result.supplementsPrimary.push_back(saTmp); + } } - } - cleanUpVector(supplementsPrimary); - saHomologyClashSolver(); - if (!ignoreOverhang && bpIn[bpChunkPositions[6] + 1] != '.') { - string overhang{}; - for (auto i = bpChunkPositions[6] + 1; - i < static_cast(bpIn.length()); ++i) { - if (bpIn[i] == ';') { - consensusOverhangs.emplace_back(overhang); - overhang.clear(); - } else { - overhang.push_back(bpIn[i]); + result.cleanUpVector(result.supplementsPrimary); + + // Some calculations. + result.saHomologyClashSolver(); + + // Column 8: significantOverhangs + if (!ignoreOverhang && bpIn[overhangsStart] != '.') { + std::string overhang{}; + for (auto i = overhangsStart; + i < static_cast(bpIn.length()); + ++i) { + if (bpIn[i] == ';') { + result.consensusOverhangs.emplace_back(overhang); + overhang.clear(); + } else { + overhang.push_back(bpIn[i]); + } } + result.consensusOverhangs.emplace_back(overhang); } - consensusOverhangs.emplace_back(overhang); } + return result; } -} -void -Breakpoint::saHomologyClashSolver() { - for (auto i = 0u; i < doubleSidedMatches.size(); ++i) { - if (!doubleSidedMatches[i].isDistant() || - doubleSidedMatches[i].getMateSupport() == 0) { - continue; - } - bool anyMatch{false}; - bool semiSuspiciousRescue{false}; - for (auto j = 0u; j < doubleSidedMatches.size(); ++j) { - if (j == i) { + + + void + Breakpoint::saHomologyClashSolver() { + for (auto i = 0u; i < doubleSidedMatches.size(); ++i) { + if (!doubleSidedMatches[i].isDistant() || + doubleSidedMatches[i].getMateSupport() == 0) { continue; } - if (doubleSidedMatches[i].saDistHomologyRescueCloseness( - doubleSidedMatches[j], 200000)) { - if (!semiSuspiciousRescue && - doubleSidedMatches[i].isSemiSuspicious() && - !doubleSidedMatches[j].isSemiSuspicious()) { - semiSuspiciousRescue = true; + bool anyMatch{false}; + bool semiSuspiciousRescue{false}; + for (auto j = 0u; j < doubleSidedMatches.size(); ++j) { + if (j == i) { + continue; } - anyMatch = true; - break; - } - } - if (!anyMatch) { - for (auto j = 0u; j < supplementsPrimary.size(); ++j) { if (doubleSidedMatches[i].saDistHomologyRescueCloseness( - supplementsPrimary[j], 200000)) { + // TODO Centralize + doubleSidedMatches[j], 200000)) { if (!semiSuspiciousRescue && doubleSidedMatches[i].isSemiSuspicious() && - !supplementsPrimary[j].isSemiSuspicious()) { + !doubleSidedMatches[j].isSemiSuspicious()) { semiSuspiciousRescue = true; } anyMatch = true; break; } } - } - if (anyMatch) { - doubleSidedMatches[i].padMateSupportHomologyRescue(); - if (semiSuspiciousRescue) { - doubleSidedMatches[i].setSemiSuspicious(false); + if (!anyMatch) { + for (auto j = 0u; j < supplementsPrimary.size(); ++j) { + if (doubleSidedMatches[i].saDistHomologyRescueCloseness( + // TODO Centralize + supplementsPrimary[j], 200000)) { + if (!semiSuspiciousRescue && + doubleSidedMatches[i].isSemiSuspicious() && + !supplementsPrimary[j].isSemiSuspicious()) { + semiSuspiciousRescue = true; + } + anyMatch = true; + break; + } + } + } + if (anyMatch) { + doubleSidedMatches[i].padMateSupportHomologyRescue(); + if (semiSuspiciousRescue) { + doubleSidedMatches[i].setSemiSuspicious(false); + } } } - } - for (auto i = 0u; i < supplementsPrimary.size(); ++i) { - if (!supplementsPrimary[i].isDistant() || - supplementsPrimary[i].getMateSupport() == 0) { - continue; - } - bool anyMatch{false}; - bool semiSuspiciousRescue{false}; - for (auto j = 0u; j < supplementsPrimary.size(); ++j) { - if (j == i) { + for (auto i = 0u; i < supplementsPrimary.size(); ++i) { + if (!supplementsPrimary[i].isDistant() || + supplementsPrimary[i].getMateSupport() == 0) { continue; } - if (supplementsPrimary[i].saDistHomologyRescueCloseness( - supplementsPrimary[j], 100000)) { - if (!semiSuspiciousRescue && - supplementsPrimary[i].isSemiSuspicious() && - !supplementsPrimary[j].isSemiSuspicious()) { - semiSuspiciousRescue = true; + bool anyMatch{false}; + bool semiSuspiciousRescue{false}; + for (auto j = 0u; j < supplementsPrimary.size(); ++j) { + if (j == i) { + continue; } - anyMatch = true; - break; - } - } - if (!anyMatch) { - for (auto j = 0u; j < doubleSidedMatches.size(); ++j) { if (supplementsPrimary[i].saDistHomologyRescueCloseness( - doubleSidedMatches[j], 100000)) { + // TODO Centralize this fuzziness cutoff + supplementsPrimary[j], 100000)) { if (!semiSuspiciousRescue && supplementsPrimary[i].isSemiSuspicious() && - !doubleSidedMatches[j].isSemiSuspicious()) { + !supplementsPrimary[j].isSemiSuspicious()) { semiSuspiciousRescue = true; } anyMatch = true; break; } } - } - if (anyMatch) { - supplementsPrimary[i].padMateSupportHomologyRescue(); - if (semiSuspiciousRescue) { - supplementsPrimary[i].setSemiSuspicious(false); + if (!anyMatch) { + for (auto j = 0u; j < doubleSidedMatches.size(); ++j) { + if (supplementsPrimary[i].saDistHomologyRescueCloseness( + // TODO Centralize + doubleSidedMatches[j], 100000)) { + if (!semiSuspiciousRescue && + supplementsPrimary[i].isSemiSuspicious() && + !doubleSidedMatches[j].isSemiSuspicious()) { + semiSuspiciousRescue = true; + } + anyMatch = true; + break; + } + } + } + if (anyMatch) { + supplementsPrimary[i].padMateSupportHomologyRescue(); + if (semiSuspiciousRescue) { + supplementsPrimary[i].setSemiSuspicious(false); + } } } } -} -SuppAlignment * -Breakpoint::searchFuzzySa(const SuppAlignment &fuzzySa) { - SuppAlignment *match = nullptr; - for (auto &saDouble : doubleSidedMatches) { - if (saDouble.saCloseness(fuzzySa, 1)) { - match = &saDouble; - return match; + SuppAlignment * + Breakpoint::searchFuzzySa(const SuppAlignment &fuzzySa) { + SuppAlignment *match = nullptr; + for (auto &saDouble : doubleSidedMatches) { + if (saDouble.saCloseness(fuzzySa, 1)) { + match = &saDouble; + return match; + } } - } - for (auto &saSingle : supplementsPrimary) { - if (saSingle.saCloseness(fuzzySa, 1)) { - match = &saSingle; - return match; + for (auto &saSingle : supplementsPrimary) { + if (saSingle.saCloseness(fuzzySa, 1)) { + match = &saSingle; + return match; + } } + return nullptr; } - return nullptr; -} } // namespace sophia - -/* namespace sophia */ diff --git a/src/BreakpointReduced.cpp b/src/BreakpointReduced.cpp index a0dc308..e69d7dd 100644 --- a/src/BreakpointReduced.cpp +++ b/src/BreakpointReduced.cpp @@ -22,185 +22,199 @@ * LICENSE: GPL */ +#include "global.h" #include "Breakpoint.h" -#include "ChrConverter.h" -#include "strtk.hpp" +#include "GlobalAppConfig.h" +#include "strtk-wrap.h" #include #include #include namespace sophia { -using namespace std; - -boost::format BreakpointReduced::doubleFormatter{"%.3f"}; -int BreakpointReduced::DEFAULTREADLENGTH{}; -double BreakpointReduced::CLONALITYSTRICTLOWTHRESHOLD{}; -double BreakpointReduced::ARTIFACTFREQHIGHTHRESHOLD{}; -string BreakpointReduced::PIDSINMREFSTR{}; - -sophia::BreakpointReduced::BreakpointReduced(const Breakpoint &tmpBp, - int lineIndexIn, - bool hasOverhangIn) - : hasOverhang{hasOverhangIn}, toRemove{false}, lineIndex{lineIndexIn}, - chrIndex{tmpBp.getChrIndex()}, pos{tmpBp.getPos()}, - normalSpans{tmpBp.getNormalSpans()}, - lowQualSpansSoft{tmpBp.getLowQualBreaksSoft()}, - lowQualSpansHard{tmpBp.getLowQualSpansHard()}, - unpairedBreaksSoft{tmpBp.getUnpairedBreaksSoft()}, - unpairedBreaksHard{tmpBp.getUnpairedBreaksHard()}, - breaksShortIndel{tmpBp.getBreaksShortIndel()}, - lowQualBreaksSoft{tmpBp.getLowQualBreaksSoft()}, - lowQualBreaksHard{tmpBp.getLowQualBreaksHard()}, - repetitiveOverhangBreaks{tmpBp.getRepetitiveOverhangBreaks()}, - pairedBreaksSoft{tmpBp.getPairedBreaksSoft()}, - pairedBreaksHard{tmpBp.getPairedBreaksHard()}, - mateSupport{tmpBp.getMateSupport()}, - leftCoverage{tmpBp.getLeftCoverage()}, - rightCoverage{tmpBp.getRightCoverage()}, mrefHits{MrefMatch{ - -1, - -1, - 10000, - {}, - }}, - germlineInfo{GermlineMatch{ - 0.0, - 0.0, - {}, - }}, - suppAlignments{} { - for (const auto &sa : tmpBp.getDoubleSidedMatches()) { - if (sa.getChrIndex() < 1002) { - suppAlignments.emplace_back(sa); - } - } - for (const auto &sa : tmpBp.getSupplementsPrimary()) { - if (sa.getChrIndex() < 1002) { - suppAlignments.emplace_back(sa); + boost::format BreakpointReduced::doubleFormatter{"%.3f"}; + + ChrSize BreakpointReduced::DEFAULT_READ_LENGTH{}; + + double BreakpointReduced::CLONALITY_STRICT_LOW_THRESHOLD{}; + + double BreakpointReduced::ARTIFACT_FREQ_HIGH_THRESHOLD{}; + + std::string BreakpointReduced::PIDS_IN_MREF_STR{}; + + sophia::BreakpointReduced::BreakpointReduced(const Breakpoint &tmpBp, + int lineIndexIn, + bool hasOverhangIn) + : hasOverhang{hasOverhangIn}, + toRemove{false}, + lineIndex{lineIndexIn}, + chrIndex{tmpBp.getChrIndex()}, + pos{tmpBp.getPos()}, + normalSpans{tmpBp.getNormalSpans()}, + lowQualSpansSoft{tmpBp.getLowQualBreaksSoft()}, + lowQualSpansHard{tmpBp.getLowQualSpansHard()}, + unpairedBreaksSoft{tmpBp.getUnpairedBreaksSoft()}, + unpairedBreaksHard{tmpBp.getUnpairedBreaksHard()}, + breaksShortIndel{tmpBp.getBreaksShortIndel()}, + lowQualBreaksSoft{tmpBp.getLowQualBreaksSoft()}, + lowQualBreaksHard{tmpBp.getLowQualBreaksHard()}, + repetitiveOverhangBreaks{tmpBp.getRepetitiveOverhangBreaks()}, + pairedBreaksSoft{tmpBp.getPairedBreaksSoft()}, + pairedBreaksHard{tmpBp.getPairedBreaksHard()}, + mateSupport{tmpBp.getMateSupport()}, + leftCoverage{tmpBp.getLeftCoverage()}, + rightCoverage{tmpBp.getRightCoverage()}, + mrefHits{MrefMatch{-1, -1, 10000, {}, }}, + germlineInfo{GermlineMatch{0.0, 0.0, {}, }}, + suppAlignments{} { + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + for (const auto &sa : tmpBp.getDoubleSidedMatches()) { + if (!chrConverter.isTechnical(sa.getChrIndex())) { + suppAlignments.emplace_back(sa); + } } - } - complexRearrangementMateRatioRescue(true); - complexRearrangementMateRatioRescue(false); -} - -void -BreakpointReduced::complexRearrangementMateRatioRescue(bool encounteredM) { - auto candidateCount = 0; - auto cumulativeMateSupport = 0.0; - auto maxExpectedDiscordants = 0; - for (const auto &sa : suppAlignments) { - if (sa.isDistant() && sa.isEncounteredM() == encounteredM && - !sa.isSuspicious() && sa.getMateSupport() > 4) { - ++candidateCount; - if (candidateCount == 3) { - return; + for (const auto &sa : tmpBp.getSupplementsPrimary()) { + if (!chrConverter.isTechnical(sa.getChrIndex())) { + suppAlignments.emplace_back(sa); } - cumulativeMateSupport += sa.getMateSupport(); - maxExpectedDiscordants = - max(maxExpectedDiscordants, sa.getExpectedDiscordants()); } + complexRearrangementMateRatioRescue(true); + complexRearrangementMateRatioRescue(false); } - if (candidateCount == 2 && - cumulativeMateSupport / maxExpectedDiscordants > 0.7) { - for (auto &sa : suppAlignments) { + + void + BreakpointReduced::complexRearrangementMateRatioRescue(bool encounteredM) { + auto candidateCount = 0; + auto cumulativeMateSupport = 0.0; + auto maxExpectedDiscordants = 0; + for (const auto &sa : suppAlignments) { if (sa.isDistant() && sa.isEncounteredM() == encounteredM && !sa.isSuspicious() && sa.getMateSupport() > 4) { - sa.setExpectedDiscordants(sa.getMateSupport()); + ++candidateCount; + if (candidateCount == 3) { + return; + } + cumulativeMateSupport += sa.getMateSupport(); + maxExpectedDiscordants = + std::max(maxExpectedDiscordants, sa.getExpectedDiscordants()); } } - } -} - -sophia::BreakpointReduced::BreakpointReduced( - const SuppAlignmentAnno &sa, const BreakpointReduced &emittingBp, - bool fuzzySecondary) - : hasOverhang{false}, toRemove{false}, lineIndex{-1}, - chrIndex{sa.getChrIndex()}, pos{!fuzzySecondary ? sa.getPos() - : sa.getExtendedPos()}, - normalSpans{}, lowQualSpansSoft{}, lowQualSpansHard{}, - unpairedBreaksSoft{}, unpairedBreaksHard{}, breaksShortIndel{}, - lowQualBreaksSoft{}, lowQualBreaksHard{}, repetitiveOverhangBreaks{}, - pairedBreaksSoft{}, pairedBreaksHard{}, mateSupport{}, leftCoverage{}, - rightCoverage{}, mrefHits{MrefMatch{-1, -1, 10000, {}}}, - germlineInfo{GermlineMatch{0.0, 0.0, {}}}, suppAlignments{} { - addDummySa(sa, emittingBp); -} - -void -sophia::BreakpointReduced::addDummySa(const SuppAlignmentAnno &sa, - const BreakpointReduced &emittingBp) { - suppAlignments.emplace_back(emittingBp.getChrIndex(), emittingBp.getPos(), - sa); -} - -const SuppAlignmentAnno & -sophia::BreakpointReduced::getDummySa() { - return suppAlignments.back(); -} - -SuppAlignmentAnno * -BreakpointReduced::searchFuzzySa(const SuppAlignmentAnno &fuzzySa) { - SuppAlignmentAnno *match = nullptr; - for (auto &sa : suppAlignments) { - if (sa.saClosenessDirectional(fuzzySa, DEFAULTREADLENGTH * 0.2)) { - match = &sa; - return match; + if (candidateCount == 2 && + cumulativeMateSupport / maxExpectedDiscordants > 0.7) { + for (auto &sa : suppAlignments) { + if (sa.isDistant() && sa.isEncounteredM() == encounteredM && + !sa.isSuspicious() && sa.getMateSupport() > 4) { + sa.setExpectedDiscordants(sa.getMateSupport()); + } + } } } - return nullptr; -} -bool -BreakpointReduced::testOverhangBasedCandidacy() const { - if (pairedBreaksSoft > 0) { - return false; + sophia::BreakpointReduced::BreakpointReduced( + const SuppAlignmentAnno &sa, + const BreakpointReduced &emittingBp, + bool fuzzySecondary) + : hasOverhang{false}, + toRemove{false}, + lineIndex{-1}, + chrIndex{sa.getChrIndex()}, + pos{!fuzzySecondary ? sa.getPos(): sa.getExtendedPos()}, + normalSpans{}, + lowQualSpansSoft{}, + lowQualSpansHard{}, + unpairedBreaksSoft{}, + unpairedBreaksHard{}, + breaksShortIndel{}, + lowQualBreaksSoft{}, + lowQualBreaksHard{}, + repetitiveOverhangBreaks{}, + pairedBreaksSoft{}, + pairedBreaksHard{}, + mateSupport{}, + leftCoverage{}, + rightCoverage{}, + mrefHits{MrefMatch{-1, -1, 10000, {}}}, + germlineInfo{GermlineMatch{0.0, 0.0, {}}}, + suppAlignments{} { + addDummySa(sa, emittingBp); } - if (breaksShortIndel > 0) { - return false; + + void + sophia::BreakpointReduced::addDummySa(const SuppAlignmentAnno &sa, + const BreakpointReduced &emittingBp) { + suppAlignments.emplace_back(emittingBp.getChrIndex(), emittingBp.getPos(), + sa); } - if (unpairedBreaksSoft < 5) { - return false; + + const SuppAlignmentAnno & + sophia::BreakpointReduced::getDummySa() { + return suppAlignments.back(); } - if (((0.0 + unpairedBreaksSoft) / normalSpans) < - CLONALITYSTRICTLOWTHRESHOLD) { - return false; + + SuppAlignmentAnno * + BreakpointReduced::searchFuzzySa(const SuppAlignmentAnno &fuzzySa) { + SuppAlignmentAnno *match = nullptr; + for (auto &sa : suppAlignments) { + if (sa.saClosenessDirectional(fuzzySa, DEFAULT_READ_LENGTH * 0.2)) { + match = &sa; + return match; + } + } + return nullptr; + } + + bool + BreakpointReduced::testOverhangBasedCandidacy() const { + if (pairedBreaksSoft > 0) { + return false; + } + if (breaksShortIndel > 0) { + return false; + } + if (unpairedBreaksSoft < 5) { + return false; + } + if (((0.0 + unpairedBreaksSoft) / normalSpans) < + CLONALITY_STRICT_LOW_THRESHOLD) { + return false; + } + auto artifactTotal = + 0.0 + lowQualSpansSoft + lowQualBreaksSoft + repetitiveOverhangBreaks; + if ((artifactTotal / (unpairedBreaksSoft + artifactTotal)) > + ARTIFACT_FREQ_HIGH_THRESHOLD) { + return false; + } + return true; } - auto artifactTotal = - 0.0 + lowQualSpansSoft + lowQualBreaksSoft + repetitiveOverhangBreaks; - if ((artifactTotal / (unpairedBreaksSoft + artifactTotal)) > - ARTIFACTFREQHIGHTHRESHOLD) { - return false; + + std::string + BreakpointReduced::printOverhang(double germlineClonality, + int numHits, + const std::string &overhang) const { + std::string res{"##"}; + res.append(GlobalAppConfig::getInstance().getChrConverter().indexToChrName(chrIndex)).append("\t"); + res.append(strtk::type_to_string(pos - 1)).append("\t"); + res.append(strtk::type_to_string(pos)).append("\t"); + if (germlineClonality > 0.1) { + res.append("GERMLINE("); + } else { + res.append("SOMATIC("); + } + res.append(strtk::type_to_string(numHits)) + .append("/") + .append(PIDS_IN_MREF_STR) + .append("):"); + res.append(boost::str(doubleFormatter % germlineClonality)).append("\t"); + res.append(overhang).append("\n"); + return res; } - return true; -} - -string -BreakpointReduced::printOverhang(double germlineClonality, int numHits, - const string &overhang) const { - string res{"##"}; - res.append(ChrConverter::indexToChr[chrIndex]).append("\t"); - res.append(strtk::type_to_string(pos - 1)).append("\t"); - res.append(strtk::type_to_string(pos)).append("\t"); - if (germlineClonality > 0.1) { - res.append("GERMLINE("); - } else { - res.append("SOMATIC("); + + void + BreakpointReduced::removeMarkedFuzzies() { + suppAlignments.erase( + remove_if(suppAlignments.begin(), suppAlignments.end(), + [](const SuppAlignmentAnno &sa) { return sa.isToRemove(); }), + suppAlignments.end()); } - res.append(strtk::type_to_string(numHits)) - .append("/") - .append(PIDSINMREFSTR) - .append("):"); - res.append(boost::str(doubleFormatter % germlineClonality)).append("\t"); - res.append(overhang).append("\n"); - return res; -} - -void -BreakpointReduced::removeMarkedFuzzies() { - suppAlignments.erase( - remove_if(suppAlignments.begin(), suppAlignments.end(), - [](const SuppAlignmentAnno &sa) { return sa.isToRemove(); }), - suppAlignments.end()); -} } /* namespace sophia */ diff --git a/src/ChosenBp.cpp b/src/ChosenBp.cpp index ac519d1..5cafb64 100644 --- a/src/ChosenBp.cpp +++ b/src/ChosenBp.cpp @@ -27,39 +27,37 @@ namespace sophia { -using namespace std; + int ChosenBp::BP_SUPPORT_THRESHOLD{}; -int ChosenBp::BPSUPPORTTHRESHOLD{}; - -void -ChosenBp::addChildNode(int indexIn) { - childrenNodes.push_back(indexIn); -} + void + ChosenBp::addChildNode(int indexIn) { + childrenNodes.push_back(indexIn); + } -void -ChosenBp::addSupplementaryAlignments( - const vector &suppAlignments) { - for (const auto &sa : suppAlignments) { - auto it = find_if(supplementaryAlignments.begin(), - supplementaryAlignments.end(), - [&](const SuppAlignment &suppAlignment) { - return suppAlignment.saCloseness(sa, 5); - }); - if (it == supplementaryAlignments.end()) { - supplementaryAlignments.push_back(sa); - } else { - if (it->isFuzzy() && !sa.isFuzzy()) { - it->removeFuzziness(sa); - } else if (it->isFuzzy() && sa.isFuzzy()) { - it->extendSuppAlignment(sa.getPos(), sa.getExtendedPos()); - } - // it->addSupportingIndices(sa.getSupportingIndices()); - if (sa.getMapq() > it->getMapq()) { - it->setMapq(sa.getMapq()); + void + ChosenBp::addSupplementaryAlignments( + const std::vector &suppAlignments) { + for (const auto &sa : suppAlignments) { + auto it = find_if(supplementaryAlignments.begin(), + supplementaryAlignments.end(), + [&](const SuppAlignment &suppAlignment) { + return suppAlignment.saCloseness(sa, 5); + }); + if (it == supplementaryAlignments.end()) { + supplementaryAlignments.push_back(sa); + } else { + if (it->isFuzzy() && !sa.isFuzzy()) { + it->removeFuzziness(sa); + } else if (it->isFuzzy() && sa.isFuzzy()) { + it->extendSuppAlignment(sa.getPos(), sa.getExtendedPos()); + } + // it->addSupportingIndices(sa.getSupportingIndices()); + if (sa.getMapq() > it->getMapq()) { + it->setMapq(sa.getMapq()); + } + it->incrementDistinctReads(); } - it->incrementDistinctReads(); } } -} } // namespace sophia diff --git a/src/ChrCategory.cpp b/src/ChrCategory.cpp new file mode 100644 index 0000000..2b5bbd6 --- /dev/null +++ b/src/ChrCategory.cpp @@ -0,0 +1,93 @@ +#include "global.h" +#include "ChrCategory.h" +#include +#include +#include +#include +#include +#include +#include + +namespace sophia { + + const boost::unordered::unordered_map ChrCategory::categories = { + {"AUTOSOME", ChrCategory("AUTOSOME", 0)}, + {"X", ChrCategory("X", 1)}, + {"Y", ChrCategory("Y", 2)}, + {"EXTRACHROMOSOMAL", ChrCategory("EXTRACHROMOSOMAL", 3)}, + {"UNASSIGNED", ChrCategory("UNASSIGNED", 4)}, + {"ALT", ChrCategory("ALT", 5)}, + {"HLA", ChrCategory("HLA", 6)}, + {"VIRUS", ChrCategory("VIRUS", 7)}, + {"DECOY", ChrCategory("DECOY", 8)}, + {"TECHNICAL", ChrCategory("TECHNICAL", 9)} + }; + + const ChrCategory& ChrCategory::AUTOSOME = ChrCategory::categories.at("AUTOSOME"); + const ChrCategory& ChrCategory::X = ChrCategory::categories.at("X"); + const ChrCategory& ChrCategory::Y = ChrCategory::categories.at("Y"); + const ChrCategory& ChrCategory::EXTRACHROMOSOMAL = ChrCategory::categories.at("EXTRACHROMOSOMAL"); + const ChrCategory& ChrCategory::UNASSIGNED = ChrCategory::categories.at("UNASSIGNED"); + const ChrCategory& ChrCategory::ALT = ChrCategory::categories.at("ALT"); + const ChrCategory& ChrCategory::HLA = ChrCategory::categories.at("HLA"); + const ChrCategory& ChrCategory::VIRUS = ChrCategory::categories.at("VIRUS"); + const ChrCategory& ChrCategory::DECOY = ChrCategory::categories.at("DECOY"); + const ChrCategory& ChrCategory::TECHNICAL = ChrCategory::categories.at("TECHNICAL"); + + + const std::vector ChrCategory::sorted_categories = { + ChrCategory::AUTOSOME, + ChrCategory::X, + ChrCategory::Y, + ChrCategory::EXTRACHROMOSOMAL, + ChrCategory::UNASSIGNED, + ChrCategory::ALT, + ChrCategory::HLA, + ChrCategory::VIRUS, + ChrCategory::DECOY, + ChrCategory::TECHNICAL + }; + + ChrCategory::ChrCategory(const std::string &in, size_type index) + : category_name { in }, + category_index { index } {} + + ChrCategory::~ChrCategory() {} + + const ChrCategory& ChrCategory::from_string(const std::string &in) { + std::string normalizedIn = boost::algorithm::to_upper_copy(in); + if (categories.find(normalizedIn) == categories.end()) { + throw_with_trace(std::invalid_argument("Unknown chromosome category: '" + in + "'")); + } + return categories.at(normalizedIn); + } + + std::string ChrCategory::getName() const { + return category_name; + } + + ChrCategory::size_type ChrCategory::numCategories() { + return categories.size(); + } + + const std::vector& ChrCategory::getCategories() { + return sorted_categories; + } + + bool ChrCategory::operator==(const ChrCategory &other) const { + return category_index == other.category_index; + } + + bool ChrCategory::operator!=(const ChrCategory &other) const { + return category_index != other.category_index; + } + + bool ChrCategory::operator<(const ChrCategory &other) const { + return category_index < other.category_index; + } + + bool ChrCategory::operator>(const ChrCategory &other) const { + return category_index > other.category_index; + } + +} // namespace sophia diff --git a/src/ChrConverter.cpp b/src/ChrConverter.cpp index a42e50b..262c35d 100644 --- a/src/ChrConverter.cpp +++ b/src/ChrConverter.cpp @@ -1,11 +1,5 @@ /* - * ChrConverter.cpp - * - * Created on: 28 Dec 2017 - * Author: Umut H. Toprak, DKFZ Heidelberg (Divisions of Theoretical - * Bioinformatics, Bioinformatics and Omics Data Analytics and currently - * Neuroblastoma Genomics) Copyright (C) 2018 Umut H. Toprak, Matthias - * Schlesner, Roland Eils and DKFZ Heidelberg + * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,290 +13,28 @@ * * You should have received a copy of the GNU General Public License * along with this program. If not, see . - * LICENSE: GPL + * LICENSE: GPL */ -#include +#include "ChrConverter.h" +#include +#include +#include namespace sophia { -using namespace std; + ChrConverter::ChrConverter(const std::string &assemblyNameIn) : + assemblyName(assemblyNameIn) {} -const array ChrConverter::indexToChr{ - "0", "1", "2", "3", "4", - "5", "6", "7", "8", "9", - "10", "11", "12", "13", "14", - "15", "16", "17", "18", "19", - "20", "21", "22", "23", "24", - "25", "26", "27", "28", "29", - "30", "31", "32", "33", "34", - "35", "36", "37", "38", "39", - "X", "Y", "42", "43", "44", - "45", "46", "47", "48", "49", - "50", "51", "52", "53", "54", - "55", "56", "57", "58", "59", - "60", "61", "62", "63", "64", - "65", "66", "67", "68", "69", - "70", "71", "72", "73", "74", - "75", "76", "77", "78", "79", - "80", "81", "82", "83", "84", - "85", "86", "87", "88", "89", - "90", "91", "92", "93", "94", - "95", "96", "97", "98", "99", - "100", "101", "102", "103", "104", - "105", "106", "107", "108", "109", - "110", "111", "112", "113", "114", - "115", "116", "117", "118", "119", - "120", "121", "122", "123", "124", - "125", "126", "127", "128", "129", - "130", "131", "132", "133", "134", - "135", "136", "137", "138", "139", - "140", "141", "142", "143", "144", - "145", "146", "147", "148", "149", - "150", "151", "152", "153", "154", - "155", "156", "157", "158", "159", - "160", "161", "162", "163", "164", - "165", "166", "167", "168", "169", - "170", "171", "172", "173", "174", - "175", "176", "177", "178", "179", - "180", "181", "182", "183", "184", - "185", "186", "187", "188", "189", - "190", "GL000191.1", "GL000192.1", "GL000193.1", "GL000194.1", - "GL000195.1", "GL000196.1", "GL000197.1", "GL000198.1", "GL000199.1", - "GL000200.1", "GL000201.1", "GL000202.1", "GL000203.1", "GL000204.1", - "GL000205.1", "GL000206.1", "GL000207.1", "GL000208.1", "GL000209.1", - "GL000210.1", "GL000211.1", "GL000212.1", "GL000213.1", "GL000214.1", - "GL000215.1", "GL000216.1", "GL000217.1", "GL000218.1", "GL000219.1", - "GL000220.1", "GL000221.1", "GL000222.1", "GL000223.1", "GL000224.1", - "GL000225.1", "GL000226.1", "GL000227.1", "GL000228.1", "GL000229.1", - "GL000230.1", "GL000231.1", "GL000232.1", "GL000233.1", "GL000234.1", - "GL000235.1", "GL000236.1", "GL000237.1", "GL000238.1", "GL000239.1", - "GL000240.1", "GL000241.1", "GL000242.1", "GL000243.1", "GL000244.1", - "GL000245.1", "GL000246.1", "GL000247.1", "GL000248.1", "GL000249.1", - "250", "251", "252", "253", "254", - "255", "256", "257", "258", "259", - "260", "261", "262", "263", "264", - "265", "266", "267", "268", "269", - "270", "271", "272", "273", "274", - "275", "276", "277", "278", "279", - "280", "281", "282", "283", "284", - "285", "286", "287", "288", "289", - "290", "291", "292", "293", "294", - "295", "296", "297", "298", "299", - "300", "301", "302", "303", "304", - "305", "306", "307", "308", "309", - "310", "311", "312", "313", "314", - "315", "316", "317", "318", "319", - "320", "321", "322", "323", "324", - "325", "326", "327", "328", "329", - "330", "331", "332", "333", "334", - "335", "336", "337", "338", "339", - "340", "341", "342", "343", "344", - "345", "346", "347", "348", "349", - "350", "351", "352", "353", "354", - "355", "356", "357", "358", "359", - "360", "361", "362", "363", "364", - "365", "366", "367", "368", "369", - "370", "371", "372", "373", "374", - "375", "376", "377", "378", "379", - "380", "381", "382", "383", "384", - "385", "386", "387", "388", "389", - "390", "391", "392", "393", "394", - "395", "396", "397", "398", "399", - "400", "401", "402", "403", "404", - "405", "406", "407", "408", "409", - "410", "411", "412", "413", "414", - "415", "416", "417", "418", "419", - "420", "421", "422", "423", "424", - "425", "426", "427", "428", "429", - "430", "431", "432", "433", "434", - "435", "436", "437", "438", "439", - "440", "441", "442", "443", "444", - "445", "446", "447", "448", "449", - "450", "451", "452", "453", "454", - "455", "456", "457", "458", "459", - "460", "461", "462", "463", "464", - "465", "466", "467", "468", "469", - "470", "471", "472", "473", "474", - "475", "476", "477", "478", "479", - "480", "481", "482", "483", "484", - "485", "486", "487", "488", "489", - "490", "491", "492", "493", "494", - "495", "496", "497", "498", "499", - "500", "501", "502", "503", "504", - "505", "506", "507", "508", "509", - "510", "511", "512", "513", "514", - "515", "516", "517", "518", "519", - "520", "521", "522", "523", "524", - "525", "526", "527", "528", "529", - "530", "531", "532", "533", "534", - "535", "536", "537", "538", "539", - "540", "541", "542", "543", "544", - "545", "546", "547", "548", "549", - "550", "551", "552", "553", "554", - "555", "556", "557", "558", "559", - "560", "561", "562", "563", "564", - "565", "566", "567", "568", "569", - "570", "571", "572", "573", "574", - "575", "576", "577", "578", "579", - "580", "581", "582", "583", "584", - "585", "586", "587", "588", "589", - "590", "591", "592", "593", "594", - "595", "596", "597", "598", "599", - "600", "601", "602", "603", "604", - "605", "606", "607", "608", "609", - "610", "611", "612", "613", "614", - "615", "616", "617", "618", "619", - "620", "621", "622", "623", "624", - "625", "626", "627", "628", "629", - "630", "631", "632", "633", "634", - "635", "636", "637", "638", "639", - "640", "641", "642", "643", "644", - "645", "646", "647", "648", "649", - "650", "651", "652", "653", "654", - "655", "656", "657", "658", "659", - "660", "661", "662", "663", "664", - "665", "666", "667", "668", "669", - "670", "671", "672", "673", "674", - "675", "676", "677", "678", "679", - "680", "681", "682", "683", "684", - "685", "686", "687", "688", "689", - "690", "691", "692", "693", "694", - "695", "696", "697", "698", "699", - "700", "701", "702", "703", "704", - "705", "706", "707", "708", "709", - "710", "711", "712", "713", "714", - "715", "716", "717", "718", "719", - "720", "721", "722", "723", "724", - "725", "726", "727", "728", "729", - "730", "731", "732", "733", "734", - "735", "736", "737", "738", "739", - "740", "741", "742", "743", "744", - "745", "746", "747", "748", "749", - "750", "751", "752", "753", "754", - "755", "756", "757", "758", "759", - "760", "761", "762", "763", "764", - "765", "766", "767", "768", "769", - "770", "771", "772", "773", "774", - "775", "776", "777", "778", "779", - "780", "781", "782", "783", "784", - "785", "786", "787", "788", "789", - "790", "791", "792", "793", "794", - "795", "796", "797", "798", "799", - "800", "801", "802", "803", "804", - "805", "806", "807", "808", "809", - "810", "811", "812", "813", "814", - "815", "816", "817", "818", "819", - "820", "821", "822", "823", "824", - "825", "826", "827", "828", "829", - "830", "831", "832", "833", "834", - "835", "836", "837", "838", "839", - "840", "841", "842", "843", "844", - "845", "846", "847", "848", "849", - "850", "851", "852", "853", "854", - "855", "856", "857", "858", "859", - "860", "861", "862", "863", "864", - "865", "866", "867", "868", "869", - "870", "871", "872", "873", "874", - "875", "876", "877", "878", "879", - "880", "881", "882", "883", "884", - "885", "886", "887", "888", "889", - "890", "891", "892", "893", "894", - "895", "896", "897", "898", "899", - "900", "901", "902", "903", "904", - "905", "906", "907", "908", "909", - "910", "911", "912", "913", "914", - "915", "916", "917", "918", "919", - "920", "921", "922", "923", "924", - "925", "926", "927", "928", "929", - "930", "931", "932", "933", "934", - "935", "936", "937", "938", "939", - "940", "941", "942", "943", "944", - "945", "946", "947", "948", "949", - "950", "951", "952", "953", "954", - "955", "956", "957", "958", "959", - "960", "961", "962", "963", "964", - "965", "966", "967", "968", "969", - "970", "971", "972", "973", "974", - "975", "976", "977", "978", "979", - "980", "981", "982", "983", "984", - "985", "986", "987", "988", "989", - "990", "991", "992", "993", "994", - "995", "996", "997", "998", "hs37d5", - "NC_007605", "MT", "phiX174", "INVALID"}; + ChrConverter::~ChrConverter() {} -const array ChrConverter::indexToChrCompressedMref{ - "1", "2", "3", "4", "5", - "6", "7", "8", "9", "10", - "11", "12", "13", "14", "15", - "16", "17", "18", "19", "20", - "21", "22", "X", "Y", "GL000191.1", - "GL000192.1", "GL000193.1", "GL000194.1", "GL000195.1", "GL000196.1", - "GL000197.1", "GL000198.1", "GL000199.1", "GL000200.1", "GL000201.1", - "GL000202.1", "GL000203.1", "GL000204.1", "GL000205.1", "GL000206.1", - "GL000207.1", "GL000208.1", "GL000209.1", "GL000210.1", "GL000211.1", - "GL000212.1", "GL000213.1", "GL000214.1", "GL000215.1", "GL000216.1", - "GL000217.1", "GL000218.1", "GL000219.1", "GL000220.1", "GL000221.1", - "GL000222.1", "GL000223.1", "GL000224.1", "GL000225.1", "GL000226.1", - "GL000227.1", "GL000228.1", "GL000229.1", "GL000230.1", "GL000231.1", - "GL000232.1", "GL000233.1", "GL000234.1", "GL000235.1", "GL000236.1", - "GL000237.1", "GL000238.1", "GL000239.1", "GL000240.1", "GL000241.1", - "GL000242.1", "GL000243.1", "GL000244.1", "GL000245.1", "GL000246.1", - "GL000247.1", "GL000248.1", "GL000249.1", "hs37d5", "NC_007605"}; + std::string ChrConverter::getAssemblyName() const { + return assemblyName; + } -const array ChrConverter::indexConverter{ - -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 20, 21, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, 22, 23, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, - 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, - 80, 81, 82, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 83, 84, -2, -2, -2}; + bool ChrConverter::isInBlockedRegion(ChrIndex chrIndex [[gnu::unused]], + ChrSize position [[gnu::unused]]) const { + return false; + } } /* namespace sophia */ diff --git a/src/ChrInfo.cpp b/src/ChrInfo.cpp new file mode 100644 index 0000000..9b3f74a --- /dev/null +++ b/src/ChrInfo.cpp @@ -0,0 +1,111 @@ +#include "global.h" +#include "ChrInfo.h" +#include "rapidcsv.h" +#include +#include +#include +#include +#include + + +namespace sophia { + + ChrInfo::ChrInfo(ChrName _name, + ChrSize _size, + bool _compressedMref, + ChrCategory _category) + : name {_name}, + size {_size}, + compressedMref {_compressedMref}, + category {_category} { + if (name.size() == 0) { + throw std::invalid_argument("ChrInfo: name cannot be empty"); + } + if (size <= 0) { + throw std::invalid_argument("ChrInfo: size must be larger than zero for '" + name + "'"); + } + } + + ChrName ChrInfo::getName() const { + return name; + } + + ChrSize ChrInfo::getSize() const { + return size; + } + + bool ChrInfo::isCompressedMref() const { + return compressedMref; + } + + ChrCategory ChrInfo::getCategory() const { + return category; + } + + + bool to_boolean (const std::string& str) { + static const boost::unordered::unordered_set trues { + "true", + "t", + "1", + "no", + "n" + }; + static const boost::unordered::unordered_set falses { + "false", + "f", + "0", + "yes", + "y" + }; + std::string lowercaseStr = str; + boost::algorithm::to_lower(lowercaseStr); + bool result; + if (trues.contains(lowercaseStr)) { + result = true; + } else if (falses.contains(lowercaseStr)) { + result = false; + } else { + throw_with_trace(std::invalid_argument("Could not parse boolean from '" + str + "'")); + } + return result; + } + + std::vector read_chr_info(std::istream &in) { + rapidcsv::Document doc(in, + rapidcsv::LabelParams(0, -1), + rapidcsv::SeparatorParams('\t')); + + // Used for checking uniqueness of chromosome names + boost::unordered::unordered_set names; + names.reserve(doc.GetRowCount()); + + // Convert the file into a vector of ChrInfo. + std::vector chr_info; + chr_info.reserve(doc.GetRowCount()); + for (size_t i = 0; i < doc.GetRowCount(); ++i) { + ChrName name = doc.GetCell("chromosome", i); + if (names.find(name) != names.end()) { + throw_with_trace(std::invalid_argument( + "Chromosome name '" + name + "' is not unique.")); + } + ChrSize size = doc.GetCell("size", i); + std::string category_string = doc.GetCell("category", i); + ChrCategory category = ChrCategory::from_string(category_string); + bool compressedMref = to_boolean(doc.GetCell("compressedMref", i)); + chr_info.emplace_back(ChrInfo(name, size, compressedMref, category)); + } + + return chr_info; + } + + + std::vector read_chr_info(const std::string &filename) { + std::ifstream in(filename); + if (!in) { + throw_with_trace(std::invalid_argument("Cannot open file '" + filename + "'")); + } + return read_chr_info(in); + } + +} // namespace sophia diff --git a/src/ChrInfoTable.cpp b/src/ChrInfoTable.cpp new file mode 100644 index 0000000..ac4276e --- /dev/null +++ b/src/ChrInfoTable.cpp @@ -0,0 +1,96 @@ +#include "ChrInfoTable.h" +#include +#include +#include + + +namespace sophia { + + /** Just a helper for the constructor. */ + boost::unordered::unordered_map> + ChrInfoTable::buildChrInfosByCategory(const std::vector &chr_info) { + // There must be a vector of `ChrInfo` for each existing category. + boost::unordered::unordered_map> result; + result.reserve(ChrCategory::numCategories()); + for (auto category : ChrCategory::getCategories()) { + result.try_emplace(category, std::vector()); + } + // Elements are added one by one to their corresponding category vector. + for (const auto &info : chr_info) { + result.at(info.getCategory()).push_back(info); + } + return result; + } + + /** Create the map of `ChrName` to `ChrInfo` and while doing that check for duplicate + * chromosome names. */ + boost::unordered::unordered_map + ChrInfoTable::buildChrInfosByName(const std::vector &chr_info) { + boost::unordered::unordered_map result; + result.reserve(chr_info.size()); + for (const auto &info : chr_info) { + if (result.contains(info.getName())) { + throw_with_trace(std::invalid_argument("Duplicate chromosome name '" + + info.getName() + "'")); + } + result.try_emplace(info.getName(), info); + } + return result; + } + + ChrInfoTable::ChrInfoTable(const std::vector &chr_infos) + : chrInfos { chr_infos }, + chrInfosByCategory { buildChrInfosByCategory(chr_infos) }, + chrInfosByName { buildChrInfosByName(chr_infos) } { + + } + + ChrIndex ChrInfoTable::nChromosomes() const { + return ChrIndex(chrInfos.size()); + } + + const std::vector& ChrInfoTable::getChrInfos() const { + return chrInfos; + } + + const std::vector &ChrInfoTable::getChrInfos(ChrCategory category) const { + return chrInfosByCategory.at(category); + } + + ChrInfoTable::ChrNames ChrInfoTable::getNames() const { + ChrNames result; + std::transform(chrInfos.begin(), + chrInfos.end(), + std::back_inserter(result), + [](const ChrInfo &info) { return info.getName(); }); + return result; + } + + ChrInfoTable::ChrNames ChrInfoTable::getNames(ChrCategory category) const { + ChrNames result; + std::transform(chrInfosByCategory.at(category).begin(), + chrInfosByCategory.at(category).end(), + std::back_inserter(result), + [](const ChrInfo &info) { return info.getName(); }); + return result; + } + + ChrInfoTable::ChrSizes ChrInfoTable::getSizes() const { + ChrSizes result; + std::transform(chrInfos.begin(), + chrInfos.end(), + std::back_inserter(result), + [](const ChrInfo &info) { return info.getSize(); }); + return result; + } + + ChrInfoTable::ChrSizes ChrInfoTable::getSizes(ChrCategory category) const { + ChrSizes result; + std::transform(chrInfosByCategory.at(category).begin(), + chrInfosByCategory.at(category).end(), + std::back_inserter(result), + [](const ChrInfo &info) { return info.getSize(); }); + return result; + } + +} // namespace sophia diff --git a/src/DeFuzzier.cpp b/src/DeFuzzier.cpp index 42f833c..4b55c48 100644 --- a/src/DeFuzzier.cpp +++ b/src/DeFuzzier.cpp @@ -25,209 +25,233 @@ namespace sophia { - using namespace std; + DeFuzzier::DeFuzzier(ChrSize maxDistanceIn, bool mrefModeIn) + : MAX_DISTANCE { maxDistanceIn }, + MREF_MODE { mrefModeIn } {} -DeFuzzier::DeFuzzier(int maxDistanceIn, bool mrefModeIn) : - MAXDISTANCE { maxDistanceIn }, - MREFMODE { mrefModeIn } { -} + void DeFuzzier::deFuzzyDb(std::vector& bps) const { + for (auto it = bps.begin(); it != bps.end(); ++it) { + for (auto &sa : it->getSupplementsPtr()) { + if (sa->isFuzzy()) { + auto saTmp = sa; + processFuzzySa(bps, it, saTmp); + } + } + it->removeMarkedFuzzies(); + } + for (auto &bp : bps) { + bp.removeMarkedFuzzies(); + } + } -void DeFuzzier::deFuzzyDb(vector& bps) const { - for (auto it = bps.begin(); it != bps.end(); ++it) { - for (auto &sa : it->getSupplementsPtr()) { - if (sa->isFuzzy()) { - auto saTmp = sa; - processFuzzySa(bps, it, saTmp); - } - } - it->removeMarkedFuzzies(); - } - for (auto &bp : bps) { - bp.removeMarkedFuzzies(); - } -} + void DeFuzzier::processFuzzySa(std::vector& bps, + std::vector::iterator startingIt, + SuppAlignmentAnno* startingSa) const { + auto consensusSa = startingSa; + std::vector processedSas { startingSa }; + if (!startingSa->isEncounteredM()) { + dbSweep(bps, startingIt, 1, consensusSa, processedSas); + dbSweep(bps, startingIt, -1, consensusSa, processedSas); + } else { + dbSweep(bps, startingIt, -1, consensusSa, processedSas); + dbSweep(bps, startingIt, 1, consensusSa, processedSas); + } + selectBestSa(processedSas, consensusSa); + } -void DeFuzzier::processFuzzySa(vector& bps, vector::iterator startingIt, SuppAlignmentAnno* startingSa) const { - auto consensusSa = startingSa; - vector processedSas { startingSa }; - if (!startingSa->isEncounteredM()) { - dbSweep(bps, startingIt, 1, consensusSa, processedSas); - dbSweep(bps, startingIt, -1, consensusSa, processedSas); - } else { - dbSweep(bps, startingIt, -1, consensusSa, processedSas); - dbSweep(bps, startingIt, 1, consensusSa, processedSas); - } - selectBestSa(processedSas, consensusSa); -} - -void DeFuzzier::dbSweep(vector& bps, vector::iterator startingIt, int increment, SuppAlignmentAnno* consensusSa, vector& processedSas) const { - auto it = startingIt; - if (it == bps.begin() || it == bps.end()) { - return; - } - advance(it, increment); - while (it != bps.begin() && it != bps.end()) { - auto res = it->searchFuzzySa(*consensusSa); - if (!res && abs(startingIt->getPos() - it->getPos()) > MAXDISTANCE) { - break; - } else { - if (res) { - processedSas.push_back(res); - if (res->isFuzzy()) { - consensusSa->extendSuppAlignment(min(res->getPos(), consensusSa->getPos()), max(res->getExtendedPos(), consensusSa->getExtendedPos())); - } - } - } - advance(it, increment); - } -} + void DeFuzzier::dbSweep(std::vector& bps, + std::vector::iterator startingIt, + int increment, + SuppAlignmentAnno* consensusSa, + std::vector& processedSas) const { + auto it = startingIt; + if (it == bps.begin() || it == bps.end()) { + return; + } + advance(it, increment); + while (it != bps.begin() && it != bps.end()) { + auto res = it->searchFuzzySa(*consensusSa); + if (!res && ChrSize(abs(static_cast(startingIt->getPos()) - static_cast(it->getPos()))) > MAX_DISTANCE) { + break; + } else { + if (res) { + processedSas.push_back(res); + if (res->isFuzzy()) { + consensusSa->extendSuppAlignment(std::min(res->getPos(), consensusSa->getPos()), + std::max(res->getExtendedPos(), consensusSa->getExtendedPos())); + } + } + } + advance(it, increment); + } + } -void DeFuzzier::selectBestSa(vector& processedSas, SuppAlignmentAnno* consensusSa) const { - auto maxMateScore = -1; - auto maxExpectedDiscordants = -1; - auto index = 0; - vector nonFuzzyIndices { }; - for (auto &sa : processedSas) { - if (sa->getMateSupport() > maxMateScore) { - maxMateScore = sa->getMateSupport(); - } - if (sa->getExpectedDiscordants() > maxExpectedDiscordants) { - maxExpectedDiscordants = sa->getExpectedDiscordants(); - } - if (!sa->isFuzzy()) { - nonFuzzyIndices.push_back(index); - } - sa->setToRemove(true); - ++index; - } - SuppAlignmentAnno* selectedSa = nullptr; - if (!nonFuzzyIndices.empty()) { - auto bestElement = max_element(nonFuzzyIndices.begin(), nonFuzzyIndices.end(), // - [&](int a, int b) {return processedSas[a]->getSupport()+processedSas[a]->getSecondarySupport() < processedSas[b]->getSupport()+processedSas[b]->getSecondarySupport();}); - selectedSa = processedSas[*bestElement]; - } else { - auto bestElement = max_element(processedSas.begin(), processedSas.end(), // - [&](SuppAlignmentAnno* a, SuppAlignmentAnno* b) {return a->getMateSupport() < b->getMateSupport();}); - selectedSa = *bestElement; - selectedSa->extendSuppAlignment(consensusSa->getPos(), consensusSa->getExtendedPos()); - } - selectedSa->setToRemove(false); - selectedSa->setMateSupport(maxMateScore); - selectedSa->setExpectedDiscordants(maxExpectedDiscordants); -} + void DeFuzzier::selectBestSa(std::vector& processedSas, + SuppAlignmentAnno* consensusSa) const { + auto maxMateScore = -1; + auto maxExpectedDiscordants = -1; + auto index = 0; + std::vector nonFuzzyIndices { }; + for (auto &sa : processedSas) { + if (sa->getMateSupport() > maxMateScore) { + maxMateScore = sa->getMateSupport(); + } + if (sa->getExpectedDiscordants() > maxExpectedDiscordants) { + maxExpectedDiscordants = sa->getExpectedDiscordants(); + } + if (!sa->isFuzzy()) { + nonFuzzyIndices.push_back(index); + } + sa->setToRemove(true); + ++index; + } + SuppAlignmentAnno* selectedSa = nullptr; + if (!nonFuzzyIndices.empty()) { + auto bestElement = max_element( + nonFuzzyIndices.begin(), + nonFuzzyIndices.end(), // + [&](unsigned int a, unsigned int b) { + return (processedSas[a]->getSupport() + processedSas[a]->getSecondarySupport()) + < (processedSas[b]->getSupport() + processedSas[b]->getSecondarySupport()); + }); + selectedSa = processedSas[(size_t) *bestElement]; + } else { + auto bestElement = max_element(processedSas.begin(), processedSas.end(), + [&](SuppAlignmentAnno* a, SuppAlignmentAnno* b) {return a->getMateSupport() < b->getMateSupport();}); + selectedSa = *bestElement; + selectedSa->extendSuppAlignment(consensusSa->getPos(), consensusSa->getExtendedPos()); + } + selectedSa->setToRemove(false); + selectedSa->setMateSupport(maxMateScore); + selectedSa->setExpectedDiscordants(maxExpectedDiscordants); + } -void DeFuzzier::deFuzzyDb(vector& bps) const { - for (auto it = bps.begin(); it != bps.end(); ++it) { - if (it->getPos() == -1) { - continue; - } - for (auto &sa : it->getSupplementsPtr()) { - if (sa->isToRemove()) { - continue; - } - if (sa->isFuzzy() || sa->isStrictFuzzy()) { - auto saTmp = sa; - processFuzzySa(bps, it, saTmp); - } - } - it->removeMarkedFuzzies(); - if (it->getValidityScore() == 0 && it->getSuppAlignments().empty()) { - it->setAsInvalid(); - } - } - for (auto &bp : bps) { - if (bp.getValidityScore() == -1 || bp.getPos() == -1) { - bp.setAsInvalid(); - } - } -} + void DeFuzzier::deFuzzyDb(std::vector& bps) const { + for (auto it = bps.begin(); it != bps.end(); ++it) { + if (!it->isValid()) { + continue; + } + for (auto &sa : it->getSupplementsPtr()) { + if (sa->isToRemove()) { + continue; + } + if (sa->isFuzzy() || sa->isStrictFuzzy()) { + auto saTmp = sa; + processFuzzySa(bps, it, saTmp); + } + } + it->removeMarkedFuzzies(); + if (it->getValidityScore() == 0 && it->getSuppAlignments().empty()) { + it->setAsInvalid(); + } + } + for (MrefEntry &bp : bps) { + if (bp.getValidityScore() == -1 || !bp.isValid()) { + bp.setAsInvalid(); + } + } + } -void DeFuzzier::processFuzzySa(vector& bps, vector::iterator startingIt, SuppAlignmentAnno* startingSa) const { - auto consensusSa = startingSa; - unordered_set fileIndices { }; - for (auto fileIndex : startingIt->getFileIndices()) { - fileIndices.insert(fileIndex); - } - vector processedSas { startingSa }; - if (!startingSa->isEncounteredM()) { - dbSweep(bps, startingIt, fileIndices, 1, consensusSa, processedSas); - dbSweep(bps, startingIt, fileIndices, -1, consensusSa, processedSas); - } else { - dbSweep(bps, startingIt, fileIndices, -1, consensusSa, processedSas); - dbSweep(bps, startingIt, fileIndices, 1, consensusSa, processedSas); - } - selectBestSa(processedSas, consensusSa, fileIndices); -} + void DeFuzzier::processFuzzySa(std::vector& bps, + std::vector::iterator startingIt, + SuppAlignmentAnno* startingSa) const { + SuppAlignmentAnno* consensusSa = startingSa; + std::unordered_set fileIndices { }; + for (auto fileIndex : startingIt->getFileIndices()) { + fileIndices.insert(fileIndex); + } + std::vector processedSas { startingSa }; + if (!startingSa->isEncounteredM()) { + dbSweep(bps, startingIt, fileIndices, 1, consensusSa, processedSas); + dbSweep(bps, startingIt, fileIndices, -1, consensusSa, processedSas); + } else { + dbSweep(bps, startingIt, fileIndices, -1, consensusSa, processedSas); + dbSweep(bps, startingIt, fileIndices, 1, consensusSa, processedSas); + } + selectBestSa(processedSas, consensusSa, fileIndices); + } -void DeFuzzier::dbSweep(vector& bps, vector::iterator startingIt, unordered_set& fileIndices, int increment, SuppAlignmentAnno* consensusSa, vector& processedSas) const { - auto it = startingIt; - if (it == bps.begin() || it == bps.end()) { - return; - } - advance(it, increment); - while (it != bps.begin() && it != bps.end()) { - if (it->getPos() != -1) { - auto res = it->searchFuzzySa(*consensusSa); - if (!res && abs(startingIt->getPos() - it->getPos()) > MAXDISTANCE) { - break; - } else { - if (res) { - for (auto fileIndex : it->getFileIndices()) { - fileIndices.insert(fileIndex); - } - processedSas.push_back(res); - if (res->isFuzzy()) { - consensusSa->extendSuppAlignment(min(res->getPos(), consensusSa->getPos()), max(res->getExtendedPos(), consensusSa->getExtendedPos())); - } - } - } - } - advance(it, increment); - } -} + void DeFuzzier::dbSweep(std::vector& bps, + std::vector::iterator startingIt, + std::unordered_set& fileIndices, + int increment, + SuppAlignmentAnno* consensusSa, + std::vector& processedSas) const { + auto it = startingIt; + if (it == bps.begin() || it == bps.end()) { + return; + } + advance(it, increment); + while (it != bps.begin() && it != bps.end()) { + if (it->isValid()) { + auto res = it->searchFuzzySa(*consensusSa); + if (!res && abs(static_cast(startingIt->getPos()) - static_cast(it->getPos())) > static_cast(MAX_DISTANCE)) { + break; + } else { + if (res) { + for (auto fileIndex : it->getFileIndices()) { + fileIndices.insert(fileIndex); + } + processedSas.push_back(res); + if (res->isFuzzy()) { + consensusSa->extendSuppAlignment(std::min(res->getPos(), consensusSa->getPos()), + std::max(res->getExtendedPos(), consensusSa->getExtendedPos())); + } + } + } + } + advance(it, increment); + } + } -void DeFuzzier::selectBestSa(vector& processedSas, SuppAlignmentAnno* consensusSa, const unordered_set& fileIndices) const { - auto maxMateScore = -1; - auto maxExpectedDiscordants = -1; - auto index = 0; - vector nonFuzzyIndices { }; - for (auto &sa : processedSas) { - if (sa->getMateSupport() > maxMateScore) { - maxMateScore = sa->getMateSupport(); - } - if (sa->getExpectedDiscordants() > maxExpectedDiscordants) { - maxExpectedDiscordants = sa->getExpectedDiscordants(); - } - if (!sa->isFuzzy()) { - nonFuzzyIndices.push_back(index); - } - sa->setToRemove(true); - ++index; - } - SuppAlignmentAnno* selectedSa = nullptr; - if (!nonFuzzyIndices.empty()) { - auto bestElement = max_element(nonFuzzyIndices.begin(), nonFuzzyIndices.end(), // - [&](int a, int b) {return processedSas[a]->getSupportingIndices().size() < processedSas[b]->getSupportingIndices().size();}); - selectedSa = processedSas[*bestElement]; - } else { - auto bestElement = max_element(processedSas.begin(), processedSas.end(), // - [&](SuppAlignmentAnno* a, SuppAlignmentAnno* b) {return a->getSupportingIndices().size() < b->getSupportingIndices().size();}); - if ((*bestElement)->getSupport() + (*bestElement)->getSecondarySupport() == 0) { - if (consensusSa->isEncounteredM()) { - selectedSa = processedSas.back(); - } else { - selectedSa = processedSas[0]; - } - } else { - selectedSa = *bestElement; - } - selectedSa->extendSuppAlignment(consensusSa->getPos(), consensusSa->getExtendedPos()); - } - selectedSa->setToRemove(false); - selectedSa->setMateSupport(maxMateScore); - selectedSa->setExpectedDiscordants(maxExpectedDiscordants); - selectedSa->mrefSaConsensus(fileIndices); -} + void DeFuzzier::selectBestSa(std::vector& processedSas, + SuppAlignmentAnno* consensusSa, + const std::unordered_set& fileIndices) const { + auto maxMateScore = -1; + auto maxExpectedDiscordants = -1; + auto index = 0; + std::vector nonFuzzyIndices { }; + for (auto &sa : processedSas) { + if (sa->getMateSupport() > maxMateScore) { + maxMateScore = sa->getMateSupport(); + } + if (sa->getExpectedDiscordants() > maxExpectedDiscordants) { + maxExpectedDiscordants = sa->getExpectedDiscordants(); + } + if (!sa->isFuzzy()) { + nonFuzzyIndices.push_back(index); + } + sa->setToRemove(true); + ++index; + } + SuppAlignmentAnno* selectedSa = nullptr; + if (!nonFuzzyIndices.empty()) { + auto bestElement = max_element( + nonFuzzyIndices.begin(), + nonFuzzyIndices.end(), + [&](unsigned int a, unsigned int b) { + return processedSas[a]->getSupportingIndices().size() < processedSas[b]->getSupportingIndices().size(); + }); + selectedSa = processedSas[static_cast(*bestElement)]; + } else { + auto bestElement = max_element(processedSas.begin(), processedSas.end(), // + [&](SuppAlignmentAnno* a, SuppAlignmentAnno* b) {return a->getSupportingIndices().size() < b->getSupportingIndices().size();}); + if ((*bestElement)->getSupport() + (*bestElement)->getSecondarySupport() == 0) { + if (consensusSa->isEncounteredM()) { + selectedSa = processedSas.back(); + } else { + selectedSa = processedSas[0]; + } + } else { + selectedSa = *bestElement; + } + selectedSa->extendSuppAlignment(consensusSa->getPos(), consensusSa->getExtendedPos()); + } + selectedSa->setToRemove(false); + selectedSa->setMateSupport(maxMateScore); + selectedSa->setExpectedDiscordants(maxExpectedDiscordants); + selectedSa->mrefSaConsensus(fileIndices); + } } diff --git a/src/GenericChrConverter.cpp b/src/GenericChrConverter.cpp new file mode 100644 index 0000000..4066d78 --- /dev/null +++ b/src/GenericChrConverter.cpp @@ -0,0 +1,309 @@ +/* + * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * LICENSE: GPL + */ + +#include "GenericChrConverter.h" +#include "global.h" +#include "ChrInfo.h" +#include "ChrInfoTable.h" +#include "ChrCategory.h" + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace sophia { + + /* This makes a search structure to map chromosome names to index positions in an array. + This is useful mostly during the parsing of chromosome names from the input. In most + downstream code just the indices are used, but the parsing has to be done for every input + line. + + unordered_map is quite fast and O(1), but we might consider making a tuned structure here, + that takes into account the hit-probability of reads against contigs, based on the + assumption of a uniform hit-probability proportional to chromosome size. */ + GenericChrConverter::ChrToIndexMap + GenericChrConverter::buildAllChromosomeLookup(const ChrInfoTable::ChrNames &chromosomes) { + ChrToIndexMap mapping; + mapping.reserve(chromosomes.size()); + for (ChrIndex i = 0; i < (ChrIndex) chromosomes.size(); ++i) { + mapping[chromosomes[static_cast(i)]] = i; + } + return mapping; + } + + std::vector + GenericChrConverter::buildCompressedMrefToAllMapping(ChrInfoTable chrInfoIn) { + std::vector mapping; + mapping.reserve((size_t) chrInfoIn.nChromosomes()); + for (ChrIndex idx = 0; idx < (ChrIndex) chrInfoIn.nChromosomes(); ++idx) { + if (chrInfoIn.getChrInfos()[static_cast(idx)].isCompressedMref()) { + mapping.emplace_back(idx); + } + } + return mapping; + } + + std::vector> + GenericChrConverter::buildAllToCompressedMrefMapping(ChrInfoTable chrInfoIn) { + std::vector> mapping; + mapping.reserve((size_t) chrInfoIn.nChromosomes()); + CompressedMrefIndex compressedMrefIndex = 0; + for (ChrIndex idx = 0; idx < chrInfoIn.nChromosomes(); ++idx) { + std::optional compressedMrefIndexO = std::nullopt; + if (chrInfoIn.getChrInfos()[static_cast(idx)].isCompressedMref()) { + compressedMrefIndexO = std::optional(compressedMrefIndex); + ++compressedMrefIndex; + } + mapping.emplace_back(compressedMrefIndexO); + } + return mapping; + } + + GenericChrConverter::GenericChrConverter( + std::string assemblyNameIn, + ChrInfoTable chrInfoTableIn) + : ChrConverter(assemblyNameIn), + chrInfoTable { chrInfoTableIn }, + allChromosomeLookup { buildAllChromosomeLookup(chrInfoTableIn.getNames()) }, + compressedToAllMapping { buildCompressedMrefToAllMapping(chrInfoTableIn) }, + allToCompressedMapping { buildAllToCompressedMrefMapping(chrInfoTableIn) } {} + + /** Number of all chromosomes. */ + ChrIndex GenericChrConverter::nChromosomes() const { + return chrInfoTable.nChromosomes(); + } + + /** Map an index position to a chromosome name. */ + ChrName GenericChrConverter::indexToChrName(ChrIndex index) const { + return chrInfoTable.getChrInfos().at(static_cast(index)).getName(); + } + + /** Map a chromosome name to an index position. */ + ChrIndex + GenericChrConverter::chrNameToIndex(ChrName chrName) const { + ChrIndex result; + try { + result = allChromosomeLookup.at(chrName); + } catch (std::out_of_range &e) { + throw_with_trace(DomainError("Chromosome name not found: '" + chrName + "'")); + } + return result; + } + + + /** chr1-chr22 */ + bool GenericChrConverter::isAutosome(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].getCategory() == ChrCategory::AUTOSOME; + } + + /** chrX */ + bool GenericChrConverter::isX(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].getCategory() == ChrCategory::X; + } + + /** chrY */ + bool GenericChrConverter::isY(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].getCategory() == ChrCategory::Y; + } + + /** chrX, chrY */ + bool GenericChrConverter::isGonosome(ChrIndex index) const { + return isX(index) || isY(index); + } + + /** phix index. */ + bool GenericChrConverter::isTechnical(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].getCategory() == ChrCategory::TECHNICAL; + } + + /** NC_007605, EBV. */ + bool GenericChrConverter::isVirus(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].getCategory() == ChrCategory::VIRUS; + } + + /** Mitochondrial chromosome index. */ + bool GenericChrConverter::isExtrachromosomal(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].getCategory() == ChrCategory::EXTRACHROMOSOMAL; + } + + /** Decoy sequence index. */ + bool GenericChrConverter::isDecoy(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].getCategory() == ChrCategory::DECOY; + } + + /** ALT sequence index. */ + bool GenericChrConverter::isALT(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].getCategory() == ChrCategory::ALT; + } + + /** HLA sequence index. */ + bool GenericChrConverter::isHLA(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].getCategory() == ChrCategory::HLA; + } + + /** Unassigned (unplaced, random, unlocalized) sequence index. */ + bool GenericChrConverter::isUnassigned(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].getCategory() == ChrCategory::UNASSIGNED; + } + + bool GenericChrConverter::isCompressedMref(ChrIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(index)].isCompressedMref(); + } + + /** Number of compressedMref chromosomes. */ + CompressedMrefIndex GenericChrConverter::nChromosomesCompressedMref() const { + return CompressedMrefIndex(compressedToAllMapping.size()); + } + + /** Map the compressed mref index to the uncompressed mref index. */ + ChrIndex GenericChrConverter::compressedMrefIndexToIndex(CompressedMrefIndex compressedMrefIndex) const { + if (compressedMrefIndex >= nChromosomesCompressedMref()) { + throw_with_trace(std::logic_error("Compressed mref index out of range.")); + } + ChrIndex result = compressedToAllMapping[static_cast(compressedMrefIndex)]; + // The following is just a crude logic test. It will fail, if there is something wrong + // with the index space mapping and it is rather a guard against programming errors. + // If global and compressed mref indices are properly type-checked (instead of + // using/typedef declarations, which are not type-checked!), then this should be removed. + // TODO Remove when switching to typed ChrIndex and CompressedMrefIndex. + if (!chrInfoTable.getChrInfos()[static_cast(result)].isCompressedMref()) + throw_with_trace(DomainError( + "Compressed mref index does not map back to a compressed mref chromosome.")); + return result; + } + + /** Map an index from the global index-space to the compressed mref index-space. */ + CompressedMrefIndex + GenericChrConverter::indexToCompressedMrefIndex(ChrIndex index) const { + if (allToCompressedMapping.at(static_cast(index)) == std::nullopt) { + throw_with_trace(std::logic_error( + "Index does not map to a compressed mref chromosome.")); + } + return allToCompressedMapping.at(static_cast(index)).value(); + } + + /** Map compressed mref index to chromosome size. */ + ChrSize GenericChrConverter::chrSizeCompressedMref(CompressedMrefIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(compressedMrefIndexToIndex(index))].getSize(); + } + + /** Map an compressed mref index to a chromosome name. */ + ChrName GenericChrConverter::compressedMrefIndexToChrName(CompressedMrefIndex index) const { + return chrInfoTable.getChrInfos()[static_cast(compressedMrefIndexToIndex(index))].getName(); + } + + + /** Parse chromosome index. It takes a position in a character stream, and translates the + following character(s) into index positions (using ChrConverter::indexToChrName). + If the name cannot be parsed, throws a domain_error exception. + + This method parses up to the first occurrence of the `stopChar1`. Then within the identified + start and end positions, parses up to the last occurrence of `stopChar2`. This allows to + parse a chromosome name "HLA-DRB1*13:01:01" from a string + "HLA-DRB1*13:01:01:2914|(4,0,0?/0)" by first separating out the `|` separator, and then + finding the last `:` separator before position. + */ + ChrName GenericChrConverter::parseChrBreakPoint(std::string::const_iterator startIt, + std::string::const_iterator endIt, + char stopChar, + const std::string &stopCharsExt) { + if (stopCharsExt.empty()) { + throw_with_trace(std::invalid_argument("stopCharsExt must not be empty.")); + } + + // First find the outer separator (one of `stopCharsExt`). + auto isStopCharExt = [stopCharsExt](char c) { + return stopCharsExt.find(c) != std::string::npos; + }; + auto endMatchIt = std::find_if(startIt, endIt, isStopCharExt); + + // Then find the inner separator (`stopChar`) by searching backwards from the outer + // separator. + auto isStopChar = [stopChar](char c) { return c == stopChar; }; + auto reverseStartIt = std::reverse_iterator(endMatchIt); + auto reverseEndMatchIt = std::find_if(reverseStartIt, + std::reverse_iterator(startIt), + isStopChar); + // The reverseEndMatchIt now points onto the stopChar. We need to reverse it again (base()) + // which will *include* the stopChar in the result, which we don't want. Therefore, we + // increment the reverseEndMatchIt once. + ++reverseEndMatchIt; + + // Finally, prepare and return the result. + ChrName chrName; + chrName.reserve(50); // Should be sufficient for most chromosome names. + std::copy(startIt, + reverseEndMatchIt.base(), // back-convert reverse_iterator to normal iterator. + std::back_inserter(chrName)); + return chrName; + } + + /** Parse the chromosome index just by finding the `stopChar`. Everything between the `startIt`, + and the first occurrence of the `stopChar` is returned as chromosome name. */ + ChrName GenericChrConverter::parseChrSimple(std::string::const_iterator startIt, + std::string::const_iterator endIt, + char stopChar) { + auto isStopChar = [stopChar](char c) { return c == stopChar; }; + auto endMatchIt = std::find_if(startIt, endIt, isStopChar); + + // Prepare and return the result. + std::string chrName; + chrName.reserve(50); // Should be sufficient for most chromosome names. + std::copy(startIt, + endMatchIt, + std::back_inserter(chrName)); + return chrName; + } + + ChrName GenericChrConverter::parseChr(std::string::const_iterator startIt, + std::string::const_iterator endIt, + char stopChar, + const std::string &stopCharsExt) { + if (stopCharsExt.empty()) { + return parseChrSimple(startIt, endIt, stopChar); + } else { + return parseChrBreakPoint(startIt, endIt, stopChar, stopCharsExt); + } + } + + ChrIndex GenericChrConverter::parseChrAndReturnIndex(std::string::const_iterator startIt, + std::string::const_iterator endIt, + char stopChar, + const std::string &stopCharsExt) const { + ChrName chrName = parseChr(startIt, endIt, stopChar, stopCharsExt); + + // Map to ChrIndex and return it, of if the chromosome is not registered, give a helpful + // error message, that shows the parsed name and from what input it was parsed. + try { + return allChromosomeLookup.at(chrName); + } catch (std::out_of_range& e) { + throw_with_trace(DomainError( + "Chromosome name '" + chrName + "' not found for assembly '" + + getAssemblyName() + "'.")); + } + // Just to get rid of a warning. + return std::numeric_limits::max(); + } + +} /* namespace sophia */ diff --git a/src/GermlineMatch.cpp b/src/GermlineMatch.cpp index 9ad359f..d750087 100644 --- a/src/GermlineMatch.cpp +++ b/src/GermlineMatch.cpp @@ -26,17 +26,15 @@ namespace sophia { -using namespace std; - -GermlineMatch::GermlineMatch( - double clonalityIn, double conservativeClonalityIn, - const vector> &suppMatchesIn) - : clonality{clonalityIn}, conservativeClonality{conservativeClonalityIn}, - suppMatches{}, clonalities{} { - for (const auto &saPair : suppMatchesIn) { - suppMatches.push_back(saPair.first); - clonalities.push_back(saPair.second); + GermlineMatch::GermlineMatch( + double clonalityIn, double conservativeClonalityIn, + const std::vector> &suppMatchesIn) + : clonality{clonalityIn}, conservativeClonality{conservativeClonalityIn}, + suppMatches{}, clonalities{} { + for (const auto &saPair : suppMatchesIn) { + suppMatches.push_back(saPair.first); + clonalities.push_back(saPair.second); + } } -} } /* namespace sophia */ diff --git a/src/GlobalAppConfig.cpp b/src/GlobalAppConfig.cpp new file mode 100644 index 0000000..037ff5d --- /dev/null +++ b/src/GlobalAppConfig.cpp @@ -0,0 +1,58 @@ +/* + * GlobalAppConfig.cpp + * + * Author: Philip R. Kensche Copyright (C) 2023 DKFZ Heidelberg + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * LICENSE: GPL + */ + +#include "GlobalAppConfig.h" +#include +#include +#include + + +namespace sophia { + + GlobalAppConfig* GlobalAppConfig::instance_ = nullptr; + std::mutex GlobalAppConfig::mutex_ = std::mutex(); + + const ChrConverter &GlobalAppConfig::getChrConverter() const { + return *chrConverter; + } + + GlobalAppConfig::GlobalAppConfig(std::unique_ptr chrConverter): + chrConverter(move(chrConverter)) {} + + GlobalAppConfig::~GlobalAppConfig() {} + + GlobalAppConfig &GlobalAppConfig::init(std::unique_ptr chrConverter) + { + std::lock_guard lock(mutex_); + if (GlobalAppConfig::instance_ == nullptr) { + GlobalAppConfig::instance_ = new GlobalAppConfig(move(chrConverter)); + } else { + throw_with_trace(std::logic_error("GlobalAppConfig already initialized")); + } + return *GlobalAppConfig::instance_; + } + + const GlobalAppConfig &GlobalAppConfig::getInstance() { + if (GlobalAppConfig::instance_ == nullptr) + throw_with_trace(std::logic_error("GlobalAppConfig not initialized")); + return *GlobalAppConfig::instance_; + } + +} \ No newline at end of file diff --git a/src/HelperFunctions.cpp b/src/HelperFunctions.cpp index 260a144..cb8e1e5 100644 --- a/src/HelperFunctions.cpp +++ b/src/HelperFunctions.cpp @@ -21,21 +21,21 @@ */ #include "HelperFunctions.h" +#include namespace sophia { -using namespace std; + std::istream & + error_terminating_getline(std::istream &is, + std::string &str) { + getline(is, str); -istream & -error_terminating_getline(istream &is, string &str) { - getline(is, str); + if (is.bad()) { + perror("Error reading line from file"); + exit(EXITCODE_IOERROR); + } - if (is.bad()) { - perror("Error reading line from file"); - exit(EXITCODE_IOERROR); + return is; } - return is; -} - } /* namespace sophia */ diff --git a/src/Hg37ChrConverter.cpp b/src/Hg37ChrConverter.cpp new file mode 100644 index 0000000..9f94e98 --- /dev/null +++ b/src/Hg37ChrConverter.cpp @@ -0,0 +1,699 @@ +/* + * Author: Philip R. Kensche, DKFZ Heidelberg (Omics IT and Data Management Core Facility) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * LICENSE: GPL + */ + +#include +#include +#include +#include + +#include "Hg37ChrConverter.h" +#include "global.h" +#include "IndexRange.h" + + +namespace sophia { + + namespace hg37 { + + static const std::vector indexToChrName { + "0", "1", "2", "3", "4", + "5", "6", "7", "8", "9", + "10", "11", "12", "13", "14", + "15", "16", "17", "18", "19", + "20", "21", "22", "23", "24", + "25", "26", "27", "28", "29", + "30", "31", "32", "33", "34", + "35", "36", "37", "38", "39", + "X", "Y", "42", "43", "44", + "45", "46", "47", "48", "49", + "50", "51", "52", "53", "54", + "55", "56", "57", "58", "59", + "60", "61", "62", "63", "64", + "65", "66", "67", "68", "69", + "70", "71", "72", "73", "74", + "75", "76", "77", "78", "79", + "80", "81", "82", "83", "84", + "85", "86", "87", "88", "89", + "90", "91", "92", "93", "94", + "95", "96", "97", "98", "99", + "100", "101", "102", "103", "104", + "105", "106", "107", "108", "109", + "110", "111", "112", "113", "114", + "115", "116", "117", "118", "119", + "120", "121", "122", "123", "124", + "125", "126", "127", "128", "129", + "130", "131", "132", "133", "134", + "135", "136", "137", "138", "139", + "140", "141", "142", "143", "144", + "145", "146", "147", "148", "149", + "150", "151", "152", "153", "154", + "155", "156", "157", "158", "159", + "160", "161", "162", "163", "164", + "165", "166", "167", "168", "169", + "170", "171", "172", "173", "174", + "175", "176", "177", "178", "179", + "180", "181", "182", "183", "184", + "185", "186", "187", "188", "189", + "190", "GL000191.1", "GL000192.1", "GL000193.1", "GL000194.1", + "GL000195.1", "GL000196.1", "GL000197.1", "GL000198.1", "GL000199.1", + "GL000200.1", "GL000201.1", "GL000202.1", "GL000203.1", "GL000204.1", + "GL000205.1", "GL000206.1", "GL000207.1", "GL000208.1", "GL000209.1", + "GL000210.1", "GL000211.1", "GL000212.1", "GL000213.1", "GL000214.1", + "GL000215.1", "GL000216.1", "GL000217.1", "GL000218.1", "GL000219.1", + "GL000220.1", "GL000221.1", "GL000222.1", "GL000223.1", "GL000224.1", + "GL000225.1", "GL000226.1", "GL000227.1", "GL000228.1", "GL000229.1", + "GL000230.1", "GL000231.1", "GL000232.1", "GL000233.1", "GL000234.1", + "GL000235.1", "GL000236.1", "GL000237.1", "GL000238.1", "GL000239.1", + "GL000240.1", "GL000241.1", "GL000242.1", "GL000243.1", "GL000244.1", + "GL000245.1", "GL000246.1", "GL000247.1", "GL000248.1", "GL000249.1", + "250", "251", "252", "253", "254", + "255", "256", "257", "258", "259", + "260", "261", "262", "263", "264", + "265", "266", "267", "268", "269", + "270", "271", "272", "273", "274", + "275", "276", "277", "278", "279", + "280", "281", "282", "283", "284", + "285", "286", "287", "288", "289", + "290", "291", "292", "293", "294", + "295", "296", "297", "298", "299", + "300", "301", "302", "303", "304", + "305", "306", "307", "308", "309", + "310", "311", "312", "313", "314", + "315", "316", "317", "318", "319", + "320", "321", "322", "323", "324", + "325", "326", "327", "328", "329", + "330", "331", "332", "333", "334", + "335", "336", "337", "338", "339", + "340", "341", "342", "343", "344", + "345", "346", "347", "348", "349", + "350", "351", "352", "353", "354", + "355", "356", "357", "358", "359", + "360", "361", "362", "363", "364", + "365", "366", "367", "368", "369", + "370", "371", "372", "373", "374", + "375", "376", "377", "378", "379", + "380", "381", "382", "383", "384", + "385", "386", "387", "388", "389", + "390", "391", "392", "393", "394", + "395", "396", "397", "398", "399", + "400", "401", "402", "403", "404", + "405", "406", "407", "408", "409", + "410", "411", "412", "413", "414", + "415", "416", "417", "418", "419", + "420", "421", "422", "423", "424", + "425", "426", "427", "428", "429", + "430", "431", "432", "433", "434", + "435", "436", "437", "438", "439", + "440", "441", "442", "443", "444", + "445", "446", "447", "448", "449", + "450", "451", "452", "453", "454", + "455", "456", "457", "458", "459", + "460", "461", "462", "463", "464", + "465", "466", "467", "468", "469", + "470", "471", "472", "473", "474", + "475", "476", "477", "478", "479", + "480", "481", "482", "483", "484", + "485", "486", "487", "488", "489", + "490", "491", "492", "493", "494", + "495", "496", "497", "498", "499", + "500", "501", "502", "503", "504", + "505", "506", "507", "508", "509", + "510", "511", "512", "513", "514", + "515", "516", "517", "518", "519", + "520", "521", "522", "523", "524", + "525", "526", "527", "528", "529", + "530", "531", "532", "533", "534", + "535", "536", "537", "538", "539", + "540", "541", "542", "543", "544", + "545", "546", "547", "548", "549", + "550", "551", "552", "553", "554", + "555", "556", "557", "558", "559", + "560", "561", "562", "563", "564", + "565", "566", "567", "568", "569", + "570", "571", "572", "573", "574", + "575", "576", "577", "578", "579", + "580", "581", "582", "583", "584", + "585", "586", "587", "588", "589", + "590", "591", "592", "593", "594", + "595", "596", "597", "598", "599", + "600", "601", "602", "603", "604", + "605", "606", "607", "608", "609", + "610", "611", "612", "613", "614", + "615", "616", "617", "618", "619", + "620", "621", "622", "623", "624", + "625", "626", "627", "628", "629", + "630", "631", "632", "633", "634", + "635", "636", "637", "638", "639", + "640", "641", "642", "643", "644", + "645", "646", "647", "648", "649", + "650", "651", "652", "653", "654", + "655", "656", "657", "658", "659", + "660", "661", "662", "663", "664", + "665", "666", "667", "668", "669", + "670", "671", "672", "673", "674", + "675", "676", "677", "678", "679", + "680", "681", "682", "683", "684", + "685", "686", "687", "688", "689", + "690", "691", "692", "693", "694", + "695", "696", "697", "698", "699", + "700", "701", "702", "703", "704", + "705", "706", "707", "708", "709", + "710", "711", "712", "713", "714", + "715", "716", "717", "718", "719", + "720", "721", "722", "723", "724", + "725", "726", "727", "728", "729", + "730", "731", "732", "733", "734", + "735", "736", "737", "738", "739", + "740", "741", "742", "743", "744", + "745", "746", "747", "748", "749", + "750", "751", "752", "753", "754", + "755", "756", "757", "758", "759", + "760", "761", "762", "763", "764", + "765", "766", "767", "768", "769", + "770", "771", "772", "773", "774", + "775", "776", "777", "778", "779", + "780", "781", "782", "783", "784", + "785", "786", "787", "788", "789", + "790", "791", "792", "793", "794", + "795", "796", "797", "798", "799", + "800", "801", "802", "803", "804", + "805", "806", "807", "808", "809", + "810", "811", "812", "813", "814", + "815", "816", "817", "818", "819", + "820", "821", "822", "823", "824", + "825", "826", "827", "828", "829", + "830", "831", "832", "833", "834", + "835", "836", "837", "838", "839", + "840", "841", "842", "843", "844", + "845", "846", "847", "848", "849", + "850", "851", "852", "853", "854", + "855", "856", "857", "858", "859", + "860", "861", "862", "863", "864", + "865", "866", "867", "868", "869", + "870", "871", "872", "873", "874", + "875", "876", "877", "878", "879", + "880", "881", "882", "883", "884", + "885", "886", "887", "888", "889", + "890", "891", "892", "893", "894", + "895", "896", "897", "898", "899", + "900", "901", "902", "903", "904", + "905", "906", "907", "908", "909", + "910", "911", "912", "913", "914", + "915", "916", "917", "918", "919", + "920", "921", "922", "923", "924", + "925", "926", "927", "928", "929", + "930", "931", "932", "933", "934", + "935", "936", "937", "938", "939", + "940", "941", "942", "943", "944", + "945", "946", "947", "948", "949", + "950", "951", "952", "953", "954", + "955", "956", "957", "958", "959", + "960", "961", "962", "963", "964", + "965", "966", "967", "968", "969", + "970", "971", "972", "973", "974", + "975", "976", "977", "978", "979", + "980", "981", "982", "983", "984", + "985", "986", "987", "988", "989", + "990", "991", "992", "993", "994", + "995", "996", "997", "998", "hs37d5", + "NC_007605", "MT", "phiX174", "INVALID"}; + + static const ChrIndex ZERO = 0; + static const ChrIndex xIndex = 40; + static const ChrIndex yIndex = 41; + static const ChrIndex decoyIndex = 999; + static const ChrIndex virusIndex = 1000; + static const ChrIndex mtIndex = 1001; + static const ChrIndex phixIndex = 1002; + static const ChrIndex INVALID = 1003; + + static const IndexRange automoseRange = {1, 23}; + static const IndexRange unassignedRange = {191, 250}; + static const IndexRange decoyRange = {decoyIndex, decoyIndex + 1}; + static const IndexRange virusRange = {virusIndex, virusIndex + 1}; + static const IndexRange extrachromosomalRange = {mtIndex, mtIndex + 1}; + static const IndexRange technicalRange = {phixIndex, phixIndex + 1}; + + /* 85 compressed mref chromosomes */ + static const std::vector compressedMrefIndexToChrName { + "1", "2", "3", "4", "5", + "6", "7", "8", "9", "10", + "11", "12", "13", "14", "15", + "16", "17", "18", "19", "20", + "21", "22", "X", "Y", "GL000191.1", + "GL000192.1", "GL000193.1", "GL000194.1", "GL000195.1", "GL000196.1", + "GL000197.1", "GL000198.1", "GL000199.1", "GL000200.1", "GL000201.1", + "GL000202.1", "GL000203.1", "GL000204.1", "GL000205.1", "GL000206.1", + "GL000207.1", "GL000208.1", "GL000209.1", "GL000210.1", "GL000211.1", + "GL000212.1", "GL000213.1", "GL000214.1", "GL000215.1", "GL000216.1", + "GL000217.1", "GL000218.1", "GL000219.1", "GL000220.1", "GL000221.1", + "GL000222.1", "GL000223.1", "GL000224.1", "GL000225.1", "GL000226.1", + "GL000227.1", "GL000228.1", "GL000229.1", "GL000230.1", "GL000231.1", + "GL000232.1", "GL000233.1", "GL000234.1", "GL000235.1", "GL000236.1", + "GL000237.1", "GL000238.1", "GL000239.1", "GL000240.1", "GL000241.1", + "GL000242.1", "GL000243.1", "GL000244.1", "GL000245.1", "GL000246.1", + "GL000247.1", "GL000248.1", "GL000249.1", "hs37d5", "NC_007605"}; + + /* 85 compressed mref chromosomes. These are the chromosome sizes + 1. Also, it is unclear, + why some chromosome sizes differ from the 1K genomes reference, e.g. Chromosome 1 is + 249904550 in there, but significantly smaller here. + Note that the hardcoded data used to be in MasterMrefProcessor. */ + static const std::vector chrSizesCompressedMref { + 249250622, 243199374, 198022431, 191154277, 180915261, 171115068, + 159138664, 146364023, 141213432, 135534748, 135006517, 133851896, + 115169879, 107349541, 102531393, 90354754, 81195211, 78077249, + 59128984, 63025521, 48129896, 51304567, 155270561, 59373567, + 106434, 547497, 189790, 191470, 182897, 38915, + 37176, 90086, 169875, 187036, 36149, 40104, + 37499, 81311, 174589, 41002, 4263, 92690, + 159170, 27683, 166567, 186859, 164240, 137719, + 172546, 172295, 172150, 161148, 179199, 161803, + 155398, 186862, 180456, 179694, 211174, 15009, + 128375, 129121, 19914, 43692, 27387, 40653, + 45942, 40532, 34475, 41935, 45868, 39940, + 33825, 41934, 42153, 43524, 43342, 39930, + 36652, 38155, 36423, 39787, 38503, 35477944, + 171824}; + + // Used to be -2, but in the mref space 1003 is INVALID, + // and -2 has the disadvantage that it cannot be represented as an unsigned integer. + // By moving this to INVALID (1003), we can make CompressedMrefIndex an unsigned integer, + // and can switch -- for compile-time checks -- the signedness of ChrIndex and + // CompressedMrefIndex. This gives us a poor-man's type checking, and we can postpone + // a bigger (more time-consuming) refactoring. + // + // Note that NA is only used when mapping from ChrIndex to CompressedMrefIndex, to indicate + // that chromosome is actually not among the compressed master ref chromosomes. + static const CompressedMrefIndex NA = 1003; + + // This used to be `indexConverter`. + static const std::vector indexToCompressedMrefIndex { + NA, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, 22, 23, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 83, 84, NA, NA, NA}; + + } /* namespace hg37 */ + + bool Hg37ChrConverter::isValid(ChrIndex index) { + return index != hg37::INVALID && index != hg37::ZERO && ( + _isAutosome(index) || + _isX(index) || + _isY(index) || + _isTechnical(index) || + _isVirus(index) || + _isExtrachromosomal(index) || + _isDecoy(index) || + _isUnassigned(index) /* || // There are no HLA and ALT contigs in hg37. The ranges are empty. + _isHLA(index) || + _isALT(index) */ + ); + } + + void Hg37ChrConverter::assertValid(ChrIndex index) { + #ifndef NDEBUG + if (!isValid(index)) { + throw_with_trace(std::runtime_error("Invalid chromosome index: " + + std::to_string(index))); + } + #endif + } + + bool Hg37ChrConverter::isValid(CompressedMrefIndex index) { + return index != hg37::NA; + } + + void Hg37ChrConverter::assertValid(CompressedMrefIndex index) { + #ifndef NDEBUG + if (!isValid(index)) { + throw_with_trace(std::runtime_error("Invalid compressed mref index: " + + std::to_string(index))); + } + #endif + } + + std::vector Hg37ChrConverter::_buildCompressedMrefIndexToIndex( + CompressedMrefIndex nCompressed, + const std::vector &indexToCompressedMrefIndex) { + + // This is now the only place, where invalid values are assigned, ... + std::vector result (static_cast(nCompressed), hg37::NA); + for (ChrIndex globalIndex = 0; + globalIndex < ChrIndex(indexToCompressedMrefIndex.size()); + ++globalIndex) { + + CompressedMrefIndex compressedMrefIndex = + indexToCompressedMrefIndex[static_cast(globalIndex)]; + + if (isValid(compressedMrefIndex)) { + unsigned int cIdx = static_cast(compressedMrefIndex); + if (isValid(result[cIdx])) { + throw_with_trace(std::runtime_error( + "Compressed mref index " + std::to_string(compressedMrefIndex) + + " is already assigned to " + + std::to_string(result[cIdx]) + + " and cannot be assigned to " + std::to_string(globalIndex))); + } + result[cIdx] = globalIndex; + } + } + + // ... but before we continue, we ensure there are no gaps. There must be an index + // in the global index space for all compressed mref indices/chromosomes. + for (auto it = result.cbegin(); it != result.cend(); ++it) { + assertValid(*it); + } + + return result; + } + + Hg37ChrConverter::Hg37ChrConverter(const std::vector &indexToChrName, + const std::vector &compressedMrefIndexToChrName, + const std::vector &chrSizesCompressedMref, + const std::vector &indexToCompressedMrefIndex) : + ChrConverter("classic_hg37"), + _indexToChrName {indexToChrName}, + _compressedMrefIndexToChrName {compressedMrefIndexToChrName}, + _chrSizesCompressedMref {chrSizesCompressedMref}, + _indexToCompressedMrefIndex {indexToCompressedMrefIndex}, + _compressedMrefIndexToIndex {_buildCompressedMrefIndexToIndex( + compressedMrefIndexToChrName.size(), + indexToCompressedMrefIndex)}{ + if (indexToChrName.size() != indexToCompressedMrefIndex.size()) + throw_with_trace(std::invalid_argument( + "indexToChrName and indexToCompressedMrefIndex must have the same size. " + "Found sizes: indexToChrName=" + std::to_string(indexToChrName.size()) + + ", indexToCompressedMrefIndex=" + std::to_string(indexToCompressedMrefIndex.size()))); + if (compressedMrefIndexToChrName.size() != chrSizesCompressedMref.size()) + throw_with_trace(std::invalid_argument( + "compressedMrefIndexToChrName and chrSizesCompressedMref must have the same size. " + "Found sizes: compressedMrefIndexToChrName=" + std::to_string(compressedMrefIndexToChrName.size()) + + ", chrSizesCompressedMref=" + std::to_string(chrSizesCompressedMref.size()))); + } + + Hg37ChrConverter::Hg37ChrConverter() + : Hg37ChrConverter(hg37::indexToChrName, + hg37::compressedMrefIndexToChrName, + hg37::chrSizesCompressedMref, + hg37::indexToCompressedMrefIndex) {} + + ChrIndex Hg37ChrConverter::nChromosomes() const { + return ChrIndex(_indexToChrName.size()); + } + + CompressedMrefIndex Hg37ChrConverter::nChromosomesCompressedMref() const { + return CompressedMrefIndex(_compressedMrefIndexToChrName.size()); + } + + /** Map an index position to a chromosome name. */ + ChrName Hg37ChrConverter::indexToChrName(ChrIndex index) const { +// assertValid(index); + return _indexToChrName[static_cast(index)]; + } + + /** chr1-chr22, ... */ + bool Hg37ChrConverter::_isAutosome(ChrIndex index) { + return hg37::automoseRange.contains(index); + } + bool Hg37ChrConverter::isAutosome(ChrIndex index) const { + return _isAutosome(index); + } + + /** chrX */ + bool Hg37ChrConverter::_isX(ChrIndex index) { + return index == hg37::xIndex; + } + bool Hg37ChrConverter::isX(ChrIndex index) const { + return _isX(index); + } + + /** chrY */ + bool Hg37ChrConverter::_isY(ChrIndex index) { + return index == hg37::yIndex; + } + bool Hg37ChrConverter::isY(ChrIndex index) const { + return _isY(index); + } + + /** chrX, chrY */ + bool Hg37ChrConverter::_isGonosome(ChrIndex index) { + return _isX(index) || _isY(index); + } + bool Hg37ChrConverter::isGonosome(ChrIndex index) const { + return _isGonosome(index); + } + + + /** phix index. */ + bool Hg37ChrConverter::_isTechnical(ChrIndex index) { + return hg37::technicalRange.contains(index); + } + bool Hg37ChrConverter::isTechnical(ChrIndex index) const { + return _isTechnical(index); + } + + /** NC_007605. */ + bool Hg37ChrConverter::_isVirus(ChrIndex index) { + return hg37::virusRange.contains(index); + } + bool Hg37ChrConverter::isVirus(ChrIndex index) const { + return _isVirus(index); + } + + /** Mitochondrial chromosome index. */ + bool Hg37ChrConverter::_isExtrachromosomal(ChrIndex index) { + return hg37::extrachromosomalRange.contains(index); + } + bool Hg37ChrConverter::isExtrachromosomal(ChrIndex index) const { + return _isExtrachromosomal(index); + } + + /** Decoy sequence index. */ + bool Hg37ChrConverter::_isDecoy(ChrIndex index) { + return hg37::decoyRange.contains(index); + } + bool Hg37ChrConverter::isDecoy(ChrIndex index) const { + return _isDecoy(index); + } + + bool Hg37ChrConverter::_isUnassigned(ChrIndex index) { + return hg37::unassignedRange.contains(index); + } + bool Hg37ChrConverter::isUnassigned(ChrIndex index) const { + return _isUnassigned(index); + } + + bool Hg37ChrConverter::_isALT(ChrIndex index [[gnu::unused]]) { + return false; + } + bool Hg37ChrConverter::isALT(ChrIndex index [[gnu::unused]]) const { + return _isALT(index); + } + + bool Hg37ChrConverter::_isHLA(ChrIndex index [[gnu::unused]]) { + return false; + } + bool Hg37ChrConverter::isHLA(ChrIndex index [[gnu::unused]]) const { + return _isHLA(index); + } + + + /* Compressed Master Ref chromosomes are 1-22, X, Y, GL* (unassigned), hs37d4 (decoys), and + * NC_007605 (virus). Excluded are MT and phix. Used to be index <= 1000 (virus). */ + bool Hg37ChrConverter::isCompressedMref(ChrIndex index) const { +// assertValid(index); + return isValid(_indexToCompressedMrefIndex.at(static_cast(index))); + } + + /** Map an compressed mref index to a chromosome name. */ + ChrName + Hg37ChrConverter::compressedMrefIndexToChrName(CompressedMrefIndex index) const { +// assertValid(index); + return _compressedMrefIndexToChrName.at(static_cast(index)); + } + + /** Map an index from the global index-space to the compressed mref index-space. */ + CompressedMrefIndex + Hg37ChrConverter::indexToCompressedMrefIndex(ChrIndex index) const { +// assertValid(index); + CompressedMrefIndex result = _indexToCompressedMrefIndex.at(static_cast(index)); +// assertValid(result); + return result; + } + + ChrIndex + Hg37ChrConverter::compressedMrefIndexToIndex(CompressedMrefIndex index) const { +// assertValid(index); + return _compressedMrefIndexToIndex.at(static_cast(index)); + } + + /** Map compressed mref index to chromosome size. */ + ChrSize + Hg37ChrConverter::chrSizeCompressedMref(CompressedMrefIndex index) const { +// assertValid(index); + return _chrSizesCompressedMref[static_cast(index)]; + } + + ChrIndex + Hg37ChrConverter::chrNameToIndex(ChrName chrName) const { + ChrIndex result; + try { + result = parseChrAndReturnIndex(chrName.begin(), chrName.end(), ' '); + } catch (DomainError &e) { + throw e << error_info_string("from = " + chrName); + } + return result; + } + + bool + Hg37ChrConverter::isInBlockedRegion(ChrIndex chrIndex, ChrSize position) const { +// assertValid(chrIndex); + // For mate not in range 33140000-33149999 on chromosome 2, do ... + return !(chrIndex == 2 && (position / 10000 == 3314)); + } + + /* This is parsing code. It takes a position in a character stream, and translates the + following character(s) into index positions (see ChrConverter::indexToChrName). It is slightly + modified from the original implementation by Umut Toprak. + + If the first position is a digit, read up to the next stopChar. + + * (\d+)$ -> $1 + + If the first position is *not* a digit return indices according to the following rules: + + * h -> 999 + * X -> 40 + * Y -> 41 + * MT -> 1001 + * G?(\d+)\. -> $1 + * N -> 1000 + * p -> 1002 + + NOTE: Most of the matches are eager matches, which means the algorithm does not check for + whether the end iterator or the stopChar is actually reached or whether it follows + any expected pattern! The actual stopChar is not actually checked in these cases. + + All identifiers not matching any of these rules will throw an exception (domain_error). + + IMPORTANT: The hg37 parser here ignores the stopCharExt, but instead remains with the legacy + behavior. + */ + ChrIndex Hg37ChrConverter::parseChrAndReturnIndex( + std::string::const_iterator start, + std::string::const_iterator end, + char stopChar, + const std::string &stopCharExt[[gnu::unused]] // Attribute to remove the warning + ) const { + ChrIndex chrIndex {0}; + /* if (start == end) { + throw_with_trace(DomainError("Chromosome identifier is empty.")); + } else */ if (isdigit(*start)) { + for (auto chr_cit = start; chr_cit != end && *chr_cit != stopChar; ++chr_cit) { + chrIndex = chrIndex * 10 + ChrIndex(*chr_cit - '0'); + } + } else { + switch (*start) { + case 'h': + chrIndex = hg37::decoyIndex; + break; + case 'X': + chrIndex = hg37::xIndex; + break; + case 'G': // Match GL...... chromosomes. + for (auto cit = next(start, 2); *cit != '.'; ++cit) { + chrIndex = 10 * chrIndex + ChrIndex(*cit - '0'); + } + break; + case 'Y': + chrIndex = hg37::yIndex; + break; + case 'M': // Match "MT" + ++start; + if (start != end && *start == 'T') { + chrIndex = hg37::mtIndex; + } else { + throw_with_trace( + DomainError("Chromosome identifier with invalid prefix 'M" + + std::to_string(*start) + "'.")); + } + break; + case 'N': + chrIndex = hg37::virusIndex; + break; + case 'p': + chrIndex = hg37::phixIndex; + break; + default: + throw_with_trace(DomainError("Chromosome identifier with invalid prefix '" + + std::to_string(*start) + "'.")); + } + } + return chrIndex; + } + +} /* namespace sophia */ diff --git a/src/IndexRange.cpp b/src/IndexRange.cpp new file mode 100644 index 0000000..2f03da3 --- /dev/null +++ b/src/IndexRange.cpp @@ -0,0 +1,29 @@ +#include "IndexRange.h" +#include "global.h" + +namespace sophia { + + IndexRange::IndexRange(ChrIndex start, ChrIndex end) + : start_(start), end_(end) { + if (start > end) { + throw_with_trace(std::invalid_argument("IndexRange: start must be <= end")); + } + } + + ChrIndex IndexRange::start() const { + return start_; + } + ChrIndex IndexRange::end() const { + return end_; + } + + ChrSize IndexRange::width() const { + return static_cast(end_ - start_); + } + + bool IndexRange::contains(const ChrIndex &index) const { + // 0-based, left-inclusive, right-exclusive range. + return index >= start_ && index < end_; + } + +} // namespace sophia diff --git a/src/MasterRefProcessor.cpp b/src/MasterRefProcessor.cpp index 37d27c6..6e3a3e5 100644 --- a/src/MasterRefProcessor.cpp +++ b/src/MasterRefProcessor.cpp @@ -22,167 +22,243 @@ * LICENSE: GPL */ -#include "ChrConverter.h" +#include "GlobalAppConfig.h" #include "DeFuzzier.h" #include "HelperFunctions.h" -#include "strtk.hpp" +#include "strtk-wrap.h" #include #include #include +#include #include #include #include namespace sophia { -using namespace std; - -MasterRefProcessor::MasterRefProcessor(const vector &filesIn, - const string &outputRootName, - const string &version, - const int defaultReadLengthIn) - : NUMPIDS{static_cast(filesIn.size())}, - DEFAULTREADLENGTH{defaultReadLengthIn}, mrefDb{} { - const vector CHRSIZES{ - 249250622, 243199374, 198022431, 191154277, 180915261, 171115068, - 159138664, 146364023, 141213432, 135534748, 135006517, 133851896, - 115169879, 107349541, 102531393, 90354754, 81195211, 78077249, - 59128984, 63025521, 48129896, 51304567, 155270561, 59373567, - 106434, 547497, 189790, 191470, 182897, 38915, - 37176, 90086, 169875, 187036, 36149, 40104, - 37499, 81311, 174589, 41002, 4263, 92690, - 159170, 27683, 166567, 186859, 164240, 137719, - 172546, 172295, 172150, 161148, 179199, 161803, - 155398, 186862, 180456, 179694, 211174, 15009, - 128375, 129121, 19914, 43692, 27387, 40653, - 45942, 40532, 34475, 41935, 45868, 39940, - 33825, 41934, 42153, 43524, 43342, 39930, - 36652, 38155, 36423, 39787, 38503, 35477944, - 171824}; - for (auto i = 0; i < 85; ++i) { - // mrefDbPtrs.emplace_back(CHRSIZES[i] + 1, nullptr); - mrefDb.emplace_back(CHRSIZES[i] + 1, MrefEntry{}); - } - vector header{"#chr", "start", "end"}; - for (const auto &gzFile : filesIn) { - int posOnVersion = version.size() - 1; - bool counting{false}; - string realPidName; - for (auto rit = gzFile.crbegin(); rit != gzFile.crend(); ++rit) { - if (!counting) { - if (*rit != version[posOnVersion]) { - posOnVersion = version.size() - 1; - } else { - --posOnVersion; - if (posOnVersion == -1) { - ++rit; - counting = true; + /** + * This constructor has a side-effect. It reads from the filesIn and write breakpoint + * information to + * + * outputRootName + "_" + NUM_PIDS + "_mergedBpCounts.bed" + * + * @param filesIn vector if input gzFile names. + * @param outputRootName base name/path for the output files + * @param version the version is matched in the gzFile name to find the realPidName. + * @param defaultreadlength Value for the default read length used for the DeFuzzier. + */ + MasterRefProcessor::MasterRefProcessor(const std::vector &filesIn, + const std::string &outputRootName, + const std::string &version, + const ChrSize defaultReadLengthIn) + : NUM_PIDS { static_cast(filesIn.size()) }, + DEFAULT_READ_LENGTH{ defaultReadLengthIn }, + mrefDb {} { + + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + + // Preallocate the full memory in one go. Otherwise, the vector will be repeatedly + // copied and reallocated, which is slow. Also the finally reserved size will be + // larger than necessary. + // NOTE: This will allocate a lot of memory as the total size of the vectors is the + // genome size (3.7 giga-bases for hg19). + std::vector>::size_type totalSize = 0; + for (CompressedMrefIndex i = 0; i < chrConverter.nChromosomesCompressedMref(); ++i) { + totalSize += static_cast>::size_type>( + chrConverter.chrSizeCompressedMref(i) + 1); + } + std::cerr << "Allocating " + << std::ceil(sizeof(sophia::MrefEntry) * totalSize / 1024.0 / 1024.0 / 1024.0) + << " GB for mrefDb ..." + << std::endl; + mrefDb.reserve(totalSize); + + // Initialize the mrefDb with default values. + for (CompressedMrefIndex i = 0; i < chrConverter.nChromosomesCompressedMref(); ++i) { + // It is unclear, why here +1 is added to the chromosomes sizes, in particular, as + // the original chromosome size data already had sized incremented by 1 summing up + // to total genome size here of N*2 additional positions, with N being the number of + // compressed master-ref chromosomes. This is just kept for all changes :| + mrefDb.emplace_back(chrConverter.chrSizeCompressedMref(i) + 1, MrefEntry{}); + } + + // Construct the output file header. This collects the `realPidName`s from the gzFile + // and appends them to the header. The `version` is matched in the gzFile name. + std::vector header {"#chr", "start", "end"}; + for (const auto &gzFile : filesIn) { + signed int posOnVersion = version.size() - 1; + bool counting { false }; + std::string realPidName; + for (auto rit = gzFile.crbegin(); rit != gzFile.crend(); ++rit) { + if (!counting) { + // Match the version in the gzFile name. Note that we traverse the gzFile name + // in reverse order (from end), and therefore the matching algorithm is + // formulated in reverse order. + if (*rit != version[static_cast(posOnVersion)]) { + // No match, means continue searching. + posOnVersion = version.size() - 1; + } else { + // Match, means continue matching. + --posOnVersion; + if (posOnVersion == -1) { + // We have a match, therefore continue in the other branch that + // collects the letters for the realPidName. + ++rit; + counting = true; + } } - } - } else { - if (*rit == '/') { - break; } else { - realPidName.push_back(*rit); + if (*rit == '/') { + // We matched a '/' character, i.e. a path separator. Therefore, we are done + // with the realPidName. Stop collecting letters. + break; + } else { + // Everything we see is part of the realPidName. + realPidName.push_back(*rit); + } } } + if (realPidName.size() == 0) { + throw_with_trace(std::runtime_error( + "Could not match realPidName in gzFile '" + gzFile + "'. " + "The version value '" + version + "' has to be contained " + "in the gzFile name. Rename the gzFile to match the pattern " + "'.*/$realPidName?$version.+'.")); + } + + reverse(realPidName.begin(), realPidName.end()); + std::cerr << "Matched realPidName '" << realPidName + << "' in gzFile '" << gzFile << "'" << std::endl; + header.push_back(realPidName); } - reverse(realPidName.begin(), realPidName.end()); - header.push_back(realPidName); - } - mergedBpsOutput = make_unique( - outputRootName + "_" + strtk::type_to_string(NUMPIDS) + - "_mergedBpCounts.bed"); - short fileIndex{0}; - for (const auto &gzFile : filesIn) { - chrono::time_point start = - chrono::steady_clock::now(); - auto newBreakpoints = processFile(gzFile, fileIndex); - chrono::time_point end = - chrono::steady_clock::now(); - chrono::seconds diff = - chrono::duration_cast(end - start); - ++fileIndex; - cerr << gzFile << "\t" << diff.count() << "\t" << newBreakpoints << "\t" - << fileIndex << "\t" << 100 * (fileIndex + 0.0) / NUMPIDS << "%\n"; - } - auto defuzzier = DeFuzzier{DEFAULTREADLENGTH * 3, true}; - auto i = 84; - while (!mrefDb.empty()) { - mrefDb.back().erase( - remove_if(mrefDb.back().begin(), mrefDb.back().end(), - [](const MrefEntry &bp) { return bp.getPos() == -1; }), - mrefDb.back().end()); - defuzzier.deFuzzyDb(mrefDb.back()); - mrefDb.back().erase( - remove_if(mrefDb.back().begin(), mrefDb.back().end(), - [](const MrefEntry &bp) { return bp.getPos() == -1; }), - mrefDb.back().end()); - auto chromosome = ChrConverter::indexToChrCompressedMref[i]; - --i; - for (auto &bp : mrefDb.back()) { - if (bp.getPos() != -1 && bp.getValidityScore() != -1) { - // cout << - //bp.printArtifactRatios(chromosome); - *mergedBpsOutput << bp.printBpInfo(chromosome); + + // This reopens the gzFiles from filesIn and processes them. It logs information to the + // standard error output. + short fileIndex{0}; + for (const auto &gzFile : filesIn) { + std::chrono::time_point start = + std::chrono::steady_clock::now(); + // newBreakpoints contains only information for chromosomes from the compressedMref set. + auto newBreakpoints = processFile(gzFile, fileIndex); + std::chrono::time_point end = std::chrono::steady_clock::now(); + std::chrono::seconds diff = std::chrono::duration_cast(end - start); + ++fileIndex; + std::cerr << gzFile << "\t" << diff.count() << "\t" << newBreakpoints << "\t" + << fileIndex << "\t" << 100 * (fileIndex + 0.0) / NUM_PIDS << "%\n"; + } + + // Finally, open the output file, and write the header and the breakpoint information. + mergedBpsOutput = std::make_unique( + outputRootName + "_" + strtk::type_to_string(NUM_PIDS) + + "_mergedBpCounts.bed"); + auto defuzzier = DeFuzzier {DEFAULT_READ_LENGTH * 3, true}; + CompressedMrefIndex compressedMrefChrIndex = chrConverter.nChromosomesCompressedMref() - 1; + while (!mrefDb.empty()) { + // Remove all invalid breakpoints. + mrefDb.back().erase( + remove_if(mrefDb.back().begin(), mrefDb.back().end(), + [](const MrefEntry &bp) { return !bp.isValid(); }), + mrefDb.back().end()); + + // Run the DeFuzzier. + defuzzier.deFuzzyDb(mrefDb.back()); + + // Again, remove all invalid breakpoints. + mrefDb.back().erase( + remove_if(mrefDb.back().begin(), mrefDb.back().end(), + [](const MrefEntry &bp) { return !bp.isValid(); }), + mrefDb.back().end()); + + // Write the breakpoint information. + std::string chromosome = + chrConverter.compressedMrefIndexToChrName(compressedMrefChrIndex); + --compressedMrefChrIndex; + for (auto &bp : mrefDb.back()) { + if (bp.isValid()) { + // std::cout << bp.printArtifactRatios(chromosome); + *mergedBpsOutput << bp.printBpInfo(chromosome); + } } + mrefDb.pop_back(); } - mrefDb.pop_back(); } -} - -unsigned long long -MasterRefProcessor::processFile(const string &gzPath, short fileIndex) { - unsigned long long newBreakpoints{0}; - ifstream refHandle(gzPath, ios_base::in | ios_base::binary); - boost::iostreams::filtering_istream gzStream{}; - gzStream.push(boost::iostreams::gzip_decompressor()); - gzStream.push(refHandle); - string sophiaLine{}; - vector> fileBps{85, vector{}}; - auto lineIndex = 0; - while (error_terminating_getline(gzStream, sophiaLine)) { - if (sophiaLine[0] != '#') { - auto chrIndex = - ChrConverter::indexConverter[ChrConverter::readChromosomeIndex( - sophiaLine.cbegin(), '\t')]; - if (chrIndex < 0) { - continue; + + /** + * Process the file at gzPath. Chromosomes not in the compressedMref set are ignored. + * The file format the one produced by the `sophia` tool. + */ + unsigned long long + MasterRefProcessor::processFile(const std::string &gzPath, short fileIndex) { + std::cerr << "Processing file '" << gzPath << "'" << std::endl; + unsigned long long newBreakpoints{0}; + std::ifstream refHandle(gzPath, std::ios_base::in | std::ios_base::binary); + boost::iostreams::filtering_istream gzStream{}; + gzStream.push(boost::iostreams::gzip_decompressor()); + gzStream.push(refHandle); + std::string sophiaLine{}; + + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + CompressedMrefIndex vectorSize = chrConverter.nChromosomesCompressedMref(); + + std::vector> fileBps = std::vector> + { static_cast(vectorSize), std::vector{}}; + auto lineIndex = 0; + + while (error_terminating_getline(gzStream, sophiaLine)) { + // Ignore comment lines. + if (sophiaLine[0] != '#') { + // Parse the chromosome name in the first column of the gzip file. + ChrIndex globalIndex; + try { + globalIndex = chrConverter.parseChrAndReturnIndex( + sophiaLine.cbegin(), sophiaLine.cend(), '\t'); + } catch (const DomainError &e) { + e << error_info_string("file = " + gzPath + ", line = " + sophiaLine); + throw e; + } + + // Ignore chromosomes not in the compressedMref set. + if (chrConverter.isCompressedMref(globalIndex)) { + CompressedMrefIndex compressedMrefIndex = + chrConverter.indexToCompressedMrefIndex(globalIndex); + Breakpoint tmpBp = Breakpoint::parse(sophiaLine, true); + fileBps[static_cast(compressedMrefIndex)].emplace_back( + tmpBp, + lineIndex++, + (sophiaLine.back() != '.' && sophiaLine.back() != '#')); + } } - Breakpoint tmpBp{sophiaLine, true}; - fileBps[chrIndex].emplace_back( - tmpBp, lineIndex++, - (sophiaLine.back() != '.' && sophiaLine.back() != '#')); } - } - auto chrIndex = 0; - for (auto &chromosome : fileBps) { - DeFuzzier deFuzzierControl{DEFAULTREADLENGTH * 6, false}; - deFuzzierControl.deFuzzyDb(chromosome); - for (auto &bp : chromosome) { - if (processBp(bp, chrIndex, fileIndex)) { - ++newBreakpoints; + + ChrIndex chrIndex = 0; + for (auto &chromosome : fileBps) { + DeFuzzier deFuzzierControl {DEFAULT_READ_LENGTH * 6, false}; + deFuzzierControl.deFuzzyDb(chromosome); + for (auto &bp : chromosome) { + if (processBp(bp, chrIndex, fileIndex)) { + ++newBreakpoints; + } } + ++chrIndex; } - ++chrIndex; + return newBreakpoints; } - return newBreakpoints; -} - -bool -MasterRefProcessor::processBp(BreakpointReduced &bp, int chrIndex, - short fileIndex) { - MrefEntry tmpMrefEntry{}; - tmpMrefEntry.addEntry(bp, fileIndex); - auto validitiyInit = - mrefDb[chrIndex][tmpMrefEntry.getPos()].getValidityScore(); - mrefDb[chrIndex][tmpMrefEntry.getPos()].mergeMrefEntries(tmpMrefEntry); - auto validitiyFinal = - mrefDb[chrIndex][tmpMrefEntry.getPos()].getValidityScore(); - if (validitiyFinal > validitiyInit) { - return true; + + bool + MasterRefProcessor::processBp(BreakpointReduced &bp, + ChrIndex chrIndex, + short fileIndex) { + MrefEntry tmpMrefEntry{}; + tmpMrefEntry.addEntry(bp, fileIndex); + + unsigned long pos = static_cast(tmpMrefEntry.getPos()); + unsigned int idx = static_cast(chrIndex); + + auto validityInit = mrefDb[idx][pos].getValidityScore(); + mrefDb[idx][pos].mergeMrefEntries(tmpMrefEntry); + auto validityFinal = mrefDb[idx][pos].getValidityScore(); + + return validityFinal > validityInit; } - return false; -} } // namespace sophia diff --git a/src/MateInfo.cpp b/src/MateInfo.cpp new file mode 100644 index 0000000..fdf6d12 --- /dev/null +++ b/src/MateInfo.cpp @@ -0,0 +1,76 @@ +#include "MateInfo.h" + +namespace sophia { + + + bool MateInfo::operator<(const MateInfo &rhs) const { + if (mateChrIndex < rhs.mateChrIndex) + return true; + if (mateChrIndex > rhs.mateChrIndex) + return false; + if (mateStartPos < rhs.mateStartPos) + return true; + return false; + } + + bool MateInfo::suppAlignmentFuzzyMatch(const SuppAlignment &sa) const { + if (mateChrIndex != sa.getChrIndex()) { + return false; + } else { + if (!sa.isFuzzy()) { + return static_cast(sa.getPos()) >= (static_cast(mateStartPos) - static_cast(sa.getMatchFuzziness())) && + static_cast(sa.getPos()) <= (static_cast(mateEndPos) + static_cast(sa.getMatchFuzziness())); + } else { + return (static_cast(mateStartPos) - static_cast(sa.getMatchFuzziness())) <= static_cast(sa.getExtendedPos()) && + static_cast(sa.getPos()) <= (static_cast(mateEndPos) + static_cast(sa.getMatchFuzziness())); + } + } + } + + MateInfo::MateInfo(ChrSize readStartPosIn, + ChrSize readEndPosIn, + ChrIndex mateChrIndexIn, + ChrSize mateStartPosIn, + int sourceType, + bool invertedIn) + : readStartPos{readStartPosIn}, + readEndPos{readEndPosIn}, + mateChrIndex{mateChrIndexIn}, + mateStartPos{mateStartPosIn}, + mateEndPos{mateStartPosIn}, + inverted{invertedIn}, + source{sourceType}, + evidenceLevel{sourceType == 2 ? 3 : 1}, + matePower{1}, + inversionSupport{invertedIn}, + straightSupport{!invertedIn}, + bpLocs{}, + saSupporter{false}, + toRemove{false} {} + + MateInfo::MateInfo(ChrSize readStartPosIn, + ChrSize readEndPosIn, + ChrIndex mateChrIndexIn, + ChrSize mateStartPosIn, + int sourceType, + bool invertedIn, + const std::vector &bpLocsIn) + : readStartPos{readStartPosIn}, + readEndPos{readEndPosIn}, + mateChrIndex{mateChrIndexIn}, + mateStartPos{mateStartPosIn}, + mateEndPos{mateStartPosIn}, + inverted{invertedIn}, + source{sourceType}, + evidenceLevel{sourceType == 2 ? 3 : 1}, + matePower{1}, + inversionSupport{invertedIn}, + straightSupport{!invertedIn}, + bpLocs{bpLocsIn}, + saSupporter{false}, + toRemove{false} {} + + bool MateInfo::isToRemove() const { return toRemove; } + + +} /* namespace sophia */ \ No newline at end of file diff --git a/src/MrefEntry.cpp b/src/MrefEntry.cpp index 7db1c71..51c52c0 100644 --- a/src/MrefEntry.cpp +++ b/src/MrefEntry.cpp @@ -20,215 +20,250 @@ * LICENSE: GPL */ +#include "global.h" #include "Breakpoint.h" -#include "strtk.hpp" +#include "strtk-wrap.h" +#include "GlobalAppConfig.h" #include #include #include -#include "ChrConverter.h" #include "BreakpointReduced.h" namespace sophia { - using namespace std; - -boost::format MrefEntry::doubleFormatter { "%.5f" }; -int MrefEntry::NUMPIDS { }; -int MrefEntry::DEFAULTREADLENGTH { }; - -MrefEntry::MrefEntry() : - validity { -1 }, - pos { -1 }, - fileIndices { }, - fileIndicesWithArtifactRatios { }, - artifactRatios { }, - suppAlignments { } { - -} - -void MrefEntry::addEntry(BreakpointReduced& tmpBreakpoint, int fileIndex) { - pos = tmpBreakpoint.getPos(); - auto artifactBreakTotal = tmpBreakpoint.getLowQualBreaksSoft() + tmpBreakpoint.getLowQualBreaksHard() + tmpBreakpoint.getRepetitiveOverhangBreaks(); - auto eventTotal = tmpBreakpoint.getPairedBreaksSoft() + tmpBreakpoint.getPairedBreaksHard() + tmpBreakpoint.getUnpairedBreaksSoft() + tmpBreakpoint.getUnpairedBreaksHard() + tmpBreakpoint.getBreaksShortIndel(); - auto breakTotal = eventTotal + artifactBreakTotal; - if (breakTotal < 200) { - for (auto saPtr : tmpBreakpoint.getSupplementsPtr()) { - if (saPtr->isSuspicious() || saPtr->isToRemove() || (saPtr->getChrIndex() != 1001 && ChrConverter::indexConverter[saPtr->getChrIndex()] < 0)) { - continue; - } - auto qualCheck = false; - auto splitTotal = saPtr->getSupport() + saPtr->getSecondarySupport(); - if (saPtr->isDistant()) { - auto clonalityCondition = (((0.0 + saPtr->getMateSupport()) / saPtr->getExpectedDiscordants()) >= 0.5); - if (!clonalityCondition) { - continue; - } - qualCheck = (splitTotal > 4 && saPtr->getMateSupport() > 2); - if (!qualCheck) { - qualCheck = (saPtr->getMateSupport() > 4); - } - } else { - qualCheck = (splitTotal > 4) || (splitTotal > 2 && saPtr->getSupport() > 0 && saPtr->getSecondarySupport() > 0); - } - if (qualCheck) { - if (!saMatcher(saPtr)) { - auto saTmp = *saPtr; - saTmp.mrefSaTransform(fileIndex); - suppAlignments.push_back(saTmp); - } - } - } - } - auto covValidity = (tmpBreakpoint.getBreaksShortIndel() > 2 || breakTotal > 9); - if (!covValidity) { - if (breakTotal > 4) { - auto clonality = ((breakTotal + 0.0) / (tmpBreakpoint.getNormalSpans() + breakTotal)); - if (clonality > 0.3) { - covValidity = true; - } else if (clonality > 0.1) { - if (tmpBreakpoint.hasOverhang) { - covValidity = true; - } - } - } - } - if (eventTotal + artifactBreakTotal > 0) { - if (covValidity) { - auto eventTotalStrict = tmpBreakpoint.getPairedBreaksSoft() + tmpBreakpoint.getUnpairedBreaksSoft() + tmpBreakpoint.getPairedBreaksHard(); - auto artifactTotalRelaxed = tmpBreakpoint.getLowQualBreaksSoft() + tmpBreakpoint.getLowQualSpansSoft() + tmpBreakpoint.getRepetitiveOverhangBreaks(); - if ((eventTotalStrict + artifactTotalRelaxed) > 0) { - artifactRatios.push_back((0.0 + artifactTotalRelaxed) / (eventTotalStrict + artifactTotalRelaxed)); - fileIndicesWithArtifactRatios.push_back(fileIndex); - } - } - } - if (covValidity) { - fileIndices.push_back(fileIndex); - validity = 1; - } else if (!suppAlignments.empty()) { - fileIndices.push_back(fileIndex); - validity = 0; - } -} - -void MrefEntry::mergeMrefEntries(MrefEntry& entry2) { - pos = entry2.getPos(); - for (auto artifactRatio : entry2.getArtifactRatios()) { - artifactRatios.push_back(artifactRatio); - } - for (auto fileIndex : entry2.getFileIndicesWithArtifactRatios()) { - fileIndicesWithArtifactRatios.push_back(fileIndex); - } - for (auto fileIndex : entry2.getFileIndices()) { - fileIndices.push_back(fileIndex); - } - for (auto saPtr : entry2.getSupplementsPtr()) { - if (!saMatcher(saPtr)) { - suppAlignments.push_back(*saPtr); - } - } - validity = max(validity, entry2.getValidityScore()); -} - -string MrefEntry::printBpInfo(const string& chromosome) { - finalizeFileIndices(); - vector outputFields { }; - outputFields.emplace_back(chromosome); - outputFields.emplace_back(strtk::type_to_string(pos)); - outputFields.emplace_back(strtk::type_to_string(pos + 1)); - outputFields.emplace_back(strtk::type_to_string(fileIndices.size())); - outputFields.emplace_back(strtk::type_to_string(fileIndicesWithArtifactRatios.size())); - outputFields.emplace_back(boost::str(doubleFormatter % ((fileIndices.size() + 0.0) / NUMPIDS))); - outputFields.emplace_back(boost::str(doubleFormatter % ((fileIndicesWithArtifactRatios.size() + 0.0) / NUMPIDS))); - if (!artifactRatios.empty()) { - outputFields.emplace_back(boost::str(doubleFormatter % (accumulate(artifactRatios.cbegin(), artifactRatios.cend(), 0.0) / artifactRatios.size()))); - } else { - outputFields.emplace_back("NA"); - } - if (suppAlignments.empty()) { - outputFields.emplace_back("."); - } else { - vector saFields { }; - saFields.reserve(suppAlignments.size()); - for (auto &sa : suppAlignments) { - sa.finalizeSupportingIndices(); - if (suppAlignments.size() < 11 || sa.getSupport() >= 0.2 * fileIndices.size()) { - saFields.emplace_back(sa.print()); - } - } - if (saFields.empty()) { - outputFields.emplace_back("."); - } else { - outputFields.emplace_back(boost::join(saFields, ";")); - } - } - vector fileIndicesStr { }; - transform(fileIndices.begin(), fileIndices.end(), back_inserter(fileIndicesStr), [](int fileIndex) {return strtk::type_to_string(fileIndex);}); - outputFields.emplace_back(boost::join(fileIndicesStr, ",")); - return boost::join(outputFields, "\t").append("\n"); -} - -string MrefEntry::printArtifactRatios(const string& chromosome) { - vector outputFields { }; - outputFields.reserve(NUMPIDS + 3); - outputFields.emplace_back(chromosome); - outputFields.emplace_back(strtk::type_to_string(pos)); - outputFields.emplace_back(strtk::type_to_string(pos + 1)); - vector artifactRatiosOutput(NUMPIDS, "."); - for (auto i = 0; i < static_cast(fileIndicesWithArtifactRatios.size()); ++i) { - artifactRatiosOutput[fileIndicesWithArtifactRatios[i]] = boost::str(doubleFormatter % artifactRatios[i]); - } - for (const auto &artifactRatio : artifactRatiosOutput) { - outputFields.push_back(artifactRatio); - } - return boost::join(outputFields, "\t").append("\n"); -} - -SuppAlignmentAnno* MrefEntry::searchFuzzySa(const SuppAlignmentAnno& fuzzySa) { - SuppAlignmentAnno* match = nullptr; - for (auto &sa : suppAlignments) { - if (sa.isToRemove()) { - continue; - } - if (sa.saClosenessDirectional(fuzzySa, 1)) { - match = &sa; - return match; - } - } - return nullptr; -} - -bool MrefEntry::saMatcher(SuppAlignmentAnno* saPtr) { - if (saPtr->isToRemove() || saPtr->isSuspicious() || (saPtr->getExpectedDiscordants() > 0 && !(saPtr->getMateSupport() / (saPtr->getExpectedDiscordants() + 0.0) > 0.1))) { - return true; - } - for (auto &sa : suppAlignments) { - if (sa.saCloseness(*saPtr, 100)) { - if (sa.isFuzzy()) { - if (saPtr->isFuzzy()) { - sa.extendSuppAlignment(saPtr->getPos(), saPtr->getExtendedPos()); - } else { - sa.removeFuzziness(*saPtr); - } - } - sa.addSupportingIndices(saPtr->getSupportingIndices()); - sa.mergeMrefSa(*saPtr); - saPtr->setToRemove(true); - return true; - } - } - return false; -} - -void MrefEntry::finalizeFileIndices() { - for (const auto &sa : suppAlignments) { - auto tmpIndices = sa.getSupportingIndices(); - copy(tmpIndices.begin(), tmpIndices.end(), inserter(fileIndices, fileIndices.end())); - } - sort(fileIndices.begin(), fileIndices.end()); - auto last = unique(fileIndices.begin(), fileIndices.end()); - fileIndices.erase(last, fileIndices.end()); -} + boost::format MrefEntry::doubleFormatter { "%.5f" }; + + unsigned int MrefEntry::NUM_PIDS { }; + + ChrSize MrefEntry::DEFAULT_READ_LENGTH { }; + + MrefEntry::MrefEntry() : + validity { -1 }, + pos { std::numeric_limits::max() }, + fileIndices { }, + fileIndicesWithArtifactRatios { }, + artifactRatios { }, + suppAlignments { } { + + } + + void MrefEntry::addEntry(BreakpointReduced& tmpBreakpoint, + int fileIndex) { + pos = tmpBreakpoint.getPos(); + auto artifactBreakTotal = + tmpBreakpoint.getLowQualBreaksSoft() + + tmpBreakpoint.getLowQualBreaksHard() + + tmpBreakpoint.getRepetitiveOverhangBreaks(); + auto eventTotal = + tmpBreakpoint.getPairedBreaksSoft() + + tmpBreakpoint.getPairedBreaksHard() + + tmpBreakpoint.getUnpairedBreaksSoft() + + tmpBreakpoint.getUnpairedBreaksHard() + + tmpBreakpoint.getBreaksShortIndel(); + auto breakTotal = + eventTotal + + artifactBreakTotal; + + if (breakTotal < 200) { + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + for (SuppAlignmentAnno *saPtr : tmpBreakpoint.getSupplementsPtr()) { + // Original code from bitbucket repository + // if (saPtr->isSuspicious() + // || saPtr->isToRemove() + // || (saPtr->getChrIndex() != 1001 // i.e. !mitochondrial + // && ChrConverter::indexConverter[saPtr->getChrIndex()] < 0)) // i.e. !compressedMref + // `indexConverter` is now `indexToCompressedMrefIndex` and a mapping from ChrIndex to + // CompressedMrefIndex, that contained `-2` values (now `NA` constant) for chromosomes that were not + // in the compressed mref set. + // Note that the mitochondrial chromosome itself is *not* among the compressed mrefs. So the + // condition is somewhat redundant. + if (saPtr->isSuspicious() + || saPtr->isToRemove() + || (!chrConverter.isExtrachromosomal(saPtr->getChrIndex()) + && !chrConverter.isCompressedMref(saPtr->getChrIndex())) + ) { + continue; + } + auto qualCheck = false; + auto splitTotal = saPtr->getSupport() + saPtr->getSecondarySupport(); + if (saPtr->isDistant()) { + auto clonalityCondition = (((0.0 + saPtr->getMateSupport()) / saPtr->getExpectedDiscordants()) >= 0.5); + if (!clonalityCondition) { + continue; + } + qualCheck = (splitTotal > 4 && saPtr->getMateSupport() > 2); + if (!qualCheck) { + qualCheck = (saPtr->getMateSupport() > 4); + } + } else { + qualCheck = (splitTotal > 4) || (splitTotal > 2 && saPtr->getSupport() > 0 && saPtr->getSecondarySupport() > 0); + } + if (qualCheck) { + if (!saMatcher(saPtr)) { + auto saTmp = *saPtr; + saTmp.mrefSaTransform(fileIndex); + suppAlignments.push_back(saTmp); + } + } + } + } + auto covValidity = (tmpBreakpoint.getBreaksShortIndel() > 2 || breakTotal > 9); + if (!covValidity) { + if (breakTotal > 4) { + auto clonality = ((breakTotal + 0.0) / (tmpBreakpoint.getNormalSpans() + breakTotal)); + if (clonality > 0.3) { + covValidity = true; + } else if (clonality > 0.1) { + if (tmpBreakpoint.hasOverhang) { + covValidity = true; + } + } + } + } + if (eventTotal + artifactBreakTotal > 0) { + if (covValidity) { + auto eventTotalStrict = tmpBreakpoint.getPairedBreaksSoft() + tmpBreakpoint.getUnpairedBreaksSoft() + tmpBreakpoint.getPairedBreaksHard(); + auto artifactTotalRelaxed = tmpBreakpoint.getLowQualBreaksSoft() + tmpBreakpoint.getLowQualSpansSoft() + tmpBreakpoint.getRepetitiveOverhangBreaks(); + if ((eventTotalStrict + artifactTotalRelaxed) > 0) { + artifactRatios.push_back((0.0 + artifactTotalRelaxed) / (eventTotalStrict + artifactTotalRelaxed)); + fileIndicesWithArtifactRatios.push_back(fileIndex); + } + } + } + if (covValidity) { + fileIndices.push_back(fileIndex); + validity = 1; + } else if (!suppAlignments.empty()) { + fileIndices.push_back(fileIndex); + validity = 0; + } + } + + void MrefEntry::mergeMrefEntries(MrefEntry& entry2) { + pos = entry2.getPos(); + for (auto artifactRatio : entry2.getArtifactRatios()) { + artifactRatios.push_back(artifactRatio); + } + for (auto fileIndex : entry2.getFileIndicesWithArtifactRatios()) { + fileIndicesWithArtifactRatios.push_back(fileIndex); + } + for (auto fileIndex : entry2.getFileIndices()) { + fileIndices.push_back(fileIndex); + } + for (auto saPtr : entry2.getSupplementsPtr()) { + if (!saMatcher(saPtr)) { + suppAlignments.push_back(*saPtr); + } + } + validity = std::max(validity, entry2.getValidityScore()); + } + + /** This prints the output of the `sophiaMref` tool. */ + std::string MrefEntry::printBpInfo(const std::string& chromosome) { + finalizeFileIndices(); + std::vector outputFields { }; + outputFields.emplace_back(chromosome); + outputFields.emplace_back(strtk::type_to_string(pos)); + outputFields.emplace_back(strtk::type_to_string(pos + 1)); + outputFields.emplace_back(strtk::type_to_string(fileIndices.size())); + outputFields.emplace_back(strtk::type_to_string(fileIndicesWithArtifactRatios.size())); + outputFields.emplace_back(boost::str(doubleFormatter % ((fileIndices.size() + 0.0) / NUM_PIDS))); + outputFields.emplace_back(boost::str(doubleFormatter % ((fileIndicesWithArtifactRatios.size() + 0.0) / NUM_PIDS))); + if (!artifactRatios.empty()) { + outputFields.emplace_back(boost::str(doubleFormatter % (accumulate(artifactRatios.cbegin(), artifactRatios.cend(), 0.0) / artifactRatios.size()))); + } else { + outputFields.emplace_back("NA"); + } + if (suppAlignments.empty()) { + outputFields.emplace_back("."); + } else { + std::vector saFields { }; + saFields.reserve(suppAlignments.size()); + for (auto &sa : suppAlignments) { + sa.finalizeSupportingIndices(); + if (suppAlignments.size() < 11 || sa.getSupport() >= 0.2 * fileIndices.size()) { + saFields.emplace_back(sa.print()); + } + } + if (saFields.empty()) { + outputFields.emplace_back("."); + } else { + outputFields.emplace_back(boost::join(saFields, ";")); + } + } + std::vector fileIndicesStr { }; + transform(fileIndices.begin(), fileIndices.end(), back_inserter(fileIndicesStr), + [](int fileIndex) {return strtk::type_to_string(fileIndex);}); + outputFields.emplace_back(boost::join(fileIndicesStr, ",")); + return boost::join(outputFields, "\t").append("\n"); + } + + // Currently, not used. + std::string MrefEntry::printArtifactRatios(const std::string& chromosome) { + std::vector outputFields { }; + outputFields.reserve(NUM_PIDS + 3u); + outputFields.emplace_back(chromosome); + outputFields.emplace_back(strtk::type_to_string(pos)); + outputFields.emplace_back(strtk::type_to_string(pos + 1)); + std::vector artifactRatiosOutput(NUM_PIDS, "."); + for (size_t i = 0; i < fileIndicesWithArtifactRatios.size(); ++i) { + artifactRatiosOutput[fileIndicesWithArtifactRatios[i]] = + boost::str(doubleFormatter % artifactRatios[i]); + } + for (const auto &artifactRatio : artifactRatiosOutput) { + outputFields.push_back(artifactRatio); + } + return boost::join(outputFields, "\t").append("\n"); + } + + SuppAlignmentAnno* MrefEntry::searchFuzzySa(const SuppAlignmentAnno& fuzzySa) { + SuppAlignmentAnno* match = nullptr; + for (auto &sa : suppAlignments) { + if (sa.isToRemove()) { + continue; + } + if (sa.saClosenessDirectional(fuzzySa, 1)) { + match = &sa; + return match; + } + } + return nullptr; + } + + bool MrefEntry::saMatcher(SuppAlignmentAnno* saPtr) { + if (saPtr->isToRemove() + || saPtr->isSuspicious() + || (saPtr->getExpectedDiscordants() > 0 + && !(saPtr->getMateSupport() / (saPtr->getExpectedDiscordants() + 0.0) > 0.1))) { + return true; + } + for (auto &sa : suppAlignments) { + if (sa.saCloseness(*saPtr, 100)) { + if (sa.isFuzzy()) { + if (saPtr->isFuzzy()) { + sa.extendSuppAlignment(saPtr->getPos(), saPtr->getExtendedPos()); + } else { + sa.removeFuzziness(*saPtr); + } + } + sa.addSupportingIndices(saPtr->getSupportingIndices()); + sa.mergeMrefSa(*saPtr); + saPtr->setToRemove(true); + return true; + } + } + return false; + } + + void MrefEntry::finalizeFileIndices() { + for (const auto &sa : suppAlignments) { + auto tmpIndices = sa.getSupportingIndices(); + copy(tmpIndices.begin(), tmpIndices.end(), inserter(fileIndices, fileIndices.end())); + } + sort(fileIndices.begin(), fileIndices.end()); + auto last = unique(fileIndices.begin(), fileIndices.end()); + fileIndices.erase(last, fileIndices.end()); + } } /* namespace sophia */ diff --git a/src/MrefEntryAnno.cpp b/src/MrefEntryAnno.cpp index ed1060f..83ad38f 100644 --- a/src/MrefEntryAnno.cpp +++ b/src/MrefEntryAnno.cpp @@ -21,68 +21,73 @@ */ #include "Breakpoint.h" -#include "strtk.hpp" +#include "strtk-wrap.h" #include #include #include -#include "ChrConverter.h" namespace sophia { - using namespace std; -boost::format MrefEntryAnno::doubleFormatter { "%.5f" }; -int MrefEntryAnno::DEFAULTREADLENGTH { }; -int MrefEntryAnno::PIDSINMREF { }; -MrefEntryAnno::MrefEntryAnno(const string& mrefEntryIn) : - pos { 0 }, - numHits { 0 }, - suppAlignments { } { - auto index = 0; - vector bpChunkPositions { }; - bpChunkPositions.reserve(7); - auto cit = mrefEntryIn.cbegin(); - if (mrefEntryIn.back() != '.') { - while (bpChunkPositions.size() < 8) { - if (*cit == '\t') { - bpChunkPositions.push_back(index); - } - ++index; - ++cit; - } - string saStr { }; - for (auto i = bpChunkPositions[7] + 1; i < static_cast(mrefEntryIn.length()); ++i) { - if (mrefEntryIn[i] == ';') { - suppAlignments.emplace_back(saStr); - saStr.clear(); - } else { - saStr.push_back(mrefEntryIn[i]); - } - } - suppAlignments.emplace_back(saStr); - } else { - while (bpChunkPositions.size() < 5) { - if (*cit == '\t') { - bpChunkPositions.push_back(index); - } - ++index; - ++cit; - } - } + boost::format MrefEntryAnno::doubleFormatter { "%.5f" }; - for (auto i = bpChunkPositions[0] + 1; i < bpChunkPositions[1]; ++i) { - pos = pos * 10 + (mrefEntryIn[i] - '0'); - } - for (auto i = bpChunkPositions[2] + 1; i < bpChunkPositions[3]; ++i) { - numHits = numHits * 10 + (mrefEntryIn[i] - '0'); - } - if (mrefEntryIn[0] == 'Y') { - numHits = min(PIDSINMREF, 2 * numHits); - } - for (auto &sa : suppAlignments) { - sa.setSecondarySupport(numHits); - } + ChrSize MrefEntryAnno::DEFAULT_READ_LENGTH { }; -} + int MrefEntryAnno::PIDS_IN_MREF { }; + + MrefEntryAnno::MrefEntryAnno(const std::string& mrefEntryIn) : + pos { 0 }, + numHits { 0 }, + suppAlignments { } { + unsigned int index = 0; + std::vector bpChunkPositions { }; + bpChunkPositions.reserve(7); + auto cit = mrefEntryIn.cbegin(); + if (mrefEntryIn.back() != '.') { + while (bpChunkPositions.size() < 8) { + if (*cit == '\t') { + bpChunkPositions.push_back(index); + } + ++index; + ++cit; + } + try { + std::string saStr { }; + for (auto i = bpChunkPositions[7] + 1; i < mrefEntryIn.length(); ++i) { + if (mrefEntryIn[i] == ';') { + suppAlignments.emplace_back(saStr); + saStr.clear(); + } else { + saStr.push_back(mrefEntryIn[i]); + } + } + suppAlignments.emplace_back(saStr); + } catch (DomainError &e) { + throw e << error_info_string("from = " + mrefEntryIn); + } + } else { + while (bpChunkPositions.size() < 5) { + if (*cit == '\t') { + bpChunkPositions.push_back(index); + } + ++index; + ++cit; + } + } + + for (auto i = bpChunkPositions[0] + 1; i < bpChunkPositions[1]; ++i) { + pos = pos * 10 + ChrSize(mrefEntryIn[i] - '0'); + } + for (auto i = bpChunkPositions[2] + 1; i < bpChunkPositions[3]; ++i) { + numHits = numHits * 10 + (mrefEntryIn[i] - '0'); + } + if (mrefEntryIn[0] == 'Y') { + numHits = std::min(PIDS_IN_MREF, 2 * numHits); + } + for (auto &sa : suppAlignments) { + sa.setSecondarySupport(numHits); + } + + } //SuppAlignmentAnno* MrefEntryAnno::searchFuzzySa(const SuppAlignmentAnno& fuzzySa) { // SuppAlignmentAnno* match = nullptr; diff --git a/src/MrefMatch.cpp b/src/MrefMatch.cpp index ddac1e4..1becf3e 100644 --- a/src/MrefMatch.cpp +++ b/src/MrefMatch.cpp @@ -27,11 +27,14 @@ namespace sophia { -MrefMatch::MrefMatch(short numHitsIn, short numConsevativeHitsIn, +MrefMatch::MrefMatch(short numHitsIn, + short numConsevativeHitsIn, int offsetDistanceIn, const std::vector &suppMatchesIn) - : numHits{numHitsIn}, numConsevativeHits{numConsevativeHitsIn}, - offsetDistance{offsetDistanceIn}, suppMatches{suppMatchesIn} {} + : numHits{numHitsIn}, + numConsevativeHits{numConsevativeHitsIn}, + offsetDistance{offsetDistanceIn}, + suppMatches{suppMatchesIn} {} } // namespace sophia /* namespace sophia */ diff --git a/src/SamSegmentMapper.cpp b/src/SamSegmentMapper.cpp index 2ed4346..31fcdf8 100644 --- a/src/SamSegmentMapper.cpp +++ b/src/SamSegmentMapper.cpp @@ -26,323 +26,314 @@ #include #include #include -// #include +#include "GlobalAppConfig.h" namespace sophia { -using namespace std; + SamSegmentMapper::SamSegmentMapper(ChrSize defaultReadLengthIn) + : STARTTIME{time(nullptr)}, + PROPER_PAIR_COMPENSATION_MODE{Breakpoint::PROPER_PAIR_COMPENSATION_MODE}, + DISCORDANT_LEFT_RANGE{static_cast(round(defaultReadLengthIn * 3))}, + DISCORDANT_RIGHT_RANGE{static_cast(round(defaultReadLengthIn * 2.51))}, + printedBps{0u}, + chrIndexCurrent{0}, + minPos{std::numeric_limits::max()}, + maxPos{std::numeric_limits::min()}, + breakpointsCurrent{}, + discordantAlignmentsPool{}, + discordantAlignmentCandidatesPool{}, + discordantLowQualAlignmentsPool{} {} -SamSegmentMapper::SamSegmentMapper(int defaultReadLengthIn) - : STARTTIME{time(nullptr)}, - PROPERPARIRCOMPENSATIONMODE{Breakpoint::PROPERPAIRCOMPENSATIONMODE}, - DISCORDANTLEFTRANGE{static_cast(round(defaultReadLengthIn * 3))}, - DISCORDANTRIGHTRANGE{static_cast(round(defaultReadLengthIn * 2.51))}, - printedBps{0u}, chrIndexCurrent{0}, minPos{-1}, maxPos{-1}, - breakpointsCurrent{}, discordantAlignmentsPool{}, - discordantAlignmentCandidatesPool{}, discordantLowQualAlignmentsPool{} {} + void + SamSegmentMapper::parseSamStream() { + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + while (true) { + auto alignment = std::make_shared(); -void -SamSegmentMapper::parseSamStream() { - while (true) { - auto alignment = make_shared(); - if (alignment->getChrIndex() > 1000) { - continue; - } - if (alignment->isValidLine()) { - if (alignment->getChrIndex() != chrIndexCurrent) { - switchChromosome(*alignment); + // Used to be `alignment->getChrIndex() > 1000`, i.e. excluding MT and phiX and INVALID. + // This is the same as `!chrConverter.isCompressedMref(alignment->getChrIndex())`. + if (!chrConverter.isCompressedMref(alignment->getChrIndex())) { + continue; + } + + if (alignment->isValidLine()) { + if (alignment->getChrIndex() != chrIndexCurrent) { + switchChromosome(*alignment); + } + alignment->continueConstruction(); + printBps(alignment->getStartPos()); + incrementCoverages(*alignment); + assignBps(alignment); + } else { + break; } - alignment->continueConstruction(); - printBps(alignment->getStartPos()); - incrementCoverages(*alignment); - assignBps(alignment); - } else { - break; } + // EOF event for the samtools pipe. printing the end of the very last + // chromosome, + printBps(std::numeric_limits::max()); } - // EOF event for the samtools pipe. printing the end of the very last - // chromosome, - printBps(numeric_limits::max()); -} -void -SamSegmentMapper::switchChromosome(const Alignment &alignment) { - // As we entered a new chromosome here, now print the previous chromosome's - // unprinted regions - if (chrIndexCurrent != 0) { - printBps(numeric_limits::max()); - } - chrIndexCurrent = alignment.getChrIndex(); - breakpointsCurrent.clear(); - coverageProfiles.clear(); - discordantAlignmentsPool.clear(); - if (PROPERPARIRCOMPENSATIONMODE) { - discordantAlignmentCandidatesPool.clear(); + void + SamSegmentMapper::switchChromosome(const Alignment &alignment) { + // As we entered a new chromosome here, now print the previous chromosome's + // unprinted regions + if (chrIndexCurrent != 0) { + printBps(std::numeric_limits::max()); + } + chrIndexCurrent = alignment.getChrIndex(); + breakpointsCurrent.clear(); + coverageProfiles.clear(); + discordantAlignmentsPool.clear(); + if (PROPER_PAIR_COMPENSATION_MODE) { + discordantAlignmentCandidatesPool.clear(); + } + discordantLowQualAlignmentsPool.clear(); + minPos = std::numeric_limits::max(); + maxPos = std::numeric_limits::min(); } - discordantLowQualAlignmentsPool.clear(); - minPos = -1; - maxPos = -1; -} -void -SamSegmentMapper::printBps(int alignmentStart) { - for (auto &bp : breakpointsCurrent) { - if (!bp.second.isCovFinalized() && ((bp.first) + 1 < alignmentStart)) { - auto posDiff = bp.first - minPos; - if (bp.first != minPos) { - bp.second.setLeftCoverage( - coverageProfiles[bp.first - minPos - 1].getCoverage()); - } else { - bp.second.setLeftCoverage(0); + /** Does a lot of stuff, and -- just by the way -- also prints the results to stdout :(. + **/ + void + SamSegmentMapper::printBps(ChrSize alignmentStart) { + for (auto &bp : breakpointsCurrent) { + if (!bp.second.isCovFinalized() && ((bp.first) + 1 < alignmentStart)) { + unsigned long posDiff = static_cast(bp.first - minPos); + if (bp.first != minPos) { + bp.second.setLeftCoverage( + coverageProfiles[static_cast(bp.first - minPos - 1)].getCoverage()); + } else { + bp.second.setLeftCoverage(0); + } + bp.second.setRightCoverage( + coverageProfiles[posDiff].getCoverage()); + bp.second.setNormalSpans( + coverageProfiles[posDiff].getNormalSpans()); + bp.second.setLowQualSpansSoft( + coverageProfiles[posDiff].getLowQualSpansSoft()); + bp.second.setLowQualSpansHard( + coverageProfiles[posDiff].getLowQualSpansHard()); + bp.second.setUnpairedBreaksSoft( + coverageProfiles[posDiff].getNormalBpsSoft()); + bp.second.setUnpairedBreaksHard( + coverageProfiles[posDiff].getNormalBpsHard()); + bp.second.setBreaksShortIndel( + coverageProfiles[posDiff].getNormalBpsShortIndel()); + bp.second.setLowQualBreaksSoft( + coverageProfiles[posDiff].getLowQualBpsSoft()); + bp.second.setLowQualBreaksHard( + coverageProfiles[posDiff].getLowQualBpsHard()); + bp.second.setCovFinalized(true); } - bp.second.setRightCoverage(coverageProfiles[posDiff].getCoverage()); - bp.second.setNormalSpans( - coverageProfiles[posDiff].getNormalSpans()); - bp.second.setLowQualSpansSoft( - coverageProfiles[posDiff].getLowQualSpansSoft()); - bp.second.setLowQualSpansHard( - coverageProfiles[posDiff].getLowQualSpansHard()); - bp.second.setUnpairedBreaksSoft( - coverageProfiles[posDiff].getNormalBpsSoft()); - bp.second.setUnpairedBreaksHard( - coverageProfiles[posDiff].getNormalBpsHard()); - bp.second.setBreaksShortIndel( - coverageProfiles[posDiff].getNormalBpsShortIndel()); - bp.second.setLowQualBreaksSoft( - coverageProfiles[posDiff].getLowQualBpsSoft()); - bp.second.setLowQualBreaksHard( - coverageProfiles[posDiff].getLowQualBpsHard()); - bp.second.setCovFinalized(true); } - } - if (minPos != -1) { - while (minPos + 2 + DISCORDANTLEFTRANGE < alignmentStart) { - if (minPos != maxPos) { - coverageProfiles.pop_front(); - ++minPos; + if (minPos != std::numeric_limits::max()) { + while (minPos + 2 + DISCORDANT_LEFT_RANGE < alignmentStart) { + if (minPos != maxPos) { + coverageProfiles.pop_front(); + ++minPos; + } else { + coverageProfiles.clear(); + minPos = std::numeric_limits::max(); + maxPos = std::numeric_limits::min(); + break; + } + } + } + for (auto bpIt = breakpointsCurrent.begin(); + bpIt != breakpointsCurrent.end();) { + if ((bpIt->first) + DISCORDANT_RIGHT_RANGE < alignmentStart) { + if (bpIt->second.finalizeBreakpoint( // Side effect: prints the breakpoint! + discordantAlignmentsPool, discordantLowQualAlignmentsPool, + discordantAlignmentCandidatesPool)) { + ++printedBps; + } + bpIt = breakpointsCurrent.erase(bpIt); } else { - coverageProfiles.clear(); - minPos = -1; - maxPos = -1; break; } } - } - for (auto bpIt = breakpointsCurrent.begin(); - bpIt != breakpointsCurrent.end();) { - if ((bpIt->first) + DISCORDANTRIGHTRANGE < alignmentStart) { - if (bpIt->second.finalizeBreakpoint( - discordantAlignmentsPool, discordantLowQualAlignmentsPool, - discordantAlignmentCandidatesPool)) { - ++printedBps; + while (!discordantAlignmentsPool.empty() && + (discordantAlignmentsPool.front().readStartPos + + DISCORDANT_LEFT_RANGE + DISCORDANT_RIGHT_RANGE < + alignmentStart)) { + discordantAlignmentsPool.pop_front(); + } + if (PROPER_PAIR_COMPENSATION_MODE) { + while (!discordantAlignmentCandidatesPool.empty() && + (discordantAlignmentCandidatesPool.front().readStartPos + + DISCORDANT_LEFT_RANGE + DISCORDANT_RIGHT_RANGE < + alignmentStart)) { + discordantAlignmentCandidatesPool.pop_front(); } - bpIt = breakpointsCurrent.erase(bpIt); - } else { - break; } - } - while (!discordantAlignmentsPool.empty() && - (discordantAlignmentsPool.front().readStartPos + - DISCORDANTLEFTRANGE + DISCORDANTRIGHTRANGE < - alignmentStart)) { - discordantAlignmentsPool.pop_front(); - } - if (PROPERPARIRCOMPENSATIONMODE) { - while (!discordantAlignmentCandidatesPool.empty() && - (discordantAlignmentCandidatesPool.front().readStartPos + - DISCORDANTLEFTRANGE + DISCORDANTRIGHTRANGE < + while (!discordantLowQualAlignmentsPool.empty() && + (discordantLowQualAlignmentsPool.front().readStartPos + + DISCORDANT_LEFT_RANGE + DISCORDANT_RIGHT_RANGE < alignmentStart)) { - discordantAlignmentCandidatesPool.pop_front(); + discordantLowQualAlignmentsPool.pop_front(); } } - while (!discordantLowQualAlignmentsPool.empty() && - (discordantLowQualAlignmentsPool.front().readStartPos + - DISCORDANTLEFTRANGE + DISCORDANTRIGHTRANGE < - alignmentStart)) { - discordantLowQualAlignmentsPool.pop_front(); - } -} -void -SamSegmentMapper::incrementCoverages(const Alignment &alignment) { - if (minPos == -1) { - for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { - coverageProfiles.emplace_back(); - } - minPos = alignment.getStartPos(); - maxPos = alignment.getEndPos() - 1; - } else { - while (alignment.getStartPos() > maxPos) { - coverageProfiles.emplace_back(); - ++maxPos; - } - } - switch (alignment.getReadType()) { - case 0: - case 3: - for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { - if (i > maxPos) { + void + SamSegmentMapper::incrementCoverages(const Alignment &alignment) { + if (minPos == std::numeric_limits::max()) { + for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { coverageProfiles.emplace_back(); - ++maxPos; } - coverageProfiles[i - minPos].incrementCoverage(); - coverageProfiles[i - minPos].incrementNormalSpans(); - } - if (PROPERPARIRCOMPENSATIONMODE) { - discordantAlignmentCandidatesPool.emplace_back( - alignment.getStartPos(), alignment.getEndPos(), -1, -1, -1, - false); - } - break; - case 1: - for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { - if (i > maxPos) { + minPos = alignment.getStartPos(); + maxPos = alignment.getEndPos() - 1; + } else { + while (alignment.getStartPos() > maxPos) { coverageProfiles.emplace_back(); ++maxPos; } - coverageProfiles[i - minPos].incrementCoverage(); - coverageProfiles[i - minPos].incrementNormalSpans(); } - break; - case 4: - for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { - if (i > maxPos) { - coverageProfiles.emplace_back(); - ++maxPos; + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + switch (alignment.getReadType()) { + case 0: + case 3: + for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { + if (i > maxPos) { + coverageProfiles.emplace_back(); + ++maxPos; + } + coverageProfiles[static_cast(i - minPos)].incrementCoverage(); + coverageProfiles[static_cast(i - minPos)].incrementNormalSpans(); } - coverageProfiles[i - minPos].incrementCoverage(); - coverageProfiles[i - minPos].incrementNormalSpans(); - } - if (alignment.getMateChrIndex() < 1002 && - !(alignment.getMateChrIndex() == 2 && - (alignment.getMatePos() / 10000 == 3314))) { - if (PROPERPARIRCOMPENSATIONMODE) { + if (PROPER_PAIR_COMPENSATION_MODE) { discordantAlignmentCandidatesPool.emplace_back( - alignment.getStartPos(), alignment.getEndPos(), - alignment.getMateChrIndex(), alignment.getMatePos(), 2, - alignment.isInvertedMate()); - } - if (!alignment.isNullMapq()) { - discordantAlignmentsPool.emplace_back( - alignment.getStartPos(), alignment.getEndPos(), - alignment.getMateChrIndex(), alignment.getMatePos(), 2, - alignment.isInvertedMate()); - } else { - discordantLowQualAlignmentsPool.emplace_back( - alignment.getStartPos(), alignment.getEndPos(), - alignment.getMateChrIndex(), alignment.getMatePos(), 2, - alignment.isInvertedMate()); + alignment.getStartPos(), + alignment.getEndPos(), + std::numeric_limits::max(), // mateChrIndexIn + std::numeric_limits::max(), // mateStartPosIn + -1, // sourceType + false); } - } - break; - case 2: - if (!(alignment.isLowMapq() || alignment.isNullMapq())) { - for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); - ++i) { + break; + case 1: + for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { if (i > maxPos) { coverageProfiles.emplace_back(); ++maxPos; } + coverageProfiles[static_cast(i - minPos)].incrementCoverage(); + coverageProfiles[static_cast(i - minPos)].incrementNormalSpans(); } - } else { - for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); - ++i) { + break; + case 4: + for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { if (i > maxPos) { coverageProfiles.emplace_back(); ++maxPos; } - coverageProfiles[i - minPos].incrementLowQualSpansHard(); + coverageProfiles[static_cast(i - minPos)].incrementCoverage(); + coverageProfiles[static_cast(i - minPos)].incrementNormalSpans(); } - } - break; - case 5: - for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { - if (i > maxPos) { - coverageProfiles.emplace_back(); - ++maxPos; - } - coverageProfiles[i - minPos].incrementLowQualSpansSoft(); - } - if (!alignment.isSupplementary() && - alignment.getMateChrIndex() < 1002 && alignment.isDistantMate()) { - if (!(alignment.getMateChrIndex() == 2 && - (alignment.getMatePos() / 10000 == 3314))) { - discordantLowQualAlignmentsPool.emplace_back( - alignment.getStartPos(), alignment.getEndPos(), - alignment.getMateChrIndex(), alignment.getMatePos(), 2, - alignment.isInvertedMate(), alignment.getReadBreakpoints()); - } - } - break; - default: - break; - } - switch (alignment.getReadType()) { - case 1: - for (auto j = 0u; j < alignment.getReadBreakpoints().size(); ++j) { - auto bpPos = alignment.getReadBreakpoints()[j]; - if (bpPos > maxPos) { - coverageProfiles.emplace_back(); - ++maxPos; + if (!chrConverter.isTechnical(alignment.getMateChrIndex()) + && !chrConverter.isInBlockedRegion(alignment.getMateChrIndex(), + alignment.getMatePos())) { + + if (PROPER_PAIR_COMPENSATION_MODE) { + discordantAlignmentCandidatesPool.emplace_back( + alignment.getStartPos(), + alignment.getEndPos(), + alignment.getMateChrIndex(), + alignment.getMatePos(), + 2, // TODO is this a chromosome index + alignment.isInvertedMate()); + } + if (!alignment.isNullMapq()) { + discordantAlignmentsPool.emplace_back( + alignment.getStartPos(), + alignment.getEndPos(), + alignment.getMateChrIndex(), + alignment.getMatePos(), + 2, // TODO is this a chromosome index + alignment.isInvertedMate()); + } else { + discordantLowQualAlignmentsPool.emplace_back( + alignment.getStartPos(), alignment.getEndPos(), + alignment.getMateChrIndex(), alignment.getMatePos(), 2, + alignment.isInvertedMate()); + } } - switch (alignment.getReadBreakpointTypes()[j]) { - case 'S': - if (bpPos == alignment.getStartPos()) { - coverageProfiles[bpPos - minPos].decrementNormalSpans(); + break; + case 2: + if (!(alignment.isLowMapq() || alignment.isNullMapq())) { + for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { + if (i > maxPos) { + coverageProfiles.emplace_back(); + ++maxPos; + } } - coverageProfiles[bpPos - minPos].incrementNormalBpsSoft(); - break; - case 'I': - coverageProfiles[bpPos - minPos].incrementNormalBpsShortIndel(); - break; - case 'D': - coverageProfiles[bpPos - minPos].incrementNormalBpsShortIndel(); - for (auto k = 0; k != alignment.getReadBreakpointsSizes()[j]; - ++k) { - coverageProfiles[bpPos - minPos + k].decrementNormalSpans(); + } else { + for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { + if (i > maxPos) { + coverageProfiles.emplace_back(); + ++maxPos; + } + coverageProfiles[static_cast(i - minPos)].incrementLowQualSpansHard(); } - break; - default: - break; } - } - break; - case 3: - for (auto j = 0u; j < alignment.getReadBreakpoints().size(); ++j) { - auto bpPos = alignment.getReadBreakpoints()[j]; - if (bpPos > maxPos) { - coverageProfiles.emplace_back(); - ++maxPos; + break; + case 5: + for (auto i = alignment.getStartPos(); i < alignment.getEndPos(); ++i) { + if (i > maxPos) { + coverageProfiles.emplace_back(); + ++maxPos; + } + coverageProfiles[static_cast(i - minPos)].incrementLowQualSpansSoft(); } - switch (alignment.getReadBreakpointTypes()[j]) { - case 'I': - coverageProfiles[bpPos - minPos].incrementNormalBpsShortIndel(); - break; - case 'D': - coverageProfiles[bpPos - minPos].incrementNormalBpsShortIndel(); - for (auto k = 0; k != alignment.getReadBreakpointsSizes()[j]; - ++k) { - coverageProfiles[bpPos - minPos + k].decrementNormalSpans(); + if (!alignment.isSupplementary() && + !chrConverter.isTechnical(alignment.getMateChrIndex()) && + alignment.isDistantMate()) { + if (!chrConverter.isInBlockedRegion(alignment.getMateChrIndex(), + alignment.getMatePos())) { + discordantLowQualAlignmentsPool.emplace_back( + alignment.getStartPos(), + alignment.getEndPos(), + alignment.getMateChrIndex(), + alignment.getMatePos(), + 2, // TODO Is this a chromosome index? + alignment.isInvertedMate(), + alignment.getReadBreakpoints()); } - break; - default: - break; } + break; + default: + break; } - break; - case 2: - if (!(alignment.isLowMapq() || alignment.isNullMapq())) { + + switch (alignment.getReadType()) { + case 1: for (auto j = 0u; j < alignment.getReadBreakpoints().size(); ++j) { - auto bpPos = alignment.getReadBreakpoints()[j]; + ChrSize bpPos = alignment.getReadBreakpoints()[j]; if (bpPos > maxPos) { coverageProfiles.emplace_back(); ++maxPos; } - if (alignment.getReadBreakpointTypes()[j] == 'H') { - coverageProfiles[bpPos - minPos].incrementNormalBpsHard(); + switch (alignment.getReadBreakpointTypes()[j]) { + case 'S': + if (bpPos == alignment.getStartPos()) { + coverageProfiles[static_cast(bpPos - minPos)].decrementNormalSpans(); + } + coverageProfiles[static_cast(bpPos - minPos)].incrementNormalBpsSoft(); + break; + case 'I': + coverageProfiles[static_cast(bpPos - minPos)].incrementNormalBpsShortIndel(); + break; + case 'D': + coverageProfiles[static_cast(bpPos - minPos)].incrementNormalBpsShortIndel(); + for (signed int k = 0; k != alignment.getReadBreakpointsSizes()[j]; ++k) { + coverageProfiles[static_cast(bpPos - minPos + k)].decrementNormalSpans(); + } + break; + default: + break; } } - } else { + break; + case 3: for (auto j = 0u; j < alignment.getReadBreakpoints().size(); ++j) { auto bpPos = alignment.getReadBreakpoints()[j]; if (bpPos > maxPos) { @@ -350,132 +341,164 @@ SamSegmentMapper::incrementCoverages(const Alignment &alignment) { ++maxPos; } switch (alignment.getReadBreakpointTypes()[j]) { - case 'S': - if (bpPos == alignment.getStartPos()) { - coverageProfiles[bpPos - minPos] - .decrementLowQualSpansHard(); + case 'I': + coverageProfiles[static_cast(bpPos - minPos)].incrementNormalBpsShortIndel(); + break; + case 'D': + coverageProfiles[static_cast(bpPos - minPos)].incrementNormalBpsShortIndel(); + for (signed int k = 0; k != alignment.getReadBreakpointsSizes()[j]; ++k) { + coverageProfiles[static_cast(bpPos - minPos + k)].decrementNormalSpans(); } - coverageProfiles[bpPos - minPos].incrementLowQualBpsHard(); break; + default: + break; + } + } + break; + case 2: + if (!(alignment.isLowMapq() || alignment.isNullMapq())) { + for (auto j = 0u; j < alignment.getReadBreakpoints().size(); ++j) { + auto bpPos = alignment.getReadBreakpoints()[j]; + if (bpPos > maxPos) { + coverageProfiles.emplace_back(); + ++maxPos; + } + if (alignment.getReadBreakpointTypes()[j] == 'H') { + coverageProfiles[static_cast(bpPos - minPos)].incrementNormalBpsHard(); + } + } + } else { + for (auto j = 0u; j < alignment.getReadBreakpoints().size(); ++j) { + auto bpPos = alignment.getReadBreakpoints()[j]; + if (bpPos > maxPos) { + coverageProfiles.emplace_back(); + ++maxPos; + } + switch (alignment.getReadBreakpointTypes()[j]) { + case 'S': + if (bpPos == alignment.getStartPos()) { + coverageProfiles[static_cast(bpPos - minPos)] + .decrementLowQualSpansHard(); + } + coverageProfiles[static_cast(bpPos - minPos)].incrementLowQualBpsHard(); + break; + case 'H': + if (bpPos == alignment.getStartPos()) { + coverageProfiles[static_cast(bpPos - minPos)] + .decrementLowQualSpansHard(); + } + coverageProfiles[static_cast(bpPos - minPos)].incrementLowQualBpsHard(); + break; + case 'I': + coverageProfiles[static_cast(bpPos - minPos)].incrementLowQualBpsHard(); + break; + case 'D': + coverageProfiles[static_cast(bpPos - minPos)].incrementLowQualBpsHard(); + for (signed int k = 0; k != alignment.getReadBreakpointsSizes()[j]; ++k) { + coverageProfiles[static_cast(bpPos - minPos + k)] + .decrementLowQualSpansHard(); + } + break; + default: + break; + } + } + } + break; + case 5: + for (auto j = 0u; j < alignment.getReadBreakpoints().size(); ++j) { + auto bpPos = alignment.getReadBreakpoints()[j]; + if (bpPos > maxPos) { + coverageProfiles.emplace_back(); + ++maxPos; + } + switch (alignment.getReadBreakpointTypes()[j]) { + case 'S': case 'H': if (bpPos == alignment.getStartPos()) { - coverageProfiles[bpPos - minPos] - .decrementLowQualSpansHard(); + coverageProfiles[static_cast(bpPos - minPos)] + .decrementLowQualSpansSoft(); } - coverageProfiles[bpPos - minPos].incrementLowQualBpsHard(); + coverageProfiles[static_cast(bpPos - minPos)].incrementLowQualBpsSoft(); break; case 'I': - coverageProfiles[bpPos - minPos].incrementLowQualBpsHard(); + coverageProfiles[static_cast(bpPos - minPos)].incrementLowQualBpsSoft(); break; case 'D': - coverageProfiles[bpPos - minPos].incrementLowQualBpsHard(); - for (auto k = 0; - k != alignment.getReadBreakpointsSizes()[j]; ++k) { - coverageProfiles[bpPos - minPos + k] - .decrementLowQualSpansHard(); + coverageProfiles[static_cast(bpPos - minPos)].incrementLowQualBpsSoft(); + for (signed int k = 0; k != alignment.getReadBreakpointsSizes()[j]; ++k) { + coverageProfiles[static_cast(bpPos - minPos + k)] + .decrementLowQualSpansSoft(); } break; default: break; } } + break; + default: + break; } - break; - case 5: - for (auto j = 0u; j < alignment.getReadBreakpoints().size(); ++j) { - auto bpPos = alignment.getReadBreakpoints()[j]; - if (bpPos > maxPos) { - coverageProfiles.emplace_back(); - ++maxPos; - } - switch (alignment.getReadBreakpointTypes()[j]) { - case 'S': - case 'H': - if (bpPos == alignment.getStartPos()) { - coverageProfiles[bpPos - minPos] - .decrementLowQualSpansSoft(); - } - coverageProfiles[bpPos - minPos].incrementLowQualBpsSoft(); - break; - case 'I': - coverageProfiles[bpPos - minPos].incrementLowQualBpsSoft(); - break; - case 'D': - coverageProfiles[bpPos - minPos].incrementLowQualBpsSoft(); - for (auto k = 0; k != alignment.getReadBreakpointsSizes()[j]; - ++k) { - coverageProfiles[bpPos - minPos + k] - .decrementLowQualSpansSoft(); - } - break; - default: - break; - } - } - break; - default: - break; } -} -void -SamSegmentMapper::assignBps(shared_ptr &alignment) { - switch (alignment->getReadType()) { - case 1: - for (auto i = 0u; i < alignment->getReadBreakpoints().size(); ++i) { - if (alignment->getReadBreakpointTypes()[i] == 'S') { - auto bpLoc = alignment->getReadBreakpoints()[i]; - auto it = breakpointsCurrent.find(bpLoc); - if (it == breakpointsCurrent.end()) { - auto newIt = breakpointsCurrent.emplace( - piecewise_construct, forward_as_tuple(bpLoc), - forward_as_tuple(chrIndexCurrent, bpLoc)); - newIt.first->second.addSoftAlignment(alignment); - } else { - it->second.addSoftAlignment(alignment); + void + SamSegmentMapper::assignBps(std::shared_ptr &alignment) { + switch (alignment->getReadType()) { + case 1: + for (auto i = 0u; i < alignment->getReadBreakpoints().size(); ++i) { + if (alignment->getReadBreakpointTypes()[i] == 'S') { + ChrSize bpLoc = alignment->getReadBreakpoints()[i]; + auto it = breakpointsCurrent.find(bpLoc); + if (it == breakpointsCurrent.end()) { + auto newIt = breakpointsCurrent.emplace( + std::piecewise_construct, std::forward_as_tuple(bpLoc), + std::forward_as_tuple(chrIndexCurrent, bpLoc)); + newIt.first->second.addSoftAlignment(alignment); + } else { + it->second.addSoftAlignment(alignment); + } } } - } - break; - case 2: - for (auto i = 0u; i < alignment->getReadBreakpoints().size(); ++i) { - if (alignment->getReadBreakpointTypes()[i] == 'H') { - auto bpLoc = alignment->getReadBreakpoints()[i]; - auto it = breakpointsCurrent.find(bpLoc); - if (it == breakpointsCurrent.end()) { - auto newIt = breakpointsCurrent.emplace( - piecewise_construct, forward_as_tuple(bpLoc), - forward_as_tuple(chrIndexCurrent, bpLoc)); - newIt.first->second.addHardAlignment(alignment); - } else { - it->second.addHardAlignment(alignment); + break; + case 2: + for (auto i = 0u; i < alignment->getReadBreakpoints().size(); ++i) { + if (alignment->getReadBreakpointTypes()[i] == 'H') { + auto bpLoc = alignment->getReadBreakpoints()[i]; + auto it = breakpointsCurrent.find(bpLoc); + if (it == breakpointsCurrent.end()) { + auto newIt = breakpointsCurrent.emplace( + std::piecewise_construct, std::forward_as_tuple(bpLoc), + std::forward_as_tuple(chrIndexCurrent, bpLoc)); + newIt.first->second.addHardAlignment(alignment); + } else { + it->second.addHardAlignment(alignment); + } } } + break; + default: + break; } - break; - default: - break; } -} -// void SamSegmentMapper::printMetadata(int ISIZESIGMALEVEL) { -// auto elapsedTime = div(difftime(time(nullptr), STARTTIME), 60); -// *metaOutputHandle << "#Using soft/hard clip length threshold " << -//Alignment::CLIPPEDNUCLEOTIDECOUNTTHRESHOLD << endl; *metaOutputHandle << -//"#Using low quality clipped overhang length threshold " << -//Alignment::LOWQUALCLIPTHRESHOLD << endl; *metaOutputHandle << "#Using Base -//Quality Threshold " << Alignment::BASEQUALITYTHRESHOLD << endl; -// *metaOutputHandle << "#Using Base Quality Threshold Low " << -//Alignment::BASEQUALITYTHRESHOLDLOW << endl; *metaOutputHandle << "#Using -//sigmas = " << ISIZESIGMALEVEL << " away from the median insert size for -//'distant' classification" << endl; *metaOutputHandle << "#Using minimum isize -//for 'distant' classification = " << Alignment::ISIZEMAX << " bps" << endl; -// *metaOutputHandle << "#Using minimum reads supporting a breakpoint " << -//Breakpoint::BPSUPPORTTHRESHOLD << endl; *metaOutputHandle << "#Using minimum -//reads supporting a discordant mate contig " << Breakpoint::BPSUPPORTTHRESHOLD -//<< endl; *metaOutputHandle << "#Using (-F 0x600 -f 0x001)" << endl; -// *metaOutputHandle << "#done\t" << printedBps << " lines printed in " << -//elapsedTime.quot << " minutes, " << elapsedTime.rem << " seconds" << endl; -// } + // void SamSegmentMapper::printMetadata(int ISIZESIGMALEVEL) { + // auto elapsedTime = div(difftime(time(nullptr), STARTTIME), 60); + // *metaOutputHandle << "#Using soft/hard clip length threshold " << + //Alignment::CLIPPED_NUCLEOTIDE_COUNT_THRESHOLD << std::endl; *metaOutputHandle << + //"#Using low quality clipped overhang length threshold " << + //Alignment::LOW_QUAL_CLIP_THRESHOLD << std::endl; *metaOutputHandle << "#Using Base + //Quality Threshold " << Alignment::BASE_QUALITY_THRESHOLD << std::endl; + // *metaOutputHandle << "#Using Base Quality Threshold Low " << + //Alignment::BASE_QUALITY_THRESHOLD_LOW << std::endl; *metaOutputHandle << "#Using + //sigmas = " << ISIZESIGMALEVEL << " away from the median insert size for + //'distant' classification" << std::endl; *metaOutputHandle << "#Using minimum isize + //for 'distant' classification = " << Alignment::ISIZEMAX << " bps" << std::endl; + // *metaOutputHandle << "#Using minimum reads supporting a breakpoint " << + //Breakpoint::BP_SUPPORT_THRESHOLD << std::endl; *metaOutputHandle << "#Using minimum + //reads supporting a discordant mate contig " << Breakpoint::BP_SUPPORT_THRESHOLD + //<< std::endl; *metaOutputHandle << "#Using (-F 0x600 -f 0x001)" << std::endl; + // *metaOutputHandle << "#done\t" << printedBps << " lines printed in " << + //elapsedTime.quot << " minutes, " << elapsedTime.rem << " seconds" << std::endl; + // } } /* namespace sophia */ diff --git a/src/Sdust.cpp b/src/Sdust.cpp index e00c31b..d488834 100644 --- a/src/Sdust.cpp +++ b/src/Sdust.cpp @@ -27,122 +27,146 @@ namespace sophia { -using namespace std; + Sdust::Sdust(const std::vector &overhangIn) + : res{}, + P{}, + w{}, + L{0}, + rW{0}, + rV{0}, + cW{std::vector(WINDOW_SIZE, 0)}, + cV{std::vector(WINDOW_SIZE, 0)} { -Sdust::Sdust(const vector &overhangIn) - : res{}, P{}, w{}, L{0}, rW{0}, rV{0}, cW{vector(WINDOWSIZE, 0)}, - cV{vector(WINDOWSIZE, 0)} { - auto wStart = 0; - for (auto wFinish = 2; wFinish < static_cast(overhangIn.size()); - ++wFinish) { - wStart = max(wFinish - WINDOWSIZE + 1, 0); - saveMaskedRegions(wStart); - auto t = triplet(overhangIn, wFinish - 2); - shiftWindow(t); - if ((rW * 10) > (L * SCORETHRESHOLD)) { - findPerfectRegions(wStart, rV, cV); + auto wStart = 0; + for (auto wFinish = 2; wFinish < static_cast(overhangIn.size()); ++wFinish) { + wStart = std::max(wFinish - WINDOW_SIZE + 1, 0); + saveMaskedRegions(wStart); + auto t = triplet(overhangIn, wFinish - 2); + shiftWindow(t); + if ((rW * 10) > (L * SCORE_THRESHOLD)) { + findPerfectRegions(wStart, rV, cV); + } + } + wStart = std::max(0, static_cast(overhangIn.size()) - WINDOW_SIZE + 1); + while (!P.empty()) { + saveMaskedRegions(wStart); + ++wStart; } } - wStart = max(0, static_cast(overhangIn.size()) - WINDOWSIZE + 1); - while (!P.empty()) { - saveMaskedRegions(wStart); - ++wStart; - } -} -void -Sdust::saveMaskedRegions(int wStart) { - if (!P.empty() && P.rbegin()->startIndex < wStart) { - if (!res.empty()) { - auto interval = res.back(); - if (P.rbegin()->startIndex <= (interval.endIndex + 1)) { - res[res.size() - 1].endIndex = - max(P.rbegin()->endIndex, interval.endIndex); + void + Sdust::saveMaskedRegions(int wStart) { + if (!P.empty() && P.rbegin()->startIndex < wStart) { + if (!res.empty()) { + auto interval = res.back(); + if (P.rbegin()->startIndex <= (interval.endIndex + 1)) { + res[res.size() - 1].endIndex = + std::max(P.rbegin()->endIndex, interval.endIndex); + } else { + res.push_back(PerfectInterval{P.rbegin()->startIndex, + P.rbegin()->endIndex, 0.0}); + } } else { res.push_back(PerfectInterval{P.rbegin()->startIndex, P.rbegin()->endIndex, 0.0}); } - } else { - res.push_back(PerfectInterval{P.rbegin()->startIndex, - P.rbegin()->endIndex, 0.0}); - } - for (;;) { - if (!P.empty() && P.rbegin()->startIndex < wStart) { - P.erase(prev(P.end())); - } else { - break; + for (;;) { + if (!P.empty() && P.rbegin()->startIndex < wStart) { + P.erase(prev(P.end())); + } else { + break; + } } } } -} -void -Sdust::findPerfectRegions(int wStart, int r, vector c) { - auto maxScore = 0.0; - for (auto i = static_cast(w.size()) - L - 1; i >= 0; --i) { - auto t = w[i]; - addTripletInfo(r, c, t); - auto newScore = r / (static_cast(w.size()) - i - 1.0); - if ((newScore * 10) > SCORETHRESHOLD) { - auto cit = P.cbegin(); - while (cit != P.cend()) { - if (cit->startIndex < i + wStart) { - break; - } - maxScore = max(maxScore, cit->score); - ++cit; + void + Sdust::findPerfectRegions(int wStart, int r, std::vector c) { + auto maxScore = 0.0; + for (auto i = static_cast(w.size()) - L - 1; i >= 0; --i) { + if (i < 0) { + throw_with_trace(std::logic_error("Sdust::findPerfectRegions index variable i < 0")); } - if (newScore >= maxScore) { - P.emplace_hint(cit, PerfectInterval{i + wStart, - static_cast(w.size()) + - 1 + wStart, - newScore}); + auto t = w[static_cast(i)]; + addTripletInfo(r, c, t); + auto newScore = r / (static_cast(w.size()) - i - 1.0); + if ((newScore * 10) > SCORE_THRESHOLD) { + auto cit = P.cbegin(); + while (cit != P.cend()) { + if (cit->startIndex < i + wStart) { + break; + } + maxScore = std::max(maxScore, cit->score); + ++cit; + } + if (newScore >= maxScore) { + P.emplace_hint(cit, PerfectInterval{i + wStart, + static_cast(w.size()) + + 1 + wStart, + newScore}); + } } } } -} -void -Sdust::shiftWindow(int t) { - if (w.size() >= WINDOWSIZE - 2) { - auto s = w.front(); - w.pop_front(); - removeTripletInfo(rW, cW, s); - if (L > static_cast(w.size())) { - --L; - removeTripletInfo(rV, cV, s); + void + Sdust::shiftWindow(int t) { + if (t < 0) { + throw_with_trace(std::logic_error("Sdust::shiftWindow index variable t < 0")); + } + if (w.size() >= WINDOW_SIZE - 2) { + auto s = w.front(); + w.pop_front(); + removeTripletInfo(rW, cW, s); + if (L > static_cast(w.size())) { + --L; + removeTripletInfo(rV, cV, s); + } + } + w.push_back(t); + ++L; + addTripletInfo(rW, cW, t); + addTripletInfo(rV, cV, t); + if ((cV[static_cast(t)] * 10) > (SCORE_THRESHOLD * 2)) { + int s{0}; + do { + int idx = static_cast(w.size()) - static_cast(L); + if (idx < 0) { + throw_with_trace(std::logic_error("Sdust::shiftWindow index (w.size() - L) < 0")); + } + s = w[static_cast(idx)]; + removeTripletInfo(rV, cV, s); + --L; + } while (s != t); } } - w.push_back(t); - ++L; - addTripletInfo(rW, cW, t); - addTripletInfo(rV, cV, t); - if ((cV[t] * 10) > (SCORETHRESHOLD * 2)) { - int s{0}; - do { - s = w[w.size() - L]; - removeTripletInfo(rV, cV, s); - --L; - } while (s != t); - } -} -void -Sdust::addTripletInfo(int &r, vector &c, int t) { - r += c[t]; - ++c[t]; -} + void + Sdust::addTripletInfo(int &r, std::vector &c, int t) { + if (t < 0) { + throw_with_trace(std::logic_error("Sdust::addTripletInfo index variable t < 0")); + } + r += c[static_cast(t)]; + ++c[static_cast(t)]; + } -void -Sdust::removeTripletInfo(int &r, vector &c, int t) { - --c[t]; - r -= c[t]; -} + void + Sdust::removeTripletInfo(int &r, std::vector &c, int t) { + if (t < 0) { + throw_with_trace(std::logic_error("Sdust::removeTripletInfo index variable t < 0")); + } + --c[static_cast(t)]; + r -= c[static_cast(t)]; + } -int -Sdust::triplet(const vector &overhangIn, int indexPos) { - return 16 * overhangIn[indexPos] + 4 * overhangIn[indexPos + 1] + - overhangIn[indexPos + 2]; -} + int + Sdust::triplet(const std::vector &overhangIn, int indexPos) { + if (indexPos < 0) { + throw_with_trace(std::logic_error("Sdust::triplet index variable indexPos < 0")); + } + return 16 * overhangIn[static_cast(indexPos)] + + 4 * overhangIn[static_cast(indexPos) + 1] + + overhangIn[static_cast(indexPos) + 2]; + } } /* namespace sophia */ diff --git a/src/SuppAlignment.cpp b/src/SuppAlignment.cpp index 694a640..19eb1bf 100644 --- a/src/SuppAlignment.cpp +++ b/src/SuppAlignment.cpp @@ -20,332 +20,504 @@ * LICENSE: GPL */ +#include "global.h" +#include "strtk-wrap.h" #include "SuppAlignment.h" +#include #include #include -#include "strtk.hpp" -#include "ChrConverter.h" -//#include + namespace sophia { - using namespace std; - -double SuppAlignment::ISIZEMAX { }; - -int SuppAlignment::DEFAULTREADLENGTH { }; - -SuppAlignment::SuppAlignment(string::const_iterator saCbegin, string::const_iterator saCend, bool primaryIn, bool lowMapqSourceIn, bool nullMapqSourceIn, bool alignmentOnForwardStrand, bool bpEncounteredM, int originIndexIn, int bpChrIndex, int bpPos) : - matchFuzziness { 5 * DEFAULTREADLENGTH }, - chrIndex { 0 }, - pos { 0 }, - extendedPos { 0 }, - mapq { 0 }, - supportingIndices { }, - supportingIndicesSecondary { }, - distinctReads { 1 }, - support { 0 }, - secondarySupport { 0 }, - mateSupport { 0 }, - expectedDiscordants { 0 }, - encounteredM { bpEncounteredM }, - toRemove { false }, - inverted { false }, - fuzzy { false }, - strictFuzzy { false }, - distant { false }, - lowMapqSource { lowMapqSourceIn }, - nullMapqSource { nullMapqSourceIn }, - suspicious { false }, - semiSuspicious { false }, - properPairErrorProne { false }, - primary { primaryIn } { - if (primary) { - supportingIndices.push_back(originIndexIn); - } else { - supportingIndicesSecondary.push_back(originIndexIn); - } -// //"SA:Z:10,24753146,+,68S33M,48,1;X,135742083,-,47S22M32S,0,0;8,72637925,-,29S19M53S,0,0;" -// //"10,24753146,+,68S33M,48,1" -// for (auto cigarString_cit = saCbegin; cigarString_cit != saCend; ++cigarString_cit) { -// cerr << *cigarString_cit; -// } -// cerr << endl; - vector fieldBegins = { saCbegin }; - vector fieldEnds; - for (auto it = saCbegin; it != saCend; ++it) { - if (*it == ',') { - fieldEnds.push_back(it); - fieldBegins.push_back(it + 1); - } - } - fieldEnds.push_back(saCend); - chrIndex = ChrConverter::readChromosomeIndex(fieldBegins[0], ','); - if (chrIndex > 1001) { - return; - } - for (auto it = fieldBegins[1]; it != fieldEnds[1]; ++it) { - pos = 10 * pos + (*it - '0'); - } -//cerr << "guessSupplementOffset" << endl; - vector cigarChunks; - auto cigarEncounteredM = false; - auto cumulativeNucleotideCount = 0, currentNucleotideCount = 0, chunkIndex = 0, bestChunkIndex = 0, indelAdjustment = 0; - auto largestClip = 0; - auto leftClipAdjustment = 0; - for (auto cigarString_cit = fieldBegins[3]; cigarString_cit != fieldEnds[3]; ++cigarString_cit) { - if (isdigit(*cigarString_cit)) { - currentNucleotideCount = currentNucleotideCount * 10 + (*cigarString_cit - '0'); - } else { - switch (*cigarString_cit) { - case 'M': - cigarEncounteredM = true; - cumulativeNucleotideCount += currentNucleotideCount; - break; - case 'S': - if (!cigarEncounteredM) { - leftClipAdjustment = currentNucleotideCount; - } - cigarChunks.emplace_back(*cigarString_cit, cigarEncounteredM, cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment, currentNucleotideCount); - if (largestClip < currentNucleotideCount) { - largestClip = currentNucleotideCount; - bestChunkIndex = chunkIndex; - } - ++chunkIndex; - cumulativeNucleotideCount += currentNucleotideCount; - break; - case 'H': - cigarChunks.emplace_back(*cigarString_cit, cigarEncounteredM, cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment, currentNucleotideCount); - if (largestClip < currentNucleotideCount) { - largestClip = currentNucleotideCount; - bestChunkIndex = chunkIndex; - } - ++chunkIndex; - break; - case 'I': - indelAdjustment -= currentNucleotideCount; - cumulativeNucleotideCount += currentNucleotideCount; - break; - case 'D': - indelAdjustment += currentNucleotideCount; - break; - default: - break; - } - currentNucleotideCount = 0; - } - } - if (cigarChunks[bestChunkIndex].encounteredM) { - pos += cigarChunks[bestChunkIndex].startPosOnRead; - } - extendedPos = pos; - //cerr << "done" << endl; - for (auto it = fieldBegins[4]; it != fieldEnds[4]; ++it) { - mapq = 10 * mapq + (*it - '0'); - } - if (alignmentOnForwardStrand) { - inverted = ('+' != *fieldBegins[2]); - } else { - inverted = ('-' != *fieldBegins[2]); - } - distant = (bpChrIndex != chrIndex || (abs(bpPos - pos) > ISIZEMAX)); - if (bpChrIndex == chrIndex) { - matchFuzziness = min(abs(bpPos - pos), matchFuzziness); - } - strictFuzzy = fuzzy || (support + secondarySupport) < 3; -} - -void SuppAlignment::finalizeSupportingIndices() { - sort(supportingIndices.begin(), supportingIndices.end()); - sort(supportingIndicesSecondary.begin(), supportingIndicesSecondary.end()); - supportingIndices.erase(unique(supportingIndices.begin(), supportingIndices.end()), supportingIndices.end()); - supportingIndicesSecondary.erase(unique(supportingIndicesSecondary.begin(), supportingIndicesSecondary.end()), supportingIndicesSecondary.end()); - support = static_cast(supportingIndices.size()); - secondarySupport = static_cast(supportingIndicesSecondary.size()); -} - -SuppAlignment::SuppAlignment(int chrIndexIn, int posIn, int mateSupportIn, int expectedDiscordantsIn, bool encounteredMIn, bool invertedIn, int extendedPosIn, bool primaryIn, bool lowMapqSourceIn, bool nullMapqSourceIn, int originIndexIn) : - matchFuzziness { 5 * DEFAULTREADLENGTH }, - chrIndex { chrIndexIn }, - pos { posIn }, - extendedPos { extendedPosIn }, - mapq { 0 }, - supportingIndices { }, - supportingIndicesSecondary { }, - distinctReads { 1 }, - support { 0 }, - secondarySupport { 0 }, - mateSupport { mateSupportIn }, - expectedDiscordants { expectedDiscordantsIn }, - encounteredM { encounteredMIn }, - toRemove { false }, - inverted { invertedIn }, - fuzzy { extendedPosIn != posIn }, - strictFuzzy { false }, - distant { true }, - lowMapqSource { lowMapqSourceIn }, - nullMapqSource { nullMapqSourceIn }, - suspicious { false }, - semiSuspicious { false }, - properPairErrorProne { false }, - primary { primaryIn } { - if (originIndexIn != -1) { - if (primary) { - supportingIndices.push_back(originIndexIn); - } else { - supportingIndicesSecondary.push_back(originIndexIn); - } - } else { - distinctReads = 0; - } - strictFuzzy = fuzzy || (support + secondarySupport) < 3; -} - -string SuppAlignment::print() const { - string outStr; - outStr.reserve(36); - string invStr { }; - if (inverted) { - invStr.append("_INV"); - } - if (encounteredM) { - outStr.append("|"); - } else { - invStr.append("|"); - } - if (!fuzzy) { - outStr.append(ChrConverter::indexToChr[chrIndex]).append(":").append(strtk::type_to_string(pos)); - } else { - outStr.append(ChrConverter::indexToChr[chrIndex]).append(":").append(strtk::type_to_string(pos)).append("-").append(strtk::type_to_string(extendedPos)); - } - outStr.append(invStr).append("(").append(strtk::type_to_string(support)).append(",").append(strtk::type_to_string(secondarySupport)).append(","); - if (!suspicious) { - outStr.append(strtk::type_to_string(mateSupport)); - if (semiSuspicious || nullMapqSource) { - outStr.append("?"); - } - } else { - outStr.append("!"); - } - outStr.append("/").append(strtk::type_to_string(expectedDiscordants)).append(")"); - if (properPairErrorProne) { - outStr.append("#"); - } - return outStr; -} - -SuppAlignment::SuppAlignment(const string& saIn) : - matchFuzziness { 5 * DEFAULTREADLENGTH }, - chrIndex { 0 }, - pos { 0 }, - extendedPos { 0 }, - mapq { 0 }, - distinctReads { 1 }, - support { 0 }, - secondarySupport { 0 }, - mateSupport { 0 }, - expectedDiscordants { 0 }, - encounteredM { saIn[0] == '|' }, - toRemove { false }, - inverted { false }, - fuzzy { false }, - strictFuzzy { false }, - distant { false }, - lowMapqSource { false }, - nullMapqSource { false }, - suspicious { false }, - semiSuspicious { false }, - properPairErrorProne { saIn.back() == '#' }, - primary { true } { - auto index = 0; - if (encounteredM) { - ++index; - } - chrIndex = ChrConverter::readChromosomeIndex(next(saIn.cbegin(), index), ':'); - if (chrIndex > 1001) { - return; - } - while (saIn[index] != ':') { - ++index; - } - ++index; - for (; saIn[index] != '('; ++index) { - if (saIn[index] == '-') { - fuzzy = true; - } else if (saIn[index] == '_') { - inverted = true; - while (saIn[index] != '(') { - ++index; - } - break; - } else if (saIn[index] != '|') { - if (!fuzzy) { - pos = 10 * pos + (saIn[index] - '0'); - } else { - extendedPos = 10 * extendedPos + (saIn[index] - '0'); - } - } - } - if (!fuzzy) { - extendedPos = pos; - } - ++index; - for (; saIn[index] != ','; ++index) { - support = 10 * support + (saIn[index] - '0'); - } - ++index; - for (; saIn[index] != ','; ++index) { - secondarySupport = 10 * secondarySupport + (saIn[index] - '0'); - } - ++index; - if (saIn[index] == '!') { - suspicious = true; - index += 2; - } else { - for (; saIn[index] != '/'; ++index) { - if (saIn[index] == '?') { - semiSuspicious = true; - } else { - mateSupport = 10 * mateSupport + (saIn[index] - '0'); - } - } - ++index; - } - for (; saIn[index] != ')'; ++index) { - expectedDiscordants = 10 * expectedDiscordants + (saIn[index] - '0'); - } - distant = expectedDiscordants > 0 || suspicious; - strictFuzzy = fuzzy || (support + secondarySupport) < 3; -} - -bool SuppAlignment::saCloseness(const SuppAlignment& rhs, int fuzziness) const { - if (inverted == rhs.isInverted() && chrIndex == rhs.getChrIndex() && encounteredM == rhs.isEncounteredM()) { - if (strictFuzzy || rhs.isStrictFuzzy()) { - fuzziness = 2.5 * DEFAULTREADLENGTH; - return (rhs.getPos() - fuzziness) <= (extendedPos + fuzziness) && (pos - fuzziness) <= (rhs.getExtendedPos() + fuzziness); - } else { - return abs(pos - rhs.getPos()) <= fuzziness; - } - } else { - return false; - } -} - - -bool SuppAlignment::saDistHomologyRescueCloseness(const SuppAlignment& rhs, int fuzziness) const { - if (!distant || !rhs.isDistant()) { - return false; - } - if (chrIndex == rhs.getChrIndex() && encounteredM == rhs.isEncounteredM()) { - if (strictFuzzy || rhs.isStrictFuzzy()) { - return (rhs.getPos() - fuzziness) <= (extendedPos + fuzziness) && (pos - fuzziness) <= (rhs.getExtendedPos() + fuzziness); - } else { - return abs(pos - rhs.getPos()) <= fuzziness; - } - } else { - return false; - } -} + double SuppAlignment::ISIZEMAX { }; + + ChrSize SuppAlignment::DEFAULT_READ_LENGTH { }; + + + // Default constructor. + SuppAlignment::SuppAlignment() : + matchFuzziness { 5 * DEFAULT_READ_LENGTH }, + chrIndex { 0 }, + pos { 0 }, + extendedPos { 0 }, + mapq { 0 }, + supportingIndices { }, + supportingIndicesSecondary { }, + distinctReads { 1 }, + support { 0 }, + secondarySupport { 0 }, + mateSupport { 0 }, + expectedDiscordants { 0 }, + encounteredM { false }, + toRemove { false }, + inverted { false }, + fuzzy { false }, + strictFuzzy { false }, + distant { false }, + lowMapqSource { false }, + nullMapqSource { false }, + suspicious { false }, + semiSuspicious { false }, + properPairErrorProne { false }, + primary { true } {} + + + SuppAlignment + SuppAlignment::create(ChrIndex chrIndexIn, + ChrSize posIn, + int mateSupportIn, + int expectedDiscordantsIn, + bool encounteredMIn, + bool invertedIn, + ChrSize extendedPosIn, + bool primaryIn, + bool lowMapqSourceIn, + bool nullMapqSourceIn, + int originIndexIn) { + SuppAlignment result = SuppAlignment(); + + result.chrIndex = chrIndexIn; + result.pos = posIn; + result.extendedPos = extendedPosIn; + result.mateSupport = mateSupportIn; + result.expectedDiscordants = expectedDiscordantsIn; + result.encounteredM = encounteredMIn; + result.inverted = invertedIn; + result.fuzzy = extendedPosIn != posIn; + result.distant = true; + result.lowMapqSource = lowMapqSourceIn; + result.nullMapqSource = nullMapqSourceIn; + result.primary = primaryIn; + + if (originIndexIn != -1) { + if (result.primary) { + result.supportingIndices.push_back(originIndexIn); + } else { + result.supportingIndicesSecondary.push_back(originIndexIn); + } + } else { + result.distinctReads = 0; + } + result.strictFuzzy = result.fuzzy || (result.support + result.secondarySupport) < 3; + + return result; + } + + /** Parse the supplementary alignment information from an SA:Z: tag according to the a + * SAM specification, such as + * + * SA:Z:10,24753146,+,68S33M,48,1;X,135742083,-,47S22M32S,0,0;8,72637925,-,29S19M53S,0,0" + */ + SuppAlignment SuppAlignment::parseSamSaTag(std::string::const_iterator saCbegin, + std::string::const_iterator saCend, + bool primaryIn, + bool lowMapqSourceIn, + bool nullMapqSourceIn, + bool alignmentOnForwardStrand, + bool bpEncounteredM, + int originIndexIn, + ChrIndex bpChrIndex, + int bpPos) { + + SuppAlignment result = SuppAlignment(); + result.encounteredM = bpEncounteredM; + result.lowMapqSource = lowMapqSourceIn; + result.nullMapqSource = nullMapqSourceIn; + result.primary = primaryIn; + + if (result.primary) { + result.supportingIndices.push_back(originIndexIn); + } else { + result.supportingIndicesSecondary.push_back(originIndexIn); + } + // //"SA:Z:10,24753146,+,68S33M,48,1;X,135742083,-,47S22M32S,0,0;8,72637925,-,29S19M53S,0,0;" + // //"10,24753146,+,68S33M,48,1" + // for (auto cigarString_cit = saCbegin; cigarString_cit != saCend; ++cigarString_cit) { + // std::cerr << *cigarString_cit; + // } + // std::cerr << std::endl; + + // Split the SA tag (from a SAM file) into fields. From the SAM specification: + // + // SA:Z:(rname ,pos ,strand ,CIGAR ,mapQ ,NM ;)+ Other canonical alignments in a chimeric alignment, for- + // matted as a semicolon-delimited list. Each element in the list represents a part of the chimeric align- + // ment. Conventionally, at a supplementary line, the first element points to the primary line. Strand is + // either ‘+’ or ‘-’, indicating forward/reverse strand, corresponding to FLAG bit 0x10. Pos is a 1-based + // coordinate. + // + // NOTE: This parser does *NOT* cover the case with multiple semicolon-separated alignments. + static const unsigned int + RNAME = 0, + POS = 1, + STRAND = 2, + CIGAR = 3, + MAPQ = 4, + NM [[gnu::unused]] = 5; // not used in this parser and only provided for documentation + + std::vector fieldBegins = { saCbegin }; + std::vector fieldEnds; + for (auto it = saCbegin; it != saCend; ++it) { + if (*it == ',') { + fieldEnds.push_back(it); + fieldBegins.push_back(it + 1); + } + } + fieldEnds.push_back(saCend); + + // Update `chrIndex` field. + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + try { + result.chrIndex = chrConverter.parseChrAndReturnIndex( + fieldBegins[RNAME], + fieldEnds[RNAME], + ','); + } catch (DomainError& e) { + e << + error_info_string("from = " + std::string(fieldBegins[RNAME], fieldEnds[RNAME])); + throw e; + } + + // If the chromosome is to be ignored, don't update any of the other fields. + if (chrConverter.isTechnical(result.chrIndex)) { + return result; + } + // else + + // Update `pos` field. + for (auto it = fieldBegins[POS]; it != fieldEnds[POS]; ++it) { + result.pos = 10 * result.pos + ChrPosition(*it - '0'); + } + + // Update `mapq` field. + for (auto it = fieldBegins[MAPQ]; it != fieldEnds[MAPQ]; ++it) { + result.mapq = 10 * result.mapq + static_cast(*it - '0'); + } + + // Update `inverted` field + if (alignmentOnForwardStrand) { + result.inverted = ('+' != *fieldBegins[STRAND]); + } else { + result.inverted = ('-' != *fieldBegins[STRAND]); + } + + // Update `strictFuzzy` field. + result.strictFuzzy = result.fuzzy || (result.support + result.secondarySupport) < 3; + + // Now, parse the CIGAR string and identify soft/hard-clipped segments. + std::vector cigarChunks; + auto cigarEncounteredM = false; + unsigned int chunkIndex = 0, + largestClipIndex = 0; + auto cumulativeNucleotideCount = 0, + currentNucleotideCount = 0, + indelAdjustment = 0; + auto largestClipSize = 0; + auto leftClipAdjustment = 0; + for (auto cigarString_cit = fieldBegins[CIGAR]; + cigarString_cit != fieldEnds[CIGAR]; + ++cigarString_cit) { + if (isdigit(*cigarString_cit)) { + currentNucleotideCount = currentNucleotideCount * 10 + (*cigarString_cit - '0'); + } else { + switch (*cigarString_cit) { + case 'M': + cigarEncounteredM = true; + cumulativeNucleotideCount += currentNucleotideCount; + break; + case 'S': + if (!cigarEncounteredM) { + leftClipAdjustment = currentNucleotideCount; + } + cigarChunks.emplace_back( + *cigarString_cit, + cigarEncounteredM, + cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment, + currentNucleotideCount); + if (largestClipSize < currentNucleotideCount) { + largestClipSize = currentNucleotideCount; + largestClipIndex = chunkIndex; + } + ++chunkIndex; + cumulativeNucleotideCount += currentNucleotideCount; + break; + case 'H': + cigarChunks.emplace_back( + *cigarString_cit, + cigarEncounteredM, + cumulativeNucleotideCount + indelAdjustment - leftClipAdjustment, + currentNucleotideCount); + if (largestClipSize < currentNucleotideCount) { + largestClipSize = currentNucleotideCount; + largestClipIndex = chunkIndex; + } + ++chunkIndex; + break; + case 'I': + indelAdjustment -= currentNucleotideCount; + cumulativeNucleotideCount += currentNucleotideCount; + break; + case 'D': + indelAdjustment += currentNucleotideCount; + break; + default: + break; + } + currentNucleotideCount = 0; + } + } + + // NOTE: If there are no supplementary alignments with no soft/hard-clipped segments, then + // `cigarChunks` will be empty. In this case, we still return the SuppAlignment, even + // if it is located on, e.g. a decoy chromosome. + if (cigarChunks.size() != 0) { + // We found soft/hard-clipped segments. Update `pos` and `extendedPos` fields. + if (cigarChunks[largestClipIndex].encounteredM) { + result.pos += cigarChunks[largestClipIndex].startPosOnRead; + } + result.extendedPos = result.pos; + + result.distant = (bpChrIndex != result.chrIndex || (abs(static_cast(bpPos) - static_cast(result.pos)) > ISIZEMAX)); + if (bpChrIndex == result.chrIndex) { + result.matchFuzziness = std::min((ChrSize) abs(static_cast(bpPos) - static_cast(result.pos)), result.matchFuzziness); + } + } + + return result; + } + + static const std::string STOP_CHARS = "|(\t"; + inline bool isStopChar(char c) { + return STOP_CHARS.find(c) != std::string::npos; + }; + + /** The syntax parsed here is the same as the one generated by `SuppAlignment::print()`. + * TODO FIX: This parser parses the same syntax as `SuppAlignmentAnno::parseSaSupport(const std::string&)`. + */ + SuppAlignment SuppAlignment::parseSaSupport(const std::string& saIn) { + try { + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + + SuppAlignment result = SuppAlignment(); + + // If the last character is a `#` then properPairErrorProne is true. + result.properPairErrorProne = saIn.back() == '#'; + + unsigned int index = 0; + + // If the string starts with a `|` then encounteredM is true. + result.encounteredM = saIn.at(0) == '|'; + if (result.encounteredM) { + ++index; + } + + // Parse chromosome name. The chromosome name will be separated from the position information + // by a colon ':' character, but as the chromosome name itself may also contain colons, we + // need to anchor first character after the position which is either a `|` or a `(`, and then + // track back to the *last* colon. + try { + result.chrIndex = chrConverter.parseChrAndReturnIndex( + next(saIn.cbegin(), index), + saIn.cend(), + ':', + STOP_CHARS); + } catch (DomainError& e) { + e << + error_info_string("from = " + std::string(next(saIn.cbegin(), index), saIn.cend())); + throw e; + } + + // If this is an ignored chromosome, don't bother parsing the rest. + if (chrConverter.isTechnical(result.chrIndex)) { + return result; + } + + // else, skip forward to the first colon ':' character. This ':' will be in column 6 or 7, + // dependent on the support information there. + while (saIn.at(index) != ':') { + ++index; + } + ++index; + + // ... and continue to parse the breakpoint specification, which gives information about the + // position, and whether the breakpoint is fuzzy or inverted. + for (; saIn.at(index) != '('; ++index) { + if (saIn.at(index) == '-') { + result.fuzzy = true; + } else if (saIn.at(index) == '_') { + result.inverted = true; + while (saIn.at(index) != '(') { + ++index; + } + break; + } else if (saIn.at(index) != '|') { + if (!result.fuzzy) { + result.pos = 10 * result.pos + static_cast(saIn.at(index) - '0'); + } else { + result.extendedPos = 10 * result.extendedPos + static_cast(saIn.at(index) - '0'); + } + } + } + + if (!result.fuzzy) { + result.extendedPos = result.pos; + } + ++index; + + for (; saIn.at(index) != ','; ++index) { + result.support = 10 * result.support + (saIn.at(index) - '0'); + } + ++index; + + for (; saIn.at(index) != ','; ++index) { + result.secondarySupport = 10 * result.secondarySupport + (saIn.at(index) - '0'); + } + + ++index; + if (saIn.at(index) == '!') { + result.suspicious = true; + index += 2; + } else { + for (; saIn.at(index) != '/'; ++index) { + if (saIn.at(index) == '?') { + result.semiSuspicious = true; + } else { + result.mateSupport = 10 * result.mateSupport + (saIn.at(index) - '0'); + } + } + ++index; + } + for (; saIn.at(index) != ')'; ++index) { + result.expectedDiscordants = 10 * result.expectedDiscordants + (saIn.at(index) - '0'); + } + + result.distant = result.expectedDiscordants > 0 || result.suspicious; + bool strictFuzzyCandidate = (result.support + result.secondarySupport) < 3; + result.strictFuzzy = result.fuzzy || strictFuzzyCandidate; + + return result; + } catch (std::out_of_range& e) { + throw boost::enable_error_info(e) << + error_info_string("from = " + saIn); + } + } + + void SuppAlignment::finalizeSupportingIndices() { + sort(supportingIndices.begin(), supportingIndices.end()); + sort(supportingIndicesSecondary.begin(), supportingIndicesSecondary.end()); + supportingIndices.erase(unique(supportingIndices.begin(), supportingIndices.end()), supportingIndices.end()); + supportingIndicesSecondary.erase(unique(supportingIndicesSecondary.begin(), + supportingIndicesSecondary.end()), + supportingIndicesSecondary.end()); + support = static_cast(supportingIndices.size()); + secondarySupport = static_cast(supportingIndicesSecondary.size()); + } + + + /** The syntax is as follows: + * + * spec ::= encounteredM position inverted notEncounteredM + * '(' support ',' secondarySupport ',' mateInfo '/' expectedDiscordants ')' + * properPairErrorProne + * encounteredM ::= | ('|' iff sa.encounteredM == true) + * position ::= chrName ':' position2 + * position2 ::= pos | pos '-' extendedPos + * pos := [0-9]+ + * extendedPos := [0-9]+ + * inverted ::= | ('_INV' iff sa.inverted == true) + * notEncounteredM ::= | ('|' iff sa.encounteredM == false) + * support ::= [0-9]+ + * secondarySupport ::= [0-9]+ + * mateInfo ::= (`!` iff sa.suspicious == true) | mateSupport + * mateSupport ::= [0-9]+ mateAddition + * mateAddition ::= | (`?` iff sa.semiSuspicious == true or sa.nullMapqSource == true) + * properPairErrorProne ::= | (`#` iff sa.properPairErrorProne == true) + * + * here refers to the empty string "" + **/ + std::string SuppAlignment::print() const { + std::string outStr; + outStr.reserve(36); + + std::string invStr { }; + if (inverted) { + invStr.append("_INV"); + } + + // If the encounteredM attribute is set then the string starts with a `|`, otherwise, the + // inv-string starts with a `|`. + if (encounteredM) { + outStr.append("|"); + } else { + invStr.append("|"); + } + + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + if (!fuzzy) { + outStr. + append(chrConverter.indexToChrName(chrIndex)). + append(":"). + append(strtk::type_to_string(pos)); + } else { + outStr. + append(chrConverter.indexToChrName(chrIndex)). + append(":"). + append(strtk::type_to_string(pos)). + append("-"). + append(strtk::type_to_string(extendedPos)); + } + outStr. + append(invStr). + append("("). + append(strtk::type_to_string(support)). + append(","). + append(strtk::type_to_string(secondarySupport)). + append(","); + if (!suspicious) { + outStr.append(strtk::type_to_string(mateSupport)); + if (semiSuspicious || nullMapqSource) { + outStr.append("?"); + } + } else { + outStr.append("!"); + } + outStr.append("/").append(strtk::type_to_string(expectedDiscordants)).append(")"); + if (properPairErrorProne) { + outStr.append("#"); + } + return outStr; + } + + bool SuppAlignment::saCloseness(const SuppAlignment& rhs, int fuzziness) const { + if (inverted == rhs.isInverted() && chrIndex == rhs.getChrIndex() && encounteredM == rhs.isEncounteredM()) { + if (strictFuzzy || rhs.isStrictFuzzy()) { + fuzziness = 2.5 * DEFAULT_READ_LENGTH; + return (static_cast(rhs.getPos()) - fuzziness) <= (static_cast(extendedPos) + fuzziness) && + (static_cast(pos) - fuzziness) <= (static_cast(rhs.getExtendedPos()) + fuzziness); + } else { + return abs(static_cast(pos) - static_cast(rhs.getPos())) <= fuzziness; + } + } else { + return false; + } + } + + + bool SuppAlignment::saDistHomologyRescueCloseness(const SuppAlignment& rhs, int fuzziness) const { + if (!distant || !rhs.isDistant()) { + return false; + } + if (chrIndex == rhs.getChrIndex() && encounteredM == rhs.isEncounteredM()) { + if (strictFuzzy || rhs.isStrictFuzzy()) { + return (static_cast(rhs.getPos()) - fuzziness) <= (static_cast(extendedPos) + fuzziness) && + (static_cast(pos) - fuzziness) <= (static_cast(rhs.getExtendedPos()) + fuzziness); + } else { + return abs(static_cast(pos) - static_cast(rhs.getPos())) <= fuzziness; + } + } else { + return false; + } + } }/* namespace sophia */ diff --git a/src/SuppAlignmentAnno.cpp b/src/SuppAlignmentAnno.cpp index f823061..9be3fc7 100644 --- a/src/SuppAlignmentAnno.cpp +++ b/src/SuppAlignmentAnno.cpp @@ -22,277 +22,363 @@ * LICENSE: GPL */ +#include "global.h" +#include "strtk-wrap.h" +#include "GlobalAppConfig.h" #include "SuppAlignmentAnno.h" -#include "ChrConverter.h" -#include "strtk.hpp" + #include #include #include -// #include +#include + namespace sophia { -using namespace std; - -double SuppAlignmentAnno::ISIZEMAX{}; -int SuppAlignmentAnno::DEFAULTREADLENGTH{}; - -SuppAlignmentAnno::SuppAlignmentAnno(const string &saStrIn) - : chrIndex{0}, pos{0}, extendedPos{0}, support{0}, secondarySupport{0}, - mateSupport{0}, expectedDiscordants{0}, encounteredM{saStrIn[0] == '|'}, - toRemove{false}, inverted{false}, fuzzy{false}, strictFuzzy{false}, - strictFuzzyCandidate{false}, distant{false}, suspicious{false}, - semiSuspicious{false}, properPairErrorProne{saStrIn.back() == '#'}, - supportingIndices{} { - auto index = 0; - if (encounteredM) { - ++index; - } - chrIndex = - ChrConverter::readChromosomeIndex(next(saStrIn.cbegin(), index), ':'); - if (chrIndex > 1001) { - return; - } - while (saStrIn[index] != ':') { - ++index; - } - ++index; - for (; saStrIn[index] != '('; ++index) { - if (saStrIn[index] == '-') { - fuzzy = true; - } else if (saStrIn[index] == '_') { - inverted = true; - while (saStrIn[index] != '(') { + double SuppAlignmentAnno::ISIZEMAX{}; + + ChrSize SuppAlignmentAnno::DEFAULT_READ_LENGTH{}; + + const std::string SuppAlignmentAnno::STOP_CHARS = "|(\t"; + bool SuppAlignmentAnno::isStopChar(char c) { + return STOP_CHARS.find(c) != std::string::npos; + }; + + /** The syntax parsed here is the same as the one generated by `SuppAlignment::print()`. + * TODO FIX: This parser parses the same syntax as `SuppAlignment::parseSaSupport(const std::string&)`. + */ + SuppAlignmentAnno::SuppAlignmentAnno(const std::string &saStrIn) + : chrIndex{0}, + pos{0}, + extendedPos{0}, + support{0}, + secondarySupport{0}, + mateSupport{0}, + expectedDiscordants{0}, + encounteredM { saStrIn[0] == '|' }, + toRemove{false}, + inverted{false}, + fuzzy{false}, + strictFuzzy{false}, + strictFuzzyCandidate{false}, + distant{false}, + suspicious{false}, + semiSuspicious{false}, + // If the last character is a `#` then properPairErrorProne is true. + properPairErrorProne { saStrIn.back() == '#' }, + supportingIndices{} { + + + try { + unsigned int index = 0; + if (encounteredM) { // skip the first '|' + ++index; + } + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + + // Parse chromosome name. The chromosome name will be separated from the position information + // by a colon ':' character, but as the chromosome name itself may also contain colons, we + // need to anchor first character after the position which is either a `|` or a `(`, and then + // track back to the *last* colon. + try { + chrIndex = chrConverter.parseChrAndReturnIndex( + next(saStrIn.cbegin(), index), + saStrIn.cend(), + ':', + STOP_CHARS); + } catch (const DomainError &e) { + throw e << error_info_string( + "from = " + std::string(next(saStrIn.cbegin(), index), saStrIn.cend())); + } + + if (chrConverter.isTechnical(chrIndex)) { + return; + } + + while (saStrIn.at(index) != ':') { ++index; } - break; - } else if (saStrIn[index] != '|') { + ++index; + + for (; saStrIn.at(index) != '('; ++index) { + if (saStrIn.at(index) == '-') { + fuzzy = true; + } else if (saStrIn.at(index) == '_') { + inverted = true; + while (saStrIn.at(index) != '(') { + ++index; + } + break; + } else if (saStrIn.at(index) != '|') { + if (!fuzzy) { + pos = 10 * pos + static_cast(saStrIn.at(index) - '0'); + } else { + extendedPos = 10 * extendedPos + static_cast(saStrIn.at(index) - '0'); + } + } + } + if (!fuzzy) { - pos = 10 * pos + (saStrIn[index] - '0'); + extendedPos = pos; + } + ++index; + + for (; saStrIn.at(index) != ','; ++index) { + support = 10 * support + (saStrIn.at(index) - '0'); + } + ++index; + + for (; saStrIn.at(index) != ','; ++index) { + secondarySupport = 10 * secondarySupport + (saStrIn.at(index) - '0'); + } + ++index; + + if (saStrIn.at(index) == '!') { + suspicious = true; + index += 2; } else { - extendedPos = 10 * extendedPos + (saStrIn[index] - '0'); + for (; saStrIn.at(index) != '/'; ++index) { + if (saStrIn.at(index) == '?') { + semiSuspicious = true; + } else { + mateSupport = 10 * mateSupport + (saStrIn.at(index) - '0'); + } + } + ++index; + } + + for (; saStrIn.at(index) != ')'; ++index) { + expectedDiscordants = 10 * expectedDiscordants + (saStrIn.at(index) - '0'); + } + + // The following is different in SuppAlignment::parseSaSupport(const std::string &) + // start of difference + if (support + secondarySupport == 0) { + fuzzy = true; } + // end of difference + + distant = expectedDiscordants > 0 || suspicious; + + // This is almost the same as in SuppAlignment::parseSaSupport(const std::string &), + // but in here, strictFuzzyCandidate is a member variable, while in + // SuppAlignment::parseSaSupport(const std::string &), it is just a local variable. + strictFuzzyCandidate = (support + secondarySupport) < 3; + strictFuzzy = fuzzy || strictFuzzyCandidate; + + } catch (const std::out_of_range &e) { + throw_with_trace(boost::enable_error_info(e) << error_info_string("from = " + saStrIn)); } } - if (!fuzzy) { - extendedPos = pos; - } - ++index; - for (; saStrIn[index] != ','; ++index) { - support = 10 * support + (saStrIn[index] - '0'); - } - ++index; - for (; saStrIn[index] != ','; ++index) { - secondarySupport = 10 * secondarySupport + (saStrIn[index] - '0'); + + SuppAlignmentAnno::SuppAlignmentAnno(const SuppAlignment &saIn) + : chrIndex{saIn.getChrIndex()}, + pos{saIn.getPos()}, + extendedPos{saIn.getExtendedPos()}, + support{saIn.getSupport()}, + secondarySupport{saIn.getSecondarySupport()}, + mateSupport{saIn.getMateSupport()}, + expectedDiscordants{saIn.getExpectedDiscordants()}, + encounteredM{saIn.isEncounteredM()}, + toRemove{false}, + inverted{saIn.isInverted()}, + fuzzy{saIn.isFuzzy()}, + strictFuzzy{false}, + strictFuzzyCandidate{false}, + distant{false}, + suspicious{saIn.isSuspicious()}, + semiSuspicious{saIn.isSemiSuspicious()}, + properPairErrorProne{saIn.isProperPairErrorProne()}, + supportingIndices{saIn.getSupportingIndices()} { + distant = expectedDiscordants > 0 || suspicious; + if (support + secondarySupport == 0) { + fuzzy = true; + } + strictFuzzyCandidate = (support + secondarySupport) < 3; + strictFuzzy = fuzzy || strictFuzzyCandidate; } - ++index; - if (saStrIn[index] == '!') { - suspicious = true; - index += 2; - } else { - for (; saStrIn[index] != '/'; ++index) { - if (saStrIn[index] == '?') { - semiSuspicious = true; - } else { - mateSupport = 10 * mateSupport + (saStrIn[index] - '0'); + + SuppAlignmentAnno::SuppAlignmentAnno(const SuppAlignmentAnno &saAnnoIn) + : chrIndex{saAnnoIn.getChrIndex()}, + pos{saAnnoIn.getPos()}, + extendedPos{saAnnoIn.getExtendedPos()}, + support{saAnnoIn.getSupport()}, + secondarySupport{saAnnoIn.getSecondarySupport()}, + mateSupport{saAnnoIn.getMateSupport()}, + expectedDiscordants{saAnnoIn.getExpectedDiscordants()}, + encounteredM{saAnnoIn.isEncounteredM()}, + toRemove{false}, + inverted{saAnnoIn.isInverted()}, + fuzzy{saAnnoIn.isFuzzy()}, + strictFuzzy{saAnnoIn.isStrictFuzzy()}, + strictFuzzyCandidate{saAnnoIn.isStrictFuzzyCandidate()}, + distant{saAnnoIn.isDistant()}, + suspicious{saAnnoIn.isSuspicious()}, + semiSuspicious{saAnnoIn.isSemiSuspicious()}, + properPairErrorProne{saAnnoIn.isProperPairErrorProne()}, + supportingIndices{saAnnoIn.getSupportingIndices()} {} + + SuppAlignmentAnno::SuppAlignmentAnno(ChrIndex emittingBpChrIndex, + ChrSize emittingBpPos, + const SuppAlignmentAnno &saAnnoIn) + : chrIndex{emittingBpChrIndex}, + pos{saAnnoIn.isDistant() + ? static_cast( + std::max(1, + static_cast( + round(emittingBpPos - static_cast(1.5 * DEFAULT_READ_LENGTH))) + ) + ) + : emittingBpPos}, + extendedPos{ + saAnnoIn.isDistant() + ? emittingBpPos + static_cast(1.5 * DEFAULT_READ_LENGTH) + : emittingBpPos}, + support{0}, + secondarySupport{0}, + mateSupport{0}, + expectedDiscordants{0}, + encounteredM{true}, + toRemove{true}, + inverted{saAnnoIn.isInverted()}, + fuzzy{saAnnoIn.isDistant()}, + strictFuzzy{saAnnoIn.isDistant()}, + strictFuzzyCandidate{true}, + distant{saAnnoIn.isDistant()}, + suspicious{saAnnoIn.isSuspicious()}, + semiSuspicious{saAnnoIn.isSemiSuspicious()}, + properPairErrorProne{saAnnoIn.isProperPairErrorProne()}, + supportingIndices{} {} + + std::string + SuppAlignmentAnno::print() const { + std::string outStr; + outStr.reserve(36); + std::string invStr{}; + if (inverted) { + invStr.append("_INV"); + } + if (encounteredM) { + outStr.append("|"); + } else { + invStr.append("|"); + } + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + if (!fuzzy || pos == extendedPos) { + outStr.append(chrConverter.indexToChrName(chrIndex)) + .append(":") + .append(strtk::type_to_string(pos)); + } else { + outStr.append(chrConverter.indexToChrName(chrIndex)) + .append(":") + .append(strtk::type_to_string(pos)) + .append("-") + .append(strtk::type_to_string(extendedPos)); + } + outStr.append(invStr) + .append("(") + .append(strtk::type_to_string(support)) + .append(",") + .append(strtk::type_to_string(secondarySupport)) + .append(","); + if (!suspicious) { + outStr.append(strtk::type_to_string(mateSupport)); + if (semiSuspicious) { + outStr.append("?"); } + } else { + outStr.append("!"); } - ++index; - } - for (; saStrIn[index] != ')'; ++index) { - expectedDiscordants = 10 * expectedDiscordants + (saStrIn[index] - '0'); - } - if (support + secondarySupport == 0) { - fuzzy = true; - } - distant = expectedDiscordants > 0 || suspicious; - strictFuzzyCandidate = (support + secondarySupport) < 3; - strictFuzzy = fuzzy || strictFuzzyCandidate; -} - -SuppAlignmentAnno::SuppAlignmentAnno(const SuppAlignment &saIn) - : chrIndex{saIn.getChrIndex()}, pos{saIn.getPos()}, - extendedPos{saIn.getExtendedPos()}, support{saIn.getSupport()}, - secondarySupport{saIn.getSecondarySupport()}, - mateSupport{saIn.getMateSupport()}, - expectedDiscordants{saIn.getExpectedDiscordants()}, - encounteredM{saIn.isEncounteredM()}, toRemove{false}, - inverted{saIn.isInverted()}, fuzzy{saIn.isFuzzy()}, strictFuzzy{false}, - strictFuzzyCandidate{false}, distant{false}, - suspicious{saIn.isSuspicious()}, semiSuspicious{saIn.isSemiSuspicious()}, - properPairErrorProne{saIn.isProperPairErrorProne()}, - supportingIndices{saIn.getSupportingIndices()} { - distant = expectedDiscordants > 0 || suspicious; - if (support + secondarySupport == 0) { - fuzzy = true; - } - strictFuzzyCandidate = (support + secondarySupport) < 3; - strictFuzzy = fuzzy || strictFuzzyCandidate; -} - -SuppAlignmentAnno::SuppAlignmentAnno(const SuppAlignmentAnno &saAnnoIn) - : chrIndex{saAnnoIn.getChrIndex()}, pos{saAnnoIn.getPos()}, - extendedPos{saAnnoIn.getExtendedPos()}, support{saAnnoIn.getSupport()}, - secondarySupport{saAnnoIn.getSecondarySupport()}, - mateSupport{saAnnoIn.getMateSupport()}, - expectedDiscordants{saAnnoIn.getExpectedDiscordants()}, - encounteredM{saAnnoIn.isEncounteredM()}, toRemove{false}, - inverted{saAnnoIn.isInverted()}, fuzzy{saAnnoIn.isFuzzy()}, - strictFuzzy{saAnnoIn.isStrictFuzzy()}, - strictFuzzyCandidate{saAnnoIn.isStrictFuzzyCandidate()}, - distant{saAnnoIn.isDistant()}, suspicious{saAnnoIn.isSuspicious()}, - semiSuspicious{saAnnoIn.isSemiSuspicious()}, - properPairErrorProne{saAnnoIn.isProperPairErrorProne()}, - supportingIndices{saAnnoIn.getSupportingIndices()} {} - -SuppAlignmentAnno::SuppAlignmentAnno(int emittingBpChrIndex, int emittingBpPos, - const SuppAlignmentAnno &saAnnoIn) - : chrIndex{emittingBpChrIndex}, - pos{saAnnoIn.isDistant() - ? max(1, static_cast( - round(emittingBpPos - 1.5 * DEFAULTREADLENGTH))) - : emittingBpPos}, - extendedPos{ - saAnnoIn.isDistant() - ? static_cast(emittingBpPos + 1.5 * DEFAULTREADLENGTH) - : emittingBpPos}, - support{0}, secondarySupport{0}, mateSupport{0}, expectedDiscordants{0}, - encounteredM{true}, toRemove{true}, inverted{saAnnoIn.isInverted()}, - fuzzy{saAnnoIn.isDistant()}, strictFuzzy{saAnnoIn.isDistant()}, - strictFuzzyCandidate{true}, distant{saAnnoIn.isDistant()}, - suspicious{saAnnoIn.isSuspicious()}, - semiSuspicious{saAnnoIn.isSemiSuspicious()}, - properPairErrorProne{saAnnoIn.isProperPairErrorProne()}, - supportingIndices{} {} - -string -SuppAlignmentAnno::print() const { - string outStr; - outStr.reserve(36); - string invStr{}; - if (inverted) { - invStr.append("_INV"); - } - if (encounteredM) { - outStr.append("|"); - } else { - invStr.append("|"); - } - if (!fuzzy || pos == extendedPos) { - outStr.append(ChrConverter::indexToChr[chrIndex]) - .append(":") - .append(strtk::type_to_string(pos)); - } else { - outStr.append(ChrConverter::indexToChr[chrIndex]) - .append(":") - .append(strtk::type_to_string(pos)) - .append("-") - .append(strtk::type_to_string(extendedPos)); - } - outStr.append(invStr) - .append("(") - .append(strtk::type_to_string(support)) - .append(",") - .append(strtk::type_to_string(secondarySupport)) - .append(","); - if (!suspicious) { - outStr.append(strtk::type_to_string(mateSupport)); - if (semiSuspicious) { - outStr.append("?"); + outStr.append("/") + .append(strtk::type_to_string(expectedDiscordants)) + .append(")"); + if (properPairErrorProne) { + outStr.append("#"); } - } else { - outStr.append("!"); - } - outStr.append("/") - .append(strtk::type_to_string(expectedDiscordants)) - .append(")"); - if (properPairErrorProne) { - outStr.append("#"); + return outStr; } - return outStr; -} - -bool -SuppAlignmentAnno::saCloseness(const SuppAlignmentAnno &rhs, - int fuzziness) const { - // inverted == rhs.isInverted() && //test - if (chrIndex == rhs.getChrIndex()) { - if (strictFuzzy || rhs.isStrictFuzzy()) { - if (pos <= rhs.getExtendedPos() && rhs.getPos() <= extendedPos) { - return true; - } - fuzziness = 2.5 * DEFAULTREADLENGTH; - if (rhs.getPos() >= extendedPos) { - return (rhs.getPos() - extendedPos) <= fuzziness; + + bool + SuppAlignmentAnno::saCloseness(const SuppAlignmentAnno &rhs, + int fuzziness) const { + // inverted == rhs.isInverted() && //test + if (chrIndex == rhs.getChrIndex()) { + if (strictFuzzy || rhs.isStrictFuzzy()) { + if (pos <= rhs.getExtendedPos() && rhs.getPos() <= extendedPos) { + return true; + } + fuzziness = 2.5 * DEFAULT_READ_LENGTH; + if (rhs.getPos() >= extendedPos) { + return (static_cast(rhs.getPos()) - static_cast(extendedPos)) <= fuzziness; + } + return (static_cast(pos) - static_cast(rhs.getExtendedPos())) <= fuzziness; + // return (rhs.getPos() - fuzziness) <= (extendedPos + + //fuzziness) && (pos - fuzziness) <= (rhs.getExtendedPos() + + //fuzziness); + } else { + return abs(static_cast(pos) - static_cast(rhs.getPos())) <= fuzziness; } - return (pos - rhs.getExtendedPos()) <= fuzziness; - // return (rhs.getPos() - fuzziness) <= (extendedPos + - //fuzziness) && (pos - fuzziness) <= (rhs.getExtendedPos() + - //fuzziness); } else { - return abs(pos - rhs.getPos()) <= fuzziness; + return false; } - } else { - return false; } -} - -bool -SuppAlignmentAnno::saClosenessDirectional(const SuppAlignmentAnno &rhs, - int fuzziness) const { - // inverted == rhs.isInverted() && //test - if (chrIndex == rhs.getChrIndex() && encounteredM == rhs.isEncounteredM()) { - if (strictFuzzy || rhs.isStrictFuzzy()) { - if (pos <= rhs.getExtendedPos() && rhs.getPos() <= extendedPos) { - return true; - } - fuzziness = 2.5 * DEFAULTREADLENGTH; - if (rhs.getPos() >= extendedPos) { - return (rhs.getPos() - extendedPos) <= fuzziness; + + bool + SuppAlignmentAnno::saClosenessDirectional(const SuppAlignmentAnno &rhs, + int fuzziness) const { + // inverted == rhs.isInverted() && //test + if (chrIndex == rhs.getChrIndex() && encounteredM == rhs.isEncounteredM()) { + if (strictFuzzy || rhs.isStrictFuzzy()) { + if (pos <= rhs.getExtendedPos() && rhs.getPos() <= extendedPos) { + return true; + } + fuzziness = 2.5 * DEFAULT_READ_LENGTH; + if (rhs.getPos() >= extendedPos) { + return (static_cast(rhs.getPos()) - static_cast(extendedPos)) <= fuzziness; + } + return (static_cast(pos) - static_cast(rhs.getExtendedPos())) <= fuzziness; + } else { + return abs(static_cast(pos) - static_cast(rhs.getPos())) <= fuzziness; } - return (pos - rhs.getExtendedPos()) <= fuzziness; } else { - return abs(pos - rhs.getPos()) <= fuzziness; + return false; } - } else { - return false; } -} - -void -SuppAlignmentAnno::mergeMrefSa(const SuppAlignmentAnno &mrefSa) { - support = max(support, mrefSa.getSupport()); - secondarySupport = max(secondarySupport, mrefSa.getSecondarySupport()); - for (auto index : mrefSa.getSupportingIndices()) { - supportingIndices.push_back(index); - } - sort(supportingIndices.begin(), supportingIndices.end()); - supportingIndices.erase( - unique(supportingIndices.begin(), supportingIndices.end()), - supportingIndices.end()); - if (mrefSa.getExpectedDiscordants() > 0 && expectedDiscordants > 0) { - if ((0.0 + mrefSa.getMateSupport()) / mrefSa.getExpectedDiscordants() > - (0.0 + mateSupport) / expectedDiscordants) { + + void + SuppAlignmentAnno::mergeMrefSa(const SuppAlignmentAnno &mrefSa) { + support = std::max(support, mrefSa.getSupport()); + secondarySupport = std::max(secondarySupport, mrefSa.getSecondarySupport()); + for (auto index : mrefSa.getSupportingIndices()) { + supportingIndices.push_back(index); + } + sort(supportingIndices.begin(), supportingIndices.end()); + supportingIndices.erase( + unique(supportingIndices.begin(), supportingIndices.end()), + supportingIndices.end()); + if (mrefSa.getExpectedDiscordants() > 0 && expectedDiscordants > 0) { + if ((0.0 + mrefSa.getMateSupport()) / mrefSa.getExpectedDiscordants() > + (0.0 + mateSupport) / expectedDiscordants) { + mateSupport = mrefSa.getMateSupport(); + expectedDiscordants = mrefSa.getExpectedDiscordants(); + } + } else if (mrefSa.getExpectedDiscordants() > 0) { mateSupport = mrefSa.getMateSupport(); expectedDiscordants = mrefSa.getExpectedDiscordants(); } - } else if (mrefSa.getExpectedDiscordants() > 0) { - mateSupport = mrefSa.getMateSupport(); - expectedDiscordants = mrefSa.getExpectedDiscordants(); + if (!mrefSa.isSemiSuspicious() && semiSuspicious) { + semiSuspicious = false; + } } - if (!mrefSa.isSemiSuspicious() && semiSuspicious) { - semiSuspicious = false; + + void + SuppAlignmentAnno::finalizeSupportingIndices() { + sort(supportingIndices.begin(), supportingIndices.end()); + supportingIndices.erase( + unique(supportingIndices.begin(), supportingIndices.end()), + supportingIndices.end()); + support = static_cast(supportingIndices.size()); + secondarySupport = 0; } -} - -void -SuppAlignmentAnno::finalizeSupportingIndices() { - sort(supportingIndices.begin(), supportingIndices.end()); - supportingIndices.erase( - unique(supportingIndices.begin(), supportingIndices.end()), - supportingIndices.end()); - support = static_cast(supportingIndices.size()); - secondarySupport = 0; -} } /* namespace sophia */ diff --git a/src/SvEvent.cpp b/src/SvEvent.cpp index 68cae30..80021e9 100644 --- a/src/SvEvent.cpp +++ b/src/SvEvent.cpp @@ -1,6 +1,4 @@ /* - * PairInfo.cpp - * * Created on: 24 Oct 2016 * Author: Umut H. Toprak, DKFZ Heidelberg (Divisions of Theoretical * Bioinformatics, Bioinformatics and Omics Data Analytics and currently @@ -22,1806 +20,1896 @@ * LICENSE: GPL */ -#include "ChrConverter.h" -#include "strtk.hpp" -#include +#include "GlobalAppConfig.h" +#include "strtk-wrap.h" +#include "SvEvent.h" +#include namespace sophia { -using namespace std; - -int SvEvent::GERMLINEOFFSETTHRESHOLD{}; -double SvEvent::RELAXEDBPFREQTHRESHOLD{}; -double SvEvent::BPFREQTHRESHOLD{}; -double SvEvent::ARTIFACTFREQLOWTHRESHOLD{}; -double SvEvent::ARTIFACTFREQHIGHTHRESHOLD{}; -double SvEvent::CLONALITYLOWTHRESHOLD{}; -double SvEvent::CLONALITYSTRICTLOWTHRESHOLD{}; -double SvEvent::CLONALITYHIGHTHRESHOLD{}; -int SvEvent::HALFDEFAULTREADLENGTH{}; -int SvEvent::GERMLINEDBLIMIT{}; -string SvEvent::PIDSINMREFSTR{}; -boost::format SvEvent::doubleFormatter{"%.3f"}; -bool SvEvent::ABRIDGEDOUTPUT{true}; -bool SvEvent::NOCONTROLMODE{false}; -bool SvEvent::DEBUGMODE{false}; -const vector SvEvent::EVENTTYPES{"UNKNOWN", "DEL", "DUP", - "TRA", "INV", "CONTAMINATION"}; - -SvEvent::SvEvent(const BreakpointReduced &bp1In, const BreakpointReduced &bp2In, - const SuppAlignmentAnno &sa1In, const SuppAlignmentAnno &sa2In, - const vector> &overhangDb) - : toRemove{false}, contaminationCandidate{0}, - chrIndex1{bp1In.getChrIndex()}, pos1{bp1In.getPos()}, - chrIndex2{bp2In.getChrIndex()}, pos2{bp2In.getPos()}, - lineIndex1{bp1In.getLineIndex()}, - lineIndex2{bp2In.getLineIndex()}, eventType{0}, eventSize{0}, - inverted{sa1In.isInverted() || sa2In.isInverted()}, distant{false}, - overhang1Compensation{false}, overhang2Compensation{false}, - overhang1Index{-1}, overhang2Index{-1}, overhang1lengthRatio{0}, - overhang2lengthRatio{0}, inputScore{2}, eventScore{0}, - totalEvidence1{sa1In.getSupport() + sa1In.getSecondarySupport() + - sa1In.getMateSupport()}, - span1{bp1In.getNormalSpans()}, - totalEvidence2{sa2In.getSupport() + sa2In.getSecondarySupport() + - sa2In.getMateSupport()}, - span2{bp2In.getNormalSpans()}, evidenceLevel1{0}, - evidenceLevel2{0}, mrefHits1{bp1In.getMrefHits().getNumConsevativeHits()}, - mrefHits1Conservative{true}, - mrefHits2{bp2In.getMrefHits().getNumConsevativeHits()}, - mrefHits2Conservative{true}, germline{false}, - germlineClonality1{bp1In.getGermlineInfo().getConservativeClonality()}, - germlineStatus1{bp1In.getGermlineInfo().getConservativeClonality() > - 0.15}, - germlineClonality2{bp2In.getGermlineInfo().getConservativeClonality()}, - germlineStatus2{bp2In.getGermlineInfo().getConservativeClonality() > - 0.15}, - selectedSa1{sa1In}, selectedSa2{sa2In}, - mateRatio1{sa1In.getExpectedDiscordants() > 0 - ? sa1In.getMateSupport() / - (0.0 + sa1In.getExpectedDiscordants()) - : 1.0}, - mateRatio2{sa2In.getExpectedDiscordants() > 0 - ? sa2In.getMateSupport() / - (0.0 + sa2In.getExpectedDiscordants()) - : 1.0}, - suspicious{0}, semiSuspicious{sa1In.isSemiSuspicious() || - sa2In.isSemiSuspicious()} { - // auto messageMode = selectedSa1.getChrIndex() == 11 && - //(selectedSa1.getPos() == 2261373 && selectedSa2.getPos() == 2148480); - auto posDifferential = pos1 - pos2; - determineEventTypeAndSize(posDifferential, selectedSa2.isEncounteredM()); - if (chrIndex1 != chrIndex2) { - distant = true; - } else if (selectedSa1.getExpectedDiscordants() > 0 || - selectedSa2.getExpectedDiscordants() > 0) { - distant = true; - } else if (eventSize > 1500) { - distant = true; - } - if (distant && chrIndex1 == chrIndex2 && - (selectedSa1.isFuzzy() || selectedSa1.isStrictFuzzyCandidate() || - selectedSa2.isFuzzy() || selectedSa2.isStrictFuzzyCandidate())) { - if (eventSize < 5000) { - distant = false; + int SvEvent::GERMLINE_OFFSET_THRESHOLD{}; + + double SvEvent::RELAXED_BP_FREQ_THRESHOLD{}; + + double SvEvent::BP_FREQ_THRESHOLD{}; + + double SvEvent::ARTIFACT_FREQ_LOW_THRESHOLD{}; + + double SvEvent::ARTIFACT_FREQ_HIGH_THRESHOLD{}; + + double SvEvent::CLONALITY_LOW_THRESHOLD{}; + + double SvEvent::CLONALITY_STRICT_LOW_THRESHOLD{}; + + double SvEvent::CLONALITY_HIGH_THRESHOLD{}; + + int SvEvent::HALF_DEFAULT_READ_LENGTH{}; + + int SvEvent::GERMLINE_DB_LIMIT{}; + + std::string SvEvent::PIDS_IN_MREF_STR{}; + + boost::format SvEvent::doubleFormatter{"%.3f"}; + + bool SvEvent::ABRIDGED_OUTPUT{true}; + + bool SvEvent::NO_CONTROL_MODE{false}; + + bool SvEvent::DEBUG_MODE{false}; + + const std::vector SvEvent::EVENTTYPES{"UNKNOWN", "DEL", "DUP", + "TRA", "INV", "CONTAMINATION"}; + + SvEvent::SvEvent(const BreakpointReduced &bp1In, const BreakpointReduced &bp2In, + const SuppAlignmentAnno &sa1In, const SuppAlignmentAnno &sa2In, + const std::vector> &overhangDb) + : toRemove{false}, + contaminationCandidate{0}, + chrIndex1{bp1In.getChrIndex()}, + pos1{bp1In.getPos()}, + chrIndex2{bp2In.getChrIndex()}, + pos2{bp2In.getPos()}, + lineIndex1{bp1In.getLineIndex()}, + lineIndex2{bp2In.getLineIndex()}, + eventType{0}, + eventSize{0}, + inverted{sa1In.isInverted() || sa2In.isInverted()}, + distant{false}, + overhang1Compensation{false}, + overhang2Compensation{false}, + overhang1Index{-1}, + overhang2Index{-1}, + overhang1lengthRatio{0}, + overhang2lengthRatio{0}, + inputScore{2}, + eventScore{0}, + totalEvidence1{sa1In.getSupport() + sa1In.getSecondarySupport() + + sa1In.getMateSupport()}, + span1{bp1In.getNormalSpans()}, + totalEvidence2{sa2In.getSupport() + sa2In.getSecondarySupport() + + sa2In.getMateSupport()}, + span2{bp2In.getNormalSpans()}, + evidenceLevel1{0}, + evidenceLevel2{0}, + mrefHits1{bp1In.getMrefHits().getNumConsevativeHits()}, + mrefHits1Conservative{true}, + mrefHits2{bp2In.getMrefHits().getNumConsevativeHits()}, + mrefHits2Conservative{true}, + germline{false}, + germlineClonality1{bp1In.getGermlineInfo().getConservativeClonality()}, + germlineStatus1{bp1In.getGermlineInfo().getConservativeClonality() > 0.15}, + germlineClonality2{bp2In.getGermlineInfo().getConservativeClonality()}, + germlineStatus2{bp2In.getGermlineInfo().getConservativeClonality() > 0.15}, + selectedSa1{sa1In}, + selectedSa2{sa2In}, + mateRatio1{sa1In.getExpectedDiscordants() > 0 + ? sa1In.getMateSupport() / + (0.0 + sa1In.getExpectedDiscordants()) + : 1.0}, + mateRatio2{sa2In.getExpectedDiscordants() > 0 + ? sa2In.getMateSupport() / + (0.0 + sa2In.getExpectedDiscordants()) + : 1.0}, + suspicious{0}, semiSuspicious{sa1In.isSemiSuspicious() || + sa2In.isSemiSuspicious()} { + + determineEventTypeAndSize(pos1, pos2, selectedSa2.isEncounteredM()); + if (chrIndex1 != chrIndex2) { + distant = true; + } else if (selectedSa1.getExpectedDiscordants() > 0 || + selectedSa2.getExpectedDiscordants() > 0) { + distant = true; + } else if (eventSize > 1500) { + distant = true; + } + if (distant && chrIndex1 == chrIndex2 && + (selectedSa1.isFuzzy() || selectedSa1.isStrictFuzzyCandidate() || + selectedSa2.isFuzzy() || selectedSa2.isStrictFuzzyCandidate())) { + if (eventSize < 5000) { + distant = false; + } } - } - auto clonalityRes1 = assessSvClonality( - bp1In, selectedSa1.getSupport() + selectedSa1.getSecondarySupport() + - selectedSa1.getMateSupport()); - artifactRatio1 = clonalityRes1.first; - clonalityRatio1 = clonalityRes1.second; - clonalityStatus1 = - assessBreakpointClonalityStatus(clonalityRatio1, bp1In, bp2In); - auto clonalityRes2 = assessSvClonality( - bp2In, selectedSa2.getSupport() + selectedSa2.getSecondarySupport() + - selectedSa2.getMateSupport()); - artifactRatio2 = clonalityRes2.first; - clonalityRatio2 = clonalityRes2.second; - clonalityStatus2 = - assessBreakpointClonalityStatus(clonalityRatio2, bp1In, bp2In); - - auto res1 = assessOverhangQualityCompensation(lineIndex1, overhangDb); - overhang1Index = res1.second; - overhang1Compensation = (clonalityStatus1 != EXTREME_SUBCLONAL) && - selectedSa1.isDistant() && res1.first; - auto res2 = assessOverhangQualityCompensation(lineIndex2, overhangDb); - overhang2Index = res2.second; - overhang2Compensation = (clonalityStatus2 != EXTREME_SUBCLONAL) && - selectedSa2.isDistant() && res2.first; - - auto doubleSemiSuspicious = - (selectedSa1.isSemiSuspicious() && selectedSa2.isSemiSuspicious()); - germlineClonality1 = - determineGermlineClonalityBp(bp1In, selectedSa1, germlineClonality1); - germlineStatus1 = germlineClonality1 > 0.15; - germlineClonality2 = - determineGermlineClonalityBp(bp2In, selectedSa2, germlineClonality2); - germlineStatus2 = germlineClonality2 > 0.15; - - auto strictNonDecoy = !selectedSa1.isProperPairErrorProne() && - !selectedSa2.isProperPairErrorProne() && - ChrConverter::indexConverter[chrIndex1] < 23 && - ChrConverter::indexConverter[chrIndex2] < 23; - auto splitSupportThreshold1 = - (strictNonDecoy && !selectedSa1.isSemiSuspicious() && - (mateRatio1 >= 0.6)) - ? 0 - : 2; - auto splitSupportThreshold2 = - (strictNonDecoy && !selectedSa2.isSemiSuspicious() && - (mateRatio2 >= 0.6)) - ? 0 - : 2; - - if (selectedSa1.getSupport() > splitSupportThreshold1) { - ++evidenceLevel1; - } else { - if (strictNonDecoy && selectedSa1.getSupport() > 0 && - selectedSa1.getSecondarySupport() > splitSupportThreshold1) { + auto clonalityRes1 = assessSvClonality( + bp1In, selectedSa1.getSupport() + selectedSa1.getSecondarySupport() + + selectedSa1.getMateSupport()); + artifactRatio1 = clonalityRes1.first; + clonalityRatio1 = clonalityRes1.second; + clonalityStatus1 = + assessBreakpointClonalityStatus(clonalityRatio1, bp1In, bp2In); + auto clonalityRes2 = assessSvClonality( + bp2In, selectedSa2.getSupport() + selectedSa2.getSecondarySupport() + + selectedSa2.getMateSupport()); + artifactRatio2 = clonalityRes2.first; + clonalityRatio2 = clonalityRes2.second; + clonalityStatus2 = + assessBreakpointClonalityStatus(clonalityRatio2, bp1In, bp2In); + + auto res1 = assessOverhangQualityCompensation(lineIndex1, overhangDb); + overhang1Index = res1.second; + overhang1Compensation = (clonalityStatus1 != EXTREME_SUBCLONAL) && + selectedSa1.isDistant() && res1.first; + auto res2 = assessOverhangQualityCompensation(lineIndex2, overhangDb); + overhang2Index = res2.second; + overhang2Compensation = (clonalityStatus2 != EXTREME_SUBCLONAL) && + selectedSa2.isDistant() && res2.first; + + auto doubleSemiSuspicious = + (selectedSa1.isSemiSuspicious() && selectedSa2.isSemiSuspicious()); + germlineClonality1 = + determineGermlineClonalityBp(bp1In, selectedSa1, germlineClonality1); + germlineStatus1 = germlineClonality1 > 0.15; + germlineClonality2 = + determineGermlineClonalityBp(bp2In, selectedSa2, germlineClonality2); + germlineStatus2 = germlineClonality2 > 0.15; + + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + auto strictNonDecoy = !selectedSa1.isProperPairErrorProne() && + !selectedSa2.isProperPairErrorProne() && + // Used to be indexConverter[chrIndex] < 23, with no check for whether + // the values was valid (i.e. != -2). Note that the index check is + // on the CompressedIndexChr range. 23 is the Y chromosome, while 22 + // is the X chromosome in this range. Therefore, here we need to check + // for !isCompressedMref, autosomes or X chromosome. + (!chrConverter.isCompressedMref(chrIndex1) || + chrConverter.isAutosome(chrIndex1) || + chrConverter.isX(chrIndex1)) && + (!chrConverter.isCompressedMref(chrIndex2) || + chrConverter.isAutosome(chrIndex2) || + chrConverter.isX(chrIndex2)); + auto splitSupportThreshold1 = + (strictNonDecoy && !selectedSa1.isSemiSuspicious() && + (mateRatio1 >= 0.6)) + ? 0 + : 2; + auto splitSupportThreshold2 = + (strictNonDecoy && !selectedSa2.isSemiSuspicious() && + (mateRatio2 >= 0.6)) + ? 0 + : 2; + + if (selectedSa1.getSupport() > splitSupportThreshold1) { ++evidenceLevel1; + } else { + if (strictNonDecoy && selectedSa1.getSupport() > 0 && + selectedSa1.getSecondarySupport() > splitSupportThreshold1) { + ++evidenceLevel1; + } } - } - if (selectedSa1.getSecondarySupport() > splitSupportThreshold1) { - ++evidenceLevel1; - } else { - if (strictNonDecoy && selectedSa1.getSecondarySupport() > 0 && - selectedSa1.getSupport() > splitSupportThreshold1) { + if (selectedSa1.getSecondarySupport() > splitSupportThreshold1) { ++evidenceLevel1; + } else { + if (strictNonDecoy && selectedSa1.getSecondarySupport() > 0 && + selectedSa1.getSupport() > splitSupportThreshold1) { + ++evidenceLevel1; + } } - } - if (selectedSa1.isDistant()) { - auto mateQuality = mateQualityConditions(selectedSa1); - if (selectedSa1.getMateSupport() > mateQuality.first && - mateRatio1 >= mateQuality.second) { - ++evidenceLevel1; - if (evidenceLevel1 < 3 && - (overhang1Compensation || - (!semiSuspicious && overhang2Compensation)) && - !doubleSemiSuspicious && strictNonDecoy) { - if ((selectedSa1.getMateSupport() > 2) || - (selectedSa1.getMateSupport() < 3 && - selectedSa1.getExpectedDiscordants() == - selectedSa1.getMateSupport())) { - ++evidenceLevel1; + if (selectedSa1.isDistant()) { + auto mateQuality = mateQualityConditions(selectedSa1); + if (selectedSa1.getMateSupport() > mateQuality.first && + mateRatio1 >= mateQuality.second) { + ++evidenceLevel1; + if (evidenceLevel1 < 3 && + (overhang1Compensation || + (!semiSuspicious && overhang2Compensation)) && + !doubleSemiSuspicious && strictNonDecoy) { + if ((selectedSa1.getMateSupport() > 2) || + (selectedSa1.getMateSupport() < 3 && + selectedSa1.getExpectedDiscordants() == + selectedSa1.getMateSupport())) { + ++evidenceLevel1; + } } } } - } - if (selectedSa2.getSupport() > splitSupportThreshold2) { - ++evidenceLevel2; - } else { - if (strictNonDecoy && selectedSa2.getSupport() > 0 && - selectedSa2.getSecondarySupport() > splitSupportThreshold2) { + if (selectedSa2.getSupport() > splitSupportThreshold2) { ++evidenceLevel2; + } else { + if (strictNonDecoy && selectedSa2.getSupport() > 0 && + selectedSa2.getSecondarySupport() > splitSupportThreshold2) { + ++evidenceLevel2; + } } - } - if (selectedSa2.getSecondarySupport() > splitSupportThreshold2) { - ++evidenceLevel2; - } else { - if (strictNonDecoy && selectedSa2.getSecondarySupport() > 0 && - selectedSa2.getSupport() > splitSupportThreshold2) { + if (selectedSa2.getSecondarySupport() > splitSupportThreshold2) { ++evidenceLevel2; + } else { + if (strictNonDecoy && selectedSa2.getSecondarySupport() > 0 && + selectedSa2.getSupport() > splitSupportThreshold2) { + ++evidenceLevel2; + } } - } - if (selectedSa2.isDistant()) { - auto mateQuality = mateQualityConditions(selectedSa2); - if (selectedSa2.getMateSupport() > mateQuality.first && - mateRatio2 >= mateQuality.second) { - ++evidenceLevel2; - if (evidenceLevel2 < 3 && - ((!semiSuspicious && overhang1Compensation) || - overhang2Compensation) && - !doubleSemiSuspicious && strictNonDecoy) { - if ((selectedSa2.getMateSupport() > 2) || - (selectedSa2.getMateSupport() < 3 && - selectedSa2.getExpectedDiscordants() == - selectedSa2.getMateSupport())) { - ++evidenceLevel2; + if (selectedSa2.isDistant()) { + auto mateQuality = mateQualityConditions(selectedSa2); + if (selectedSa2.getMateSupport() > mateQuality.first && + mateRatio2 >= mateQuality.second) { + ++evidenceLevel2; + if (evidenceLevel2 < 3 && + ((!semiSuspicious && overhang1Compensation) || + overhang2Compensation) && + !doubleSemiSuspicious && strictNonDecoy) { + if ((selectedSa2.getMateSupport() > 2) || + (selectedSa2.getMateSupport() < 3 && + selectedSa2.getExpectedDiscordants() == + selectedSa2.getMateSupport())) { + ++evidenceLevel2; + } } } } - } - auto mrefHits1Tmp = - processMrefHits(bp1In.getMrefHits(), selectedSa1, evidenceLevel1); - mrefHits1 = mrefHits1Tmp.second; - mrefHits1Conservative = mrefHits1Tmp.first; - auto mrefHits2Tmp = - processMrefHits(bp2In.getMrefHits(), selectedSa2, evidenceLevel2); - mrefHits2 = mrefHits2Tmp.second; - mrefHits2Conservative = mrefHits2Tmp.first; - if (!germlineStatus1 && germlineClonality1 > 0 && - mrefHits1 > GERMLINEDBLIMIT) { - germlineStatus1 = true; - } - if (!germlineStatus2 && germlineClonality2 > 0 && - mrefHits2 > GERMLINEDBLIMIT) { - germlineStatus2 = true; - } - germline = - (germlineStatus1 || germlineStatus2) && - !((selectedSa1.getSupport() + selectedSa1.getSecondarySupport()) > - 200 && - (selectedSa1.getSupport() + selectedSa2.getSecondarySupport()) > 200); - - assessSvArtifactStatus(bp1In, bp2In); - if (!selectedSa1.isSemiSuspicious() && selectedSa2.isSemiSuspicious()) { - // auto messageMode = selectedSa1.getChrIndex() == 21 && - //selectedSa1.getPos() == 48119076; - auto score1 = filterMatch(bp1In, bp2In); - auto score2 = filterMatchUnknown(bp1In); - // if (messageMode) cerr << score1 << " " << score2 << - //"\n"; - if (score2 == 0 && score1 != 0) { - suspicious = score2; - semiSuspicious = false; + auto mrefHits1Tmp = + processMrefHits(bp1In.getMrefHits(), selectedSa1, evidenceLevel1); + mrefHits1 = mrefHits1Tmp.second; + mrefHits1Conservative = mrefHits1Tmp.first; + auto mrefHits2Tmp = + processMrefHits(bp2In.getMrefHits(), selectedSa2, evidenceLevel2); + mrefHits2 = mrefHits2Tmp.second; + mrefHits2Conservative = mrefHits2Tmp.first; + if (!germlineStatus1 && germlineClonality1 > 0 && + mrefHits1 > GERMLINE_DB_LIMIT) { + germlineStatus1 = true; + } + if (!germlineStatus2 && germlineClonality2 > 0 && + mrefHits2 > GERMLINE_DB_LIMIT) { + germlineStatus2 = true; + } + germline = + (germlineStatus1 || germlineStatus2) && + !((selectedSa1.getSupport() + selectedSa1.getSecondarySupport()) > 200 && + (selectedSa1.getSupport() + selectedSa2.getSecondarySupport()) > 200); + + assessSvArtifactStatus(bp1In, bp2In); + if (!selectedSa1.isSemiSuspicious() && selectedSa2.isSemiSuspicious()) { + auto score1 = filterMatch(bp1In, bp2In); + auto score2 = filterMatchUnknown(bp1In); + if (score2 == 0 && score1 != 0) { + suspicious = score2; + semiSuspicious = false; + } else { + suspicious = score1; + } } else { - suspicious = score1; + suspicious = filterMatch(bp1In, bp2In); + } + eventScore = assessEventScore(false, inputScore); + if (suspicious == 0 && eventScore > 2) { + assessContamination(overhangDb); } - } else { - suspicious = filterMatch(bp1In, bp2In); - } - eventScore = assessEventScore(false, inputScore); - if (suspicious == 0 && eventScore > 2) { - assessContamination(overhangDb); - } -} - -SvEvent::SvEvent(const BreakpointReduced &bp1In, const BreakpointReduced &bp2In, - const SuppAlignmentAnno &sa1In, - const vector> &overhangDb, - const SuppAlignmentAnno &dummySaIn) - : toRemove{false}, contaminationCandidate{0}, - chrIndex1{bp1In.getChrIndex()}, pos1{bp1In.getPos()}, - chrIndex2{bp2In.getChrIndex()}, pos2{bp2In.getPos()}, - lineIndex1{bp1In.getLineIndex()}, lineIndex2{bp2In.getLineIndex()}, - eventType{0}, eventSize{0}, inverted{sa1In.isInverted()}, distant{false}, - overhang1Compensation{false}, overhang2Compensation{false}, - overhang1Index{-1}, overhang2Index{-1}, overhang1lengthRatio{0}, - overhang2lengthRatio{0}, inputScore{1}, eventScore{0}, - totalEvidence1{sa1In.getSupport() + sa1In.getSecondarySupport() + - sa1In.getMateSupport()}, - span1{bp1In.getNormalSpans()}, - totalEvidence2{bp2In.getPairedBreaksSoft() + bp2In.getPairedBreaksHard() + - bp2In.getUnpairedBreaksSoft() + - bp2In.getUnpairedBreaksHard() + - bp2In.getBreaksShortIndel() + bp2In.getMateSupport()}, - span2{bp2In.getNormalSpans()}, evidenceLevel1{0}, - evidenceLevel2{0}, mrefHits1{bp1In.getMrefHits().getNumConsevativeHits()}, - mrefHits1Conservative{true}, - mrefHits2{bp2In.getMrefHits().getNumConsevativeHits()}, - mrefHits2Conservative{true}, germline{false}, - germlineClonality1{bp1In.getGermlineInfo().getConservativeClonality()}, - germlineStatus1{bp1In.getGermlineInfo().getConservativeClonality() > - 0.15}, - germlineClonality2{bp2In.getGermlineInfo().getConservativeClonality()}, - germlineStatus2{bp2In.getGermlineInfo().getConservativeClonality() > - 0.15}, - selectedSa1{sa1In}, selectedSa2{dummySaIn}, - mateRatio1{sa1In.getExpectedDiscordants() > 0 - ? sa1In.getMateSupport() / - (0.0 + sa1In.getExpectedDiscordants()) - : 1.0}, - mateRatio2{1.0}, suspicious{0}, semiSuspicious{sa1In.isSemiSuspicious()} { - - auto posDifferential = pos1 - pos2; - determineEventTypeAndSize( - posDifferential, (bp2In.getLeftCoverage() > bp2In.getRightCoverage())); - if (chrIndex1 != chrIndex2) { - distant = true; - } else if (selectedSa1.getExpectedDiscordants() > 0) { - distant = true; - } else if (eventSize > 1500) { - distant = true; } - if (distant && chrIndex1 == chrIndex2 && - (selectedSa1.isFuzzy() || selectedSa1.isStrictFuzzyCandidate())) { - if (eventSize < 5000) { + + SvEvent::SvEvent(const BreakpointReduced &bp1In, + const BreakpointReduced &bp2In, + const SuppAlignmentAnno &sa1In, + const std::vector> &overhangDb, + const SuppAlignmentAnno &dummySaIn) + : toRemove{false}, + contaminationCandidate{0}, + chrIndex1{bp1In.getChrIndex()}, + pos1{bp1In.getPos()}, + chrIndex2{bp2In.getChrIndex()}, + pos2{bp2In.getPos()}, + lineIndex1{bp1In.getLineIndex()}, + lineIndex2{bp2In.getLineIndex()}, + eventType{0}, + eventSize{0}, + inverted{sa1In.isInverted()}, + distant{false}, + overhang1Compensation{false}, + overhang2Compensation{false}, + overhang1Index{-1}, + overhang2Index{-1}, + overhang1lengthRatio{0}, + overhang2lengthRatio{0}, + inputScore{1}, + eventScore{0}, + totalEvidence1{sa1In.getSupport() + sa1In.getSecondarySupport() + sa1In.getMateSupport()}, + span1{bp1In.getNormalSpans()}, + totalEvidence2{bp2In.getPairedBreaksSoft() + bp2In.getPairedBreaksHard() + + bp2In.getUnpairedBreaksSoft() + + bp2In.getUnpairedBreaksHard() + + bp2In.getBreaksShortIndel() + bp2In.getMateSupport()}, + span2{bp2In.getNormalSpans()}, + evidenceLevel1{0}, + evidenceLevel2{0}, + mrefHits1{bp1In.getMrefHits().getNumConsevativeHits()}, + mrefHits1Conservative{true}, + mrefHits2{bp2In.getMrefHits().getNumConsevativeHits()}, + mrefHits2Conservative{true}, + germline{false}, + germlineClonality1{bp1In.getGermlineInfo().getConservativeClonality()}, + germlineStatus1{bp1In.getGermlineInfo().getConservativeClonality() > 0.15}, + germlineClonality2{bp2In.getGermlineInfo().getConservativeClonality()}, + germlineStatus2{bp2In.getGermlineInfo().getConservativeClonality() > 0.15}, + selectedSa1{sa1In}, selectedSa2{dummySaIn}, + mateRatio1{sa1In.getExpectedDiscordants() > 0 + ? sa1In.getMateSupport() / + (0.0 + sa1In.getExpectedDiscordants()) + : 1.0}, + mateRatio2{1.0}, + suspicious{0}, + semiSuspicious{sa1In.isSemiSuspicious()} { + + determineEventTypeAndSize(pos1, pos2, (bp2In.getLeftCoverage() > bp2In.getRightCoverage())); + if (chrIndex1 != chrIndex2) { + distant = true; + } else if (selectedSa1.getExpectedDiscordants() > 0) { + distant = true; + } else if (eventSize > 1500) { + distant = true; + } + if (distant && chrIndex1 == chrIndex2 && + (selectedSa1.isFuzzy() || selectedSa1.isStrictFuzzyCandidate())) { + if (eventSize < 5000) { + distant = false; + } + } + if (distant && (eventSize > 0) && (eventSize < 1000)) { distant = false; } - } - if (distant && (eventSize > 0) && (eventSize < 1000)) { - distant = false; - } - auto res1 = assessOverhangQualityCompensation(lineIndex1, overhangDb); - overhang1Index = res1.second; - auto res2 = assessOverhangQualityCompensation(lineIndex2, overhangDb); - overhang2Index = res2.second; - - auto mateEvidence1 = false; - auto mateEvidence2 = false; - germlineClonality1 = - determineGermlineClonalityBp(bp1In, selectedSa1, germlineClonality1); - germlineStatus1 = germlineClonality1 > 0.15; - - auto strictNonDecoy = !selectedSa1.isProperPairErrorProne() && - ChrConverter::indexConverter[chrIndex1] < 23 && - ChrConverter::indexConverter[chrIndex2] < 23; - auto splitSupportThreshold = - (strictNonDecoy && !selectedSa2.isSemiSuspicious() && - (mateRatio1 >= 0.66)) - ? 0 - : 2; - - if (selectedSa1.getSupport() > splitSupportThreshold) { - ++evidenceLevel1; - } else { - if (strictNonDecoy && selectedSa1.getSupport() > 0 && - selectedSa1.getSecondarySupport() > splitSupportThreshold) { + auto res1 = assessOverhangQualityCompensation(lineIndex1, overhangDb); + overhang1Index = res1.second; + auto res2 = assessOverhangQualityCompensation(lineIndex2, overhangDb); + overhang2Index = res2.second; + + auto mateEvidence1 = false; + auto mateEvidence2 = false; + germlineClonality1 = + determineGermlineClonalityBp(bp1In, selectedSa1, germlineClonality1); + germlineStatus1 = germlineClonality1 > 0.15; + + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + auto strictNonDecoy = !selectedSa1.isProperPairErrorProne() && + // Used to be indexConverter[chrIndex] < 23, with no check for whether + // the values was valid (i.e. != -2). + (!chrConverter.isCompressedMref(chrIndex1) || + chrConverter.isAutosome(chrIndex1) || + chrConverter.isX(chrIndex1)) && + (!chrConverter.isCompressedMref(chrIndex2) || + chrConverter.isAutosome(chrIndex2) || + chrConverter.isX(chrIndex2)); + auto splitSupportThreshold = + (strictNonDecoy && !selectedSa2.isSemiSuspicious() && + (mateRatio1 >= 0.66)) + ? 0 + : 2; + + if (selectedSa1.getSupport() > splitSupportThreshold) { ++evidenceLevel1; + } else { + if (strictNonDecoy && selectedSa1.getSupport() > 0 && + selectedSa1.getSecondarySupport() > splitSupportThreshold) { + ++evidenceLevel1; + } } - } - if (selectedSa1.getSecondarySupport() > splitSupportThreshold) { - ++evidenceLevel1; - } else { - if (strictNonDecoy && selectedSa1.getSecondarySupport() > 0 && - selectedSa1.getSupport() > splitSupportThreshold) { + if (selectedSa1.getSecondarySupport() > splitSupportThreshold) { ++evidenceLevel1; - } - } - if (selectedSa1.isDistant()) { - if (!selectedSa1.isStrictFuzzyCandidate() || - (selectedSa1.isStrictFuzzyCandidate() && - selectedSa1.getMateSupport() > 4)) { - auto mateQualityCriteria = selectedSa1.isProperPairErrorProne() || - selectedSa1.isSemiSuspicious() || - selectedSa1.isStrictFuzzyCandidate(); - if (mateRatio1 >= 0.4 && - (!mateQualityCriteria || - (mateQualityCriteria && selectedSa1.getMateSupport() > 4))) { + } else { + if (strictNonDecoy && selectedSa1.getSecondarySupport() > 0 && + selectedSa1.getSupport() > splitSupportThreshold) { ++evidenceLevel1; - mateEvidence1 = true; } } - } + if (selectedSa1.isDistant()) { + if (!selectedSa1.isStrictFuzzyCandidate() || + (selectedSa1.isStrictFuzzyCandidate() && + selectedSa1.getMateSupport() > 4)) { + auto mateQualityCriteria = selectedSa1.isProperPairErrorProne() || + selectedSa1.isSemiSuspicious() || + selectedSa1.isStrictFuzzyCandidate(); + if (mateRatio1 >= 0.4 && + (!mateQualityCriteria || + (mateQualityCriteria && selectedSa1.getMateSupport() > 4))) { + ++evidenceLevel1; + mateEvidence1 = true; + } + } + } - if (bp2In.getPairedBreaksSoft() + bp2In.getUnpairedBreaksSoft() > 0) { - ++evidenceLevel2; - } - if (bp2In.getPairedBreaksHard() + bp2In.getUnpairedBreaksHard() > 0) { - ++evidenceLevel2; - } - if (selectedSa1.isDistant()) { - if (bp2In.getMateSupport() > 4) { + if (bp2In.getPairedBreaksSoft() + bp2In.getUnpairedBreaksSoft() > 0) { ++evidenceLevel2; - mateEvidence2 = true; } - } - auto mrefHits1Tmp = - processMrefHits(bp1In.getMrefHits(), selectedSa1, evidenceLevel1); - mrefHits1 = mrefHits1Tmp.second; - mrefHits1Conservative = mrefHits1Tmp.first; - auto mrefHits2Tmp = - processMrefHits(bp2In.getMrefHits(), selectedSa2, evidenceLevel2); - mrefHits2 = mrefHits2Tmp.second; - mrefHits2Conservative = mrefHits2Tmp.first; - if (!germlineStatus1 && germlineClonality1 > 0 && - mrefHits1 > GERMLINEDBLIMIT) { - germlineStatus1 = true; - } - if (!germlineStatus2 && germlineClonality2 > 0 && - mrefHits2 > GERMLINEDBLIMIT) { - germlineStatus2 = true; - } - germline = (germlineStatus1 || germlineStatus2) && // - !((selectedSa1.getSupport() + - selectedSa1.getSecondarySupport()) > 200 && - (bp2In.getPairedBreaksSoft() + bp2In.getPairedBreaksHard() + - bp2In.getUnpairedBreaksSoft() + - bp2In.getUnpairedBreaksHard()) > 200); - - if (!distant) { - if (mateEvidence1) { - --evidenceLevel1; - totalEvidence1 -= selectedSa1.getMateSupport(); - } - if (mateEvidence2) { - --evidenceLevel2; - totalEvidence2 -= bp2In.getMateSupport(); + if (bp2In.getPairedBreaksHard() + bp2In.getUnpairedBreaksHard() > 0) { + ++evidenceLevel2; } - } - auto clonalityRes1 = assessSvClonality( - bp1In, selectedSa1.getSupport() + selectedSa1.getSecondarySupport() + - selectedSa1.getMateSupport()); - artifactRatio1 = clonalityRes1.first; - clonalityRatio1 = clonalityRes1.second; - clonalityStatus1 = - assessBreakpointClonalityStatusSingle(clonalityRatio1, bp1In, bp2In); - auto clonalityRes2 = assessSvClonality( - bp2In, bp2In.getPairedBreaksSoft() + bp2In.getPairedBreaksHard() + - bp2In.getUnpairedBreaksSoft()); - artifactRatio2 = clonalityRes2.first; - clonalityRatio2 = clonalityRes2.second; - clonalityStatus2 = - assessBreakpointClonalityStatusSingle(clonalityRatio2, bp1In, bp2In); - - assessSvArtifactStatus(bp1In, bp2In); - suspicious = filterMatchSingle(bp1In, bp2In); - auto hardClipSuspiciousCall = - selectedSa1.getSupport() == 0 && - selectedSa1.getSecondarySupport() > 0 && - (selectedSa1.getMateSupport() <= selectedSa1.getSecondarySupport() + 4); - eventScore = assessEventScore(hardClipSuspiciousCall, inputScore); - if (suspicious == 0 && eventScore > 2) { - assessContamination(overhangDb); - } -} - -SvEvent::SvEvent(const BreakpointReduced &bp1In, const SuppAlignmentAnno &sa1In, - GermlineMatch germlineInfo2, MrefMatch hitsInMref2In, - const vector> &overhangDb, - const SuppAlignmentAnno &dummySaIn) - : toRemove{false}, contaminationCandidate{0}, - chrIndex1{bp1In.getChrIndex()}, pos1{bp1In.getPos()}, - chrIndex2{sa1In.getChrIndex()}, pos2{sa1In.getPos()}, - lineIndex1{bp1In.getLineIndex()}, lineIndex2{-1}, eventType{0}, - eventSize{0}, inverted{sa1In.isInverted()}, distant{false}, - overhang1Compensation{false}, overhang2Compensation{false}, - overhang1Index{-1}, overhang2Index{-1}, overhang1lengthRatio{0}, - overhang2lengthRatio{0}, inputScore{0}, eventScore{0}, - totalEvidence1{sa1In.getSupport() + sa1In.getSecondarySupport() + - sa1In.getMateSupport()}, - span1{bp1In.getNormalSpans()}, totalEvidence2{0}, evidenceLevel1{0}, - evidenceLevel2{0}, mrefHits1{bp1In.getMrefHits().getNumConsevativeHits()}, - mrefHits1Conservative{true}, - mrefHits2{hitsInMref2In.getNumConsevativeHits()}, - mrefHits2Conservative{true}, germline{false}, - germlineClonality1{bp1In.getGermlineInfo().getConservativeClonality()}, - germlineStatus1{bp1In.getGermlineInfo().getConservativeClonality() > - 0.15}, - germlineClonality2{germlineInfo2.getClonality()}, - germlineStatus2{germlineInfo2.getClonality() > 0.15}, selectedSa1{sa1In}, - selectedSa2{dummySaIn}, - mateRatio1{sa1In.getExpectedDiscordants() > 0 - ? sa1In.getMateSupport() / - (0.0 + sa1In.getExpectedDiscordants()) - : 1.0}, - mateRatio2{1.0}, suspicious{0}, semiSuspicious{sa1In.isSemiSuspicious()} { - auto truePos2 = pos2; - if (chrIndex1 == chrIndex2) { - if (abs(pos1 - pos2) > abs(pos1 - sa1In.getExtendedPos())) { - truePos2 = sa1In.getExtendedPos(); + if (selectedSa1.isDistant()) { + if (bp2In.getMateSupport() > 4) { + ++evidenceLevel2; + mateEvidence2 = true; + } + } + auto mrefHits1Tmp = + processMrefHits(bp1In.getMrefHits(), selectedSa1, evidenceLevel1); + mrefHits1 = mrefHits1Tmp.second; + mrefHits1Conservative = mrefHits1Tmp.first; + auto mrefHits2Tmp = + processMrefHits(bp2In.getMrefHits(), selectedSa2, evidenceLevel2); + mrefHits2 = mrefHits2Tmp.second; + mrefHits2Conservative = mrefHits2Tmp.first; + if (!germlineStatus1 && germlineClonality1 > 0 && + mrefHits1 > GERMLINE_DB_LIMIT) { + germlineStatus1 = true; + } + if (!germlineStatus2 && germlineClonality2 > 0 && + mrefHits2 > GERMLINE_DB_LIMIT) { + germlineStatus2 = true; + } + germline = (germlineStatus1 || germlineStatus2) && // + !((selectedSa1.getSupport() + + selectedSa1.getSecondarySupport()) > 200 && + (bp2In.getPairedBreaksSoft() + bp2In.getPairedBreaksHard() + + bp2In.getUnpairedBreaksSoft() + + bp2In.getUnpairedBreaksHard()) > 200); + + if (!distant) { + if (mateEvidence1) { + --evidenceLevel1; + totalEvidence1 -= selectedSa1.getMateSupport(); + } + if (mateEvidence2) { + --evidenceLevel2; + totalEvidence2 -= bp2In.getMateSupport(); + } + } + auto clonalityRes1 = assessSvClonality( + bp1In, selectedSa1.getSupport() + selectedSa1.getSecondarySupport() + + selectedSa1.getMateSupport()); + artifactRatio1 = clonalityRes1.first; + clonalityRatio1 = clonalityRes1.second; + clonalityStatus1 = + assessBreakpointClonalityStatusSingle(clonalityRatio1, bp1In, bp2In); + auto clonalityRes2 = assessSvClonality( + bp2In, bp2In.getPairedBreaksSoft() + bp2In.getPairedBreaksHard() + + bp2In.getUnpairedBreaksSoft()); + artifactRatio2 = clonalityRes2.first; + clonalityRatio2 = clonalityRes2.second; + clonalityStatus2 = + assessBreakpointClonalityStatusSingle(clonalityRatio2, bp1In, bp2In); + + assessSvArtifactStatus(bp1In, bp2In); + suspicious = filterMatchSingle(bp1In, bp2In); + auto hardClipSuspiciousCall = + selectedSa1.getSupport() == 0 && + selectedSa1.getSecondarySupport() > 0 && + (selectedSa1.getMateSupport() <= selectedSa1.getSecondarySupport() + 4); + eventScore = assessEventScore(hardClipSuspiciousCall, inputScore); + if (suspicious == 0 && eventScore > 2) { + assessContamination(overhangDb); } } - auto posDifferential = pos1 - truePos2; - determineEventTypeAndSize(posDifferential, !selectedSa1.isEncounteredM()); - if (chrIndex1 != chrIndex2) { - distant = true; - } else if (selectedSa1.getExpectedDiscordants() > 0) { - distant = true; - } else if (eventSize > 2500) { - distant = true; - } - if (distant && selectedSa1.isFuzzy() && - bp1In.getChrIndex() == selectedSa1.getChrIndex() && - selectedSa1.isStrictFuzzyCandidate()) { - auto fuzDiff = selectedSa1.getExtendedPos() - selectedSa1.getPos(); - if (max(0, pos1 - fuzDiff) <= selectedSa1.getExtendedPos() && - selectedSa1.getPos() <= (pos1 + fuzDiff)) { - distant = false; - } else if (eventSize < 5000) { + + SvEvent::SvEvent(const BreakpointReduced &bp1In, + const SuppAlignmentAnno &sa1In, + GermlineMatch germlineInfo2, + MrefMatch hitsInMref2In, + const std::vector> &overhangDb, + const SuppAlignmentAnno &dummySaIn) + : toRemove{false}, contaminationCandidate{0}, + chrIndex1{bp1In.getChrIndex()}, + pos1{bp1In.getPos()}, + chrIndex2{sa1In.getChrIndex()}, + pos2{sa1In.getPos()}, + lineIndex1{bp1In.getLineIndex()}, + lineIndex2{-1}, + eventType{0}, + eventSize{0}, + inverted{sa1In.isInverted()}, + distant{false}, + overhang1Compensation{false}, + overhang2Compensation{false}, + overhang1Index{-1}, + overhang2Index{-1}, + overhang1lengthRatio{0}, + overhang2lengthRatio{0}, + inputScore{0}, + eventScore{0}, + totalEvidence1{sa1In.getSupport() + sa1In.getSecondarySupport() + + sa1In.getMateSupport()}, + span1{bp1In.getNormalSpans()}, + totalEvidence2{0}, + evidenceLevel1{0}, + evidenceLevel2{0}, + mrefHits1{bp1In.getMrefHits().getNumConsevativeHits()}, + mrefHits1Conservative{true}, + mrefHits2{hitsInMref2In.getNumConsevativeHits()}, + mrefHits2Conservative{true}, + germline{false}, + germlineClonality1{bp1In.getGermlineInfo().getConservativeClonality()}, + germlineStatus1{bp1In.getGermlineInfo().getConservativeClonality() > 0.15}, + germlineClonality2{germlineInfo2.getClonality()}, + germlineStatus2{germlineInfo2.getClonality() > 0.15}, + selectedSa1{sa1In}, + selectedSa2{dummySaIn}, + mateRatio1{sa1In.getExpectedDiscordants() > 0 + ? sa1In.getMateSupport() / + (0.0 + sa1In.getExpectedDiscordants()) + : 1.0}, + mateRatio2{1.0}, + suspicious{0}, + semiSuspicious{sa1In.isSemiSuspicious()} { + auto truePos2 = pos2; + if (chrIndex1 == chrIndex2) { + if (abs(static_cast(pos1) - static_cast(pos2)) > abs(static_cast(pos1) - static_cast(sa1In.getExtendedPos()))) { + truePos2 = sa1In.getExtendedPos(); + } + } + + determineEventTypeAndSize(pos1, truePos2, !selectedSa1.isEncounteredM()); + if (chrIndex1 != chrIndex2) { + distant = true; + } else if (selectedSa1.getExpectedDiscordants() > 0) { + distant = true; + } else if (eventSize > 2500) { + distant = true; + } + if (distant && selectedSa1.isFuzzy() && + bp1In.getChrIndex() == selectedSa1.getChrIndex() && + selectedSa1.isStrictFuzzyCandidate()) { + auto fuzDiff = static_cast(selectedSa1.getExtendedPos()) - static_cast(selectedSa1.getPos()); + if (std::max(0l, static_cast(pos1) - fuzDiff) <= static_cast(selectedSa1.getExtendedPos()) && + static_cast(selectedSa1.getPos()) <= (static_cast(pos1) + fuzDiff)) { + distant = false; + } else if (eventSize < 5000) { + distant = false; + } + } + if (distant && (eventSize > 0) && (eventSize < 2000)) { distant = false; } - } - if (distant && (eventSize > 0) && (eventSize < 2000)) { - distant = false; - } - if (chrIndex1 == chrIndex2 && !distant && eventType == 3) { - suspicious = 3; - return; - } - auto clonalityRes1 = assessSvClonality( - bp1In, selectedSa1.getSupport() + selectedSa1.getSecondarySupport() + - selectedSa1.getMateSupport()); - artifactRatio1 = clonalityRes1.first; - clonalityRatio1 = clonalityRes1.second; - clonalityStatus1 = - assessBreakpointClonalityStatusUnknown(clonalityRatio1, bp1In); - - auto res1 = assessOverhangQualityCompensation(lineIndex1, overhangDb); - overhang1Index = res1.second; - overhang1Compensation = (clonalityStatus1 != EXTREME_SUBCLONAL) && - selectedSa1.isDistant() && res1.first; - - auto mateEvidence1 = false; - auto additionalEvidence1 = false; - germlineClonality1 = - determineGermlineClonalityBp(bp1In, selectedSa1, germlineClonality1); - germlineStatus1 = germlineClonality1 > 0.15; - - auto strictNonDecoy = !selectedSa1.isProperPairErrorProne() && - ChrConverter::indexConverter[chrIndex1] < 23 && - ChrConverter::indexConverter[chrIndex2] < 23; - auto splitSupportThreshold = - (strictNonDecoy && (mateRatio1 >= 0.66) ? 0 : 2); - - if (selectedSa1.getSupport() > splitSupportThreshold) { - ++evidenceLevel1; - } else { - if (strictNonDecoy && selectedSa1.getSupport() > 0 && - selectedSa1.getSecondarySupport() > splitSupportThreshold) { - ++evidenceLevel1; + if (chrIndex1 == chrIndex2 && !distant && eventType == 3) { + suspicious = 3; + return; } - } - if (selectedSa1.getSecondarySupport() > splitSupportThreshold) { - ++evidenceLevel1; - } else { - if (strictNonDecoy && selectedSa1.getSecondarySupport() > 0 && - selectedSa1.getSupport() > splitSupportThreshold) { + auto clonalityRes1 = assessSvClonality( + bp1In, selectedSa1.getSupport() + selectedSa1.getSecondarySupport() + + selectedSa1.getMateSupport()); + artifactRatio1 = clonalityRes1.first; + clonalityRatio1 = clonalityRes1.second; + clonalityStatus1 = + assessBreakpointClonalityStatusUnknown(clonalityRatio1, bp1In); + + auto res1 = assessOverhangQualityCompensation(lineIndex1, overhangDb); + overhang1Index = res1.second; + overhang1Compensation = (clonalityStatus1 != EXTREME_SUBCLONAL) && + selectedSa1.isDistant() && res1.first; + + auto mateEvidence1 = false; + auto additionalEvidence1 = false; + germlineClonality1 = + determineGermlineClonalityBp(bp1In, selectedSa1, germlineClonality1); + germlineStatus1 = germlineClonality1 > 0.15; + + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + auto strictNonDecoy = !selectedSa1.isProperPairErrorProne() && + // Used to be indexConverter[chrIndex] < 23, with no check for whether + // the values was valid (i.e. != -2). + (!chrConverter.isCompressedMref(chrIndex1) || + chrConverter.isAutosome(chrIndex1) || + chrConverter.isX(chrIndex1)) && + (!chrConverter.isCompressedMref(chrIndex2) || + chrConverter.isAutosome(chrIndex2) || + chrConverter.isX(chrIndex2)); + auto splitSupportThreshold = + (strictNonDecoy && (mateRatio1 >= 0.66) ? 0 : 2); + + if (selectedSa1.getSupport() > splitSupportThreshold) { ++evidenceLevel1; + } else { + if (strictNonDecoy && selectedSa1.getSupport() > 0 && + selectedSa1.getSecondarySupport() > splitSupportThreshold) { + ++evidenceLevel1; + } } - } - if (selectedSa1.isDistant()) { - if (mateRatio1 >= 0.4) { - if (!(selectedSa1.isStrictFuzzyCandidate() || - selectedSa1.isProperPairErrorProne()) || - ((selectedSa1.isStrictFuzzyCandidate() || - selectedSa1.isProperPairErrorProne()) && - selectedSa1.getMateSupport() > 4)) { + if (selectedSa1.getSecondarySupport() > splitSupportThreshold) { + ++evidenceLevel1; + } else { + if (strictNonDecoy && selectedSa1.getSecondarySupport() > 0 && + selectedSa1.getSupport() > splitSupportThreshold) { ++evidenceLevel1; - mateEvidence1 = true; - if (evidenceLevel1 < 3 && overhang1Compensation && - strictNonDecoy) { - if ((selectedSa1.getMateSupport() > 2) || - (selectedSa1.getMateSupport() < 3 && - selectedSa1.getExpectedDiscordants() == - selectedSa1.getMateSupport())) { - ++evidenceLevel1; - additionalEvidence1 = true; + } + } + if (selectedSa1.isDistant()) { + if (mateRatio1 >= 0.4) { + if (!(selectedSa1.isStrictFuzzyCandidate() || + selectedSa1.isProperPairErrorProne()) || + ((selectedSa1.isStrictFuzzyCandidate() || + selectedSa1.isProperPairErrorProne()) && + selectedSa1.getMateSupport() > 4)) { + ++evidenceLevel1; + mateEvidence1 = true; + if (evidenceLevel1 < 3 && overhang1Compensation && + strictNonDecoy) { + if ((selectedSa1.getMateSupport() > 2) || + (selectedSa1.getMateSupport() < 3 && + selectedSa1.getExpectedDiscordants() == + selectedSa1.getMateSupport())) { + ++evidenceLevel1; + additionalEvidence1 = true; + } } } } } - } - auto mrefHits1Tmp = - processMrefHits(bp1In.getMrefHits(), selectedSa1, evidenceLevel1); - mrefHits1 = mrefHits1Tmp.second; - mrefHits1Conservative = mrefHits1Tmp.first; - auto mrefHits2Tmp = processMrefHits(hitsInMref2In, selectedSa2, 0); - mrefHits2 = mrefHits2Tmp.second; - mrefHits2Conservative = mrefHits2Tmp.first; - if (!germlineStatus1 && germlineClonality1 > 0 && - mrefHits1 > GERMLINEDBLIMIT) { - germlineStatus1 = true; - } - if (!germlineStatus2 && germlineClonality2 > 0 && - mrefHits2 > GERMLINEDBLIMIT) { - germlineStatus2 = true; - } + auto mrefHits1Tmp = + processMrefHits(bp1In.getMrefHits(), selectedSa1, evidenceLevel1); + mrefHits1 = mrefHits1Tmp.second; + mrefHits1Conservative = mrefHits1Tmp.first; + auto mrefHits2Tmp = processMrefHits(hitsInMref2In, selectedSa2, 0); + mrefHits2 = mrefHits2Tmp.second; + mrefHits2Conservative = mrefHits2Tmp.first; + if (!germlineStatus1 && germlineClonality1 > 0 && + mrefHits1 > GERMLINE_DB_LIMIT) { + germlineStatus1 = true; + } + if (!germlineStatus2 && germlineClonality2 > 0 && + mrefHits2 > GERMLINE_DB_LIMIT) { + germlineStatus2 = true; + } - germline = - (germlineStatus1 || germlineStatus2) && - !((selectedSa1.getSupport() + selectedSa1.getSecondarySupport()) > 200); - if (!distant) { - if (mateEvidence1) { - --evidenceLevel1; - totalEvidence1 -= selectedSa1.getMateSupport(); + germline = + (germlineStatus1 || germlineStatus2) && + !((selectedSa1.getSupport() + selectedSa1.getSecondarySupport()) > 200); + if (!distant) { + if (mateEvidence1) { + --evidenceLevel1; + totalEvidence1 -= selectedSa1.getMateSupport(); + } + if (additionalEvidence1) { + --evidenceLevel1; + } } - if (additionalEvidence1) { - --evidenceLevel1; + assessSvArtifactStatusUnknown(); + suspicious = filterMatchUnknown(bp1In); + auto hardClipSuspiciousCall = + selectedSa1.getSupport() == 0 && + selectedSa1.getSecondarySupport() > 0 && + (selectedSa1.getMateSupport() <= selectedSa1.getSecondarySupport() + 4); + eventScore = assessEventScore(hardClipSuspiciousCall, inputScore); + if (suspicious == 0 && eventScore > 2) { + assessContamination(overhangDb); } } - assessSvArtifactStatusUnknown(); - suspicious = filterMatchUnknown(bp1In); - auto hardClipSuspiciousCall = - selectedSa1.getSupport() == 0 && - selectedSa1.getSecondarySupport() > 0 && - (selectedSa1.getMateSupport() <= selectedSa1.getSecondarySupport() + 4); - eventScore = assessEventScore(hardClipSuspiciousCall, inputScore); - if (suspicious == 0 && eventScore > 2) { - assessContamination(overhangDb); - } -} - -void -SvEvent::determineEventTypeAndSize(int posDifferential, - bool matchEncounteredM) { - // static vector EVENTTYPES { "UNKNOWN", "DEL", "DUP", "TRA", - //"INV", "CONTAMINATION" }; - if (chrIndex1 != chrIndex2) { - eventType = 3; - eventSize = -1; - } else { - eventSize = abs(posDifferential); - if (posDifferential < 0) { - if (inverted) { - eventType = 4; - } else { - if (selectedSa1.isEncounteredM() && !matchEncounteredM) { - eventType = 1; - } else if (!selectedSa1.isEncounteredM() && matchEncounteredM) { - eventType = 2; + + void + SvEvent::determineEventTypeAndSize(ChrPosition pos1In, + ChrPosition pos2In, + bool matchEncounteredM) { + int posDifferential = pos1In - pos2In; + if (chrIndex1 != chrIndex2) { + // interchromosomal + eventType = 3; + eventSize = -1; + } else { + eventSize = abs(posDifferential); + if (posDifferential < 0) { // pos1 < pos2; pos1 left of pos2 + if (inverted) { + // inverted + eventType = 4; } else { - eventType = 3; + // Assuming, selectedSa1 and pos1 belong together, as selectedSa2 and pos2. + if (selectedSa1.isEncounteredM() && !matchEncounteredM) { + eventType = 1; + } else if (!selectedSa1.isEncounteredM() && matchEncounteredM) { + eventType = 2; + } else { + // Both same encounteredM value + eventType = 3; + } } - } - } else if (posDifferential > 0) { - if (inverted) { - eventType = 4; - } else { - if (selectedSa1.isEncounteredM() && !matchEncounteredM) { - eventType = 2; - } else if (!selectedSa1.isEncounteredM() && matchEncounteredM) { - eventType = 1; + } else if (posDifferential > 0) { // pos1 > pos2; pos1 right of pos2 + if (inverted) { + // inverted + eventType = 4; } else { - eventType = 3; + if (selectedSa1.isEncounteredM() && !matchEncounteredM) { + // This is opposite of the posDifferential < 0 case. + eventType = 2; + } else if (!selectedSa1.isEncounteredM() && matchEncounteredM) { + // This is opposite of the posDifferential < 0 case. + eventType = 1; + } else { + // Both same encounteredM value + eventType = 3; + } } + } else { // posDifferential == 0 + eventType = 3; + suspicious = 3; } - } else { - eventType = 3; - suspicious = 3; } } -} - -pair -SvEvent::mateQualityConditions(const SuppAlignmentAnno &sa) { - // auto messageMode = sa.getChrIndex() == 11 && (sa.getPos() == 2261373 || - //sa.getPos() == 2148480); - auto doubleSemiSuspicious = - (selectedSa1.isSemiSuspicious() && selectedSa2.isSemiSuspicious()) || - (selectedSa1.isSemiSuspicious() && - selectedSa2.isProperPairErrorProne()) || - (selectedSa1.isProperPairErrorProne() && - selectedSa2.isSemiSuspicious()); - auto mateLowQualityCriteriaTier1 = sa.isProperPairErrorProne() || - doubleSemiSuspicious || - (mateRatio1 + mateRatio2) < 1.1; - auto mateLowQualityCriteriaTier2 = - sa.isSemiSuspicious() && sa.isStrictFuzzyCandidate(); - auto mateLowQualityCriteriaTier3 = sa.isSemiSuspicious() || sa.isFuzzy(); - auto mateLowQualityCriteriaTier4 = sa.isStrictFuzzyCandidate(); - if (mateLowQualityCriteriaTier1 && mateLowQualityCriteriaTier2) { - return {9, 0.8}; - } else if (mateLowQualityCriteriaTier1 || inputScore == 0) { - return {4, 0.8}; - } else if (mateLowQualityCriteriaTier2) { - return {4, 0.6}; - } else if (mateLowQualityCriteriaTier3) { - return {2, 0.6}; - } else if (mateLowQualityCriteriaTier4) { - return {0, 0.6}; - } else if (sa.getMateSupport() < 10) { - return {0, 0.4}; - } else { - return {0, 0.33}; - } -} - -pair -SvEvent::assessOverhangQualityCompensation( - int lineIndex, const vector> &overhangDb) const { - auto overhangIndex = -1; - auto compensation = false; - pair dummy{lineIndex, ""}; - auto lower = lower_bound(overhangDb.cbegin(), overhangDb.cend(), dummy); - auto it = overhangDb.cend(); - if (lower != overhangDb.cend()) { - if (lower->first == lineIndex) { - it = lower; - } else if (next(lower) != overhangDb.cend() && - next(lower)->first == lineIndex) { - it = next(lower); - } else if (lower != overhangDb.cbegin() && - prev(lower)->first == lineIndex) { - it = prev(lower); + + std::pair + SvEvent::mateQualityConditions(const SuppAlignmentAnno &sa) { + auto doubleSemiSuspicious = + (selectedSa1.isSemiSuspicious() && selectedSa2.isSemiSuspicious()) || + (selectedSa1.isSemiSuspicious() && + selectedSa2.isProperPairErrorProne()) || + (selectedSa1.isProperPairErrorProne() && + selectedSa2.isSemiSuspicious()); + auto mateLowQualityCriteriaTier1 = sa.isProperPairErrorProne() || + doubleSemiSuspicious || + (mateRatio1 + mateRatio2) < 1.1; + auto mateLowQualityCriteriaTier2 = sa.isSemiSuspicious() && sa.isStrictFuzzyCandidate(); + auto mateLowQualityCriteriaTier3 = sa.isSemiSuspicious() || sa.isFuzzy(); + auto mateLowQualityCriteriaTier4 = sa.isStrictFuzzyCandidate(); + if (mateLowQualityCriteriaTier1 && mateLowQualityCriteriaTier2) { + return {9, 0.8}; + } else if (mateLowQualityCriteriaTier1 || inputScore == 0) { + return {4, 0.8}; + } else if (mateLowQualityCriteriaTier2) { + return {4, 0.6}; + } else if (mateLowQualityCriteriaTier3) { + return {2, 0.6}; + } else if (mateLowQualityCriteriaTier4) { + return {0, 0.6}; + } else if (sa.getMateSupport() < 10) { + return {0, 0.4}; + } else { + return {0, 0.33}; } } - if (it != overhangDb.cend()) { - auto overhangLength = 0; - auto maxOverhangLength = 0; - auto counts = 0; - overhangIndex = it - overhangDb.cbegin(); - for (const auto c : it->second) { - switch (c) { - case '(': - maxOverhangLength = max(maxOverhangLength, overhangLength); - overhangLength = 0; - break; - case ':': - overhangLength = 0; - ++counts; - break; - default: - ++overhangLength; - break; + + std::pair + SvEvent::assessOverhangQualityCompensation( + int lineIndex, + const std::vector> &overhangDb) const { + auto overhangIndex = -1; + auto compensation = false; + std::pair dummy{lineIndex, ""}; + auto lower = lower_bound(overhangDb.cbegin(), overhangDb.cend(), dummy); + auto it = overhangDb.cend(); + if (lower != overhangDb.cend()) { + if (lower->first == lineIndex) { + it = lower; + } else if (next(lower) != overhangDb.cend() && + next(lower)->first == lineIndex) { + it = next(lower); + } else if (lower != overhangDb.cbegin() && + prev(lower)->first == lineIndex) { + it = prev(lower); } } - if (counts < 3) { - auto lengthRatio = (0.0 + maxOverhangLength) / - SuppAlignmentAnno::DEFAULTREADLENGTH; - compensation = 0.25 <= lengthRatio && lengthRatio <= 0.8; - } - } - return {compensation, overhangIndex}; -} - -pair -SvEvent::processMrefHits(const MrefMatch &hitsInMref, - const SuppAlignmentAnno &sa, - int evidenceLevelIn) const { - auto initScore = hitsInMref.getNumConsevativeHits(); - auto distantHit = false; - auto saMatch = false; - short maxScore{0}; - short hits{initScore}; - for (const auto &saRef : hitsInMref.getSuppMatches()) { - if (saRef.saCloseness(sa, SuppAlignmentAnno::DEFAULTREADLENGTH * 6)) { - saMatch = true; - auto score = saRef.getSecondarySupport(); - if (score > maxScore) { - maxScore = score; - if (maxScore > initScore) { - hits = maxScore; - distantHit = true; + if (it != overhangDb.cend()) { + auto overhangLength = 0; + auto maxOverhangLength = 0; + auto counts = 0; + overhangIndex = it - overhangDb.cbegin(); + for (const auto c : it->second) { + switch (c) { + case '(': + maxOverhangLength = std::max(maxOverhangLength, overhangLength); + overhangLength = 0; + break; + case ':': + overhangLength = 0; + ++counts; + break; + default: + ++overhangLength; + break; } } + if (counts < 3) { + auto lengthRatio = (0.0 + maxOverhangLength) / + SuppAlignmentAnno::DEFAULT_READ_LENGTH; + compensation = 0.25 <= lengthRatio && lengthRatio <= 0.8; + } } + return {compensation, overhangIndex}; } - if (!distantHit && - (selectedSa1.isStrictFuzzy() || selectedSa2.isStrictFuzzy()) && - evidenceLevelIn < 3) { - hits = hitsInMref.getNumHits(); - } - return {!saMatch, hits}; -} - -double -SvEvent::determineGermlineClonalityBp(const BreakpointReduced &bp1, - const SuppAlignmentAnno &sa, - double clonalityInit) const { - auto maxClonality = clonalityInit; - if (sa.isDistant()) { - auto i = 0; - for (const auto &saRef : bp1.getGermlineInfo().getSuppMatches()) { - if (saRef.saCloseness(sa, - SuppAlignmentAnno::DEFAULTREADLENGTH * 6)) { - auto clonality = bp1.getGermlineInfo().getClonalities()[i]; - if (clonality > maxClonality) { - maxClonality = clonality; + + std::pair + SvEvent::processMrefHits(const MrefMatch &hitsInMref, + const SuppAlignmentAnno &sa, + int evidenceLevelIn) const { + auto initScore = hitsInMref.getNumConsevativeHits(); + auto distantHit = false; + auto saMatch = false; + short maxScore{0}; + short hits{initScore}; + for (const auto &saRef : hitsInMref.getSuppMatches()) { + if (saRef.saCloseness(sa, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6)) { + saMatch = true; + auto score = saRef.getSecondarySupport(); + if (score > maxScore) { + maxScore = score; + if (maxScore > initScore) { + hits = maxScore; + distantHit = true; + } } - break; } - ++i; } + if (!distantHit && + (selectedSa1.isStrictFuzzy() || selectedSa2.isStrictFuzzy()) && + evidenceLevelIn < 3) { + hits = hitsInMref.getNumHits(); + } + return {!saMatch, hits}; } - return maxClonality; -} -int -SvEvent::filterMatch(const BreakpointReduced &bp1, - const BreakpointReduced &bp2) { - if (suspicious != 0) { - return suspicious; - } - if (selectedSa1.isSuspicious() || selectedSa2.isSuspicious() || - (selectedSa1.isSemiSuspicious() && selectedSa2.isSemiSuspicious() && - !(evidenceLevel1 == 3 && evidenceLevel2 == 3))) { - return 1; - } - if ((selectedSa1.getExpectedDiscordants() > 0 && mateRatio1 < 0.1) || - (selectedSa2.getExpectedDiscordants() > 0 && mateRatio2 < 0.1)) { - return 2; - } - if (mrefHits1 > GERMLINEDBLIMIT) { - if (selectedSa1.isSemiSuspicious() && evidenceLevel1 < 3) { - if (!(selectedSa1.getSupport() > 9 || - selectedSa1.getSecondarySupport() > 9 || - selectedSa1.getMateSupport() > 9 || overhang1Compensation)) { - return 4; + double + SvEvent::determineGermlineClonalityBp(const BreakpointReduced &bp1, + const SuppAlignmentAnno &sa, + double clonalityInit) const { + auto maxClonality = clonalityInit; + if (sa.isDistant()) { + unsigned int i = 0; + for (const auto &saRef : bp1.getGermlineInfo().getSuppMatches()) { + if (saRef.saCloseness(sa, SuppAlignmentAnno::DEFAULT_READ_LENGTH * 6)) { + auto clonality = bp1.getGermlineInfo().getClonalities()[i]; + if (clonality > maxClonality) { + maxClonality = clonality; + } + break; + } + ++i; } } + return maxClonality; } - if (mrefHits2 > GERMLINEDBLIMIT) { - if (selectedSa2.isSemiSuspicious() && evidenceLevel2 < 3) { - if (!(selectedSa2.getSupport() > 9 || - selectedSa2.getSecondarySupport() > 9 || - selectedSa1.getMateSupport() > 9 || overhang2Compensation)) { - return 5; + + int + SvEvent::filterMatch(const BreakpointReduced &bp1, + const BreakpointReduced &bp2) { + if (suspicious != 0) { + return suspicious; + } + if (selectedSa1.isSuspicious() || selectedSa2.isSuspicious() || + (selectedSa1.isSemiSuspicious() && selectedSa2.isSemiSuspicious() && + !(evidenceLevel1 == 3 && evidenceLevel2 == 3))) { + return 1; + } + if ((selectedSa1.getExpectedDiscordants() > 0 && mateRatio1 < 0.1) || + (selectedSa2.getExpectedDiscordants() > 0 && mateRatio2 < 0.1)) { + return 2; + } + if (mrefHits1 > GERMLINE_DB_LIMIT) { + if (selectedSa1.isSemiSuspicious() && evidenceLevel1 < 3) { + if (!(selectedSa1.getSupport() > 9 || + selectedSa1.getSecondarySupport() > 9 || + selectedSa1.getMateSupport() > 9 || overhang1Compensation)) { + return 4; + } } } - } - if (mrefHits1 > GERMLINEDBLIMIT && mrefHits2 > GERMLINEDBLIMIT) { - if (clonalityRatio1 < CLONALITYSTRICTLOWTHRESHOLD && - clonalityRatio2 < CLONALITYSTRICTLOWTHRESHOLD) { - return 4; - } else if ((!germlineStatus1 || !germlineStatus2) && // - (mrefHits1 > GERMLINEDBLIMIT || - mrefHits2 > GERMLINEDBLIMIT) && - (eventSize > 0) && (eventSize < HALFDEFAULTREADLENGTH)) { - return 5; - } - } - if (!distant) { - if (semiSuspicious) { - return 13; + if (mrefHits2 > GERMLINE_DB_LIMIT) { + if (selectedSa2.isSemiSuspicious() && evidenceLevel2 < 3) { + if (!(selectedSa2.getSupport() > 9 || + selectedSa2.getSecondarySupport() > 9 || + selectedSa1.getMateSupport() > 9 || overhang2Compensation)) { + return 5; + } + } } - if (selectedSa1.isStrictFuzzy() || selectedSa2.isStrictFuzzy()) { - return 13; + if (mrefHits1 > GERMLINE_DB_LIMIT && mrefHits2 > GERMLINE_DB_LIMIT) { + if (clonalityRatio1 < CLONALITY_STRICT_LOW_THRESHOLD && + clonalityRatio2 < CLONALITY_STRICT_LOW_THRESHOLD) { + return 4; + } else if ((!germlineStatus1 || !germlineStatus2) && // + (mrefHits1 > GERMLINE_DB_LIMIT || + mrefHits2 > GERMLINE_DB_LIMIT) && + (eventSize > 0) && (eventSize < HALF_DEFAULT_READ_LENGTH)) { + return 5; + } } - if (mrefHits1 > GERMLINEDBLIMIT || mrefHits2 > GERMLINEDBLIMIT) { - if (totalEvidence1 < 5 || totalEvidence2 < 5) { + if (!distant) { + if (semiSuspicious) { return 13; } - } - } else { - if (bp1.getChrIndex() != bp2.getChrIndex()) { - if (selectedSa1.getMateSupport() == 0) { - return 6; - } else if (selectedSa2.getMateSupport() == 0) { - return 7; - } - } - auto threshold = - (semiSuspicious || selectedSa1.isProperPairErrorProne() || - selectedSa2.isProperPairErrorProne()) - ? 5 - : 3; - if (selectedSa1.getSupport() < threshold && - selectedSa1.getSecondarySupport() < threshold && - selectedSa2.getMateSupport() < threshold && - selectedSa2.getSupport() < threshold && - selectedSa2.getSecondarySupport() < threshold && - selectedSa2.getMateSupport() < threshold) { - return 8; - } - if (mateRatio1 < 0.4 && mateRatio2 < 0.4) { - return 11; - } - if (mateRatio1 < 0.25 || mateRatio2 < 0.25) { - return 12; - } - if (selectedSa1.isFuzzy() || selectedSa2.isFuzzy()) { - if (bp1.getChrIndex() == bp2.getChrIndex()) { - if (selectedSa1.isStrictFuzzy() || - selectedSa2.isStrictFuzzy()) { - if (abs(bp1.getPos() - bp2.getPos()) < 5000) { - return 14; - } - } + if (selectedSa1.isStrictFuzzy() || selectedSa2.isStrictFuzzy()) { + return 13; } - if (mrefHits1 > GERMLINEDBLIMIT || mrefHits2 > GERMLINEDBLIMIT) { - return 173; + if (mrefHits1 > GERMLINE_DB_LIMIT || mrefHits2 > GERMLINE_DB_LIMIT) { + if (totalEvidence1 < 5 || totalEvidence2 < 5) { + return 13; + } } - if (selectedSa1.isFuzzy() && selectedSa2.isFuzzy()) { - if (mateRatio1 < 0.5 || mateRatio2 < 0.5) { - return 174; + } else { + if (bp1.getChrIndex() != bp2.getChrIndex()) { + if (selectedSa1.getMateSupport() == 0) { + return 6; + } else if (selectedSa2.getMateSupport() == 0) { + return 7; } } - if (selectedSa1.isFuzzy() && selectedSa1.getSupport() == 0 && - selectedSa1.getSecondarySupport() == 0 && - selectedSa2.isSemiSuspicious()) { - return 175; + auto threshold = + (semiSuspicious || selectedSa1.isProperPairErrorProne() || + selectedSa2.isProperPairErrorProne()) + ? 5 + : 3; + if (selectedSa1.getSupport() < threshold && + selectedSa1.getSecondarySupport() < threshold && + selectedSa2.getMateSupport() < threshold && + selectedSa2.getSupport() < threshold && + selectedSa2.getSecondarySupport() < threshold && + selectedSa2.getMateSupport() < threshold) { + return 8; } - if (selectedSa2.isFuzzy() && selectedSa2.getSupport() == 0 && - selectedSa2.getSecondarySupport() == 0 && - selectedSa1.isSemiSuspicious()) { - return 176; + if (mateRatio1 < 0.4 && mateRatio2 < 0.4) { + return 11; } - } - } - if (bp1.getChrIndex() != bp2.getChrIndex() || - abs(bp1.getPos() - bp2.getPos()) > 150) { - auto eventTotal1 = bp1.getPairedBreaksSoft() + - bp1.getPairedBreaksHard() + - bp1.getUnpairedBreaksSoft(); - auto eventTotal2 = bp2.getPairedBreaksSoft() + - bp2.getPairedBreaksHard() + - bp2.getUnpairedBreaksSoft(); - if (bp1.getBreaksShortIndel() > eventTotal1 || - bp2.getBreaksShortIndel() > eventTotal2) { - return 16; - } - } - if (clonalityStatus1 == EXTREME_SUBCLONAL) { - return 39; - } - if (clonalityStatus2 == EXTREME_SUBCLONAL) { - return 40; - } - if (artifactStatus == ARTIFACT) { - return 41; - } - if (NOCONTROLMODE || germlineStatus1 || germlineStatus2) { - if (mrefHits1 > GERMLINEDBLIMIT && mrefHits2 > GERMLINEDBLIMIT) { - return 42; - } - } - if (mrefHits1 > BPFREQTHRESHOLD) { - if (NOCONTROLMODE || germlineStatus1 || germlineStatus2 || - mrefHits2 > GERMLINEDBLIMIT) { - return 431; - } - if (mrefHits1 > RELAXEDBPFREQTHRESHOLD * 2.5) { - return 431; - } - if (!(mrefHits1Conservative && evidenceLevel1 == 3 && - evidenceLevel2 > 1 && mrefHits2 == 0 && !selectedSa1.isFuzzy() && - !selectedSa2.isFuzzy())) { - if (mrefHits1 > RELAXEDBPFREQTHRESHOLD) { - return 431; + if (mateRatio1 < 0.25 || mateRatio2 < 0.25) { + return 12; } - } - } - if (mrefHits2 > BPFREQTHRESHOLD) { - if (NOCONTROLMODE || germlineStatus1 || germlineStatus2 || - mrefHits1 > GERMLINEDBLIMIT) { - return 432; - } - if (mrefHits2 > RELAXEDBPFREQTHRESHOLD * 2.5) { - return 432; - } - if (!(mrefHits2Conservative && evidenceLevel1 > 1 && - evidenceLevel2 == 3 && mrefHits1 == 0 && !selectedSa1.isFuzzy() && - !selectedSa2.isFuzzy())) { - if (mrefHits2 > RELAXEDBPFREQTHRESHOLD) { - return 432; + if (selectedSa1.isFuzzy() || selectedSa2.isFuzzy()) { + if (bp1.getChrIndex() == bp2.getChrIndex()) { + if (selectedSa1.isStrictFuzzy() || + selectedSa2.isStrictFuzzy()) { + if (abs(static_cast(bp1.getPos()) - static_cast(bp2.getPos())) < 5000l) { + return 14; + } + } + } + if (mrefHits1 > GERMLINE_DB_LIMIT || mrefHits2 > GERMLINE_DB_LIMIT) { + return 173; + } + if (selectedSa1.isFuzzy() && selectedSa2.isFuzzy()) { + if (mateRatio1 < 0.5 || mateRatio2 < 0.5) { + return 174; + } + } + if (selectedSa1.isFuzzy() && selectedSa1.getSupport() == 0 && + selectedSa1.getSecondarySupport() == 0 && + selectedSa2.isSemiSuspicious()) { + return 175; + } + if (selectedSa2.isFuzzy() && selectedSa2.getSupport() == 0 && + selectedSa2.getSecondarySupport() == 0 && + selectedSa1.isSemiSuspicious()) { + return 176; + } } } - } - return suspicious; -} - -int -SvEvent::filterMatchSingle(const BreakpointReduced &bp1, - const BreakpointReduced &bp2) { - if (suspicious != 0) { - return suspicious; - } - if (selectedSa1.isSuspicious() || semiSuspicious) { - return 1; - } - if (selectedSa1.getExpectedDiscordants() > 0 && mateRatio1 < 0.1) { - return 2; - } - if (mrefHits1 > GERMLINEDBLIMIT && - !(bp1.getChrIndex() == 999 || bp1.getChrIndex() == 1000)) { - return 171; - } - if (mrefHits2 > GERMLINEDBLIMIT && - !(bp2.getChrIndex() == 999 || bp2.getChrIndex() == 1000)) { - return 171; - } - if (!distant) { - if (inverted) { - return 22; - } - if (eventSize > 0 && eventSize < HALFDEFAULTREADLENGTH) { - return 23; - } - if (eventType == 3) { - return 23; - } - if (eventSize > 150) { + if (bp1.getChrIndex() != bp2.getChrIndex() || + abs(static_cast(bp1.getPos()) - static_cast(bp2.getPos())) > 150l) { auto eventTotal1 = bp1.getPairedBreaksSoft() + bp1.getPairedBreaksHard() + bp1.getUnpairedBreaksSoft(); auto eventTotal2 = bp2.getPairedBreaksSoft() + bp2.getPairedBreaksHard() + bp2.getUnpairedBreaksSoft(); - if ((bp1.getBreaksShortIndel() / (0.0 + eventTotal1) > 0.5)) { - return 20; - } else if ((bp2.getBreaksShortIndel() / (0.0 + eventTotal2)) > - 0.5) { - return 21; + if (bp1.getBreaksShortIndel() > eventTotal1 || + bp2.getBreaksShortIndel() > eventTotal2) { + return 16; } } - if (selectedSa1.isFuzzy() || selectedSa1.isSemiSuspicious() || - evidenceLevel1 == 1 || - (selectedSa1.getSupport() + selectedSa1.getSecondarySupport()) < - 3) { - return 25; + if (clonalityStatus1 == EXTREME_SUBCLONAL) { + return 39; } - if (mrefHits1 > GERMLINEDBLIMIT || mrefHits2 > GERMLINEDBLIMIT) { - if (totalEvidence1 < 5) { - return 25; - } + if (clonalityStatus2 == EXTREME_SUBCLONAL) { + return 40; } - } else { - if (selectedSa1.getSupport() < 3 && - selectedSa1.getSecondarySupport() < 3 && - selectedSa1.getMateSupport() < 3) { - return 18; + if (artifactStatus == ARTIFACT) { + return 41; } - if (bp1.getChrIndex() != bp2.getChrIndex()) { - if (selectedSa1.getMateSupport() == 0) { - return 18; - } else if (evidenceLevel1 == 1 && bp2.getMateSupport() == 0) { - return 19; + if (NO_CONTROL_MODE || germlineStatus1 || germlineStatus2) { + if (mrefHits1 > GERMLINE_DB_LIMIT && mrefHits2 > GERMLINE_DB_LIMIT) { + return 42; } } - if (selectedSa1.getMateSupport() < 3 || mateRatio1 < 0.4) { - return 24; - } - if (selectedSa1.isFuzzy() || (selectedSa1.getSupport() + - selectedSa1.getSecondarySupport()) < 3) { - if (bp1.getChrIndex() == bp2.getChrIndex()) { - if (abs(bp1.getPos() - bp2.getPos()) < 5000) { - return 26; - } - if (inverted && abs(bp1.getPos() - bp2.getPos()) < 10000) { - return 27; - } + if (mrefHits1 > BP_FREQ_THRESHOLD) { + if (NO_CONTROL_MODE || germlineStatus1 || germlineStatus2 || + mrefHits2 > GERMLINE_DB_LIMIT) { + return 431; + } + if (mrefHits1 > RELAXED_BP_FREQ_THRESHOLD * 2.5) { + return 431; } - if (selectedSa1.isFuzzy() && selectedSa1.getSupport() == 0 && - selectedSa1.getSecondarySupport() == 0) { - return 271; + if (!(mrefHits1Conservative && evidenceLevel1 == 3 && + evidenceLevel2 > 1 && mrefHits2 == 0 && !selectedSa1.isFuzzy() && + !selectedSa2.isFuzzy())) { + if (mrefHits1 > RELAXED_BP_FREQ_THRESHOLD) { + return 431; + } } } - } - auto eventTotal1 = bp1.getPairedBreaksSoft() + bp1.getPairedBreaksHard() + - bp1.getUnpairedBreaksSoft(); - auto eventTotal2 = bp2.getPairedBreaksSoft() + bp2.getPairedBreaksHard() + - bp2.getUnpairedBreaksSoft(); - if (bp1.getBreaksShortIndel() > eventTotal1 || - bp2.getBreaksShortIndel() > eventTotal2) { - return 28; - } - if (clonalityStatus1 == EXTREME_SUBCLONAL) { - return 39; - } - if (clonalityStatus2 == EXTREME_SUBCLONAL) { - return 40; - } - if (artifactStatus == ARTIFACT) { - return 41; - } - if (NOCONTROLMODE || germlineStatus1 || germlineStatus2) { - if (mrefHits1 > GERMLINEDBLIMIT && mrefHits2 > GERMLINEDBLIMIT) { - return 42; + if (mrefHits2 > BP_FREQ_THRESHOLD) { + if (NO_CONTROL_MODE || germlineStatus1 || germlineStatus2 || + mrefHits1 > GERMLINE_DB_LIMIT) { + return 432; + } + if (mrefHits2 > RELAXED_BP_FREQ_THRESHOLD * 2.5) { + return 432; + } + if (!(mrefHits2Conservative && evidenceLevel1 > 1 && + evidenceLevel2 == 3 && mrefHits1 == 0 && !selectedSa1.isFuzzy() && + !selectedSa2.isFuzzy())) { + if (mrefHits2 > RELAXED_BP_FREQ_THRESHOLD) { + return 432; + } + } } + return suspicious; } - if (mrefHits1 > BPFREQTHRESHOLD) { - if (chrIndex1 != 999 || chrIndex2 == 999 || - mrefHits2 > BPFREQTHRESHOLD) { - return 431; + + int + SvEvent::filterMatchSingle(const BreakpointReduced &bp1, + const BreakpointReduced &bp2) { + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + if (suspicious != 0) { + return suspicious; } - if (mrefHits1 > RELAXEDBPFREQTHRESHOLD || NOCONTROLMODE || - germlineStatus1 || germlineStatus2 || mrefHits2 > GERMLINEDBLIMIT) { - return 431; + if (selectedSa1.isSuspicious() || semiSuspicious) { + return 1; } - } - if (mrefHits2 > BPFREQTHRESHOLD) { - if (chrIndex1 == 999 || chrIndex2 != 999 || - mrefHits1 > BPFREQTHRESHOLD) { - return 432; + if (selectedSa1.getExpectedDiscordants() > 0 && mateRatio1 < 0.1) { + return 2; } - if (mrefHits2 > RELAXEDBPFREQTHRESHOLD || NOCONTROLMODE || - germlineStatus1 || germlineStatus2 || mrefHits1 > GERMLINEDBLIMIT) { - return 432; + if (mrefHits1 > GERMLINE_DB_LIMIT && + !(chrConverter.isDecoy(bp1.getChrIndex()) || chrConverter.isVirus(bp1.getChrIndex()))) { + return 171; } - } - return suspicious; -} - -int -SvEvent::filterMatchUnknown(const BreakpointReduced &bp1) { - if (suspicious != 0) { - return suspicious; - } - if (selectedSa1.isSuspicious() || selectedSa1.isSemiSuspicious()) { - return 1; - } - if (selectedSa1.getExpectedDiscordants() > 0 && mateRatio1 < 0.1) { - return 2; - } - auto eventTotal1 = bp1.getPairedBreaksSoft() + bp1.getPairedBreaksHard() + - bp1.getUnpairedBreaksSoft(); - if (eventTotal1 + selectedSa1.getMateSupport() + bp1.getNormalSpans() < - 10) { - return 29; - } - if (mrefHits1 > GERMLINEDBLIMIT && - !(bp1.getChrIndex() == 999 || bp1.getChrIndex() == 1000)) { - return 301; - } - if (mrefHits2 > GERMLINEDBLIMIT && !(selectedSa1.getChrIndex() == 999 || - selectedSa1.getChrIndex() == 1000)) { - return 302; - } - if (!distant) { - if (inverted || totalEvidence1 < 5 || evidenceLevel1 == 1) { - return 30; + if (mrefHits2 > GERMLINE_DB_LIMIT && + !(chrConverter.isDecoy(bp2.getChrIndex()) || chrConverter.isVirus(bp2.getChrIndex()))) { + return 171; } - if (eventSize > 0 && eventSize < HALFDEFAULTREADLENGTH) { - return 31; + if (!distant) { + if (inverted) { + return 22; + } + if (eventSize > 0 && eventSize < HALF_DEFAULT_READ_LENGTH) { + return 23; + } + if (eventType == 3) { + return 23; + } + if (eventSize > 150) { + auto eventTotal1 = bp1.getPairedBreaksSoft() + + bp1.getPairedBreaksHard() + + bp1.getUnpairedBreaksSoft(); + auto eventTotal2 = bp2.getPairedBreaksSoft() + + bp2.getPairedBreaksHard() + + bp2.getUnpairedBreaksSoft(); + if ((bp1.getBreaksShortIndel() / (0.0 + eventTotal1) > 0.5)) { + return 20; + } else if ((bp2.getBreaksShortIndel() / (0.0 + eventTotal2)) > + 0.5) { + return 21; + } + } + if (selectedSa1.isFuzzy() || selectedSa1.isSemiSuspicious() || + evidenceLevel1 == 1 || + (selectedSa1.getSupport() + selectedSa1.getSecondarySupport()) < + 3) { + return 25; + } + if (mrefHits1 > GERMLINE_DB_LIMIT || mrefHits2 > GERMLINE_DB_LIMIT) { + if (totalEvidence1 < 5) { + return 25; + } + } + } else { + if (selectedSa1.getSupport() < 3 && + selectedSa1.getSecondarySupport() < 3 && + selectedSa1.getMateSupport() < 3) { + return 18; + } + if (bp1.getChrIndex() != bp2.getChrIndex()) { + if (selectedSa1.getMateSupport() == 0) { + return 18; + } else if (evidenceLevel1 == 1 && bp2.getMateSupport() == 0) { + return 19; + } + } + if (selectedSa1.getMateSupport() < 3 || mateRatio1 < 0.4) { + return 24; + } + if (selectedSa1.isFuzzy() || (selectedSa1.getSupport() + + selectedSa1.getSecondarySupport()) < 3) { + if (bp1.getChrIndex() == bp2.getChrIndex()) { + if (abs(static_cast(bp1.getPos()) - static_cast(bp2.getPos())) < 5000l) { + return 26; + } + if (inverted && abs(static_cast(bp1.getPos()) - static_cast(bp2.getPos())) < 10000l) { + return 27; + } + } + if (selectedSa1.isFuzzy() && selectedSa1.getSupport() == 0 && + selectedSa1.getSecondarySupport() == 0) { + return 271; + } + } } - if (eventType == 3) { - return 31; + auto eventTotal1 = bp1.getPairedBreaksSoft() + bp1.getPairedBreaksHard() + + bp1.getUnpairedBreaksSoft(); + auto eventTotal2 = bp2.getPairedBreaksSoft() + bp2.getPairedBreaksHard() + + bp2.getUnpairedBreaksSoft(); + if (bp1.getBreaksShortIndel() > eventTotal1 || + bp2.getBreaksShortIndel() > eventTotal2) { + return 28; } - if (selectedSa1.isFuzzy() || selectedSa1.isSemiSuspicious() || - selectedSa1.isStrictFuzzyCandidate()) { - return 31; + if (clonalityStatus1 == EXTREME_SUBCLONAL) { + return 39; } - if (mrefHits1 > GERMLINEDBLIMIT || mrefHits2 > GERMLINEDBLIMIT) { - if (totalEvidence1 < 5) { - return 31; - } + if (clonalityStatus2 == EXTREME_SUBCLONAL) { + return 40; } - } else { - if (selectedSa1.getSupport() < 3 && - selectedSa1.getSecondarySupport() < 3 && - selectedSa1.getMateSupport() < 3) { - return 18; + if (artifactStatus == ARTIFACT) { + return 41; } - if (selectedSa1.getMateSupport() < 3) { - if ((mrefHits1 > GERMLINEDBLIMIT && mrefHits2 > GERMLINEDBLIMIT) || - (evidenceLevel1 < 3 && selectedSa1.getSupport() < 5)) { - return 32; + if (NO_CONTROL_MODE || germlineStatus1 || germlineStatus2) { + if (mrefHits1 > GERMLINE_DB_LIMIT && mrefHits2 > GERMLINE_DB_LIMIT) { + return 42; } } - if (mateRatio1 < 0.33 || - (evidenceLevel1 != 3 && totalEvidence1 < 5 && mateRatio1 < 0.5)) { - return 33; + if (mrefHits1 > BP_FREQ_THRESHOLD) { + if (!chrConverter.isDecoy(chrIndex1) || chrConverter.isDecoy(chrIndex2) || + mrefHits2 > BP_FREQ_THRESHOLD) { + return 431; + } + if (mrefHits1 > RELAXED_BP_FREQ_THRESHOLD || NO_CONTROL_MODE || + germlineStatus1 || germlineStatus2 || mrefHits2 > GERMLINE_DB_LIMIT) { + return 431; + } } - if (selectedSa1.isFuzzy() || selectedSa1.isStrictFuzzyCandidate()) { - if (selectedSa1.getSupport() == 0 && - selectedSa1.getSecondarySupport() == 0) { - return 36; + if (mrefHits2 > BP_FREQ_THRESHOLD) { + if (chrConverter.isDecoy(chrIndex1) || !chrConverter.isDecoy(chrIndex2) || + mrefHits1 > BP_FREQ_THRESHOLD) { + return 432; + } + if (mrefHits2 > RELAXED_BP_FREQ_THRESHOLD || NO_CONTROL_MODE || + germlineStatus1 || germlineStatus2 || mrefHits1 > GERMLINE_DB_LIMIT) { + return 432; } } + return suspicious; } - if ((bp1.getBreaksShortIndel() > eventTotal1)) { - return 38; - } - if (clonalityStatus1 == EXTREME_SUBCLONAL) { - return 44; - } - if (artifactStatus == ARTIFACT) { - return 45; - } - if (NOCONTROLMODE || (germlineStatus1 || germlineStatus2)) { - if (NOCONTROLMODE) { - if (mrefHits1 > GERMLINEDBLIMIT && mrefHits2 > GERMLINEDBLIMIT) { - return 46; + + int + SvEvent::filterMatchUnknown(const BreakpointReduced &bp1) { + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + if (suspicious != 0) { + return suspicious; + } + if (selectedSa1.isSuspicious() || selectedSa1.isSemiSuspicious()) { + return 1; + } + if (selectedSa1.getExpectedDiscordants() > 0 && mateRatio1 < 0.1) { + return 2; + } + auto eventTotal1 = bp1.getPairedBreaksSoft() + bp1.getPairedBreaksHard() + + bp1.getUnpairedBreaksSoft(); + if (eventTotal1 + selectedSa1.getMateSupport() + bp1.getNormalSpans() < + 10) { + return 29; + } + if (mrefHits1 > GERMLINE_DB_LIMIT && + !(chrConverter.isDecoy(bp1.getChrIndex()) || + chrConverter.isVirus(bp1.getChrIndex()))) { + return 301; + } + if (mrefHits2 > GERMLINE_DB_LIMIT && + !(chrConverter.isDecoy(selectedSa1.getChrIndex()) || + chrConverter.isVirus(selectedSa1.getChrIndex()))) { + return 302; + } + if (!distant) { + if (inverted || totalEvidence1 < 5 || evidenceLevel1 == 1) { + return 30; + } + if (eventSize > 0 && eventSize < HALF_DEFAULT_READ_LENGTH) { + return 31; + } + if (eventType == 3) { + return 31; + } + if (selectedSa1.isFuzzy() || selectedSa1.isSemiSuspicious() || + selectedSa1.isStrictFuzzyCandidate()) { + return 31; + } + if (mrefHits1 > GERMLINE_DB_LIMIT || mrefHits2 > GERMLINE_DB_LIMIT) { + if (totalEvidence1 < 5) { + return 31; + } } } else { - if (mrefHits1 > GERMLINEDBLIMIT || mrefHits2 > GERMLINEDBLIMIT) { - return 47; + if (selectedSa1.getSupport() < 3 && + selectedSa1.getSecondarySupport() < 3 && + selectedSa1.getMateSupport() < 3) { + return 18; + } + if (selectedSa1.getMateSupport() < 3) { + if ((mrefHits1 > GERMLINE_DB_LIMIT && mrefHits2 > GERMLINE_DB_LIMIT) || + (evidenceLevel1 < 3 && selectedSa1.getSupport() < 5)) { + return 32; + } + } + if (mateRatio1 < 0.33 || + (evidenceLevel1 != 3 && totalEvidence1 < 5 && mateRatio1 < 0.5)) { + return 33; + } + if (selectedSa1.isFuzzy() || selectedSa1.isStrictFuzzyCandidate()) { + if (selectedSa1.getSupport() == 0 && + selectedSa1.getSecondarySupport() == 0) { + return 36; + } } } - } - if (mrefHits1 > BPFREQTHRESHOLD) { - if (chrIndex1 != 999 || selectedSa1.getChrIndex() == 999 || - mrefHits2 > BPFREQTHRESHOLD) { - return 471; + if ((bp1.getBreaksShortIndel() > eventTotal1)) { + return 38; } - if (mrefHits1 > 3 * BPFREQTHRESHOLD || NOCONTROLMODE || - germlineStatus1 || germlineStatus2 || mrefHits2 > GERMLINEDBLIMIT) { - return 471; + if (clonalityStatus1 == EXTREME_SUBCLONAL) { + return 44; } - } - if (mrefHits2 > BPFREQTHRESHOLD) { - if (chrIndex2 == 999 || selectedSa1.getChrIndex() != 999 || - mrefHits1 > BPFREQTHRESHOLD) { - return 472; + if (artifactStatus == ARTIFACT) { + return 45; } - if (mrefHits2 > 3 * BPFREQTHRESHOLD || NOCONTROLMODE || - germlineStatus1 || germlineStatus2 || mrefHits1 > GERMLINEDBLIMIT) { - return 472; + if (NO_CONTROL_MODE || (germlineStatus1 || germlineStatus2)) { + if (NO_CONTROL_MODE) { + if (mrefHits1 > GERMLINE_DB_LIMIT && mrefHits2 > GERMLINE_DB_LIMIT) { + return 46; + } + } else { + if (mrefHits1 > GERMLINE_DB_LIMIT || mrefHits2 > GERMLINE_DB_LIMIT) { + return 47; + } + } } - } - return suspicious; -} - -pair -SvEvent::assessSvClonality(const BreakpointReduced &bp, - int eventSupportTotal) const { - auto artifactTotal1 = bp.getLowQualSpansSoft() + bp.getLowQualBreaksSoft() + - bp.getRepetitiveOverhangBreaks(); - auto eventTotal1 = eventSupportTotal + bp.getUnpairedBreaksSoft(); - auto artifactRatio = - (artifactTotal1 + 0.0) / (artifactTotal1 + eventTotal1); - auto clonalityRatio = - (eventTotal1 + 0.0) / (eventTotal1 + bp.getNormalSpans()); - return {artifactRatio, clonalityRatio}; -} - -ClonalityStatus -SvEvent::assessBreakpointClonalityStatus(double clonalityRatioIn, - const BreakpointReduced &bp1, - const BreakpointReduced &bp2) const { - if (clonalityRatioIn < CLONALITYLOWTHRESHOLD) { - if (mrefHits1 > GERMLINEDBLIMIT && mrefHits2 > GERMLINEDBLIMIT) { - return EXTREME_SUBCLONAL; + if (mrefHits1 > BP_FREQ_THRESHOLD) { + if (!chrConverter.isDecoy(chrIndex1) || chrConverter.isDecoy(selectedSa1.getChrIndex()) || + mrefHits2 > BP_FREQ_THRESHOLD) { + return 471; + } + if (mrefHits1 > 3 * BP_FREQ_THRESHOLD || NO_CONTROL_MODE || + germlineStatus1 || germlineStatus2 || mrefHits2 > GERMLINE_DB_LIMIT) { + return 471; + } } - if (distant) { - if ((selectedSa1.getMateSupport() > 9 && mateRatio1 >= 0.4) || - (selectedSa1.getMateSupport() > 4 && mateRatio1 >= 0.6)) { - return SUBCLONAL; + if (mrefHits2 > BP_FREQ_THRESHOLD) { + if (chrConverter.isDecoy(chrIndex2) || !chrConverter.isDecoy(selectedSa1.getChrIndex()) || + mrefHits1 > BP_FREQ_THRESHOLD) { + return 472; } - if ((selectedSa2.getMateSupport() > 9 && mateRatio2 >= 0.4) || - (selectedSa2.getMateSupport() > 4 && mateRatio2 >= 0.6)) { - return SUBCLONAL; + if (mrefHits2 > 3 * BP_FREQ_THRESHOLD || NO_CONTROL_MODE || + germlineStatus1 || germlineStatus2 || mrefHits1 > GERMLINE_DB_LIMIT) { + return 472; } } - return EXTREME_SUBCLONAL; - } else if (clonalityRatioIn >= CLONALITYHIGHTHRESHOLD) { - return HOMO; - } else { - return HETERO; + return suspicious; } -} - -ClonalityStatus -SvEvent::assessBreakpointClonalityStatusSingle( - double clonalityRatioIn, const BreakpointReduced &bp1, - const BreakpointReduced &bp2) const { - if (clonalityRatioIn < CLONALITYLOWTHRESHOLD) { - if (mrefHits1 > GERMLINEDBLIMIT && mrefHits2 > GERMLINEDBLIMIT) { + + std::pair + SvEvent::assessSvClonality(const BreakpointReduced &bp, + int eventSupportTotal) const { + auto artifactTotal1 = bp.getLowQualSpansSoft() + bp.getLowQualBreaksSoft() + + bp.getRepetitiveOverhangBreaks(); + auto eventTotal1 = eventSupportTotal + bp.getUnpairedBreaksSoft(); + auto artifactRatio = + (artifactTotal1 + 0.0) / (artifactTotal1 + eventTotal1); + auto clonalityRatio = + (eventTotal1 + 0.0) / (eventTotal1 + bp.getNormalSpans()); + return {artifactRatio, clonalityRatio}; + } + + ClonalityStatus + SvEvent::assessBreakpointClonalityStatus(double clonalityRatioIn, + const BreakpointReduced &bp1[[gnu::unused]], + const BreakpointReduced &bp2[[gnu::unused]]) const { + if (clonalityRatioIn < CLONALITY_LOW_THRESHOLD) { + if (mrefHits1 > GERMLINE_DB_LIMIT && mrefHits2 > GERMLINE_DB_LIMIT) { + return EXTREME_SUBCLONAL; + } + if (distant) { + if ((selectedSa1.getMateSupport() > 9 && mateRatio1 >= 0.4) || + (selectedSa1.getMateSupport() > 4 && mateRatio1 >= 0.6)) { + return SUBCLONAL; + } + if ((selectedSa2.getMateSupport() > 9 && mateRatio2 >= 0.4) || + (selectedSa2.getMateSupport() > 4 && mateRatio2 >= 0.6)) { + return SUBCLONAL; + } + } return EXTREME_SUBCLONAL; - } - if (distant && - ((selectedSa1.getMateSupport() > 9 && mateRatio1 >= 0.5) || - (selectedSa1.getMateSupport() > 4 && mateRatio1 >= 0.8))) { - return SUBCLONAL; + } else if (clonalityRatioIn >= CLONALITY_HIGH_THRESHOLD) { + return HOMO; } else { - return EXTREME_SUBCLONAL; + return HETERO; } - } else if (clonalityRatioIn >= CLONALITYHIGHTHRESHOLD) { - return HOMO; - } else { - return HETERO; } -} -ClonalityStatus -SvEvent::assessBreakpointClonalityStatusUnknown( - double clonalityRatioIn, const BreakpointReduced &bp1) const { - if (clonalityRatioIn < CLONALITYLOWTHRESHOLD) { - if (mrefHits1 > GERMLINEDBLIMIT) { - return EXTREME_SUBCLONAL; - } - if (distant && - ((selectedSa1.getMateSupport() > 9 && mateRatio1 >= 0.5) || - (selectedSa1.getMateSupport() > 4 && mateRatio1 >= 0.8))) { - return SUBCLONAL; + ClonalityStatus + SvEvent::assessBreakpointClonalityStatusSingle( + double clonalityRatioIn, + const BreakpointReduced &bp1 [[gnu::unused]], + const BreakpointReduced &bp2 [[gnu::unused]] + ) const { + if (clonalityRatioIn < CLONALITY_LOW_THRESHOLD) { + if (mrefHits1 > GERMLINE_DB_LIMIT && mrefHits2 > GERMLINE_DB_LIMIT) { + return EXTREME_SUBCLONAL; + } + if (distant && + ((selectedSa1.getMateSupport() > 9 && mateRatio1 >= 0.5) || + (selectedSa1.getMateSupport() > 4 && mateRatio1 >= 0.8))) { + return SUBCLONAL; + } else { + return EXTREME_SUBCLONAL; + } + } else if (clonalityRatioIn >= CLONALITY_HIGH_THRESHOLD) { + return HOMO; } else { - return EXTREME_SUBCLONAL; + return HETERO; } - } else if (clonalityRatioIn >= CLONALITYHIGHTHRESHOLD) { - return HOMO; - } else { - return HETERO; } -} - -void -SvEvent::assessSvArtifactStatus(const BreakpointReduced &bp1, - const BreakpointReduced &bp2) { - if (artifactRatio1 < ARTIFACTFREQLOWTHRESHOLD && - artifactRatio2 < ARTIFACTFREQLOWTHRESHOLD) { - artifactStatus = CLEAN; - return; - } - if (artifactRatio1 > 0.85 && artifactRatio2 > 0.85) { - artifactStatus = ARTIFACT; - return; + + ClonalityStatus + SvEvent::assessBreakpointClonalityStatusUnknown( + double clonalityRatioIn, + const BreakpointReduced &bp1 [[gnu::unused]] + ) const { + if (clonalityRatioIn < CLONALITY_LOW_THRESHOLD) { + if (mrefHits1 > GERMLINE_DB_LIMIT) { + return EXTREME_SUBCLONAL; + } + if (distant && + ((selectedSa1.getMateSupport() > 9 && mateRatio1 >= 0.5) || + (selectedSa1.getMateSupport() > 4 && mateRatio1 >= 0.8))) { + return SUBCLONAL; + } else { + return EXTREME_SUBCLONAL; + } + } else if (clonalityRatioIn >= CLONALITY_HIGH_THRESHOLD) { + return HOMO; + } else { + return HETERO; + } } - if (artifactRatio1 > 0.85 || artifactRatio2 > 0.85) { - if (!(evidenceLevel1 >= 2 || evidenceLevel2 >= 2)) { + + void + SvEvent::assessSvArtifactStatus(const BreakpointReduced &bp1 [[gnu::unused]], + const BreakpointReduced &bp2 [[gnu::unused]] + ) { + if (artifactRatio1 < ARTIFACT_FREQ_LOW_THRESHOLD && + artifactRatio2 < ARTIFACT_FREQ_LOW_THRESHOLD) { + artifactStatus = CLEAN; + return; + } + if (artifactRatio1 > 0.85 && artifactRatio2 > 0.85) { artifactStatus = ARTIFACT; return; } + if (artifactRatio1 > 0.85 || artifactRatio2 > 0.85) { + if (!(evidenceLevel1 >= 2 || evidenceLevel2 >= 2)) { + artifactStatus = ARTIFACT; + return; + } + } + if (artifactRatio1 > ARTIFACT_FREQ_HIGH_THRESHOLD && + artifactRatio2 > ARTIFACT_FREQ_HIGH_THRESHOLD && + (mrefHits1 > GERMLINE_DB_LIMIT || mrefHits1 > GERMLINE_DB_LIMIT)) { + artifactStatus = ARTIFACT; + return; + } + if ((artifactRatio1 > ARTIFACT_FREQ_HIGH_THRESHOLD || + artifactRatio2 > ARTIFACT_FREQ_HIGH_THRESHOLD) && + (clonalityStatus1 == EXTREME_SUBCLONAL || + clonalityStatus2 == EXTREME_SUBCLONAL)) { + artifactStatus = ARTIFACT; + return; + } + artifactStatus = BORDERLINE; } - if (artifactRatio1 > ARTIFACTFREQHIGHTHRESHOLD && - artifactRatio2 > ARTIFACTFREQHIGHTHRESHOLD && - (mrefHits1 > GERMLINEDBLIMIT || mrefHits1 > GERMLINEDBLIMIT)) { - artifactStatus = ARTIFACT; - return; - } - if ((artifactRatio1 > ARTIFACTFREQHIGHTHRESHOLD || - artifactRatio2 > ARTIFACTFREQHIGHTHRESHOLD) && - (clonalityStatus1 == EXTREME_SUBCLONAL || - clonalityStatus2 == EXTREME_SUBCLONAL)) { - artifactStatus = ARTIFACT; - return; - } - artifactStatus = BORDERLINE; -} - -void -SvEvent::assessSvArtifactStatusUnknown() { - if (artifactRatio1 < ARTIFACTFREQLOWTHRESHOLD && - artifactRatio2 < ARTIFACTFREQLOWTHRESHOLD) { - artifactStatus = CLEAN; - return; - } - if (artifactRatio1 > ARTIFACTFREQHIGHTHRESHOLD) { - artifactStatus = ARTIFACT; - return; + + void + SvEvent::assessSvArtifactStatusUnknown() { + if (artifactRatio1 < ARTIFACT_FREQ_LOW_THRESHOLD && + artifactRatio2 < ARTIFACT_FREQ_LOW_THRESHOLD) { + artifactStatus = CLEAN; + return; + } + if (artifactRatio1 > ARTIFACT_FREQ_HIGH_THRESHOLD) { + artifactStatus = ARTIFACT; + return; + } + artifactStatus = BORDERLINE; } - artifactStatus = BORDERLINE; -} - -int -SvEvent::assessEventScore(bool hardClipSuspiciousCall, int inputScoreCategory) { - // inputscore 0: UNKNOWN partner - // inputscore 1: known partner with no SA signal - // inputscore 2: known partner with matching SA signal - if (inputScoreCategory == 0) { - if (distant) { - if (mrefHits1 > GERMLINEDBLIMIT && mrefHits2 > GERMLINEDBLIMIT) { - if (selectedSa1.getSupport() > 30 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 10) { - eventType = 5; - return 1; - } - if (selectedSa1.getSupport() > 100 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 20) { - eventType = 5; - return 1; - } - } else if (mrefHits1 > GERMLINEDBLIMIT) { - if (selectedSa1.getSupport() > 50 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 10) { - eventType = 5; - return 1; + + int + SvEvent::assessEventScore(bool hardClipSuspiciousCall, int inputScoreCategory) { + // inputscore 0: UNKNOWN partner + // inputscore 1: known partner with no SA signal + // inputscore 2: known partner with matching SA signal + if (inputScoreCategory == 0) { + if (distant) { + if (mrefHits1 > GERMLINE_DB_LIMIT && mrefHits2 > GERMLINE_DB_LIMIT) { + if (selectedSa1.getSupport() > 30 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 10) { + eventType = 5; + return 1; + } + if (selectedSa1.getSupport() > 100 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 20) { + eventType = 5; + return 1; + } + } else if (mrefHits1 > GERMLINE_DB_LIMIT) { + if (selectedSa1.getSupport() > 50 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 10) { + eventType = 5; + return 1; + } + if (selectedSa1.getSupport() > 100 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 20) { + eventType = 5; + return 1; + } } - if (selectedSa1.getSupport() > 100 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 20) { - eventType = 5; + if ((selectedSa1.isFuzzy() || + selectedSa1.isStrictFuzzyCandidate())) { + if (evidenceLevel1 == 3 || + !selectedSa1.isStrictFuzzyCandidate()) { + if ((selectedSa1.getMateSupport() > 9 && + mateRatio1 >= 0.5) // + || (selectedSa1.getMateSupport() > 5 && + (selectedSa1.getExpectedDiscordants() - + selectedSa1.getMateSupport()) < 3)) { + if (mrefHits1 == 0 || mrefHits2 == 0) { + return 4; + } + } + } return 1; } - } - if ((selectedSa1.isFuzzy() || - selectedSa1.isStrictFuzzyCandidate())) { - if (evidenceLevel1 == 3 || + if (!hardClipSuspiciousCall && !selectedSa1.isStrictFuzzyCandidate()) { - if ((selectedSa1.getMateSupport() > 9 && - mateRatio1 >= 0.5) // - || (selectedSa1.getMateSupport() > 5 && - (selectedSa1.getExpectedDiscordants() - - selectedSa1.getMateSupport()) < 3)) { - if (mrefHits1 == 0 || mrefHits2 == 0) { - return 4; - } + if (selectedSa1.getSupport() > 4 || evidenceLevel1 == 3 || + selectedSa1.getMateSupport() > 9) { + return 4; } } - return 1; } - if (!hardClipSuspiciousCall && - !selectedSa1.isStrictFuzzyCandidate()) { - if (selectedSa1.getSupport() > 4 || evidenceLevel1 == 3 || - selectedSa1.getMateSupport() > 9) { - return 4; - } - } - } - return 1; - } else if (inputScoreCategory == 2) { - // auto messageMode = selectedSa1.getChrIndex() == 11 && - //(selectedSa1.getPos() == 2261373 && selectedSa2.getPos() == 2148480); - if (!distant) { - if (totalEvidence1 < 5 && totalEvidence2 < 5) { - if (mrefHits1 > GERMLINEDBLIMIT || - mrefHits2 > GERMLINEDBLIMIT) { - return 1; - } - return 3; - } - } else { - if (mrefHits1 > GERMLINEDBLIMIT && mrefHits2 > GERMLINEDBLIMIT) { - if (selectedSa1.getSupport() > 30 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 10) { - eventType = 5; - return 2; - } - if (selectedSa1.getSupport() > 100 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 20) { - eventType = 5; - return 2; - } - if (selectedSa2.getSupport() > 30 && - selectedSa2.getSecondarySupport() < 10 && - selectedSa2.getMateSupport() < 10) { - eventType = 5; - return 2; - } - if (selectedSa2.getSupport() > 100 && - selectedSa2.getSecondarySupport() < 10 && - selectedSa2.getMateSupport() < 20) { - eventType = 5; - return 2; - } - } else if (mrefHits1 > GERMLINEDBLIMIT) { - if (selectedSa1.getSupport() > 50 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 10) { - eventType = 5; - return 2; - } - if (selectedSa1.getSupport() > 100 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 20) { - eventType = 5; - return 2; + return 1; + } else if (inputScoreCategory == 2) { + // auto messageMode = selectedSa1.getChrIndex() == 11 && + //(selectedSa1.getPos() == 2261373 && selectedSa2.getPos() == 2148480); + if (!distant) { + if (totalEvidence1 < 5 && totalEvidence2 < 5) { + if (mrefHits1 > GERMLINE_DB_LIMIT || + mrefHits2 > GERMLINE_DB_LIMIT) { + return 1; + } + return 3; } + } else { + if (mrefHits1 > GERMLINE_DB_LIMIT && mrefHits2 > GERMLINE_DB_LIMIT) { + if (selectedSa1.getSupport() > 30 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 10) { + eventType = 5; + return 2; + } + if (selectedSa1.getSupport() > 100 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 20) { + eventType = 5; + return 2; + } + if (selectedSa2.getSupport() > 30 && + selectedSa2.getSecondarySupport() < 10 && + selectedSa2.getMateSupport() < 10) { + eventType = 5; + return 2; + } + if (selectedSa2.getSupport() > 100 && + selectedSa2.getSecondarySupport() < 10 && + selectedSa2.getMateSupport() < 20) { + eventType = 5; + return 2; + } + } else if (mrefHits1 > GERMLINE_DB_LIMIT) { + if (selectedSa1.getSupport() > 50 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 10) { + eventType = 5; + return 2; + } + if (selectedSa1.getSupport() > 100 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 20) { + eventType = 5; + return 2; + } - } else if (mrefHits2 > GERMLINEDBLIMIT) { - if (selectedSa2.getSupport() > 50 && - selectedSa2.getSecondarySupport() < 10 && - selectedSa2.getMateSupport() < 10) { - eventType = 5; - return 2; - } - if (selectedSa2.getSupport() > 100 && - selectedSa2.getSecondarySupport() < 10 && - selectedSa2.getMateSupport() < 20) { - eventType = 5; - return 2; - } - } - if (semiSuspicious) { - if (selectedSa1.isSemiSuspicious() && evidenceLevel1 < 3) { - return 2; + } else if (mrefHits2 > GERMLINE_DB_LIMIT) { + if (selectedSa2.getSupport() > 50 && + selectedSa2.getSecondarySupport() < 10 && + selectedSa2.getMateSupport() < 10) { + eventType = 5; + return 2; + } + if (selectedSa2.getSupport() > 100 && + selectedSa2.getSecondarySupport() < 10 && + selectedSa2.getMateSupport() < 20) { + eventType = 5; + return 2; + } } - if (selectedSa1.isSemiSuspicious() && - (selectedSa1.getSecondarySupport() == 0 || - selectedSa1.getSupport() == 0) && - evidenceLevel2 == 0) { - return 2; + if (semiSuspicious) { + if (selectedSa1.isSemiSuspicious() && evidenceLevel1 < 3) { + return 2; + } + if (selectedSa1.isSemiSuspicious() && + (selectedSa1.getSecondarySupport() == 0 || + selectedSa1.getSupport() == 0) && + evidenceLevel2 == 0) { + return 2; + } + if (selectedSa2.isSemiSuspicious()) { + if (evidenceLevel1 < 3) { + auto oneSidedScore = + assessEventScore(hardClipSuspiciousCall, 0); + if (oneSidedScore > 2) { + semiSuspicious = false; + return oneSidedScore; + } else { + return 2; + } + } + } } - if (selectedSa2.isSemiSuspicious()) { - if (evidenceLevel1 < 3) { - auto oneSidedScore = - assessEventScore(hardClipSuspiciousCall, 0); - if (oneSidedScore > 2) { - semiSuspicious = false; - return oneSidedScore; - } else { + if (selectedSa1.isFuzzy() && selectedSa2.isFuzzy()) { + if (!semiSuspicious && mrefHits1 == 0 && mrefHits2 == 0 && + !(selectedSa1.isProperPairErrorProne() || + selectedSa2.isProperPairErrorProne())) { + if (evidenceLevel1 > 1 || evidenceLevel2 > 1) { + if (mateRatio1 > 0.5 && mateRatio2 > 0.5) { + return 3; + } + } + } + if (evidenceLevel1 < 3 || evidenceLevel2 < 3) { + return 2; + } + } else { + if (selectedSa1.isFuzzy()) { + if (evidenceLevel1 < 2 && evidenceLevel2 < 2) { return 2; } } - } - } - if (selectedSa1.isFuzzy() && selectedSa2.isFuzzy()) { - if (!semiSuspicious && mrefHits1 == 0 && mrefHits2 == 0 && - !(selectedSa1.isProperPairErrorProne() || - selectedSa2.isProperPairErrorProne())) { - if (evidenceLevel1 > 1 || evidenceLevel2 > 1) { - if (mateRatio1 > 0.5 && mateRatio2 > 0.5) { - return 3; + if (selectedSa2.isFuzzy()) { + if (evidenceLevel2 < 2 && evidenceLevel1 < 2) { + return 2; } } } - if (evidenceLevel1 < 3 || evidenceLevel2 < 3) { - return 2; - } - } else { - if (selectedSa1.isFuzzy()) { - if (evidenceLevel1 < 2 && evidenceLevel2 < 2) { + if (evidenceLevel1 < 3 && evidenceLevel2 < 3) { + if ((selectedSa1.getMateSupport() < 10 && mateRatio1 < 0.5) || + (selectedSa2.getMateSupport() < 10 && mateRatio2 < 0.5)) { return 2; } - } - if (selectedSa2.isFuzzy()) { - if (evidenceLevel2 < 2 && evidenceLevel1 < 2) { + if (selectedSa1.getMateSupport() < 10 || + selectedSa2.getMateSupport() < 10) { + if ((mateRatio1 + mateRatio2) < 1.1) { + return 2; + } + } + if (!(evidenceLevel1 > 1 || evidenceLevel2 > 1)) { + if (!semiSuspicious && mrefHits1 == 0 && mrefHits2 == 0 && + !(selectedSa1.isProperPairErrorProne() || + selectedSa2.isProperPairErrorProne())) { + if (mateRatio1 >= 0.8 && mateRatio2 >= 0.8) { + if (selectedSa1.getMateSupport() > 4 && + selectedSa2.getMateSupport() > 4) { + if (selectedSa1.getMateSupport() > 9 || + selectedSa2.getMateSupport() > 9) { + return 3; + } + } + } + } return 2; } - } - } - if (evidenceLevel1 < 3 && evidenceLevel2 < 3) { - if ((selectedSa1.getMateSupport() < 10 && mateRatio1 < 0.5) || - (selectedSa2.getMateSupport() < 10 && mateRatio2 < 0.5)) { - return 2; - } - if (selectedSa1.getMateSupport() < 10 || - selectedSa2.getMateSupport() < 10) { - if ((mateRatio1 + mateRatio2) < 1.1) { + if (evidenceLevel2 == 1) { + if ((mateRatio2 >= 0.8 && + selectedSa2.getMateSupport() > 9)) { + auto oneSidedScore = + assessEventScore(hardClipSuspiciousCall, 0); + if (oneSidedScore > 2) { + return oneSidedScore; + } + } return 2; } - } - if (!(evidenceLevel1 > 1 || evidenceLevel2 > 1)) { - if (!semiSuspicious && mrefHits1 == 0 && mrefHits2 == 0 && + if (evidenceLevel1 == 1) { + if (mateRatio1 >= 0.8 && selectedSa1.getMateSupport() > 9 && + mateRatio2 >= 0.6) { + return 5; + } + return 2; + } + if (!semiSuspicious && mrefHits1 < GERMLINE_DB_LIMIT && + mrefHits2 < GERMLINE_DB_LIMIT && !(selectedSa1.isProperPairErrorProne() || selectedSa2.isProperPairErrorProne())) { if (mateRatio1 >= 0.8 && mateRatio2 >= 0.8) { if (selectedSa1.getMateSupport() > 4 && selectedSa2.getMateSupport() > 4) { - if (selectedSa1.getMateSupport() > 9 || - selectedSa2.getMateSupport() > 9) { - return 3; - } + return 5; } } } return 2; } - if (evidenceLevel2 == 1) { - if ((mateRatio2 >= 0.8 && - selectedSa2.getMateSupport() > 9)) { - auto oneSidedScore = - assessEventScore(hardClipSuspiciousCall, 0); - if (oneSidedScore > 2) { - return oneSidedScore; - } + } + return 5; + } else if (inputScoreCategory == 1) { + if (distant) { + if (mrefHits1 > GERMLINE_DB_LIMIT && mrefHits2 > GERMLINE_DB_LIMIT) { + if (selectedSa1.getSupport() > 30 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 10) { + eventType = 5; + return 1; } - return 2; - } - if (evidenceLevel1 == 1) { - if (mateRatio1 >= 0.8 && selectedSa1.getMateSupport() > 9 && - mateRatio2 >= 0.6) { - return 5; + if (selectedSa1.getSupport() > 100 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 20) { + eventType = 5; + return 1; } - return 2; - } - if (!semiSuspicious && mrefHits1 < GERMLINEDBLIMIT && - mrefHits2 < GERMLINEDBLIMIT && - !(selectedSa1.isProperPairErrorProne() || - selectedSa2.isProperPairErrorProne())) { - if (mateRatio1 >= 0.8 && mateRatio2 >= 0.8) { - if (selectedSa1.getMateSupport() > 4 && - selectedSa2.getMateSupport() > 4) { - return 5; - } + } else if (mrefHits1 > GERMLINE_DB_LIMIT) { + if (selectedSa1.getSupport() > 50 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 10) { + eventType = 5; + return 1; + } + if (selectedSa1.getSupport() > 100 && + selectedSa1.getSecondarySupport() < 10 && + selectedSa1.getMateSupport() < 20) { + eventType = 5; + return 1; } } - return 2; - } - } - return 5; - } else if (inputScoreCategory == 1) { - if (distant) { - if (mrefHits1 > GERMLINEDBLIMIT && mrefHits2 > GERMLINEDBLIMIT) { - if (selectedSa1.getSupport() > 30 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 10) { - eventType = 5; - return 1; - } - if (selectedSa1.getSupport() > 100 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 20) { - eventType = 5; - return 1; + if (selectedSa1.isFuzzy() || selectedSa1.isStrictFuzzyCandidate()) { + if (evidenceLevel1 == 3) { + return 3; + } + return 0; } - } else if (mrefHits1 > GERMLINEDBLIMIT) { - if (selectedSa1.getSupport() > 50 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 10) { - eventType = 5; + } else { + if (totalEvidence1 < 5) { return 1; } - if (selectedSa1.getSupport() > 100 && - selectedSa1.getSecondarySupport() < 10 && - selectedSa1.getMateSupport() < 20) { - eventType = 5; + if (selectedSa1.getSupport() < 2 || + selectedSa1.getSecondarySupport() < 2) { return 1; } } - if (selectedSa1.isFuzzy() || selectedSa1.isStrictFuzzyCandidate()) { + if (!hardClipSuspiciousCall && !selectedSa1.isStrictFuzzyCandidate()) { if (evidenceLevel1 == 3) { return 3; + } else { + return 1; } - return 0; - } - } else { - if (totalEvidence1 < 5) { - return 1; - } - if (selectedSa1.getSupport() < 2 || - selectedSa1.getSecondarySupport() < 2) { - return 1; - } - } - if (!hardClipSuspiciousCall && !selectedSa1.isStrictFuzzyCandidate()) { - if (evidenceLevel1 == 3) { - return 3; - } else { - return 1; } + return 0; } return 0; } - return 0; -} - -void -SvEvent::assessContamination(const vector> &overhangDb) { - if (eventType == 5) { - contaminationCandidate = 2; - return; - } - auto score1 = 0; - auto score2 = 0; - if (inputScore == 2) { - if (overhang2Index != -1) { - auto res = assessContaminationSingleBp(overhang2Index, overhangDb, - selectedSa2); - score2 = res.first; - overhang2lengthRatio = res.second; + + void + SvEvent::assessContamination(const std::vector> &overhangDb) { + if (eventType == 5) { + contaminationCandidate = 2; + return; } - } - if (contaminationCandidate < 2) { - if (overhang1Index != -1) { - auto res = assessContaminationSingleBp(overhang1Index, overhangDb, - selectedSa1); - score1 = res.first; - overhang1lengthRatio = res.second; + auto score1 = 0; + auto score2 = 0; + if (inputScore == 2) { + if (overhang2Index != -1) { + auto res = assessContaminationSingleBp(overhang2Index, overhangDb, + selectedSa2); + score2 = res.first; + overhang2lengthRatio = res.second; + } } - } - if (score1 > 1 || score2 > 1) { - contaminationCandidate = 2; - } else if (score1 == 1 && score2 == 1) { - contaminationCandidate = 1; - } else if (score1 == 1 && score2 == 0) { - contaminationCandidate = 0; - } else if (score1 == 0 && score2 == 1) { - contaminationCandidate = 0; - } -} - -pair -SvEvent::assessContaminationSingleBp( - int overhangIndex, const vector> &overhangDb, - const SuppAlignmentAnno &selectedSa) { - auto overhangLengthMax = 0; - auto overhangLength = 0; - for (auto cit = overhangDb[overhangIndex].second.cbegin(); - cit != overhangDb[overhangIndex].second.cend(); ++cit) { - switch (*cit) { - case '(': - overhangLengthMax = max(overhangLengthMax, overhangLength); - overhangLength = 0; - break; - case ':': - overhangLength = 0; - break; - default: - ++overhangLength; - break; + if (contaminationCandidate < 2) { + if (overhang1Index != -1) { + auto res = assessContaminationSingleBp(overhang1Index, overhangDb, + selectedSa1); + score1 = res.first; + overhang1lengthRatio = res.second; + } + } + if (score1 > 1 || score2 > 1) { + contaminationCandidate = 2; + } else if (score1 == 1 && score2 == 1) { + contaminationCandidate = 1; + } else if (score1 == 1 && score2 == 0) { + contaminationCandidate = 0; + } else if (score1 == 0 && score2 == 1) { + contaminationCandidate = 0; } } - auto maxOverhangLengthRatio = - (overhangLengthMax + 0.0) / SuppAlignmentAnno::DEFAULTREADLENGTH; - if (selectedSa.getSecondarySupport() > 4) { - return {0, maxOverhangLengthRatio}; - } - if (maxOverhangLengthRatio > 0.7) { - if (selectedSa.getSecondarySupport() == 0) { - return {3, maxOverhangLengthRatio}; + + std::pair + SvEvent::assessContaminationSingleBp( + int overhangIndex, + const std::vector> &overhangDb, + const SuppAlignmentAnno &selectedSa) { + + if (overhangIndex < 0) { + throw_with_trace(std::invalid_argument( + "SvEvent::assessContaminationSingleBp has overhangIndex < 0")); + } + + auto overhangLengthMax = 0; + auto overhangLength = 0; + for (auto cit = overhangDb[static_cast(overhangIndex)].second.cbegin(); + cit != overhangDb[static_cast(overhangIndex)].second.cend(); ++cit) { + switch (*cit) { + case '(': + overhangLengthMax = std::max(overhangLengthMax, overhangLength); + overhangLength = 0; + break; + case ':': + overhangLength = 0; + break; + default: + ++overhangLength; + break; + } } - if (selectedSa.getSecondarySupport() < 3 || - (inputScore < 2 && selectedSa.getSecondarySupport() < 10)) { - return {2, maxOverhangLengthRatio}; + auto maxOverhangLengthRatio = + (overhangLengthMax + 0.0) / SuppAlignmentAnno::DEFAULT_READ_LENGTH; + if (selectedSa.getSecondarySupport() > 4) { + return {0, maxOverhangLengthRatio}; } - } else if (maxOverhangLengthRatio > 0.6) { - if (selectedSa.getSecondarySupport() < 3) { - return {1, maxOverhangLengthRatio}; + if (maxOverhangLengthRatio > 0.7) { + if (selectedSa.getSecondarySupport() == 0) { + return {3, maxOverhangLengthRatio}; + } + if (selectedSa.getSecondarySupport() < 3 || + (inputScore < 2 && selectedSa.getSecondarySupport() < 10)) { + return {2, maxOverhangLengthRatio}; + } + } else if (maxOverhangLengthRatio > 0.6) { + if (selectedSa.getSecondarySupport() < 3) { + return {1, maxOverhangLengthRatio}; + } } + return {0, maxOverhangLengthRatio}; } - return {0, maxOverhangLengthRatio}; -} - -string -SvEvent::printMatch(const vector> &overhangDb) const { - vector outputFields; - outputFields.reserve(20); - outputFields.emplace_back(ChrConverter::indexToChr[chrIndex1]); - outputFields.emplace_back(strtk::type_to_string(pos1 - 1)); - outputFields.emplace_back(strtk::type_to_string(pos1)); - outputFields.emplace_back(ChrConverter::indexToChr[chrIndex2]); - outputFields.emplace_back(strtk::type_to_string(pos2 - 1)); - outputFields.emplace_back( - inputScore > 0 - ? strtk::type_to_string(pos2) - : strtk::type_to_string(selectedSa1.getExtendedPos())); - - if (!germlineStatus1) { + + std::string + SvEvent::printMatch(const std::vector> &overhangDb) const { + std::vector outputFields; + outputFields.reserve(20); + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + + outputFields.emplace_back(chrConverter.indexToChrName(chrIndex1)); + outputFields.emplace_back(strtk::type_to_string(pos1 - 1)); + outputFields.emplace_back(strtk::type_to_string(pos1)); + outputFields.emplace_back(chrConverter.indexToChrName(chrIndex2)); + outputFields.emplace_back(strtk::type_to_string(pos2 - 1)); outputFields.emplace_back( - "SOMATIC(" + strtk::type_to_string(mrefHits1) + "/" + - PIDSINMREFSTR + - "):" + boost::str(doubleFormatter % germlineClonality1)); - } else { - if (germline || (mrefHits1 > GERMLINEDBLIMIT)) { + inputScore > 0 + ? strtk::type_to_string(pos2) + : strtk::type_to_string(selectedSa1.getExtendedPos())); + + if (!germlineStatus1) { outputFields.emplace_back( - "GERMLINE(" + strtk::type_to_string(mrefHits1) + "/" + - PIDSINMREFSTR + + "SOMATIC(" + strtk::type_to_string(mrefHits1) + "/" + + PIDS_IN_MREF_STR + "):" + boost::str(doubleFormatter % germlineClonality1)); } else { - outputFields.emplace_back( - "RESCUED(" + strtk::type_to_string(mrefHits1) + "/" + - PIDSINMREFSTR + - "):" + boost::str(doubleFormatter % germlineClonality1)); + if (germline || (mrefHits1 > GERMLINE_DB_LIMIT)) { + outputFields.emplace_back( + "GERMLINE(" + strtk::type_to_string(mrefHits1) + "/" + + PIDS_IN_MREF_STR + + "):" + boost::str(doubleFormatter % germlineClonality1)); + } else { + outputFields.emplace_back( + "RESCUED(" + strtk::type_to_string(mrefHits1) + "/" + + PIDS_IN_MREF_STR + + "):" + boost::str(doubleFormatter % germlineClonality1)); + } } - } - if (!germlineStatus2) { - if (inputScore > 0) { - outputFields.emplace_back( - "SOMATIC(" + strtk::type_to_string(mrefHits2) + "/" + - PIDSINMREFSTR + - "):" + boost::str(doubleFormatter % germlineClonality2)); + + if (!germlineStatus2) { + if (inputScore > 0) { + outputFields.emplace_back( + "SOMATIC(" + strtk::type_to_string(mrefHits2) + "/" + + PIDS_IN_MREF_STR + + "):" + boost::str(doubleFormatter % germlineClonality2)); + } else { + outputFields.emplace_back( + "UNKNOWN(" + strtk::type_to_string(mrefHits2) + "/" + + PIDS_IN_MREF_STR + + "):" + boost::str(doubleFormatter % germlineClonality2)); + } + } else { - outputFields.emplace_back( - "UNKNOWN(" + strtk::type_to_string(mrefHits2) + "/" + - PIDSINMREFSTR + - "):" + boost::str(doubleFormatter % germlineClonality2)); + if (germline || (mrefHits2 > GERMLINE_DB_LIMIT)) { + outputFields.emplace_back( + "GERMLINE(" + strtk::type_to_string(mrefHits2) + "/" + + PIDS_IN_MREF_STR + + "):" + boost::str(doubleFormatter % germlineClonality2)); + } else { + outputFields.emplace_back( + "RESCUED(" + strtk::type_to_string(mrefHits2) + "/" + + PIDS_IN_MREF_STR + + "):" + boost::str(doubleFormatter % germlineClonality2)); + } } - } else { - if (germline || (mrefHits2 > GERMLINEDBLIMIT)) { + outputFields.emplace_back(EVENTTYPES[eventType]); + outputFields.emplace_back((suspicious == 0) + ? strtk::type_to_string(eventScore) + : strtk::type_to_string(-suspicious)); + outputFields.emplace_back( + (eventSize > 0) ? strtk::type_to_string(eventSize) : "NA"); + outputFields.emplace_back(inverted ? "INV" : "NORMAL"); + + outputFields.emplace_back(strtk::type_to_string(totalEvidence1)); + outputFields.emplace_back(boost::str( + doubleFormatter % (totalEvidence1 / (totalEvidence1 + span1 + 0.0)))); + if (inputScore > 0) { + outputFields.emplace_back(strtk::type_to_string(totalEvidence2)); outputFields.emplace_back( - "GERMLINE(" + strtk::type_to_string(mrefHits2) + "/" + - PIDSINMREFSTR + - "):" + boost::str(doubleFormatter % germlineClonality2)); + (totalEvidence2 == 0 && span2 == 0) + ? "0.000" + : boost::str( + doubleFormatter % + (totalEvidence2 / (totalEvidence2 + span2 + 0.0)))); } else { - outputFields.emplace_back( - "RESCUED(" + strtk::type_to_string(mrefHits2) + "/" + - PIDSINMREFSTR + - "):" + boost::str(doubleFormatter % germlineClonality2)); + outputFields.emplace_back("UNKNOWN"); + outputFields.emplace_back("UNKNOWN"); } - } - outputFields.emplace_back(EVENTTYPES[eventType]); - outputFields.emplace_back((suspicious == 0) - ? strtk::type_to_string(eventScore) - : strtk::type_to_string(-suspicious)); - outputFields.emplace_back( - (eventSize > 0) ? strtk::type_to_string(eventSize) : "NA"); - outputFields.emplace_back(inverted ? "INV" : "NORMAL"); - - outputFields.emplace_back(strtk::type_to_string(totalEvidence1)); - outputFields.emplace_back(boost::str( - doubleFormatter % (totalEvidence1 / (totalEvidence1 + span1 + 0.0)))); - if (inputScore > 0) { - outputFields.emplace_back(strtk::type_to_string(totalEvidence2)); + + outputFields.emplace_back(selectedSa1.print()); + outputFields.emplace_back(inputScore == 2 ? selectedSa2.print() : "_"); + outputFields.emplace_back( - (totalEvidence2 == 0 && span2 == 0) - ? "0.000" - : boost::str( - doubleFormatter % - (totalEvidence2 / (totalEvidence2 + span2 + 0.0)))); - } else { - outputFields.emplace_back("UNKNOWN"); - outputFields.emplace_back("UNKNOWN"); - } + overhang1Index != -1 ? overhangDb[static_cast(overhang1Index)].second : "."); + outputFields.emplace_back( + overhang2Index != -1 ? overhangDb[static_cast(overhang2Index)].second : "."); - outputFields.emplace_back(selectedSa1.print()); - outputFields.emplace_back(inputScore == 2 ? selectedSa2.print() : "_"); - - outputFields.emplace_back( - overhang1Index != -1 ? overhangDb[overhang1Index].second : "."); - outputFields.emplace_back( - overhang2Index != -1 ? overhangDb[overhang2Index].second : "."); - - return collapseRange(outputFields, "\t").append("\n"); -} -// vector SvEvent::getKey() const { -// if (!DEBUGMODE && (suspicious != 0 || eventScore == 0)) { -// return {}; -// } -// auto keyScore = (suspicious == 0) ? eventScore : suspicious; -// if (chrIndex1 < chrIndex2 || (chrIndex1 == chrIndex2 && pos1 < pos2)) { -// return {chrIndex1,pos1,chrIndex2,pos2,keyScore}; -// } else { -// return {chrIndex2,pos2,chrIndex1,pos1,keyScore}; -// } -// } - -string -SvEvent::getKey() const { - if (!DEBUGMODE && (suspicious != 0 || eventScore == 0)) { - return {}; + return collapseRange(outputFields, "\t").append("\n"); } - auto keyScore = (suspicious == 0) ? eventScore : suspicious; - if (chrIndex1 < chrIndex2 || (chrIndex1 == chrIndex2 && pos1 < pos2)) { - return collapseRange({to_string(chrIndex1), to_string(pos1), - to_string(chrIndex2), to_string(pos2), - to_string(keyScore)}, - "_"); - } else { - return collapseRange({to_string(chrIndex2), to_string(pos2), - to_string(chrIndex1), to_string(pos1), - to_string(keyScore)}, - "_"); + + std::string + SvEvent::getKey() const { + if (!DEBUG_MODE && (suspicious != 0 || eventScore == 0)) { + return {}; + } + auto keyScore = (suspicious == 0) ? eventScore : suspicious; + if (chrIndex1 < chrIndex2 || (chrIndex1 == chrIndex2 && pos1 < pos2)) { + return collapseRange({std::to_string(chrIndex1), std::to_string(pos1), + std::to_string(chrIndex2), std::to_string(pos2), + std::to_string(keyScore)}, + "_"); + } else { + return collapseRange({std::to_string(chrIndex2), std::to_string(pos2), + std::to_string(chrIndex1), std::to_string(pos1), + std::to_string(keyScore)}, + "_"); + } } -} } /* namespace sophia */ diff --git a/src/global.cpp b/src/global.cpp new file mode 100644 index 0000000..f947fa8 --- /dev/null +++ b/src/global.cpp @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "global.h" +#include "ChrConverter.h" +#include "ChrInfo.h" +#include "ChrInfoTable.h" +#include "Hg37ChrConverter.h" +#include "GenericChrConverter.h" +#include "GlobalAppConfig.h" + + +namespace sophia { + + std::string get_trace(const boost::exception &e) { + std::stringstream ss; + const boost::stacktrace::stacktrace *st = boost::get_error_info(e); + if (st != nullptr) { + ss << *st << std::endl; + } + return ss.str(); + } + + void setApplicationConfig(std::optional assembly_name) { + std::unique_ptr converter; + + if (!assembly_name.has_value() || assembly_name.value() == "classic_hg37") { + converter = std::unique_ptr(new Hg37ChrConverter()); + + } else if (assembly_name.value().size() > 0) { + // Compose the chromosome set name from the assembly name. + std::string chromosome_file = "resources/" + assembly_name.value() + ".tsv"; + std::vector chr_info = read_chr_info(chromosome_file); + ChrInfoTable chr_info_table { chr_info }; + converter = std::unique_ptr( + new GenericChrConverter(assembly_name.value(), chr_info_table)); + } else { + throw std::invalid_argument("Empty assembly name."); + } + + // Initialize the global application configuration. + GlobalAppConfig::init(std::move(converter)); + + } + +} // namespace sophia \ No newline at end of file diff --git a/src/sophia.cpp b/src/sophia.cpp new file mode 100644 index 0000000..4fee5fc --- /dev/null +++ b/src/sophia.cpp @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "Alignment.h" +#include "SuppAlignment.h" +#include "Breakpoint.h" +#include "SamSegmentMapper.h" +#include "HelperFunctions.h" +#include "Hg37ChrConverter.h" +#include "GenericChrConverter.h" +#include "GlobalAppConfig.h" + + +std::pair getIsizeParameters(const std::string &ISIZEFILE); + +int main(int argc, char** argv) { + + using namespace sophia; + + ChrSize defaultReadLength = 0; + + int baseQuality = 23, + baseQualityLow = 12, + clipSize = 10, + lowQualClipSize = 5, + isizeSigmaLevel = 5, + bpSupport = 5; + double properPairPercentage = 100.0; + std::string assemblyName = "classic_hg37"; + + try { + std::ios_base::sync_with_stdio(false); + std::cin.tie(nullptr); + namespace po = boost::program_options; + po::options_description desc("Allowed options for sophia"); + desc.add_options() + ("help", "print help message") + ("assemblyname", + po::value(&assemblyName)->default_value(assemblyName), + ("assembly name (classic_hg37, hg38, ...)")) + ("mergedisizes", + po::value(), + "insertsize distribution file for the merged bam. Line 1: median insert size, Line 3: standard deviation of the insert size") + ("medianisize", + po::value(), + "median insert size for the merged bam") + ("stdisizepercentage", + po::value(), + "percentage standard deviation of the insert size for the merged bam") + ("defaultreadlength", + po::value(&defaultReadLength), + "Default read length for the technology used in sequencing 101, 151, etc.") + ("clipsize", + po::value(&clipSize)->default_value(clipSize), + "Minimum length of soft/hard clips in the alignment") + ("basequality", + po::value(&baseQuality)->default_value(baseQuality), + "Minimum median quality of split read overhangs") + ("basequalitylow", + po::value(&baseQualityLow)->default_value(baseQualityLow), + "If 5 consecutive bases in a split read overhang have lower quality than this strict threshold, it will be low-quality clipped") + ("lowqualclipsize", + po::value(&lowQualClipSize)->default_value(lowQualClipSize), + "Maximum length of a low quality split read overhang for discarding") + ("isizesigma", + po::value(&isizeSigmaLevel)->default_value(isizeSigmaLevel), + "The number of SDs a s's mate has to be away to be called as discordant") + ("bpsupport", + po::value(&bpSupport)->default_value(bpSupport), + "Minimum number of reads supporting a discordant contig") + ("properpairpercentage", + po::value(&properPairPercentage)->default_value(properPairPercentage), + "Proper pair ratio as a percentage") + ; + double properPairRatio = properPairPercentage / 100.0; + + po::variables_map inputVariables { }; + po::store(po::parse_command_line(argc, argv, desc), inputVariables); + po::notify(inputVariables); + + if (inputVariables.count("help")) { + std::cout << desc << std::endl; + return 0; + } + + std::optional assemblyNameOpt { }; + if (inputVariables.count("assemblyname")) { + assemblyNameOpt = inputVariables["assemblyname"].as(); + } + setApplicationConfig(assemblyNameOpt); + + if (inputVariables.count("defaultreadlength")) { + defaultReadLength = inputVariables["defaultreadlength"].as(); + } else { + std::cerr << "Default read Length not given, exiting. Use --defaultreadlength." << std::endl; + return 1; + } + if (defaultReadLength < 1) { + std::cerr << "Default read length " << std::to_string(defaultReadLength) + << " is invalid." << std::endl; + return 1; + } + + if (inputVariables.count("clipsize")) { + clipSize = inputVariables["clipsize"].as(); + } + + if (inputVariables.count("basequality")) { + baseQuality = inputVariables["basequality"].as(); + } + + if (inputVariables.count("basequalitylow")) { + baseQualityLow = inputVariables["basequalitylow"].as(); + } + + if (inputVariables.count("lowqualclipsize")) { + lowQualClipSize = inputVariables["lowqualclipsize"].as(); + } + + if (inputVariables.count("isizesigma")) { + isizeSigmaLevel = inputVariables["isizesigma"].as(); + } + + if (inputVariables.count("bpsupport")) { + bpSupport = inputVariables["bpsupport"].as(); + } + + if (inputVariables.count("properpairpercentage")) { + properPairRatio = inputVariables["properpairpercentage"].as(); + properPairRatio /= 100; + if (properPairRatio < 0.9) { + Breakpoint::PROPER_PAIR_COMPENSATION_MODE = true; + Breakpoint::IMPROPER_PAIR_RATIO = 0.9 - properPairRatio; + } + } + + std::string mergedIsizeFile; + if (inputVariables.count("mergedisizes")) { + mergedIsizeFile = inputVariables["mergedisizes"].as(); + auto isizeparams = getIsizeParameters(mergedIsizeFile); + Alignment::ISIZEMAX = + std::min(4000.0, + isizeparams.first + isizeSigmaLevel * isizeparams.second); + SuppAlignment::ISIZEMAX = Alignment::ISIZEMAX; + } else { + if (inputVariables.count("medianisize") && inputVariables.count("stdisizepercentage")) { + auto medianIsize = inputVariables["medianisize"].as(); + auto isizeStdPercentage = inputVariables["stdisizepercentage"].as(); + Alignment::ISIZEMAX = + std::min(4000.0, + medianIsize + isizeSigmaLevel * medianIsize * isizeStdPercentage * 0.01); + SuppAlignment::ISIZEMAX = Alignment::ISIZEMAX; + } else { + Alignment::ISIZEMAX = 2000.0; + SuppAlignment::ISIZEMAX = 2000.0; + std::cerr << "No insert size distribution file given (mergedisizes). " + << "Using a dummy default value of 2000, " + << "because the min insert size of a distant event. " + << std::endl; + } + } + + Alignment::CLIPPED_NUCLEOTIDE_COUNT_THRESHOLD = static_cast(clipSize); + Alignment::BASE_QUALITY_THRESHOLD = baseQuality + 33; + Alignment::BASE_QUALITY_THRESHOLD_LOW = baseQualityLow + 33; + Alignment::LOW_QUAL_CLIP_THRESHOLD = (ChrSize) lowQualClipSize; + Breakpoint::BP_SUPPORT_THRESHOLD = bpSupport; + Breakpoint::DEFAULT_READ_LENGTH = defaultReadLength; + Breakpoint::DISCORDANT_LOW_QUAL_LEFT_RANGE = static_cast(round(defaultReadLength * 1.11)); + Breakpoint::DISCORDANT_LOW_QUAL_RIGHT_RANGE = static_cast(round(defaultReadLength * 0.51)); + + SuppAlignment::DEFAULT_READ_LENGTH = defaultReadLength; + ChosenBp::BP_SUPPORT_THRESHOLD = bpSupport; + std::cout << Breakpoint::COLUMN_STR; + SamSegmentMapper segmentRefMaster { defaultReadLength }; + segmentRefMaster.parseSamStream(); + + return 0; + } catch (boost::exception &e) { + std::cerr << "Error: " << boost::diagnostic_information(e) << std::endl; + return 1; + } catch (std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } +} + +std::pair getIsizeParameters(const std::string &ISIZEFILE) { + std::pair isizeMedianStd { }; + std::ifstream infile { ISIZEFILE }; + std::string line; + auto i = 0; + while (sophia::error_terminating_getline(infile, line)) { + boost::algorithm::trim_right(line); + switch (i) { + case 0: + isizeMedianStd.first = boost::lexical_cast(line); + break; + case 2: + isizeMedianStd.second = boost::lexical_cast(line); + break; + default: + break; + } + ++i; + } + return isizeMedianStd; +} diff --git a/src/sophiaAnnotate.cpp b/src/sophiaAnnotate.cpp new file mode 100644 index 0000000..7170c92 --- /dev/null +++ b/src/sophiaAnnotate.cpp @@ -0,0 +1,282 @@ +/* + * sophiaAnnotate.cpp + * + * Created on: 28 Apr 2016 + * Author: umuttoprak + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "BreakpointReduced.h" +#include "AnnotationProcessor.h" +#include "SuppAlignment.h" +#include "SuppAlignmentAnno.h" +#include "SvEvent.h" +#include "strtk-wrap.h" +#include "MrefEntryAnno.h" +#include "HelperFunctions.h" + + +int main(int argc, char** argv) { + using namespace sophia; + + std::string assemblyName = "classic_hg37"; + int artifactlofreq { 33 }; + int artifacthifreq { 50 }; + int clonalitylofreq { 5 }; + int clonalitystrictlofreq { 20 }; + int clonalityhifreq { 85 }; + int bpFreq { 3 }; + int germlineOffset { 5 }; + int germlineDbLimit { 5 }; + int pidsInMref { 0 }; + ChrSize defaultReadLengthTumor { 0 }; + ChrSize defaultReadLengthControl { 0 }; + + try { + std::ios_base::sync_with_stdio(false); + namespace po = boost::program_options; + std::cin.tie(nullptr); + po::options_description options("Allowed options for sophiaAnnotate"); + options.add_options() + ("help", + "produce help message") + ("mref", + po::value(), + "path to mref file") + ("tumorresults", + po::value(), + "path to _bps.bed.gz file from `sophia` for the tumor, or control for a no-tumor analysis") + ("controlresults", + po::value(), + "path to _bps.bed.gz file from `sophia` for the control") + ("assemblyname", + po::value(&assemblyName)->default_value(assemblyName), + "assembly name (classic_hg37, hg38, ...)") + ("defaultreadlengthtumor", + po::value(&defaultReadLengthTumor), + "Default read length for the technology used in sequencing 101,151 etc., tumor") + ("defaultreadlengthcontrol", + po::value(&defaultReadLengthControl), + "Default read length for the technology used in sequencing 101,151 etc., tumor") + ("pidsinmref", + po::value(&pidsInMref)->default_value(pidsInMref), + "Number of PIDs in the MREF") + ("artifactlofreq", + po::value(&artifactlofreq)->default_value(artifactlofreq), + "PERCENTAGE frequency of artifact supports for bps to be considered as artifact_like") + ("artifacthifreq", + po::value(&artifacthifreq)->default_value(artifacthifreq), + "PERCENTAGE frequency of artifact supports for bps to be considered as artifacts") + ("clonalitylofreq", + po::value(&clonalitylofreq)->default_value(clonalitylofreq), + "PERCENTAGE clonality for bps to be considered as extreme_subclonal") + ("clonalitystrictlofreq", + po::value(&clonalitystrictlofreq)->default_value(clonalitystrictlofreq), + "PERCENTAGE clonality for bps to be considered as extreme_subclonal") + ("clonalityhifreq", + po::value(&clonalityhifreq)->default_value(clonalityhifreq), + "PERCENTAGE clonality for bps to be considered as homozygous") + ("bpfreq", + po::value(&bpFreq)->default_value(bpFreq), + "PERCENTAGE frequency of a BP for consideration as rare") + ("germlineoffset", + po::value(&germlineOffset)->default_value(germlineOffset), + "Minimum offset a germline bp and a control bp") + ("germlinedblimit", + po::value(&germlineDbLimit)->default_value(germlineDbLimit), + "Maximum occurrence of germline variants in the db") + ("DEBUG_MODE", + "DEBUG_MODE") + ; + + po::variables_map inputVariables { }; + po::store(po::parse_command_line(argc, argv, options), inputVariables); + po::notify(inputVariables); + + if (inputVariables.count("help")) { + std::cout << options << std::endl; + return 0; + } + + std::optional assemblyNameOpt { }; + if (inputVariables.count("assemblyname")) { + assemblyNameOpt = inputVariables["assemblyname"].as(); + } + setApplicationConfig(assemblyNameOpt); + + CompressedMrefIndex vectorSize = + GlobalAppConfig::getInstance().getChrConverter().nChromosomesCompressedMref(); + + std::vector> mref { static_cast(vectorSize), std::vector { } }; + if (!inputVariables.count("mref")) { + std::cerr << "No mref file given, exiting" << std::endl; + return 1; + } + + std::string tumorResults; + if (inputVariables.count("tumorresults")) { + tumorResults = inputVariables["tumorresults"].as(); + } else { + std::cerr << "No input file given, exiting" << std::endl; + return 1; + } + + if (inputVariables.count("pidsinmref")) { + pidsInMref = inputVariables["pidsinmref"].as(); + } else { + std::cerr << "number of PIDS in the MREF not given, exiting" << std::endl; + return 1; + } + + if (inputVariables.count("defaultreadlengthtumor")) { + defaultReadLengthTumor = inputVariables["defaultreadlengthtumor"].as(); + } else { + std::cerr << "Default read length not given, exiting" << std::endl; + return 1; + } + if (defaultReadLengthTumor < 1) { + std::cerr << "Default read length tumor " << std::to_string(defaultReadLengthTumor) + << " is invalid." << std::endl; + return 1; + } + + + if (inputVariables.count("artifactlofreq")) { + artifactlofreq = inputVariables["artifactlofreq"].as(); + } + + if (inputVariables.count("artifacthifreq")) { + artifacthifreq = inputVariables["artifacthifreq"].as(); + } + + if (inputVariables.count("clonalitylofreq")) { + clonalitylofreq = inputVariables["clonalitylofreq"].as(); + } + + if (inputVariables.count("clonalitystrictlofreq")) { + clonalitystrictlofreq = inputVariables["clonalitystrictlofreq"].as(); + } + + if (inputVariables.count("clonalityhifreq")) { + clonalityhifreq = inputVariables["clonalityhifreq"].as(); + } + + if (inputVariables.count("bpfreq")) { + bpFreq = inputVariables["bpfreq"].as(); + } + + if (inputVariables.count("germlineoffset")) { + germlineOffset = inputVariables["germlineoffset"].as(); + } + + if (inputVariables.count("germlinedblimit")) { + germlineDbLimit = inputVariables["germlinedblimit"].as(); + } + + MrefEntryAnno::PIDS_IN_MREF = pidsInMref; + std::unique_ptr mrefInputHandle + { std::make_unique(inputVariables["mref"].as(), + std::ios_base::in | std::ios_base::binary) }; + std::unique_ptr mrefGzHandle + { std::make_unique() }; + + mrefGzHandle->push(boost::iostreams::gzip_decompressor()); + mrefGzHandle->push(*mrefInputHandle); + std::string line { }; + + const ChrConverter &chrConverter = GlobalAppConfig::getInstance().getChrConverter(); + + while (error_terminating_getline(*mrefGzHandle, line)) { + if (line.front() == '#') { + continue; + }; + ChrIndex globalIndex; + try { + globalIndex = chrConverter.parseChrAndReturnIndex(line.cbegin(), line.cend(), '\t'); + } catch (const DomainError &e) { + e << + error_info_string("line = " + line); + throw e; + } + CompressedMrefIndex chrIndex; + if (!chrConverter.isCompressedMref(globalIndex)) { + continue; + } else { + chrIndex = chrConverter.indexToCompressedMrefIndex(globalIndex); + mref[static_cast(chrIndex)].emplace_back(line); + } + } + SvEvent::ARTIFACT_FREQ_LOW_THRESHOLD = (artifactlofreq + 0.0) / 100; + SvEvent::ARTIFACT_FREQ_HIGH_THRESHOLD = (artifacthifreq + 0.0) / 100; + BreakpointReduced::ARTIFACT_FREQ_HIGH_THRESHOLD = SvEvent::ARTIFACT_FREQ_HIGH_THRESHOLD; + SvEvent::CLONALITY_LOW_THRESHOLD = (clonalitylofreq + 0.0) / 100; + SvEvent::CLONALITY_STRICT_LOW_THRESHOLD = (clonalitystrictlofreq + 0.0) / 100; + BreakpointReduced::CLONALITY_STRICT_LOW_THRESHOLD = SvEvent::CLONALITY_STRICT_LOW_THRESHOLD; + SvEvent::CLONALITY_HIGH_THRESHOLD = (clonalityhifreq + 0.0) / 100; + SvEvent::BP_FREQ_THRESHOLD = pidsInMref * (bpFreq + 0.0) / 100; + SvEvent::RELAXED_BP_FREQ_THRESHOLD = 3 * SvEvent::BP_FREQ_THRESHOLD; + SvEvent::PIDS_IN_MREF_STR = strtk::type_to_string(pidsInMref); + BreakpointReduced::PIDS_IN_MREF_STR = SvEvent::PIDS_IN_MREF_STR; + BreakpointReduced::DEFAULT_READ_LENGTH = defaultReadLengthTumor; + Breakpoint::DEFAULT_READ_LENGTH = defaultReadLengthTumor; + SuppAlignment::DEFAULT_READ_LENGTH = defaultReadLengthTumor; + SuppAlignmentAnno::DEFAULT_READ_LENGTH = defaultReadLengthTumor; + SvEvent::HALF_DEFAULT_READ_LENGTH = round(defaultReadLengthTumor / 2.0); + SvEvent::GERMLINE_OFFSET_THRESHOLD = germlineOffset; + SvEvent::GERMLINE_DB_LIMIT = germlineDbLimit; + SvEvent::ABRIDGED_OUTPUT = true; + if (inputVariables.count("DEBUG_MODE")) { + SvEvent::DEBUG_MODE = true; + } else { + SvEvent::DEBUG_MODE = false; + } + AnnotationProcessor::ABRIDGED_OUTPUT = true; + Breakpoint::BP_SUPPORT_THRESHOLD = 3; + if (inputVariables.count("controlresults")) { + std::string controlResults { inputVariables["controlresults"].as() }; + if (inputVariables.count("defaultreadlengthcontrol")) { + defaultReadLengthControl = inputVariables["defaultreadlengthcontrol"].as(); + } else { + std::cerr << "Default read length control not given, exiting" << std::endl; + return 1; + } + if (defaultReadLengthControl < 1) { + std::cerr << "Default read length control " << std::to_string(defaultReadLengthControl) + << " is invalid." << std::endl; + return 1; + } + + auto lowQualControl = 0; + auto pathogenInControl = false; + { + SvEvent::NO_CONTROL_MODE = true; + AnnotationProcessor annotationProcessorControlCheck { controlResults, mref, defaultReadLengthControl, true, germlineDbLimit }; + lowQualControl = annotationProcessorControlCheck.getMassiveInvFilteringLevel(); + pathogenInControl = annotationProcessorControlCheck.isContaminationObserved(); + SvEvent::NO_CONTROL_MODE = false; + } + AnnotationProcessor annotationProcessor { tumorResults, mref, controlResults, defaultReadLengthTumor, defaultReadLengthControl, germlineDbLimit, lowQualControl, pathogenInControl }; + annotationProcessor.printFilteredResults(pathogenInControl, lowQualControl); + } else { + SvEvent::NO_CONTROL_MODE = true; + AnnotationProcessor annotationProcessor { tumorResults, mref, defaultReadLengthTumor, false, germlineDbLimit }; + annotationProcessor.printFilteredResults(false, 0); + } + + return 0; + } catch (boost::exception &e) { + std::cerr << "Error: " << boost::diagnostic_information(e) << std::endl; + return 1; + } catch (std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } +} diff --git a/src/sophiaMref.cpp b/src/sophiaMref.cpp new file mode 100644 index 0000000..4dbf856 --- /dev/null +++ b/src/sophiaMref.cpp @@ -0,0 +1,127 @@ +/* + * sophiaMref.cpp + * + * Created on: 27 Apr 2016 + * Author: umuttoprak + */ + +#include +#include +#include +#include +#include +#include "MasterRefProcessor.h" +#include "MrefEntry.h" +#include "HelperFunctions.h" +#include "GlobalAppConfig.h" +#include "Hg37ChrConverter.h" +#include "GenericChrConverter.h" +#include "ChrInfo.h" +#include "ChrInfoTable.h" +#include "ChrCategory.h" + + +int main(int argc, char** argv) { + + using namespace sophia; + + ChrSize defaultReadLength { 0 }; + std::string assemblyName = "classic_hg37"; + + try { + boost::program_options::options_description desc("Allowed options for sophiaMref"); + desc.add_options() + ("help", + "produce help message") + ("gzins", + boost::program_options::value(), + "A file containing the the paths of the of all gzipped control beds, line-by-line") + ("outputrootname", + boost::program_options::value(), + "base name/path for the output files") + ("version", + boost::program_options::value(), + "version string used to match the PID in the BED files with the pattern\n `.*/$pidName.{1}$version.+`") + ("assemblyname", + boost::program_options::value(&assemblyName)->default_value("classic_hg37"), + "assembly name (classic_hg37, hg38, ...)") + ("defaultreadlength", + boost::program_options::value(&defaultReadLength), + "Default read length for the technology used in sequencing, e.g. 101 or 151.") + ; + + boost::program_options::variables_map inputVariables { }; + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), + inputVariables); + boost::program_options::notify(inputVariables); + + if (inputVariables.count("help")) { + std::cout << desc << std::endl; + return 0; + } + + std::optional assemblyNameOpt { }; + if (inputVariables.count("assemblyname")) { + assemblyNameOpt = inputVariables["assemblyname"].as(); + } + setApplicationConfig(assemblyNameOpt); + + std::string gzInFilesList; + if (inputVariables.count("gzins")) { + gzInFilesList = inputVariables["gzins"].as(); + } else { + std::cerr << "No gzipped control bed list file given, exiting" << std::endl; + return 1; + } + + std::ifstream gzInFilesHandle { gzInFilesList }; + std::vector gzListIn; + for (std::string line; error_terminating_getline(gzInFilesHandle, line);) { + gzListIn.push_back(line); + } + + std::string version { }; + if (inputVariables.count("version")) { + version = inputVariables["version"].as(); + } else { + std::cerr << "No input version given, exiting" << std::endl; + return 1; + } + + if (inputVariables.count("defaultreadlength")) { + defaultReadLength = inputVariables["defaultreadlength"].as(); + } else { + std::cerr << "Default read length not given, exiting" << std::endl; + return 1; + } + if (defaultReadLength < 1) { + std::cerr << "Default read length " << std::to_string(defaultReadLength) + << " is invalid." << std::endl; + return 1; + } + + + std::string outputRoot { }; + if (inputVariables.count("outputrootname")) { + outputRoot = inputVariables["outputrootname"].as(); + } else { + std::cerr << "No output file root name given, exiting" << std::endl; + return 1; + } + + SuppAlignment::DEFAULT_READ_LENGTH = defaultReadLength; + SuppAlignmentAnno::DEFAULT_READ_LENGTH = defaultReadLength; + MrefEntry::NUM_PIDS = gzListIn.size(); + + std::cerr << "Running sophiaMref on " << MrefEntry::NUM_PIDS << " PIDs ..." << std::endl; + MasterRefProcessor mRefProcessor { gzListIn, outputRoot, version, defaultReadLength }; + + return 0; + } catch (boost::exception &e) { + std::cerr << "Error: " << boost::diagnostic_information(e) << std::endl; + return 1; + } catch (std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } +} diff --git a/tests/Breakpoint_test.cpp b/tests/Breakpoint_test.cpp new file mode 100644 index 0000000..96f8406 --- /dev/null +++ b/tests/Breakpoint_test.cpp @@ -0,0 +1,24 @@ +#include +#include + +#include "GenericChrConverter.h" +#include "GlobalAppConfig.h" +#include "Breakpoint.h" + +namespace sophia { + + TEST(BreakpointTest_Parse, BasicAssertions) { + + const std::string test1 = "chr22\t10525762\t10525763\t0,0,7,0,0,0,4,0,0,9,0,0\t4,4\t|chrUn_KI270749v1:73502-73503(0,2,4?/8);|chr22_KI270735v1_random:17508-17509_INV(0,1,3?/6)\t.\t."; + EXPECT_EQ(Breakpoint::parse(test1, true).getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr22")); + + // Breakpoint contains `:` + const std::string test2 = "chr22\t37934897\t37934898\t0,0,4,4,0,0,32,2,0,0,0,0\t32,36\t.\tHLA-DRB1*13:01:01:2914|(4,0,0?/0);chr1:28514292-28514293_INV|(4,0,1?/5);HLA-DRB1*13:01:01:2919|(4,0,0?/0);chr13:20527706-20527707_INV|(4,0,1?/5);HLA-DRB1*13:01:01:2922|(4,0,0?/0);chr1:120948617-120948618_INV|(4,0,1?/5);HLA-DRB1*13:01:01:2923|(4,0,0?/0);chr13:92964918-92964919_INV|(4,0,1?/5)\t>28974_1:ATGTCCACGGTAAAAAATTTGAATTTTATTT|(4)"; + EXPECT_EQ(Breakpoint::parse(test2, true).getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr22")); + + // TODO Add tests to *write/print* the breakpoint in the correct way. + } + +} // namespace sophia \ No newline at end of file diff --git a/tests/ChrCategory_test.cpp b/tests/ChrCategory_test.cpp new file mode 100644 index 0000000..a6f0afb --- /dev/null +++ b/tests/ChrCategory_test.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include "ChrCategory.h" + +namespace sophia { + + TEST(ChrCategoryTest, ChrCategoryTest_from_string) { + ASSERT_EQ(ChrCategory::from_string("AUTOSOME"), ChrCategory::AUTOSOME); + ASSERT_EQ(ChrCategory::from_string("X"), ChrCategory::X); + ASSERT_EQ(ChrCategory::from_string("x"), ChrCategory::X); + ASSERT_EQ(ChrCategory::from_string("Y"), ChrCategory::Y); + ASSERT_EQ(ChrCategory::from_string("extraChromosomal"), ChrCategory::EXTRACHROMOSOMAL); + ASSERT_EQ(ChrCategory::from_string("uNasSIGNED"), ChrCategory::UNASSIGNED); + ASSERT_EQ(ChrCategory::from_string("alt"), ChrCategory::ALT); + ASSERT_EQ(ChrCategory::from_string("hLa"), ChrCategory::HLA); + ASSERT_EQ(ChrCategory::from_string("decoy"), ChrCategory::DECOY); + ASSERT_EQ(ChrCategory::from_string("virus"), ChrCategory::VIRUS); + ASSERT_EQ(ChrCategory::from_string("technical"), ChrCategory::TECHNICAL); + } + + TEST(ChrCategoryTest, ChrCategoryTest_from_string_invalid) { + ASSERT_THROW(ChrCategory::from_string("invalid"), std::invalid_argument); + } + + TEST(ChrCategoryTest, ChrCategoryTest_getName) { + ASSERT_EQ(ChrCategory::AUTOSOME.getName(), "AUTOSOME"); + ASSERT_EQ(ChrCategory::X.getName(), "X"); + ASSERT_EQ(ChrCategory::Y.getName(), "Y"); + ASSERT_EQ(ChrCategory::EXTRACHROMOSOMAL.getName(), "EXTRACHROMOSOMAL"); + ASSERT_EQ(ChrCategory::UNASSIGNED.getName(), "UNASSIGNED"); + ASSERT_EQ(ChrCategory::ALT.getName(), "ALT"); + ASSERT_EQ(ChrCategory::HLA.getName(), "HLA"); + ASSERT_EQ(ChrCategory::DECOY.getName(), "DECOY"); + ASSERT_EQ(ChrCategory::VIRUS.getName(), "VIRUS"); + ASSERT_EQ(ChrCategory::TECHNICAL.getName(), "TECHNICAL"); + } + + TEST(ChrCategoryTest, ChrCategoryTest_numCategories) { + ASSERT_EQ(ChrCategory::numCategories(), 10); + } + + TEST(ChrCategoryTest, ChrCategoryTest_getCategories) { + std::vector categories = ChrCategory::getCategories(); + ASSERT_EQ(categories.size(), 10); + ASSERT_EQ(categories[0], ChrCategory::AUTOSOME); + ASSERT_EQ(categories[1], ChrCategory::X); + ASSERT_EQ(categories[2], ChrCategory::Y); + ASSERT_EQ(categories[3], ChrCategory::EXTRACHROMOSOMAL); + ASSERT_EQ(categories[4], ChrCategory::UNASSIGNED); + ASSERT_EQ(categories[5], ChrCategory::ALT); + ASSERT_EQ(categories[6], ChrCategory::HLA); + ASSERT_EQ(categories[7], ChrCategory::VIRUS); + ASSERT_EQ(categories[8], ChrCategory::DECOY); + ASSERT_EQ(categories[9], ChrCategory::TECHNICAL); + } + +} // namespace sophia \ No newline at end of file diff --git a/tests/ChrInfoTable_test.cpp b/tests/ChrInfoTable_test.cpp new file mode 100644 index 0000000..6ac678c --- /dev/null +++ b/tests/ChrInfoTable_test.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include "Fixtures.h" +#include "GlobalAppConfig.h" +#include "GenericChrConverter.h" + +namespace sophia { + + TEST_F(ChrInfoTableFixture, ChrInfoTableTest_nChromosomes) { + EXPECT_EQ(chr_info_table->nChromosomes(), 3367); + } + + TEST_F(ChrInfoTableFixture, ChrInfoTableTest_getNames) { + EXPECT_EQ(chr_info_table->getNames()[0], "chr1"); + EXPECT_EQ(chr_info_table->getNames()[21], "chr22"); + EXPECT_EQ(chr_info_table->getNames()[22], "chrX"); + EXPECT_EQ(chr_info_table->getNames()[23], "chrY"); + EXPECT_EQ(chr_info_table->getNames()[24], "chrM"); + EXPECT_EQ(chr_info_table->getNames()[3366], "phix"); + EXPECT_EQ(chr_info_table->getNames()[88], "chrUn_KI270423v1"); + } + + TEST_F(ChrInfoTableFixture, ChrInfoTableTest_getNamesByCategory) { + EXPECT_EQ(chr_info_table->getNames(ChrCategory::AUTOSOME).size(), 22); + EXPECT_EQ(chr_info_table->getNames(ChrCategory::AUTOSOME)[0], "chr1"); + + EXPECT_EQ(chr_info_table->getNames(ChrCategory::X).size(), 1); + EXPECT_EQ(chr_info_table->getNames(ChrCategory::X)[0], "chrX"); + + EXPECT_EQ(chr_info_table->getNames(ChrCategory::Y).size(), 1); + EXPECT_EQ(chr_info_table->getNames(ChrCategory::Y)[0], "chrY"); + + EXPECT_EQ(chr_info_table->getNames(ChrCategory::EXTRACHROMOSOMAL).size(), 1); + EXPECT_EQ(chr_info_table->getNames(ChrCategory::EXTRACHROMOSOMAL)[0], "chrM"); + + EXPECT_EQ(chr_info_table->getNames(ChrCategory::UNASSIGNED).size(), 169); + EXPECT_EQ(chr_info_table->getNames(ChrCategory::UNASSIGNED)[0], "chr1_KI270706v1_random"); + + EXPECT_EQ(chr_info_table->getNames(ChrCategory::TECHNICAL).size(), 1); + EXPECT_EQ(chr_info_table->getNames(ChrCategory::TECHNICAL)[0], "phix"); + + EXPECT_EQ(chr_info_table->getNames(ChrCategory::VIRUS).size(), 1); + EXPECT_EQ(chr_info_table->getNames(ChrCategory::VIRUS)[0], "chrEBV"); + + EXPECT_EQ(chr_info_table->getNames(ChrCategory::DECOY).size(), 2385); + EXPECT_EQ(chr_info_table->getNames(ChrCategory::DECOY)[0], "chrUn_KN707606v1_decoy"); + + EXPECT_EQ(chr_info_table->getNames(ChrCategory::ALT).size(), 261); + EXPECT_EQ(chr_info_table->getNames(ChrCategory::ALT)[0], "chr1_KI270762v1_alt"); + + EXPECT_EQ(chr_info_table->getNames(ChrCategory::HLA).size(), 525); + EXPECT_EQ(chr_info_table->getNames(ChrCategory::HLA)[0], "HLA-A*01:01:01:01"); + } + +} // namespace sophia \ No newline at end of file diff --git a/tests/Fixtures.h b/tests/Fixtures.h new file mode 100644 index 0000000..ee928e7 --- /dev/null +++ b/tests/Fixtures.h @@ -0,0 +1,61 @@ +#ifndef FIXTURES_H_ +#define FIXTURES_H_ + +#include +#include +#include +#include +#include +#include "global.h" +#include "ChrInfoTable.h" +#include "GenericChrConverter.h" +#include "GlobalAppConfig.h" + + +namespace sophia { + + class ChrInfoTableFixture : public ::testing::Test { + + protected: + + ChrInfoTable *chr_info_table = nullptr; + + void SetUp() override { + std::string chromosome_file = "resources/hg38_test.tsv"; + std::vector chr_info = read_chr_info(chromosome_file); + chr_info_table = new ChrInfoTable(chr_info); + } + + void TearDown() override { + delete chr_info_table; + } + + }; + + class GenericChrConverterFixture : public ChrInfoTableFixture { + + protected: + + void SetUp() { + ChrInfoTableFixture::SetUp(); + try { + std::unique_ptr converter = + std::unique_ptr( + new GenericChrConverter("hg38", *chr_info_table)); + // TODO Find a way to deal with the singleton, without having to refactor the whole code + GlobalAppConfig::init(std::move(converter)); + } catch (const std::logic_error& e) { + // In case the singleton is already set, this will throw a logic_error, which we + // we just ignore. + } + } + + void TearDown() override { + // TODO Maybe remove the singleton here? At least for non-parallel testing + } + + }; + +} + +#endif /* FIXTURES_H_ */ diff --git a/tests/GenericChrConverter_test.cpp b/tests/GenericChrConverter_test.cpp new file mode 100644 index 0000000..e1aca66 --- /dev/null +++ b/tests/GenericChrConverter_test.cpp @@ -0,0 +1,156 @@ +#include +#include +#include +#include "Fixtures.h" +#include "GlobalAppConfig.h" +#include "GenericChrConverter.h" + +namespace sophia { + + TEST_F(GenericChrConverterFixture, GenericChrConverterTest_chrNameToIndex) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_EQ(converter.chrNameToIndex("chr1"), 0); + EXPECT_EQ(converter.chrNameToIndex("chr22"), 21); + EXPECT_EQ(converter.chrNameToIndex("chrX"), 22); + EXPECT_EQ(converter.chrNameToIndex("chrY"), 23); + EXPECT_EQ(converter.chrNameToIndex("chrM"), 24); + EXPECT_EQ(converter.chrNameToIndex("chrUn_KI270423v1"), 88); + EXPECT_EQ(converter.chrNameToIndex("phix"), 3366); + } + + TEST_F(GenericChrConverterFixture, GenericChrConverter_assemblyName) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_EQ(converter.getAssemblyName(), "hg38"); + } + + TEST_F(GenericChrConverterFixture, GenericChrConverter_nChromosomes) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_EQ(converter.nChromosomes(), 3367); + } + + TEST_F(GenericChrConverterFixture, GenericChrConverterTest_indexToChrName) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_EQ(converter.indexToChrName(0), "chr1"); + EXPECT_EQ(converter.indexToChrName(21), "chr22"); + EXPECT_EQ(converter.indexToChrName(22), "chrX"); + EXPECT_EQ(converter.indexToChrName(23), "chrY"); + EXPECT_EQ(converter.indexToChrName(24), "chrM"); + EXPECT_EQ(converter.indexToChrName(88), "chrUn_KI270423v1"); + EXPECT_EQ(converter.indexToChrName(3366), "phix"); + } + + TEST_F(GenericChrConverterFixture, GenericChrConverter_chrNameToIndex) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_EQ(converter.chrNameToIndex("chr1"), 0); + EXPECT_EQ(converter.chrNameToIndex("chr22"), 21); + EXPECT_EQ(converter.chrNameToIndex("chrX"), 22); + EXPECT_EQ(converter.chrNameToIndex("chrY"), 23); + EXPECT_EQ(converter.chrNameToIndex("chrM"), 24); + EXPECT_EQ(converter.chrNameToIndex("chrUn_KI270423v1"), 88); + EXPECT_EQ(converter.chrNameToIndex("phix"), 3366); + } + + TEST_F(GenericChrConverterFixture, GenericChrConverterTest_nChromosomesCompressedMref) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_EQ(converter.nChromosomesCompressedMref(), 3365); + } + + TEST_F(GenericChrConverterFixture, GenericChrConverterTest_is_category) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_TRUE(converter.isAutosome(0)); // chr1 + EXPECT_TRUE(converter.isAutosome(21)); // chr22 + EXPECT_TRUE(converter.isGonosome(22)); // chrX + EXPECT_TRUE(converter.isGonosome(23)); // chrY + EXPECT_TRUE(converter.isExtrachromosomal(24)); // chrM + EXPECT_TRUE(converter.isUnassigned(88)); // chrUn_KI270423v1 + EXPECT_TRUE(converter.isTechnical(3366)); // phix + EXPECT_TRUE(converter.isVirus(455)); // chrEBV + EXPECT_TRUE(converter.isDecoy(456)); // chrUn_KN707606v1_decoy + EXPECT_TRUE(converter.isALT(194)); // chr1_KI270762v1_alt + EXPECT_TRUE(converter.isHLA(2841)); // HLA-A*01:01:01:01 + } + + TEST_F(GenericChrConverterFixture, GenericChrConverter_isCompressedMrefIndex) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_TRUE(converter.isCompressedMref(0)); // chr1 + EXPECT_TRUE(converter.isCompressedMref(21)); // chr22 + EXPECT_TRUE(converter.isCompressedMref(22)); // chrX + EXPECT_TRUE(converter.isCompressedMref(23)); // chrY + EXPECT_TRUE(! converter.isCompressedMref(24)); // chrM + EXPECT_TRUE(converter.isCompressedMref(88)); // chrUn_KI270423v1 + EXPECT_TRUE(! converter.isCompressedMref(3366)); // phix + } + + TEST_F(GenericChrConverterFixture, GenericChrConverter_compressedMrefIndexToIndex) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_EQ(converter.compressedMrefIndexToIndex(0), 0); // chr1 + EXPECT_EQ(converter.compressedMrefIndexToIndex(23), 23); // chrY + // chrM = global::24 is missing from compressed mrefs, + // therefore, compressed::24 = global::25 + EXPECT_EQ(converter.compressedMrefIndexToIndex(24), 25); // chr1_KI270706v1_random + EXPECT_THROW(converter.compressedMrefIndexToIndex(3366), std::logic_error); // phix + } + + TEST_F(GenericChrConverterFixture, GenericChrConverter_indexToCompressedMrefIndex) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_EQ(converter.indexToCompressedMrefIndex(0), 0); // chr1 + EXPECT_EQ(converter.indexToCompressedMrefIndex(23), 23); // chrY + // chrM = global::24 is missing from compressed mrefs, ... + // [this is an interesting case, because it demonstrates that the GenericChrConverter + // is not constrained to support only compressed Mref chromosomes strictly separated from + // the rest of the chromosomes, but that they isCompressedMref() flag can freely be used.] + EXPECT_THROW(converter.indexToCompressedMrefIndex(24), std::logic_error); // phix + // ... therefore, compressed::24 = global::25 + EXPECT_EQ(converter.indexToCompressedMrefIndex(25), 24); // chr1_KI270706v1_random + EXPECT_THROW(converter.indexToCompressedMrefIndex(3366), std::logic_error); // phix + } + + TEST_F(GenericChrConverterFixture, GenericChrConverterTest_ParseSimpleStrings) { + const std::string test1 = "chr1\tsomething\telse\n"; + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_EQ(converter.parseChr(test1.begin(), test1.end(), '\t'), + "chr1"); + } + + TEST_F(GenericChrConverterFixture, GenericChrConverter_chrSizeCompressedMref) { + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + EXPECT_EQ(converter.chrSizeCompressedMref(0), 248956422); // chr1 + EXPECT_EQ(converter.chrSizeCompressedMref(23), 57227415); // chrY + EXPECT_EQ(converter.chrSizeCompressedMref(454), 171823); // chrEBV; 454 is the index in compressed mrefs! + // chrM is not in compressed mrefs + EXPECT_EQ(converter.chrSizeCompressedMref(24), 175055); // chr1_KI270706v1_random + } + + TEST_F(GenericChrConverterFixture, GenericChrConverterTest_ParseBreakPointStrings) { + const std::string stopChars = "|(,!/?;"; + const GenericChrConverter &converter = + dynamic_cast(GlobalAppConfig::getInstance().getChrConverter()); + + + const std::string test1 = "HLA-DRB1*13:01:01:2914|(4,0,0?/0)"; + EXPECT_EQ(converter.parseChr(test1.begin(), test1.end(), ':', stopChars), + "HLA-DRB1*13:01:01"); + + const std::string test2 = "chrUn_KI270749v1:13653-13654(1,0,3?/4)"; + EXPECT_EQ(converter.parseChr(test2.begin(), test2.end(), ':', stopChars), + "chrUn_KI270749v1"); + + const std::string test3 = test1 + ";" + test2; + EXPECT_EQ(converter.parseChr(test3.begin(), test3.end(), ':', stopChars), + "HLA-DRB1*13:01:01"); + + } + +} // namespace sophia \ No newline at end of file diff --git a/tests/Hg37ChrConverter_test.cpp b/tests/Hg37ChrConverter_test.cpp new file mode 100644 index 0000000..2fc9a67 --- /dev/null +++ b/tests/Hg37ChrConverter_test.cpp @@ -0,0 +1,16 @@ +#include +#include +#include +#include +#include "global.h" +#include "Hg37ChrConverter.h" + +namespace sophia { + + TEST(Hg37ChrConverterTest, Hg37ChrConverterTest_chrNameToIndex) { + std::vector mrefIndex {1003, 0, 1003, 1}; + std::vector result {1, 3}; + EXPECT_EQ(Hg37ChrConverter::_buildCompressedMrefIndexToIndex(2, mrefIndex), + result); + } +} // namespace sophia \ No newline at end of file diff --git a/tests/IndexRange_test.cpp b/tests/IndexRange_test.cpp new file mode 100644 index 0000000..9fc2f26 --- /dev/null +++ b/tests/IndexRange_test.cpp @@ -0,0 +1,30 @@ +#include +#include + +#include "IndexRange.h" + +namespace sophia { + + TEST(IndexRangeTest, IndexRangeTest_constructor) { + IndexRange range(1, 10); + EXPECT_EQ(range.start(), 1); + EXPECT_EQ(range.end(), 10); + + EXPECT_THROW(IndexRange(1, 0), std::invalid_argument); + } + + TEST(IndexRangeTest, IndexRangeTest_contains) { + IndexRange range(1, 11); + EXPECT_FALSE(range.contains(0)); + EXPECT_TRUE(range.contains(1)); + EXPECT_TRUE(range.contains(5)); + EXPECT_TRUE(range.contains(10)); + EXPECT_FALSE(range.contains(11)); + } + + TEST(IndexRangeTest, IndexRangeTest_width) { + IndexRange range(1, 11); + EXPECT_EQ(range.width(), 10); + } + +} // namespace sophia \ No newline at end of file diff --git a/tests/SuppAlignmentAnno_test.cpp b/tests/SuppAlignmentAnno_test.cpp new file mode 100644 index 0000000..76ab6dc --- /dev/null +++ b/tests/SuppAlignmentAnno_test.cpp @@ -0,0 +1,160 @@ +#include +#include +#include + +#include "GenericChrConverter.h" +#include "GlobalAppConfig.h" +#include "SuppAlignmentAnno.h" +#include "Fixtures.h" + +namespace sophia { + + using namespace testing; + + TEST_F(GenericChrConverterFixture, SuppAlignmentAnnoTest_ParsingConstructor1) { + const std::string test = "chr1:1041693|(8,0,!/0)"; + SuppAlignmentAnno anno = SuppAlignmentAnno(test); + + EXPECT_EQ(anno.getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr1")); + + EXPECT_EQ(anno.isEncounteredM(), false); + EXPECT_EQ(anno.isInverted(), false); + + EXPECT_EQ(anno.getSupport(), 8); + anno.setSupport(6); + EXPECT_EQ(anno.getSupport(), 6); + + EXPECT_EQ(anno.getMateSupport(), 0); + anno.incrementMateSupport(); + EXPECT_EQ(anno.getMateSupport(), 1); + anno.setMateSupport(4); + EXPECT_EQ(anno.getMateSupport(), 4); + + EXPECT_EQ(anno.getExtendedPos(), 1041693); + EXPECT_EQ(anno.getPos(), 1041693); + + EXPECT_EQ(anno.getSecondarySupport(), 0); + anno.setSecondarySupport(5); + EXPECT_EQ(anno.getSecondarySupport(), 5); + + EXPECT_EQ(anno.isToRemove(), false); + anno.setToRemove(true); + EXPECT_EQ(anno.isToRemove(), true); + + EXPECT_EQ(anno.isDistant(), true); + + EXPECT_EQ(anno.isSuspicious(), true); + anno.setSuspicious(false); + EXPECT_EQ(anno.isSuspicious(), false); + + EXPECT_EQ(anno.isSemiSuspicious(), false); + anno.setSemiSuspicious(true); + EXPECT_EQ(anno.isSemiSuspicious(), true); + + EXPECT_EQ(anno.getExpectedDiscordants(), 0); + anno.setExpectedDiscordants(10); + EXPECT_EQ(anno.getExpectedDiscordants(), 10); + + EXPECT_EQ(anno.isFuzzy(), false); + anno.setFuzzy(true); + EXPECT_EQ(anno.isFuzzy(), true); + + EXPECT_EQ(anno.isStrictFuzzy(), false); + + EXPECT_EQ(anno.isStrictFuzzyCandidate(), false); + + EXPECT_EQ(anno.isProperPairErrorProne(), false); + + EXPECT_EQ(anno.getSupportingIndices().size(), 0); + anno.addSupportingIndices(std::vector({1, 2, 3})); + EXPECT_THAT(anno.getSupportingIndices(), ElementsAre(1, 2, 3)); + + // !suspicious, semisuspicious, 10 expected discordants, 6 supporting, 5 secondary supporting, 4 mate supporting + EXPECT_EQ(anno.print(), "chr1:1041693|(6,5,4?/10)"); + } + + TEST_F(GenericChrConverterFixture, SuppAlignmentAnnoTest_ParsingConstructor2) { + const std::string test = "chr16:1041693_INV|(8,0,!/0)"; + + SuppAlignmentAnno anno = SuppAlignmentAnno(test); + + EXPECT_EQ(anno.getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr16")); + + EXPECT_EQ(anno.isEncounteredM(), false); + EXPECT_EQ(anno.isInverted(), true); + EXPECT_EQ(anno.isSuspicious(), true); + } + + TEST_F(GenericChrConverterFixture, SuppAlignmentAnnoTest_ParsingConstructor3) { + const std::string test = "chr16:32084614-32084615_INV|(0,1,3?/6)"; + + SuppAlignmentAnno anno = SuppAlignmentAnno(test); + + EXPECT_EQ(anno.getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr16")); + + EXPECT_EQ(anno.isEncounteredM(), false); + EXPECT_EQ(anno.isInverted(), true); + EXPECT_EQ(anno.isSuspicious(), false); + EXPECT_EQ(anno.isSemiSuspicious(), true); + } + + TEST_F(GenericChrConverterFixture, SuppAlignmentAnnoTest_ParsingConstructor4) { + const std::string test = "|chr4:49107794-49107795(0,1,2?/89)"; + SuppAlignmentAnno anno = SuppAlignmentAnno(test); + EXPECT_EQ(anno.getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr4")); + + EXPECT_EQ(anno.isEncounteredM(), true); + } + + TEST_F(GenericChrConverterFixture, SuppAlignmentAnnoTest_ParsingConstructor5) { + const std::string test = "HLA-DRB1*13:01:01:2914|(4,0,0?/0)"; + EXPECT_EQ(SuppAlignmentAnno(test).getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("HLA-DRB1*13:01:01")); + } + + TEST_F(GenericChrConverterFixture, SuppAlignmentAnnoTest_ParsingConstructor6) { + const std::string test = "|chr17:64795390_INV(8,0,6?/12)"; + + SuppAlignmentAnno anno = SuppAlignmentAnno(test); + + EXPECT_TRUE(anno.isInverted()); + EXPECT_EQ(anno.isEncounteredM(), true); + + EXPECT_EQ(anno.getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr17")); + } + + TEST_F(GenericChrConverterFixture, SuppAlignmentAnnoTest_ParsingConstructor7) { + const std::string test = "chr5:10019-11689|(11,0,13?/13)"; + + SuppAlignmentAnno anno = SuppAlignmentAnno(test); + + EXPECT_EQ(anno.getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr5")); + + EXPECT_EQ(anno.isEncounteredM(), false); + } + + TEST_F(GenericChrConverterFixture, SuppAlignmentAnnoTest_ParsingConstructor8) { + const std::string test = "chr5:10006-10487|(11,0,18?/18)"; + EXPECT_EQ(SuppAlignmentAnno(test).getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr5")); + } + + TEST_F(GenericChrConverterFixture, SuppAlignmentAnnoTest_ParsingConstructor9) { + const std::string test = "chr5:10041-11708|(21,0,90?/90)"; + EXPECT_EQ(SuppAlignmentAnno(test).getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr5")); + } + + TEST_F(GenericChrConverterFixture, SuppAlignmentAnnoTest_ParsingConstructor10) { + const std::string test = "chr18:10007-10586|(8,0,9/9)"; + EXPECT_EQ(SuppAlignmentAnno(test).getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr18")); + } + +} // namespace sophia \ No newline at end of file diff --git a/tests/SuppAlignment_test.cpp b/tests/SuppAlignment_test.cpp new file mode 100644 index 0000000..98b4892 --- /dev/null +++ b/tests/SuppAlignment_test.cpp @@ -0,0 +1,30 @@ +#include +#include + +#include "GenericChrConverter.h" +#include "GlobalAppConfig.h" +#include "SuppAlignment.h" +#include "Fixtures.h" + +namespace sophia { + + TEST_F(GenericChrConverterFixture, SuppAlignmentTest_ParseSaSupport) { + + const std::string test1 = "chr16:1041693_INV|(8,0,!/0)"; + EXPECT_EQ(SuppAlignment::parseSaSupport(test1).getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr16")); + + const std::string test2 = "chr16:32084614-32084615_INV|(0,1,3?/6)"; + EXPECT_EQ(SuppAlignment::parseSaSupport(test2).getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr16")); + + const std::string test3 = "|chr4:49107794-49107795(0,1,2?/89)"; + EXPECT_EQ(SuppAlignment::parseSaSupport(test3).getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("chr4")); + + const std::string test4 = "HLA-DRB1*13:01:01:2914|(4,0,0?/0)"; + EXPECT_EQ(SuppAlignment::parseSaSupport(test4).getChrIndex(), + GlobalAppConfig::getInstance().getChrConverter().chrNameToIndex("HLA-DRB1*13:01:01")); + } + +} // namespace sophia \ No newline at end of file