From bd95e3b0eaf39240a48e9ef30cd777de2005da7d Mon Sep 17 00:00:00 2001 From: jonperdomo Date: Tue, 15 Aug 2023 11:14:29 -0400 Subject: [PATCH] Remove some unused files and functions --- include/ComFunction.h | 32 ------ include/ComStruct.h | 244 ------------------------------------------ include/hts_reader.h | 1 - setup.py | 2 +- src/ComFunction.cpp | 46 -------- src/ComStruct.cpp | 102 ------------------ src/bam_module.cpp | 1 - src/fast5_module.cpp | 66 +----------- src/hts_reader.cpp | 1 - src/output_data.cpp | 48 +++------ src/seqtxt_module.cpp | 3 +- tests/SCRIPTS.txt | 102 ------------------ 12 files changed, 21 insertions(+), 627 deletions(-) delete mode 100644 include/ComFunction.h delete mode 100644 include/ComStruct.h delete mode 100644 src/ComFunction.cpp delete mode 100644 src/ComStruct.cpp delete mode 100644 tests/SCRIPTS.txt diff --git a/include/ComFunction.h b/include/ComFunction.h deleted file mode 100644 index 01093a0..0000000 --- a/include/ComFunction.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef COMFUNCTION_H_ -#define COMFUNCTION_H_ - -#include - -#include -#include -#include - -#include -#include - -#include "ComStruct.h" - -#define get_array_size(m_a) sizeof(m_a)/sizeof(m_a[0]) - -// Rounding function used for timing the modules -#define round3(d) (((double)((int)(d*1000+0.5)))/1000) - -#define UNUSED(expr) do { (void)(expr); } while (0) - -#ifdef WINDOWS - #include -#else - #include -#endif - -bool isExpectedLength(char * destination_buffer, const char * input_string, int max_buffer_size); - -std::vector readRepeatDataFromString(const std::string & m_str, std::string multi_delimiters=WhiteSpace, bool contain_delimiter=false); - -#endif diff --git a/include/ComStruct.h b/include/ComStruct.h deleted file mode 100644 index 1851ff6..0000000 --- a/include/ComStruct.h +++ /dev/null @@ -1,244 +0,0 @@ -#ifndef COMSTRUCT_H_ -#define COMSTRUCT_H_ - -#include -#include -#include -#include - -///////////////////////////////// -#define MAX_F5EVENT_SIZE 3000000 -#define MAX_F5SIGNAL_SIZE 15000000 - -#define KMER_SIZE 6 - -#define NORM_SIGNAL_RANGE 4 -#define INVAIL_SIGNAL -1000 - -////////////////////////////////// -#define MODULE_NUM 5 -#define CHAR_SIZE 1024 - -#define WhiteSpace "\t\n\v\f\r " - -// for feature generation for deepmod -// , , -// training need large , , -//#define DP_Max_Instance_size 1000000 -//for testing with smaller , -#define DP_Max_Instance_size 100000 -#define RNN_Window 21 -#define Feature_Size 7 - -////////////////////////////////// - -typedef struct F5Event{ - float mean; - float stdv; - uint64_t start; - uint64_t length; - char model_state[KMER_SIZE]; - uint32_t move; -} F5Event; - -typedef struct F5EventOld1{ - float mean; - float stdv; - float start; - float length; - char model_state[KMER_SIZE]; - uint32_t move; -} F5EventOld1; - -typedef struct F5EventOld0{ - double mean; - double start; - double stdv; - double length; - char model_state[KMER_SIZE]; - uint64_t move; -} F5EventOld0; - - -typedef struct F5AnnoEvent{ - float mean; - float stdv; - uint64_t start; - uint64_t length; - char model_state[KMER_SIZE]; - uint64_t ref_pos; - uint64_t qry_pos; - uint64_t map_type; -} F5AnnoEvent; - -/////////////////////////////////////// -typedef struct RankPos{ - double value; - uint64_t pos; -} RankPos; - - -/////////////////////////////////// -typedef std::basic_string uc8string; - -typedef struct Map1Base{ - char qry_base; - char ref_base; -} Map1Base; - -// 0-based, like bam and bed format -typedef struct Map1BasePos{ - uint64_t qry_pos; - uint64_t ref_pos; - uint64_t map_type; -} Map1BasePos; - -#define Pos_Match 1 -#define Minus_Match 2 -#define Plus_Match 4 -class MapPos1Adj{ - public: - uint16_t adjust; - Map1BasePos map_pos; - - double _t_pos_dif; - double _t_minus_dif; - double _t_plus_dif; - - MapPos1Adj(const Map1BasePos& mps, uint16_t p_adj=0); - void add_adjust(uint16_t p_adj); - int get_adjust_num(); -}; - -typedef struct Map1BasePosPred{ - uint64_t qry_pos; - uint64_t ref_pos; - uint64_t map_type; - float pred; -} Map1BasePosPred; - -typedef struct MapRecord{ - std::string ref_chr; - uint16_t ref_strand; - std::string qry_readname; - uint64_t _start_; - - std::vector map_detail; -} MapRecord; - -class Fast5ReaderRunOption{ -public: - uint64_t group_size; - std::string read_num; - std::string read_id; - - F5Event *f5events; - F5EventOld0 *f5eventsOld0; - F5EventOld1 *f5eventsOld1; - int16_t * f5signals; - int8_t * f5moves; - double *group_dif; - double * group_sum; - - Fast5ReaderRunOption(); - ~Fast5ReaderRunOption(); -}; - -// 0-based, like bam and bed format; end-not-included -typedef struct GenomicRegion{ - uint64_t start_pos; - uint64_t end_pos; - char chrn[CHAR_SIZE]; -} GenomicRegion; - -// 0-based, like bam and bed format; end-not-included -// Data structure for storing a repeat region -typedef struct RepeatRegion{ - uint64_t start_pos; - uint64_t end_pos; - char repeat_size[CHAR_SIZE]; - int len_repeat_unit; -} RepeatRegion; - -////////////////////////////// - -struct kmer_signal_model_struct{ - float signal_mean; - float signal_std; -}; - -class kmer_signal_model_struct_region { - private: - std::vector region_kmer_signal; - uint64_t ref_start_pos; - public: - kmer_signal_model_struct_region(uint64_t p_ref_start_pos); - uint64_t get_ref_end_pos(); - uint64_t get_ref_start_pos(); - void set_ref_start_pos(uint64_t p_ref_start_pos); - const kmer_signal_model_struct& operator[](size_t idx) const; - void add(const kmer_signal_model_struct& ksms); - uint64_t size(); -}; - -struct Ref_Position_Base{ - uint64_t map_pos; - char ref_base; -}; - -struct Ref_Position{ - uint64_t _position_; - uint16_t _strand_; -}; - -struct Signal_ST{ - double z_st; - double mean1; - double mean2; - double std1; - double std2; - - uint64_t depth1; - uint64_t depth2; - - double z_st_nb; -}; - -class ComparedPositionWithSignal{ - public: - Signal_ST signal_st; - Ref_Position_Base ref_pos; - std::vector signal1; - std::vector signal2; - std::vector signal1_seg_num; - std::vector signal2_seg_num; - - ComparedPositionWithSignal(); - void reset(); -}; - -struct F5AnnoIndexRecord{ - std::string f5_file; - std::string qry_name; - std::string f5anno_file; - uint16_t pri_sec_sup; - - //uint16_t map_strand; - std::string map_strand; - std::string map_chr; - uint64_t ref_start_pos; - uint64_t ref_end_pos; -}; - - -struct Pred_Mod_Info{ - char strand; - uint64_t ref_pos; - - uint64_t mod_coverage; - uint64_t unmod_coverage; -}; - - -#endif - diff --git a/include/hts_reader.h b/include/hts_reader.h index 4fa0004..d811db3 100644 --- a/include/hts_reader.h +++ b/include/hts_reader.h @@ -10,7 +10,6 @@ #include #include -#include "ComStruct.h" #include "output_data.h" #define BAM_UN_OPEN 1 diff --git a/setup.py b/setup.py index fbd61f6..89edb2d 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ author="WGLab", description="""A fast and flexible QC tool for long read sequencing data""", ext_modules=[lrst_mod], - script_args=['build_ext', '--inplace', '--build-lib', 'lib'], + script_args=['build_ext', '--build-lib', 'lib'], py_modules=['lrst'], packages=setuptools.find_packages(), headers=project_headers, diff --git a/src/ComFunction.cpp b/src/ComFunction.cpp deleted file mode 100644 index 74c641d..0000000 --- a/src/ComFunction.cpp +++ /dev/null @@ -1,46 +0,0 @@ -#include "ComFunction.h" - -#include "glob.h" - -#include -#include -#include - -#include -#include - -#include - - -#include - -// Returns false if the input is larger than the specified buffer size -bool isExpectedLength(char * destination_buffer, const char * input_string, int max_buffer_size){ - int buffer_count = snprintf(destination_buffer, max_buffer_size, "%s", input_string); - if (buffer_count<0 || buffer_count>=max_buffer_size){ - fprintf(stderr, "Input (%s) is larger than the expected buffer size (%d)", input_string, max_buffer_size); - return false; - } - return true; -} - -// Convert the repeat pattern string into a vector containing its data (Should be 4 elements). -std::vector readRepeatDataFromString(const std::string & m_str, std::string multi_delimiters, bool contain_delimiter){ - std::vector m_substr_list; - m_substr_list.reserve(10); - - size_t cur_pos = 0; - size_t last_pos = 0; - while ((cur_pos=m_str.find_first_of(multi_delimiters, last_pos))!=std::string::npos){ - m_substr_list.push_back(m_str.substr(last_pos, cur_pos-last_pos+(contain_delimiter?1:0))); - last_pos = cur_pos+1; - if (last_pos==std::string::npos || last_pos>=m_str.size()){ - break; - } - } - if (!(last_pos==std::string::npos || last_pos>m_str.size())){ - m_substr_list.push_back(m_str.substr(last_pos)); - } - - return m_substr_list; -} diff --git a/src/ComStruct.cpp b/src/ComStruct.cpp deleted file mode 100644 index ce92d8c..0000000 --- a/src/ComStruct.cpp +++ /dev/null @@ -1,102 +0,0 @@ -#include "ComStruct.h" - -Fast5ReaderRunOption::Fast5ReaderRunOption(){ - f5events = new F5Event[MAX_F5EVENT_SIZE]; - f5eventsOld0 = new F5EventOld0[MAX_F5EVENT_SIZE]; - f5eventsOld1 = new F5EventOld1[MAX_F5EVENT_SIZE]; - f5signals = new int16_t[MAX_F5SIGNAL_SIZE]; - f5moves = new int8_t[MAX_F5SIGNAL_SIZE]; - group_dif = new double[MAX_F5SIGNAL_SIZE+1]; - group_sum = new double[MAX_F5SIGNAL_SIZE+1]; -} - -Fast5ReaderRunOption::~Fast5ReaderRunOption(){ - delete [] f5events; - delete [] f5eventsOld0; - delete [] f5eventsOld1; - delete [] f5signals; - delete [] f5moves; - delete [] group_dif; - delete [] group_sum; -} - - -kmer_signal_model_struct_region::kmer_signal_model_struct_region(uint64_t p_ref_start_pos){ - region_kmer_signal.reserve(50000); - ref_start_pos = p_ref_start_pos; -} -uint64_t kmer_signal_model_struct_region::get_ref_end_pos(){ - return ref_start_pos + region_kmer_signal.size(); -} - -uint64_t kmer_signal_model_struct_region::get_ref_start_pos(){ - return ref_start_pos; -} - -void kmer_signal_model_struct_region::set_ref_start_pos(uint64_t p_ref_start_pos){ - ref_start_pos = p_ref_start_pos; - region_kmer_signal.clear(); -} - -const kmer_signal_model_struct& kmer_signal_model_struct_region::operator[](size_t idx) const{ - return region_kmer_signal[idx]; -} - -void kmer_signal_model_struct_region::add(const kmer_signal_model_struct& ksms){ - region_kmer_signal.push_back(ksms); -} - -uint64_t kmer_signal_model_struct_region::size(){ - return region_kmer_signal.size(); -} - -MapPos1Adj::MapPos1Adj(const Map1BasePos& mps, uint16_t p_adj){ - map_pos = mps; - adjust = p_adj; - _t_pos_dif = 100; - _t_minus_dif = 100; - _t_plus_dif = 100; - -} -void MapPos1Adj::add_adjust(uint16_t p_adj){ - adjust |= p_adj; -} - -int MapPos1Adj::get_adjust_num(){ - int adj_num = 0; - if ((adjust&Pos_Match)>0){ adj_num+=1; } - if ((adjust&Minus_Match)>0){ adj_num+=1; } - if ((adjust&Plus_Match)>0){ adj_num+=1; } - return adj_num; -} - -ComparedPositionWithSignal::ComparedPositionWithSignal(){ - signal1.reserve(500); - signal2.reserve(500); - signal1_seg_num.reserve(500); - signal2_seg_num.reserve(500); - reset(); -} -void ComparedPositionWithSignal::reset(){ - signal1.clear(); - signal2.clear(); - signal1_seg_num.clear(); - signal2_seg_num.clear(); - - signal_st.mean1 = 0; - signal_st.mean2 = 0; - signal_st.std1 = 0; - signal_st.std2 = 0; - - signal_st.depth1 = 0; - signal_st.depth2 = 0; - - signal_st.z_st = 0; - signal_st.z_st_nb = 0; - - ref_pos.map_pos = 0; - ref_pos.ref_base = 0; -} - - - diff --git a/src/bam_module.cpp b/src/bam_module.cpp index 03b5c15..7cda381 100644 --- a/src/bam_module.cpp +++ b/src/bam_module.cpp @@ -9,7 +9,6 @@ Class for generating BAM file statistics. Records are accessed using multi-threa #include #include "bam_module.h" -#include "ComFunction.h" int BAM_Module::calculateStatistics(Input_Para& input_params, Output_BAM& final_output){ diff --git a/src/fast5_module.cpp b/src/fast5_module.cpp index 9199624..2c8c3bb 100644 --- a/src/fast5_module.cpp +++ b/src/fast5_module.cpp @@ -3,27 +3,15 @@ F5_module.cpp: Class for calling FAST5 statistics modules. */ -#include -#include // std::sort, copy -#include // std::accumulate #include -#include // std::begin, std::end -#include #include -#include // std::ofstream +#include +#include +#include -#include -#include -#include -#include - -#include -#include -#include +#include "H5Cpp.h" #include "fast5_module.h" -#include "ComFunction.h" -#include "H5Cpp.h" using namespace H5; @@ -310,13 +298,8 @@ Base_Signals getReadBaseSignalData(H5::H5File f5, std::string read_name, bool si { // Grab the signal int end_index = base_start_index + block_stride_value; -// std::vector block_signal(block_stride_value); - - // Get the signal from base_start_index to end_index -// block_signal = std::vector(f5signals.begin() + base_start_index, f5signals.begin() + end_index); // Append the signal to the current base signal vector -// called_base_signal.insert( called_base_signal.end(), block_signal.begin(), block_signal.end() ); called_base_signal.insert( called_base_signal.end(), f5signals.begin() + base_start_index, f5signals.begin() + end_index ); // Check whether a basecall occurred @@ -580,47 +563,6 @@ int generateQCForFAST5(Input_Para &_input_data, Output_FAST5 &output_data) read_details_file = _input_data.output_folder + "/FAST5_details.txt"; read_summary_file = _input_data.output_folder + "/FAST5_summary.txt"; - output_data.long_read_info.total_num_reads = ZeroDefault; // total number of long reads - output_data.long_read_info.total_num_bases = ZeroDefault; // total number of bases - - output_data.long_read_info.longest_read_length = ZeroDefault; // the length of longest reads - output_data.long_read_info.n50_read_length = MoneDefault; // N50 - output_data.long_read_info.n95_read_length = MoneDefault; // N95 - output_data.long_read_info.n05_read_length = MoneDefault; // N05; - output_data.long_read_info.mean_read_length = MoneDefault; // mean of read length - - output_data.long_read_info.NXX_read_length.clear(); - output_data.long_read_info.median_read_length = MoneDefault; // median of read length - - output_data.long_read_info.total_a_cnt = ZeroDefault; // A content - output_data.long_read_info.total_c_cnt = ZeroDefault; // C content - output_data.long_read_info.total_g_cnt = ZeroDefault; // G content - output_data.long_read_info.total_tu_cnt = ZeroDefault; // T content for DNA, or U content for RNA - output_data.long_read_info.total_n_cnt = ZeroDefault; // N content - output_data.long_read_info.gc_cnt = ZeroDefault; // GC ratio - - //int64_t *read_length_count; // statistics of read length: each position is the number of reads with the length of the index; - - output_data.long_read_info.read_gc_content_count.clear(); - output_data.long_read_info.read_length_count.clear(); - //output_data.seq_quality_info.base_quality_distribution.clear(); - output_data.seq_quality_info.read_average_base_quality_distribution.clear(); - - output_data.long_read_info.read_length_count.resize(MAX_READ_LENGTH + 1, 0); - // read_length_count[x] is the number of reads that length is equal to x. MAX_READ_LENGTH is a initial max value, the vector can expand if thre are reads longer than MAX_READ_LENGTH. - - output_data.long_read_info.read_gc_content_count.resize(101, 0); - // read_gc_content_count[x], x is a integer in the range of [0, 101). read_gc_content_count[x] means number of reads that average GC is x%. - - output_data.long_read_info.NXX_read_length.resize(101, 0); - // NXX_read_length[50] means N50 read length; NXX_read_length[95] means N95 read length; - - //output_data.seq_quality_info.base_quality_distribution.resize(256, 0); - // base_quality_distribution[x] means number of bases that quality = x. - - output_data.seq_quality_info.read_average_base_quality_distribution.resize(256, 0); - // base_quality_distribution[x] means number of reads that average base quality = x. - // Set up the output summary text file read_details_fp = fopen(read_details_file.c_str(), "w"); if (NULL == read_details_fp) diff --git a/src/hts_reader.cpp b/src/hts_reader.cpp index 7f09dda..75ecff3 100644 --- a/src/hts_reader.cpp +++ b/src/hts_reader.cpp @@ -13,7 +13,6 @@ Class for reading a set number of records from a BAM file. Used for multi-thread #include #include "hts_reader.h" -#include "ComFunction.h" // HTSReader constructor HTSReader::HTSReader(const std::string & bam_file_name){ diff --git a/src/output_data.cpp b/src/output_data.cpp index 8fcfaab..48591dd 100644 --- a/src/output_data.cpp +++ b/src/output_data.cpp @@ -15,10 +15,9 @@ Output_Info::Output_Info(){ // Base class for storing basic QC data Basic_Seq_Statistics::Basic_Seq_Statistics(){ - read_length_count.resize(MAX_READ_LENGTH); - for(int _i_=0; _i_ #include "seqtxt_module.h" -#include "ComFunction.h" size_t SeqTxt_Module::batch_size_of_record=3000; @@ -221,7 +220,7 @@ int SeqTxt_Module::generateStatistics( Output_SeqTxt& t_output_SeqTxt_info){ t_output_SeqTxt_info.global_sum(); auto relapse_end_time = std::chrono::high_resolution_clock::now(); - std::cout<<"Elapsed time (seconds): "<(relapse_end_time - relapse_start_time).count() << std::endl; std::cout<<"sequencing_summary.txt QC "<< (has_error==0?"generated":"failed") << std::endl; diff --git a/tests/SCRIPTS.txt b/tests/SCRIPTS.txt deleted file mode 100644 index af0479d..0000000 --- a/tests/SCRIPTS.txt +++ /dev/null @@ -1,102 +0,0 @@ -srun --mem=30G -c 12 --pty bash -conda build -c bioconda -c conda-forge . - -swig -c++ -python -outdir lib -Iinclude -o src/lrst_wrap.cpp src/lrst.i - -/mnt/isilon/wang_lab/shared/ont_data_of_projects/PeiLab_SingleCell/20221221_SingleCellcDNA_Test/LV_Heart - -fq -t 12 -i /mnt/isilon/wang_lab/shared/ont_data_of_projects/PeiLab_SingleCell/20221221_SingleCellcDNA_Test/LV_Heart/20221221_1520_X4_AMY216_b8b78e5b/fastq_pass/AMY216_pass_b8b78e5b_6ced03d2_100.fastq.gz -o SampleOutputs - -# CDNA tests: -/mnt/isilon/wang_lab/shared/datasets/RNA_Modification/datasets/HeLa_104968/guppy6_basecalled/alignment/aligned.grch38.cdna.bam -bam -t 12 -i /mnt/isilon/wang_lab/shared/datasets/RNA_Modification/datasets/HeLa_104968/guppy6_basecalled/alignment/aligned.grch38.cdna.bam -o SampleOutputs -bam -t 12 -i /mnt/isilon/wang_lab/shared/datasets/RNA_Modification/datasets/HEK293T_103785/guppy6_basecalled/alignment/aligned.grch38.cdna.bam - -samtools view -r chr22 /mnt/isilon/wang_lab/shared/datasets/RNA_Modification/datasets/HeLa_104968/guppy6_basecalled/alignment/aligned.grch38.cdna.bam -o rnaseq_chr22.bam -bam -t 4 -i SampleData/rnaseq_chr22.bam -o RNAModOut - -Performance tests: -/mnt/isilon/wang_lab/shared/datasets/HG002_NA24385/nanopore/ONT_Official/basecalls/gm24385_q20_2021.10/multi_fast5/20210805_1713_5C_PAH79257_0e41e938/alignment/ - -Small RNA dataset: -bam -t 12 -i /mnt/isilon/wang_lab/shared/ont_data_of_projects/directRNA_K562_R9_flowcell/no_sample/20220222_0015_X4_FAO30221_b0226a85 -o RNAOut1 - - -# CAG tandem repeats from base index 19675-22000 - -f5s -t 12 -i /mnt/isilon/wang_lab/shared/datasets/DNA_Methylation/datasets/nanopore-wgs-consortium/WGS/basecalls/chr4/workspace/83/4b070e74-b067-485b-83dd-6c8481d0a3df.fast5 -o CAG_Repeats - -# VBZ-compressed fast5s: -f5s -t 12 -i /mnt/isilon/wang_lab/shared/ont_data_of_projects/PeiLab_SingleCell/20221221_SingleCellcDNA_Test/LV_Heart/20221221_1520_X4_AMY216_b8b78e5b/fast5_pass/AMY216_pass_b8b78e5b_6ced03d2_100.fast5 -o SampleOutputs - -# Additional path variables: -PATH=$PATH:~/miniconda3/envs/lrst_py39/bin;HDF5_PLUGIN_PATH=/home/perdomoj/github/LongReadSum/ont-vbz-hdf-plugin-1.0.1-Linux/usr/local/hdf5/lib/plugin/ - -# Test VBZ plugin: -PATH=$PATH:HDF5_PLUGIN_PATH=/home/perdomoj/github/LongReadSum/ont-vbz-hdf-plugin-1.0.1-Linux/usr/local/hdf5/lib/plugin/ - -# Basic tests: -bam -t 12 -i SampleData/guppy.bam -o SampleOutputs -NanoPlot -t 12 --bam SampleData/guppy.bam -o NanoPlot_Out - -# RAM usage test: -python src/ bam -i /mnt/isilon/wang_lab/shared/datasets/RNA_Modification/datasets/HeLa_104968/guppy6_basecalled/alignment/aligned.grch38.cdna.bam -o LargeBAM - - -python src/ bam -t 12 -i SampleData/guppy.bam -o SampleOutputs - - -KidsFirst-like Large BAM file testing: -/mnt/isilon/wang_lab/shared/datasets/HG002_NA24385/nanopore/guppy.4.2.2/HG002.Guppy_4.2.2_prom.bam - -bam -t 12 -i /mnt/isilon/wang_lab/shared/datasets/HG002_NA24385/nanopore/guppy.4.2.2/HG002.Guppy_4.2.2_prom.bam -o KidsFirstBAM - - -Filetype tests: - -bam -t 12 -i SampleData/guppy.bam -o BamOut_LessDeps -bam -t 12 -i SampleData/pacbio_unmapped_trim.bam -o UBAMOut_LessDeps -fa -i SampleData/fasta_trim1.fa -o FASTAOut_LessDeps -fq -i SampleData/guppy.fastq -o FASTQOut_LessDeps -f5 -i SampleData/kelvin_20160810_FN_MN17519_sequencing_run_160810_na12878_PCRSssI_92870_ch100_read6082_strand.fast5 -o FAST5Out_LessDeps -f5s -i SampleData/kelvin_20160810_FN_MN17519_sequencing_run_160810_na12878_PCRSssI_92870_ch100_read6082_strand.fast5 -o FAST5OutSig_LessDeps -seqtxt -i SampleData/sequencing_summary.txt -o SEQTXTOut_LessDeps - -xport LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/home/perdomoj/github/LongReadSum/lib -# SCENARIO TESTS - -cd /home/perdomoj/github/LongReadSum -srun --mem=30G -c 12 --pty bash -conda activate lrst_py39 -conda activate longreadsum - -RRMS test: -python LongReadSum bam -t 12 -i /mnt/isilon/wang_lab/joe/Adaptive_Sampling/RRMS/HG002_RRMS_3/HG002_RRMS_3.sorted.bam -o LongReadSum/output/manuscript/RRMS_HG002 - -direct mRNA test: -python LongReadSum bam -t 12 -i /mnt/isilon/wang_lab/perdomoj/data/lrs_testing_data/direct_mRNA/direct_mRNA_merged.bam -o LongReadSum/output/manuscript/dmRNA - -cDNA test: -python LongReadSum bam -t 12 -i /mnt/isilon/wang_lab/perdomoj/data/lrs_testing_data/cDNA/cDNA_merged.bam -o LongReadSum/output/manuscript/cDNA - -WGS R9.4 test: -python LongReadSum bam -t 12 -i /mnt/isilon/wang_lab/perdomoj/data/lrs_testing_data/WGS/WGS_merged.bam -o LongReadSum/M_WGS -python LongReadSum bam -t 12 -i /mnt/isilon/wang_lab/perdomoj/data/lrs_testing_data/WGS_Kit11_R9_4_1/WGS_merged.bam -o LongReadSum/output/manuscript/WGS_R9 -python LongReadSum fq -t 12 -i /mnt/isilon/wang_lab/perdomoj/data/lrs_testing_data/WGS_Kit11_R9_4_1/merged.fastq -o LongReadSum/output/manuscript/WGS_R9_FQ - -WGS R10.4 test: -python LongReadSum bam -t 12 -i /mnt/isilon/wang_lab/perdomoj/data/lrs_testing_data/WGS_Kit14_R10_4_1/WGS_merged_sorted.bam -o LongReadSum/output/manuscript/WGS_R10_r2 -python LongReadSum fq -t 12 -i /mnt/isilon/wang_lab/perdomoj/data/lrs_testing_data/WGS_Kit14_R10_4_1/merged.fastq -o LongReadSum/output/manuscript/WGS_R10_FQ - -Large BAM test: -python LongReadSum bam --thread 4 --input LongReadSum/SampleData/guppy.bam --log test.log --outprefix test_basename --outputfolder LongReadSum/KidsFirstTest - -python LongReadSum bam --thread 4 --input /mnt/isilon/wang_lab/shared/datasets/HG002_NA24385/nanopore/guppy.4.2.2/HG002.Guppy_4.2.2_prom.bam --log test.log --outprefix test_basename --outputfolder LongReadSum/KidsFirstTest2 - -Aquizu BAM test: -sbatch --mem=30G -c 12 --pty bash -python LongReadSum bam --thread 12 --input /mnt/isilon/wang_lab/shared/datasets/HG002_NA24385/nanopore/guppy.4.2.2/HG002.Guppy_4.2.2_prom.bam --log test.log --outprefix test_basename --outputfolder LongReadSum/KidsFirstTest2 - -# Repeat regions: -python . f5s -i /scr1/users/ahsanm1/ont_kit14/basecalls/20221109_1654_5A_PAG65784_f306681d/workspace/PAG65784_pass_f306681d_16a70748_102.fast5 -o output/G4C2_RepeatRegion -python . f5s -i InputRepeat/PAG65784_pass_f306681d_16a70748_102.fast5 -o output/G4C2_RepeatRegion -r 41208c6c-aa37-4df5-86a8-7795015a36a7,41208c6c-aa37-4df5-86a8-7795015a36a7,d57e5a3b-8647-4853-8f8f-166302ab045a,32fedea8-a42e-415b-a945-a50f33dc47a6,77f52939-46a2-48c4-9949-95597ae7b854,5eabff89-c20e-4639-b345-30d17b9feed2,4d57ff17-3489-4ff6-bf51-2a6e111f7f59,eff47b76-de1d-492a-8ec2-f95999b256fb,741a5ce0-bdd0-4d3d-8f85-de8a00045b72,e0b5b1d9-bdfc-4899-acad-951edad4f689,8c23e735-2419-4a7a-9142-89d0cf2fa5fb,ea13ea0b-d186-4482-a59d-a5e81b76a55a,13693f20-a759-4126-a472-c7555e2f09fc,ed2e4ea7-f3b7-4282-bd7b-81bf8753e403,a779f471-d9ed-491d-b72a-f9edd830b557,5a33d2c3-99e3-4f86-bfee-a5d0e9856dc2,2ac47147-011d-410c-9216-89efa2378807,5f218fa5-7459-460b-a9b0-e06e0fa9473d,d01a5cd4-d20f-427b-86ea-ba87dce2d785,6367073b-3fea-4458-b4cd-a7a4ea7cdf4f,9e2aef52-a956-45cd-a0d3-83c1fd645901,c8c5e3a3-3d7a-4782-a19e-980fe2c28840,3f062064-c151-485e-86ef-0d3a67fcb98b,50acc532-70da-4b37-894a-53afc59d38b2,4a4e7021-9cb6-4df5-852a-2504fb8d396f,ced37b99-cdcd-4988-8a35-599a5ef9d57c,24c55c33-9cae-4e3d-9b31-a7b5b9c562e4,88f5b081-7f05-411f-a135-0049e10733eb,99f5e3a5-bf3c-4ecf-b5a5-1336c83b89b7,3cfceb00-48d8-4331-92ee-7b04931689ae,4990ca3c-d209-4afd-8fca-5784145f7487,