Skip to content

Commit

Permalink
test sra sqlite select
Browse files Browse the repository at this point in the history
  • Loading branch information
ktmeaton committed Apr 14, 2020
1 parent 63b6e59 commit 72831be
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 4 deletions.
1 change: 1 addition & 0 deletions annot_biosample.txt
Original file line number Diff line number Diff line change
Expand Up @@ -707,3 +707,4 @@ SAMN13675363 4822 Burkholderia pseudomallei 2018 Viet Nam REMOVE: Not Yersin
SAMN13698231 134 Burkholderia pseudomallei 1985 Thailand Homo sapiens REMOVE: Not Yersinia pestis
SAMN13698233 134K Burkholderia pseudomallei 2019 Thailand REMOVE: Not Yersinia pestis
SAMN08892357 139 Yersinia pestis 1966 Kazakhstan Rhombomis opinus REMOVE: Pairwise alignment 60 percent coverage of reference
SAMEA3541827 PRJEB10885 RISE509 Yersinia pestis ERS848976 SAMEA3325401 "14C cal BC low: -2677, 14C cal BC High: -2887" Russia: Bateni Homo sapiens KEEP
6 changes: 3 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ params{
iqtree_rng = "4154541355"

// SQLite q
// sqlite_select_command = "\'SELECT AssemblyFTPGenbank FROM Master WHERE BioSampleComment NOT LIKE \"%REMOVE%\"\'"
sqlite_select_command = "\"SELECT AssemblyFTPGenbank FROM Master WHERE (BioSampleComment NOT LIKE \'%REMOVE%\') AND (TRIM(BioSampleHost) > \'\') AND (TRIM(LOWER(BioSampleHost)) IS NOT \'missing\') AND (TRIM(BioSampleCollectionDate) > \'\') AND (TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'missing\' AND TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'not applicable\' AND TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'unknown\' AND TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'n/a\' ) AND (TRIM(BioSampleGeographicLocation) > '') AND (TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'missing\') AND (TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'missing\') AND (TRIM(AssemblyFTPGenbank) > \'\') \""

// sqlite_select_command_asm = "\'SELECT AssemblyFTPGenbank FROM Master WHERE BioSampleComment NOT LIKE \"%REMOVE%\"\'"
sqlite_select_command_asm = "\"SELECT AssemblyFTPGenbank FROM Master WHERE (BioSampleComment NOT LIKE \'%REMOVE%\') AND (TRIM(BioSampleHost) > \'\') AND (TRIM(LOWER(BioSampleHost)) IS NOT \'missing\') AND (TRIM(BioSampleCollectionDate) > \'\') AND (TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'missing\' AND TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'not applicable\' AND TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'unknown\' AND TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'n/a\' ) AND (TRIM(BioSampleGeographicLocation) > '') AND (TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'missing\') AND (TRIM(LOWER(BioSampleCollectionDate)) IS NOT \'missing\') AND (TRIM(AssemblyFTPGenbank) > \'\') \""
sqlite_select_command_sra = "\"SELECT SRARunAccession FROM Master WHERE (BioSampleComment LIKE \'%KEEP%\')\""
// MultiQC
multiqc_config = "$baseDir/multiqc_config.yaml"

Expand Down
42 changes: 41 additions & 1 deletion pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ if( (params.sqlite || ( params.ncbimeta_update && params.ncbimeta_annot) ) && !p
*/
// Other variables and config
tag "$sqlite"
echo true
publishDir "${outdir}/sqlite_import", mode: 'copy'
// Set the sqlite channel to update or sqlite import depending on ncbimeta mode
// TO DO: catch if both parameters are specified!!!
Expand All @@ -261,14 +262,20 @@ if( (params.sqlite || ( params.ncbimeta_update && params.ncbimeta_annot) ) && !p
// Shell script to execute
script:
"""
sqlite3 ${sqlite} ${params.sqlite_select_command} | grep . | head -n ${params.max_datasets} | sed -E -e 's/ |;/\\n/g' | while read line;
# Select the Genbank Assemblies
sqlite3 ${sqlite} ${params.sqlite_select_command_asm} | grep . | head -n ${params.max_datasets} | sed -E -e 's/ |;/\\n/g' | while read line;
do
if [[ ! -z \$line ]]; then
asm_ftp=`echo \$line | \
awk -F "/" -v suffix=${params.genbank_assembly_gz_suffix} '{print \$0 FS \$NF suffix}'`;
echo \$asm_ftp >> ${params.file_assembly_for_download_ftp}
fi;
done;
# Select the SRA Run Accessions
sqlite3 ${sqlite} ${params.sqlite_select_command_sra} | grep . | head -n ${params.max_datasets} | sed -E -e 's/ |;/\\n/g' | while read line;
do
echo "\$line";
done;
"""
}

Expand Down Expand Up @@ -316,6 +323,39 @@ if (!params.skip_assembly_download){
}

}

// -------------------------------------------------------------------------- //
// SRA Download //
// -------------------------------------------------------------------------- //
/*
process sra_download{
Input:
ch_():
Output:
ch_ ():
Publish:
// Other variables and config
tag ""
publishDir
// IO and conditional behavior
input:
output:
// Shell script to execute
script:
"""
"""
}
*/


// -------------------------------------------------------------------------- //
// Reference Genome Processing //
// -------------------------------------------------------------------------- //
Expand Down

0 comments on commit 72831be

Please sign in to comment.