-
Notifications
You must be signed in to change notification settings - Fork 0
/
kraken.build
56 lines (40 loc) · 1.93 KB
/
kraken.build
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
. databases.config
DATE=`date +%Y%m%d`
set -e
install -g ${GROUP} --mode=755 -d ${PREFIX}/kraken
install -g ${GROUP} --mode=755 -d ${PREFIX}/kraken/kraken-${DATE}
install -g ${GROUP} --mode=755 -d ${PREFIX}/kraken/kraken-${DATE}/taxonomy
git clone https://github.com/mw55309/Kraken_db_install_scripts.git
cd Kraken_db_install_scripts
patch -p 1 < ../patches/kraken_cachedir.patch
perl download_viral.pl
perl download_archaea.pl
perl download_bacteria.pl
kraken-build --db newDB --download-taxonomy
mkdir additional
cd additional
# human CHM13 t2t genome - https://github.com/marbl/CHM13
../../scripts/kraken_dl.pl https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/analysis_set/chm13v2.0.fa.gz 9606
# Acidoferrum panamensis - taxid 2741543
../../scripts/kraken_dl.pl https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/450/225/GCA_013450225.1_ASM1345022v1/GCA_013450225.1_ASM1345022v1_genomic.fna.gz 2741543
# Acidoferrum typicum - taxid 1953112
../../scripts/kraken_dl.pl https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/478/115/GCA_002478115.1_ASM247811v1/GCA_002478115.1_ASM247811v1_genomic.fna.gz 1953112
cd ..
for d in viral archaea bacteria additional; do
find $d -type f -name \*.fna | while read f; do
dustmasker -in ${f} -outfmt fasta | sed -e '/^>/!s/[a-z]/N/g' > /tmp/xx.fna
mv /tmp/xx.fna ${f}
kraken-build --db newDB --add-to-library ${f}
done
done
kraken-build --threads ${THREADS} --db newDB --build
install -g ${GROUP} --mode=644 newDB/database.idx ${PREFIX}/kraken/kraken-${DATE}
install -g ${GROUP} --mode=644 newDB/database.kdb ${PREFIX}/kraken/kraken-${DATE}
install -g ${GROUP} --mode=644 newDB/taxonomy/names.dmp ${PREFIX}/kraken/kraken-${DATE}/taxonomy/
install -g ${GROUP} --mode=644 newDB/taxonomy/nodes.dmp ${PREFIX}/kraken/kraken-${DATE}/taxonomy/
cd ..
rm -rf Kraken_db_install_scripts
cd ${PREFIX}/kraken/kraken-${DATE}
cd ..
rm -f current
ln -s kraken-${DATE} current