Skip to content

Commit

Permalink
Add CDD to databases downloader #410
Browse files Browse the repository at this point in the history
  • Loading branch information
milot-mirdita committed Feb 10, 2021
1 parent 04b27f9 commit d5717e8
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 1 deletion.
15 changes: 14 additions & 1 deletion data/workflow/databases.sh
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,15 @@ case "${SELECTION}" in
fi
INPUT_TYPE="eggNOG"
;;
"CDD")
if notExists "${TMP_PATH}/msa.msa.gz"; then
downloadFile "https://ftp.ncbi.nih.gov/pub/mmdb/cdd/cdd.info" "${TMP_PATH}/version"
downloadFile "https://ftp.ncbi.nih.gov/pub/mmdb/cdd/fasta.tar.gz" "${TMP_PATH}/msa.tar.gz"
fi
INPUT_TYPE="FASTA_MSA"
FASTA_MSA_SED='s|\.FASTA||g'
FASTA_MSA_MSA2PROFILE_PAR="--skip-query"
;;
"Resfinder")
if notExists "${TMP_PATH}/download.done"; then
downloadFile "https://api.bitbucket.org/2.0/repositories/genomicepidemiology/resfinder_db/commit/master?fields=hash,date" "${TMP_PATH}/version"
Expand Down Expand Up @@ -302,9 +311,13 @@ case "${INPUT_TYPE}" in
# shellcheck disable=SC2086
"${MMSEQS}" tar2db "${TMP_PATH}/msa.tar.gz" "${TMP_PATH}/msa" --output-dbtype 11 ${THREADS_PAR} \
|| fail "tar2db died"
if [ -n "${FASTA_MSA_SED}" ]; then
sed "${FASTA_MSA_SED}" "${TMP_PATH}/msa.lookup" > "${TMP_PATH}/msa.lookup_tmp"
mv -f "${TMP_PATH}/msa.lookup_tmp" "${TMP_PATH}/msa.lookup"
fi
rm -f "${TMP_PATH}/msa.tar.gz"
# shellcheck disable=SC2086
"${MMSEQS}" msa2profile "${TMP_PATH}/msa" "${OUTDB}" --match-mode 1 --match-ratio 0.5 ${THREADS_PAR} \
"${MMSEQS}" msa2profile "${TMP_PATH}/msa" "${OUTDB}" --match-mode 1 --match-ratio 0.5 ${FASTA_MSA_MSA2PROFILE_PAR} ${THREADS_PAR} \
|| fail "msa2profile died"
if [ -n "${REMOVE_TMP}" ]; then
# shellcheck disable=SC2086
Expand Down
7 changes: 7 additions & 0 deletions src/workflow/Databases.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,13 @@ std::vector<DatabaseDownload> downloads = {{
"https://xfam.wordpress.com/2020/06/30/a-new-pfam-b-is-released",
false, Parameters::DBTYPE_HMM_PROFILE, databases_sh, databases_sh_len,
{ }
}, {
"CDD",
"Conserved Domain Database is a protein annotation resource consisting of well-annotated MSAs for ancient domains and full-length proteins.",
"Lu et al: CDD/SPARCLE: the conserved domain database in 2020. Nucleic Acids Res 48(D1), D265–D268 (2020)",
"https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml",
false, Parameters::DBTYPE_HMM_PROFILE, databases_sh, databases_sh_len,
{ }
}, {
"eggNOG",
"eggNOG is a hierarchical, functionally and phylogenetically annotated orthology resource",
Expand Down

0 comments on commit d5717e8

Please sign in to comment.