-
Notifications
You must be signed in to change notification settings - Fork 180
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Loading status checks…
Squashed commit of the following:
commit cf66ef8a27146afe575b64e135d117b212f7bd64 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 31 14:04:42 2024 +0100 Updated tests. commit eae157eaae352ce9b38c268cd647ff5ffa6bdf61 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 31 13:57:45 2024 +0100 Updated changelog. commit 6b2f8ebc1d421cd096ee9df7c5951fc373e54982 Merge: 8666f7e8 0c888e84 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 31 13:23:40 2024 +0100 Merge branch 'master' into dev commit 0c888e84367dfba1d1ab6d23a3aa663ad3c61440 Merge: 19c730d5 56f5d14 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 31 13:23:25 2024 +0100 Merge branch 'master' of https://github.com/bbuchfink/diamond commit 8666f7e853efe8c8a1d7d0bdde46ad810db7475c Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 31 12:34:15 2024 +0100 Fixed output. commit a70a755c04135b310d119e69ab9a899a6ace94b0 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 31 11:09:09 2024 +0100 Changed max block size for vsens mode. commit df9be960f04c8c41b49495746790e44784e7b138 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 16:56:36 2024 +0100 Fixed taxon format. commit 565bd4fd37d4116cb6cbd147eed851fd627c81af Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 16:45:54 2024 +0100 Fixed error. commit 67df3d4af021e7d39fa559064b3122dd825ab96b Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 16:43:01 2024 +0100 Added taxon lineage option. commit e4203e82775fb50789bfe5826b0dc50aa2fc8e67 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 15:57:39 2024 +0100 Added gapped seqs to help. commit 98b64461cae30e158fbd2e27eaf9d00d9fb53a59 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 15:47:50 2024 +0100 Fixed warnings. commit ab40c4a23dc16a11c9af50657ba40bf3ea734f8d Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 15:41:20 2024 +0100 Fixed warnings. commit d9dc1ae43167fb8f496b97742e021b0300b24526 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 15:26:53 2024 +0100 Fixed warnings. commit ad6c31999575a2f0b3e84192a91a73fa0eab473e Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 15:02:00 2024 +0100 Updated version. commit 30f509a8476c8b1ed4dd1497244da554301412ae Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 14:39:42 2024 +0100 Added shapes-30x10 mode. commit dc64958415be88c5ecfbb4b47e48ad5c34f78e4d Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 14:29:17 2024 +0100 Add per round cutoff options. commit 4193abd0cf7da091bd25b794c278577075c0b9f8 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 13:43:06 2024 +0100 Added setting ccd per round. commit d78caa14b78cd8f690a02df08ef7951934486e41 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jan 30 12:59:40 2024 +0100 Added shapes30x10 mode. commit 30221fa8e1b1564fc28ab669a4f08a89da45b44b Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Jan 29 16:16:44 2024 +0100 Removed famsa, incremental clustering. commit ba15651e286bfc4460343faeb5b08dd98bc97176 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Jan 29 15:58:38 2024 +0100 Update reassign for mutual cover. commit cf5a84ec02058870e0c42020c66dcf423baf4383 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Jan 29 15:30:01 2024 +0100 Updated recluster for mutual coverage. commit e6032ba102c805d565096b4cfc1e0c0d34ad9168 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Jan 26 17:52:49 2024 +0100 Update recluster for mutual cover. commit bdee2616dfbe9195841bac59438d5cc44733a5d4 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Jan 26 16:37:24 2024 +0100 Fixed issue with length lookup. commit 739be0ea92aee7d32837ab39c70efc2a792f37aa Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Jan 26 11:20:14 2024 +0100 Fixed issue with round values. commit 34f0b3a73204117b41efc20883fa583e3a89339a Author: Benjamin Buchfink <buchfink@gmail.com> Date: Thu Jan 25 13:28:44 2024 +0100 Reworked storing target ids in hits logic. commit 76e17c7443d4362bd821f855a2e8dc8598c67710 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 24 18:00:39 2024 +0100 Rework target ids in seed array. commit 31f0f34a3c06e52c9a87f565260375376601553e Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 24 15:08:28 2024 +0100 Fixed error. commit 69680a20ad46fbc9618d5fca59cb7b76104946ca Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 24 13:20:32 2024 +0100 Use ccd=0 in last round. commit a428f0d9dfcd3225ec34bb358984ac1c6a2e4cd0 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 24 13:07:51 2024 +0100 Added round approx id setting. commit 261cf75522b9a703644329bfb24164d53d126e84 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jan 24 12:49:19 2024 +0100 Fixed error. commit 1f3f8a7acd480f4814f5a0f9eaee5b17630c7b11 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Jan 22 17:00:42 2024 +0100 Rework round coverage option commit fb55d8eba76e8726b241d3f4fee81e73922af753 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Jan 22 15:55:01 2024 +0100 Apply evalue max in cascaded clustering. commit ab498d0bbffe523b4e3087149cd7b6be5c29b096 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Jan 22 15:25:31 2024 +0100 Apply coverage increment. commit f5dd1367549c7932185c78218b0eb6c4618e7e01 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Jan 19 16:49:23 2024 +0100 Fixed evalue calculation. commit 87aff2d0eab54fc4440a5046f8d06f12440fec1b Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Jan 12 12:26:15 2024 +0100 Added shapes. commit a49026b0319b04675ac3fa65c851bb116f792c43 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Thu Jan 11 14:52:17 2024 +0100 Added new weight 10 modes. commit 590e1756fd198b288b4010414010d8b9baed089a Author: Vincent Spath <90271444+v-spath@users.noreply.github.com> Date: Wed Nov 29 17:24:01 2023 +0100 DNA only mapping (#18) * seed repetition cutoff * remove overhead * back to array * test * repetitive filter * changed float to double for parser * fixed count typo * removed io header * min heap seed filtering * fix heap * filter multithreaded * multithreaded optimized * locked guard * test new multithreading filter * filter multithreaded optimized * pass thread index during creation of thread * check smallest element before adding to queue * remove unordered set, replace by checking against cutoff value * refactor * new heuristics * add ungapped extension to set * new timer * temp * new timer * ssh * d * Delete Arabidopsis_thaliana.TAIR10.dna_sm.toplevel.fa.gz * new timer * timer * remove timer * temp * Delete Arabidopsis_thaliana.TAIR10.dna_sm.toplevel.fa.gz * temp * Delete Arabidopsis_thaliana.TAIR10.dna.chromosome.Pt.fa.gz * temp * Delete Arabidopsis_thaliana.TAIR10.dna.toplevel.fa.gz * refactor * seed filter * target filter * target filter * target filter * revert * r_ungapped to score * fix * wfadaptive * setheuristicnone * remove ungapped eval * repetition cutoff 0 * fix filter for small index * backup chaining and apple specifics * revert * test ksw2 for mac * chaining and bug fixes and apple specifics * backtracking * backup chaining, with old out commented code, 1 bug left * working_chaining * add dna chaining condition * basic chaining * Delete src/align/align.cpp.save * repetition-cutoff into WITH_DNA * Delete libwfa2cpp.a * add fast approximation of log2 * Delete setup.cpp.save * delete binary obj * remove key in RepetitiveCounter * removed vector of priority_queues as class member * Update dna_index.cpp check size repetitive before adding to queue * extended struct SeedMatch * fixed bug 100 % ungapped score * struct for data an chain * removed std output * update chain.cpp * change built queue * add sorting before chaining * chaining only for one target at a time * head * comments * include wfa2 in comment * include wfa comment * Update log2_fast.h * chain_end two parameters * fixed bug backtrack * make variables const if possible * parameters struct * struct anchor data * update documentation * backtrack simplification * update backtrack * delete wrong commit * map hsp * remove RepetitiveCounter struct * updated chain struct * construction Chain Object * chain Standard Komparator * use iterator * mapping * correction syntax * update indices * mapping quality * fixed index t_id * mapping output includes n_anchors (cm) * remove redundancy * fixed bug match build * primary computation for all targets * overlap percentage of shorter chain * fix compile error * dna extension * with dna compile * debug test * Update extension.cpp * fix segmentation fault * test why slow * test only map best * test map percentage * test 1 * sort score * todo * move semantics * forward/reverse together * test seed lookup * Update extension.cpp * add chaining penalties * new primary chain computation * Update extension.cpp * Update extension.cpp * simplify chain structure * use PAF when only mapping * Update config.cpp * compile error on mac * Merge branch 'personal_dev' into dev # Conflicts: # src/basic/config.cpp # src/run/config.cpp # src/run/double_indexed.cpp # src/search/stage0.cpp * Todo: align long reads * basic chaining alignment * fix bug short reads reverse (now correct) * chain alignment * integrate ksw2 correctly * update WFA * corrected anchor alignment * fixed residue matches mapping * change semantics * refacator chain alignment * integrate WFA * correct primary computation * Update extension.h * chaining penalties scaling factors * correction chain alignment * compute correct alignment score * correct paf format DNA * clean up includes * filter chains * comments * wfa low memory mode * filter chains with lower bound --------- Co-authored-by: Dimi <dimitrios_K@gmx.de> Co-authored-by: Dimitrios Koutsogiannis <dkoutso@taco.eb.local> Co-authored-by: Dimi99 <73211787+Dimi99@users.noreply.github.com> Co-authored-by: vinceaps <90271444+vinceaps@users.noreply.github.com> Co-authored-by: Benjamin Buchfink <buchfink@gmail.com> commit b8f12f41864bc2c2795bcf9ed22b867d717bc5da Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Nov 22 10:52:38 2023 +0100 Add cmake flag for famsa. commit 7fdecdb2879997b6b6a1079ed28ee2ecacf25229 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Nov 15 16:52:41 2023 +0100 Added hit culling. commit e3eadb911af4df39aa08bf0f560ec1068d4f5211 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Thu Nov 9 17:02:54 2023 +0100 Fixed non x86 compile. commit b258c2f5be5421c5c80e44b77b4870d24ac6c603 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Thu Nov 9 16:58:40 2023 +0100 Fixed non x86 compile. commit e055ce95c60e769da4f496232108b3968d0c4bf9 Author: Vincent Spath <90271444+v-spath@users.noreply.github.com> Date: Tue Nov 7 15:59:40 2023 +0100 DNA chaining (#17) * seed repetition cutoff * remove overhead * back to array * test * repetitive filter * changed float to double for parser * fixed count typo * removed io header * min heap seed filtering * fix heap * filter multithreaded * multithreaded optimized * locked guard * test new multithreading filter * filter multithreaded optimized * pass thread index during creation of thread * check smallest element before adding to queue * remove unordered set, replace by checking against cutoff value * refactor * new heuristics * add ungapped extension to set * new timer * temp * new timer * ssh * d * Delete Arabidopsis_thaliana.TAIR10.dna_sm.toplevel.fa.gz * new timer * timer * remove timer * temp * Delete Arabidopsis_thaliana.TAIR10.dna_sm.toplevel.fa.gz * temp * Delete Arabidopsis_thaliana.TAIR10.dna.chromosome.Pt.fa.gz * temp * Delete Arabidopsis_thaliana.TAIR10.dna.toplevel.fa.gz * refactor * seed filter * target filter * target filter * target filter * revert * r_ungapped to score * fix * wfadaptive * setheuristicnone * remove ungapped eval * repetition cutoff 0 * fix filter for small index * backup chaining and apple specifics * revert * test ksw2 for mac * chaining and bug fixes and apple specifics * backtracking * backup chaining, with old out commented code, 1 bug left * working_chaining * add dna chaining condition * basic chaining * Delete src/align/align.cpp.save * repetition-cutoff into WITH_DNA * Delete libwfa2cpp.a * add fast approximation of log2 * Delete setup.cpp.save * delete binary obj * remove key in RepetitiveCounter * removed vector of priority_queues as class member * Update dna_index.cpp check size repetitive before adding to queue * extended struct SeedMatch * fixed bug 100 % ungapped score * struct for data an chain * removed std output * update chain.cpp * change built queue * add sorting before chaining * chaining only for one target at a time * head * comments * include wfa2 in comment * include wfa comment * Update log2_fast.h * chain_end two parameters * fixed bug backtrack * make variables const if possible * parameters struct * struct anchor data * update documentation * backtrack simplification * update backtrack * delete wrong commit * remove RepetitiveCounter struct * updated chain struct * construction Chain Object * chain Standard Komparator * use iterator * correction syntax * fixed index t_id * move semantics --------- Co-authored-by: Dimi <dimitrios_K@gmx.de> Co-authored-by: Dimitrios Koutsogiannis <dkoutso@taco.eb.local> Co-authored-by: Dimi99 <73211787+Dimi99@users.noreply.github.com> Co-authored-by: vinceaps <90271444+vinceaps@users.noreply.github.com> Co-authored-by: Benjamin Buchfink <buchfink@gmail.com> commit 19c730d59620efa02e9b7a6928283962767a64c3 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Nov 7 15:54:53 2023 +0100 Add output field for lineage. commit 7cb96085fa2950f0d500e468331d0f7a8367c182 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Nov 7 10:48:11 2023 +0100 Check for identical block. commit 8438e3387278dad3405ed9c2f7b1b6a29be94407 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Thu Oct 26 14:23:56 2023 +0200 remove alignment computation. commit 52649efac7cc76c4ab7822c7848cd74763582c07 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Oct 25 17:35:38 2023 +0200 Filter by mutual coverage. commit 1bfa4e2a82e52fb91241c48eeb349a29f7dbed53 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Oct 25 17:08:11 2023 +0200 Added alignment. commit 1149045d6c55fcf40604248e57c8c93b3ddbd4ab Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Oct 25 12:55:15 2023 +0200 Fix centroid lookup. commit dcef57b958768f36c69dc6a1c19527082a043f9e Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Oct 24 16:35:01 2023 +0200 Added parallel processing. commit c0ebcc97a3f0201ca5a271199795a144f97c49c5 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Oct 24 13:48:51 2023 +0200 Fixed bug. commit ff8b053116a7df875e59fafff7e933da10c2bddf Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Oct 24 13:43:18 2023 +0200 Check for indirection. commit 539416b50a933cde0594c9a110b4945d6c366d57 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Oct 24 12:33:32 2023 +0200 Write output. commit 0fdfe23a8695efeb8e824fb07e79a959e5ce317a Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Oct 23 16:59:07 2023 +0200 Process alignments. commit 83b0442e0efc20f28e703d16ebf52503015189d0 Merge: 8d0ce419 5171828f Author: Benjamin Buchfink <buchfink@gmail.com> Date: Sat Oct 21 14:39:44 2023 +0200 Merge branch 'dev' of https://github.com/bbuchfink/diamond_dev into dev commit 8d0ce419c8bf9c835403ca1afd1ee0b8be5d4b5e Author: Benjamin Buchfink <buchfink@gmail.com> Date: Sat Oct 21 14:39:34 2023 +0200 Fixed error. commit 5171828faf97348653d5447f4765e8c041075c58 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Oct 20 14:50:12 2023 +0200 Fixed error. commit 367ac23fc68acbb4c195d6136f5d83d2e24a9770 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Thu Oct 19 17:38:31 2023 +0200 Added profile alignment. commit 03b51a98073196fb03494979fa03ee917448b376 Merge: 13a3e512 6096ef28 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Oct 18 16:26:27 2023 +0200 Merge branch 'dev' of https://github.com/bbuchfink/diamond_dev into dev commit 13a3e512efd362d6746359fd17580fc4dfee6908 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Oct 18 16:26:08 2023 +0200 Fixed blastdb. commit 6096ef2841d6e0c87d58292b4238eb9ce3d6b17d Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Oct 17 16:53:37 2023 +0200 Added local alignment of profiles. commit 8a5eca0d72348012775177f929447ddcfd5db0a7 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Oct 13 15:32:15 2023 +0200 Fix leak. commit d2563fd8c85c6891064ce2a8010a5c38e94c63b9 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Oct 13 15:04:16 2023 +0200 Added profile-recluster workflow. commit d4b638e4fee52ae4024d35e109e81bed1f9ee32d Author: Benjamin Buchfink <buchfink@gmail.com> Date: Thu Oct 12 17:03:33 2023 +0200 Added MSA computation. commit a7e375040af808cdea6ae2bfc9a43b2fb285b1db Author: Benjamin Buchfink <buchfink@gmail.com> Date: Thu Oct 12 14:24:55 2023 +0200 Added FAMSA. commit 88119a03726d7288c406237d0b975c1ad304c426 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Sep 27 16:52:47 2023 +0200 Use letter count of subdb in clustering. commit bed1a484f7f28beb654d12817431c8a7610faca0 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Sep 27 16:28:07 2023 +0200 Added length lookup. commit 81843d6fa42081219f89d0833838b8d481dfec23 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Sep 26 13:54:08 2023 +0200 Use coverage increment in earlier rounds. commit 53a20c61e5f4e68b41505bc08dcb776070cbca35 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Sep 26 13:40:00 2023 +0200 Use ccd=0 in linear stages. commit e5699a10fb0b8da76f1152ac5773d3cafdb304a1 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Sep 18 16:52:45 2023 +0200 Use median sequence. commit da749bc1fb1826bebbeadd80ad8a46adc70dc118 Merge: eb336685 a1c35d03 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Sep 18 11:44:00 2023 +0200 Merge branch 'dev' of https://github.com/bbuchfink/diamond_dev into dev commit eb33668574a28a84ec585613c8d31de644d89ff6 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Sep 18 11:43:25 2023 +0200 Fixed subject source range. commit a1c35d03f4ba0f4a56461d431df6dbde277b85eb Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Sep 11 14:50:49 2023 +0200 Auto set min length ratio. commit 612df402ca9fafc2c5cfed2d54b62354041edb8f Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Sep 11 13:43:35 2023 +0200 Added mutual cover clustering. commit c909a44381fa52d6a359d44b607fedc59690eeff Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Sep 8 11:15:33 2023 +0200 Added connected component depth. commit b613c12c6d4f5abaa9914075626414f39392b7b0 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Sep 6 16:58:28 2023 +0200 Added cc clustering. commit abd593504534f23632b98787f5053d5d93d23dc4 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Sep 5 17:04:45 2023 +0200 Added callback for bidirectional clustering. commit 76b222058f50e2fee6664159e0df47cb518b667b Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Sep 5 15:46:27 2023 +0200 Added --no-reassign to gvc. commit 268090426a40c2b22f04b8a0ced5101b6195ba5d Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Sep 5 10:31:51 2023 +0200 Fixed approx_id for anchored swipe. commit 4acb106d2495d0cd67481567607c81184081fbf9 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Aug 28 14:56:25 2023 +0200 Fixed bug. commit ccae0326a15e745e07c54edced47e3284b92c949 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Aug 28 13:26:12 2023 +0200 Added length ratio filter. commit 43eb577fd11eca0a84901b558013a409edcfd5c6 Merge: 49c3fffb 7d89b2b0 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Aug 25 16:04:42 2023 +0200 Merge branch 'dev' of https://github.com/bbuchfink/diamond_dev into dev commit 49c3fffb58f22405b1496b2f18fe8c04e35298b2 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Aug 25 16:03:43 2023 +0200 Added len ratio filter. commit 7d89b2b0197901a8727e2814ea35605e2cf5cdaf Author: Benjamin Buchfink <buchfink@gmail.com> Date: Thu Jul 20 16:12:54 2023 +0200 Fixed warning. commit 6c4c4d7994f80f8603027cabf1d966ead048b428 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Thu Jul 20 16:02:58 2023 +0200 Added sorting. commit 97c350241a5b309b3fc1862b3666365834cfbe39 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jul 19 11:23:49 2023 +0200 Fixed full_qqual field. commit 4ba1920cbe2d4ed3cb45d803126296a2bcc1a775 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jul 18 18:50:34 2023 +0200 Add len-ratio filter. commit 58bf56ec153b9f5944b672db9f136aec0e1af705 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Tue Jul 18 16:19:29 2023 +0200 Added symmetric option to gvc. commit 4d8f6d6aee2d895d7a62ace47689432dc31e6de3 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Mon Jul 17 16:43:32 2023 +0200 Added mutual cov lin-stage1. commit 6693d01a2004346cd35f209f448aa5803a347737 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Jul 14 16:59:11 2023 +0200 Added linclust stage with mutual cov. commit dd3fb04acc23760257664ea6c4f6127dd576f8c6 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Jul 14 12:41:03 2023 +0200 Added mutual coverage. commit 97c1e2e54dd5a540800b022a7e3bb513ae5da429 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jul 12 15:55:04 2023 +0200 Fixed sam query field. commit 2af1fa4028f30bbb9da4363d80f268ead311e3f5 Merge: e1d1c047 23a1ba7 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Wed Jul 12 15:54:11 2023 +0200 Merge branch 'master' into dev commit e1d1c047d6459ced42b85ce897a5d42a7640714e Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Jun 23 15:11:53 2023 +0200 Added length ratio filter. commit 59a52cd0bcf1458bc311b36ba3911fd00bee1319 Merge: 561b9632 af4fc64 Author: Benjamin Buchfink <buchfink@gmail.com> Date: Fri Jun 23 14:24:04 2023 +0200 Merge branch 'master' into dev commit 561b96324e32731019b3e3ca7d3df5b54caa9217 Merge: 70409de0 749de49c Author: Dimi99 <73211787+Dimi99@users.noreply.github.com> Date: Wed Jun 21 14:21:55 2023 +0200 Merge pull request #12 from Dimi99/dev Add the WFA Extension-Option commit 749de49c7d90b6fee3c331bd009fb86dfcce4557 Merge: 141f517e 52cdcbae Author: Dimi <dimitrios_K@gmx.de> Date: Wed Jun 21 14:13:18 2023 +0200 Merge branch 'dev' of github.com:Dimi99/diamond_dev into dev commit 141f517ee52480501ac3a79b72be8ca05818cde0 Author: Dimi <dimitrios_K@gmx.de> Date: Wed Jun 21 14:13:09 2023 +0200 changes commit 52cdcbae9023e38c94235778a33abb4542425b6a Merge: c6c9bfff 70409de0 Author: Dimi99 <73211787+Dimi99@users.noreply.github.com> Date: Wed Jun 21 12:35:12 2023 +0200 Merge branch 'dev' into dev commit c6c9bfff21d275992a55da542553120d6af0231a Author: Dimi <dimitrios_K@gmx.de> Date: Wed Jun 21 12:31:25 2023 +0200 cmake commit 9a19d041c72f09aca50a41913b2adb8e4f10eae8 Author: Dimi <dimitrios_K@gmx.de> Date: Wed Jun 21 12:30:59 2023 +0200 cmake commit 5dd3ac5e6cd6db4062e2da9b726d155ba2f823e6 Author: Dimi <dimitrios_K@gmx.de> Date: Wed Jun 21 12:23:41 2023 +0200 timer
Showing
200 changed files
with
6,108 additions
and
448,714 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,297 @@ | ||
/**** | ||
DIAMOND protein aligner | ||
Copyright (C) 2023 Vincent Spath | ||
Code developed by Vincent Spath <vincent.spath@tuebingen.mpg.de> | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
****/ | ||
|
||
#include "chain.h" | ||
#include "../util/util.h" | ||
#include <cstdint> | ||
#include "../util/math/log2_fast.h" | ||
|
||
namespace Dna { | ||
|
||
|
||
/** | ||
* find the start of a chain | ||
* @param max_drop | ||
* @param score_end | ||
* @param index_end | ||
* @param aD | ||
* @return index of the start of the chain | ||
*/ | ||
static int64_t chain_start(const int32_t max_drop, const uint64_t score_end, const uint64_t index_end, const AnchorData &aD) { | ||
|
||
int64_t i = index_end; | ||
int64_t max_i = i; | ||
int32_t max_s = 0; | ||
// check for invalid or visited anchor | ||
if (i < 0 || aD.temp_marking[i] != 0) return i; | ||
// iterate over all anchors in chain while anchor has not been visited | ||
do { | ||
// move to the previous anchor in the chain | ||
i = aD.predecessor_anchor[i]; | ||
// compute score difference between anchors (if valid anchor) | ||
const int32_t s = i < 0 ? score_end : (int32_t)score_end - aD.best_score_anchor[i]; | ||
// new max score? (best extension so far) | ||
if (s > max_s) { | ||
max_s = s; | ||
max_i = i; | ||
} | ||
// chain does not extend if score drops too much between anchors | ||
else if (max_s - s > max_drop) break; | ||
} while (i >= 0 && aD.temp_marking[i] == 0); | ||
return max_i; | ||
} | ||
|
||
|
||
/** | ||
* backtrack to find chains | ||
* @param n | ||
* @param aD | ||
* @param min_chain_score | ||
* @param max_drop | ||
* @param hits | ||
* @return vector of chains | ||
*/ | ||
std::vector<Chain> chain_backtrack(AnchorData &aD, const int min_chain_score, const int max_drop, const SeedMatch *begin, bool reverse) { | ||
|
||
// potential starting positions of valid chains (score, index) | ||
std::vector<std::pair<uint64_t, uint64_t>> z_end; | ||
for (int64_t i = 0; i < aD.best_score_anchor.size(); ++i) | ||
if (aD.best_score_anchor[i] >= min_chain_score) z_end.emplace_back(aD.best_score_anchor[i], i); | ||
|
||
// sort by score | ||
std::sort(z_end.begin(), z_end.end()); | ||
|
||
// size of z_end is the number of potential end positions of valid chains | ||
const int64_t n_z = z_end.size(); | ||
if (n_z == 0) return {}; | ||
|
||
// chains found during the chaining process | ||
std::vector<Chain> chains; | ||
aD.temp_marking.assign(aD.temp_marking.size(), 0); | ||
// iterate over all potential end positions (highest to lowest) | ||
for (int64_t k = n_z - 1; k >= 0; --k) { | ||
// position not been used yet? | ||
if (aD.temp_marking[z_end[k].second] != 0) continue; | ||
// calculate the end of the current chain | ||
const int64_t start_i = chain_start(max_drop, z_end[k].first, z_end[k].second, aD); | ||
// iterate over all positions in chain, marks used anchors | ||
Chain chain_t = Chain(reverse); | ||
int64_t i; // so? brauche es ja für sc später | ||
for (i = z_end[k].second; i != start_i; i = aD.predecessor_anchor[i]) { | ||
aD.temp_marking[i] = 1; | ||
chain_t.anchors.emplace_back(begin[i].i(), begin[i].j()); | ||
} | ||
// score of current chain | ||
const int32_t sc = i < 0 ? z_end[k].first : (int32_t)z_end[k].first - aD.best_score_anchor[i]; | ||
// valid chain? | ||
// prev_n_chain_anchors stores the number of anchors before processing the current chain | ||
if (sc >= min_chain_score && !chain_t.anchors.empty()){ | ||
chain_t.target_id = begin->id(); | ||
chain_t.chain_score = sc; | ||
chains.push_back(std::move(chain_t)); | ||
} | ||
} | ||
return chains; | ||
} | ||
|
||
|
||
/** | ||
* compute score between two anchors | ||
* @param hit_i | ||
* @param hit_j | ||
* @param q_span | ||
* @param p | ||
* @return score of the extension | ||
*/ | ||
int32_t compute_score(const SeedMatch &hit_i, const SeedMatch &hit_j, int q_span, const ChainingParameters &p) { | ||
|
||
// distance on query | ||
const int32_t dq = hit_i.i() - hit_j.i(); | ||
if (dq <= 0 || dq > p.MAX_DIST_X) return INT32_MIN; | ||
|
||
// distance on target | ||
const int32_t dr = hit_i.j() - hit_j.j(); | ||
if (dr == 0 || dr > p.MAX_DIST_Y) return INT32_MIN; | ||
|
||
// absolute difference (in positions) between query and target | ||
const int32_t dd = dr > dq? dr - dq : dq - dr; | ||
|
||
// too big distance on query or target | ||
if (dd > p.BAND_WIDTH) return INT32_MIN; | ||
|
||
// smaller distance on query or target (gap) | ||
const int32_t dg = std::min(dr, dq); | ||
|
||
// initial score: smaller q_span or gap | ||
int32_t sc = std::min(q_span, dg); | ||
if (dd || dg > q_span) { | ||
float lin_pen = p.CHAIN_PEN_GAP * (float)dd + p.CHAIN_PEN_SKIP * (float)dg; | ||
float log_pen = dd >= 1? log2_ap(dd + 1) : 0.0f; // log2() only works for dd>=2 | ||
sc -= (int)(lin_pen + .5f * log_pen); | ||
} | ||
|
||
return sc; | ||
} | ||
|
||
|
||
|
||
/** | ||
* identifies the primary chains and their mapping quality | ||
* @param chains | ||
* @param kmer_size | ||
*/ | ||
void compute_primary_chains(std::vector<Chain> &chains, const int kmer_size) { // new version | ||
|
||
std::vector<int> score_secondary(chains.size(), 0); | ||
// first chain is always primary | ||
std::vector<size_t> primary_chains = {0}; | ||
std::vector<int> chain_span; | ||
chain_span.reserve(chains.size()); | ||
// pre compute chain span | ||
for (Chain chain : chains) { | ||
chain_span.push_back(chain.anchors[0].i + kmer_size - chain.anchors.back().i); | ||
} | ||
|
||
// iterate over all chains | ||
for (size_t i = 1; i < chains.size(); ++i) { | ||
// chain overlaps with a primary? | ||
bool primary = true; | ||
for (size_t c : primary_chains) { | ||
// calculate overlap length between chains | ||
int overlapLength = chains[i].overlap_query(chains[c], kmer_size); | ||
// no overlap | ||
if (overlapLength <= 0) { | ||
continue; | ||
} | ||
|
||
// calculate overlap percentage of shorter chain | ||
double overlapPercentage = static_cast<double>(overlapLength) / std::min(chain_span[i], chain_span[c]); | ||
|
||
if (overlapPercentage >= MIN_OVERLAP_PERCENTAGE) { | ||
primary = false; | ||
// Chain i is the best secondary to Chain c | ||
score_secondary[c] = std::max(score_secondary[c], chains[i].chain_score); | ||
} | ||
} | ||
// primary chain added to vector | ||
if (primary) { | ||
primary_chains.push_back(i); | ||
} | ||
} | ||
|
||
// mapping quality | ||
for (size_t i : primary_chains) { | ||
chains[i].compute_mapping_quality(score_secondary[i]); | ||
} | ||
} | ||
|
||
|
||
/** | ||
* dynamic programming chaining algorithm | ||
* @param hits | ||
* @param window | ||
* @param kmer_size | ||
* @param p | ||
* @return vector of chains | ||
*/ | ||
std::vector<Chain> chain_dp(const int window, const int kmer_size, const ChainingParameters &p, const SeedMatch* begin, | ||
const SeedMatch* end, bool reverse) { | ||
|
||
// number of total matches | ||
const auto n = std::distance(begin, end); | ||
|
||
const int32_t max_drop = p.BAND_WIDTH; // zur Übersicht und testen | ||
int32_t mmax_f = 0; | ||
|
||
// initialize vectors for chaining | ||
AnchorData aD(n); | ||
|
||
//code is for 1 vector of matches for 1 query and 1 target | ||
int64_t max_i_s = -1; | ||
for (int64_t i = 0; i < n; ++i) { | ||
int64_t max_j = -1; | ||
int32_t max_f = kmer_size; | ||
int32_t n_skip = 0; | ||
// increase st until same id or t_pos of st is in range of i | ||
int64_t st = 0; | ||
while (st < i && begin[i].j() > begin[st].j() + p.MAX_DIST_X) ++st; | ||
// stay in range of max iterations | ||
st = std::max(st, i - p.MAX_ITER); | ||
// iterate over all hits from st to i | ||
int64_t j; | ||
for (j = i - 1; j >= st; --j) { | ||
int32_t sc = compute_score(begin[i], begin[j], kmer_size, p); | ||
if (sc == INT32_MIN) continue; | ||
sc += aD.best_score_anchor[j]; | ||
// new max score? | ||
if (sc > max_f) { | ||
max_f = sc; | ||
max_j = j; | ||
// pending skipped seeds? | ||
if (n_skip > 0) --n_skip; | ||
// already in chain? | ||
} else if (aD.temp_marking[j] == (int32_t)i) { | ||
// increment number of skipped seeds and break if too many | ||
if (++n_skip > p.MAX_SKIP) break; | ||
} | ||
// updates the seed information if previous seed was part of the chain | ||
if (aD.predecessor_anchor[j] >= 0) aD.temp_marking[aD.predecessor_anchor[j]] = i; | ||
} | ||
int64_t end_j = j; | ||
if (max_i_s < 0 || begin[i].j() - begin[max_i_s].j() > (int64_t)p.MAX_DIST_X) { | ||
int32_t max = INT32_MIN; | ||
max_i_s = -1; | ||
// find seed with the highest score | ||
for (j = i - 1; j >= st; --j) { | ||
if (max < aD.best_score_anchor[j]){ | ||
max = aD.best_score_anchor[j]; | ||
max_i_s = j; | ||
} | ||
} | ||
} | ||
// valid max score | ||
if (max_i_s >= 0 && max_i_s < end_j) { | ||
// score of extending the current anchor to the best scoring anchor | ||
const int32_t tmp = compute_score(begin[i], begin[max_i_s], kmer_size, p); | ||
// score is valid and higher than the current max score | ||
if (tmp != INT32_MIN && max_f < tmp + aD.best_score_anchor[max_i_s]){ | ||
max_f = tmp + aD.best_score_anchor[max_i_s]; | ||
max_j = max_i_s; | ||
} | ||
} | ||
// setting max score at seed i and index of the seed that contributes to the maximum score of the chain ending at seed i | ||
// peak_score_anchor keeps the peak score up to i; best_scores_anchors is the score ending at i, not always the peak | ||
aD.best_score_anchor[i] = max_f; | ||
aD.predecessor_anchor[i] = max_j; | ||
aD.peak_score_anchor[i] = max_j >= 0 && aD.peak_score_anchor[max_j] > max_f? aD.peak_score_anchor[max_j] : max_f; | ||
if (max_i_s < 0 || (aD.best_score_anchor[max_i_s] < aD.best_score_anchor[i])) | ||
max_i_s = i; | ||
// maximum score in entire chaining | ||
mmax_f = std::max(mmax_f, max_f); | ||
} | ||
|
||
return chain_backtrack(aD, p.MIN_CHAIN_SCORE, max_drop, begin, reverse); | ||
|
||
} | ||
|
||
// Chain constructor | ||
Chain::Chain(bool rev) : chain_score(0), anchors(), target_id(0), mapping_quality(0), reverse(rev) {} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
/**** | ||
DIAMOND protein aligner | ||
Copyright (C) 2023 Vincent Spath | ||
Code developed by Vincent Spath <vincent.spath@tuebingen.mpg.de> | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU General Public License for more details. | ||
You should have received a copy of the GNU General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
****/ | ||
|
||
#include "seed_set_dna.h" | ||
|
||
namespace Dna { | ||
|
||
const double MIN_OVERLAP_PERCENTAGE = 0.5; | ||
|
||
struct ChainingParameters { | ||
int MAX_DIST_X = 5000; | ||
int MAX_DIST_Y = 5000; | ||
const int BAND_WIDTH = 500; | ||
const int MAX_SKIP = 25; | ||
const int MAX_ITER = 5000; | ||
const int MIN_CHAIN_SCORE = 40; | ||
const int MIN_NUMBER_MINIMIZERS = 3; | ||
const float MAP_PERCENTAGE = 0.5; | ||
|
||
const float CHAIN_PEN_GAP; | ||
const float CHAIN_PEN_SKIP; | ||
|
||
// constructor for additional parameters | ||
ChainingParameters(float gap, float skip) | ||
: CHAIN_PEN_GAP(gap), CHAIN_PEN_SKIP(skip) {} | ||
}; | ||
|
||
struct AnchorData { | ||
// optimal predecessor for each anchor | ||
std::vector<int64_t> predecessor_anchor; | ||
// best score for each anchor | ||
std::vector<int32_t> best_score_anchor; | ||
// best score up to now (peak) | ||
std::vector<int32_t> peak_score_anchor; | ||
|
||
std::vector<int32_t> temp_marking; | ||
|
||
explicit AnchorData(int n) : | ||
predecessor_anchor(n), | ||
best_score_anchor(n), | ||
peak_score_anchor(n), | ||
temp_marking(n, 0) {} | ||
}; | ||
|
||
|
||
|
||
struct Chain { | ||
explicit Chain(bool rev); | ||
int32_t chain_score; | ||
BlockId target_id; | ||
uint8_t mapping_quality; | ||
bool reverse; | ||
|
||
struct Anchor { | ||
Loc i; | ||
Loc j; | ||
Anchor(Loc i, Loc j) : i(i), j(j) {} | ||
}; | ||
// query/target starting position in reverse (i, j) | ||
std::vector<Anchor> anchors; | ||
|
||
int overlap_query(const Chain &other, const int kmer_size) const { | ||
return (std::min(anchors[0].i, other.anchors[0].i) + kmer_size) - | ||
std::max(anchors.back().i, other.anchors.back().i); | ||
|
||
} | ||
|
||
void compute_mapping_quality(int score_secondary) { | ||
double sc_ratio = static_cast<double>(score_secondary) / chain_score; | ||
double quality = 40 * (1 - sc_ratio) * std::min(1.0, static_cast<double>(anchors.size()) / 10) * std::log(chain_score); | ||
// compress to 0-60 | ||
mapping_quality = static_cast<uint8_t>(quality * 60 / 312); | ||
} | ||
|
||
bool operator>(const Chain& other)const | ||
{return this->chain_score > other.chain_score;} | ||
|
||
}; | ||
|
||
|
||
|
||
|
||
|
||
|
||
std::vector<Chain> chain_dp(int window, int kmer_size, const ChainingParameters &p, const SeedMatch* begin, | ||
const SeedMatch* end, bool reverse); | ||
void compute_primary_chains(std::vector<Chain> &chains, int kmer_size); | ||
} | ||
|
Oops, something went wrong.