Skip to content
Steve Bond edited this page Dec 7, 2016 · 7 revisions

--trimal, -trm

Description

This reimplements portions of the excellent software trimAl, allowing the removal of poorly aligned regions to increase overall alignment quality.

Note: If working from annotated genbank files, any features spanning columns that are removed will be truncated as is appropriate, and will be deleted completely if all of its columns are removed.

Arguments

Mode ( { float, int, mode_str } )

The trimal function contains four distinct modes: 'clean', 'all' (or 'no_gaps'), 'gappyout', and a threshold mode controlled by supplying number.

clean: Removes any columns that are 100% gaps (example 1).

all: Removes all columns with any number of gaps (example 2).

gappyout: A more involved algorithm that takes into account the distribution of alignment gaps to automatically select the most appropriate threshold (example 3).

Threshold ( int ): Specifically set the maximum number of gaps a column may contain without being removed (example 4).

Threshold ( float ): Use a fractional proportion (0.05, 0.65, 0.99, etc) less than 1 to set the maximum percentage of columns with gaps (example 5).

Examples

Input file: Mnemiopsis.nex

#NEXUS
begin data;
	dimensions ntax=7 nchar=216;
	format datatype=protein missing=? gap=-;
matrix
Mle_Panxα9  --ML-ILSKF--KGVTPFKGR-QYTGSVIS--GFKKFG--STFAEDY-VRLLHN--PVES-DQELFSIGD--FKTYGSEW----IKKLKL-EDNLATEA-FLRDDSA---IKHMYFQRKL-RGNGKAL----------------------------------------------------------------------------------------
Mle_Panxα7A -MGV-ILFPI--RATAP-KSRQQLGGAYIA--GFSRDEEYERFAEEW-VAMLQN--PVED-GFEKFKFGS--FAEYGLLW----DTRNP--LNNVQSLS-SLDRSPAVGVISKLYLKDESPRAEVLKRRSKKVKVPSPRKPKLLFHEEIKKKLIKRTERKDDNLTKYAQEDVLDSEYVVVEQSVPETMTEQESVEESVPEISKAEQEGGSSDHIDV
Mle_Panxα8  -MVL-V-ALF--PRLAPFKVR-SYTGSVIE--GFLKVP--VEFAKDY-ISMLHN--PVET-AEELFHIGS--FFSYGVDW----AAEEPEPGEDYKTLA-FLRDEPA---YSEIWRVGGE-----------------------------VRCLVERC-------------FSLNRPHI--------------------PHLVK-------------
Mle_Panxα5  -MIYWV-AVF--KRMAPFKVG-IIA-STIK--GFEDSFRSEAFVDEY-INLLHN--PLTRDEEELFKIGN--FVTYGFEW----VGSQVP-NGSTYTLT-FWND-WN---LRHLYWNEYV-QYNGHLKTS--------------------------------------------------------------------------------------
Mle_Panxα4  -MVIEL-AGY--KGLSPFKDR-QYS-TLIA--GFTKFH--PQFAEDY-IKLLCL--ASGSPEQQMFEYGSNTWYRYGADWYGTRFSSYHE-TNNSITLG-MLPDKPS---YRYVFMTTSK-NADVIENLS-----------------------------------------KLDSSVI----------------------------ELGSKDSI--
Mle_Panxα3  MLLLGS-GTI--KNLS-FKDS-QYT-KNIS--GFTKFG--EDFSQDY-LKIMSDYYHCTT-ETPSFQVGD--FKTYGIEW----LRQFPN-PENYSTSGHFSHKHPG---WKFMYYKHLR-IGHVPGE-------------------------------------------YLTDPA---------------------------------------
Mle_Panxα6  -MLLEI-ANF--KGAT-FKER-QYT-GIIA--GLTKFS--AAFAEDYILAMLHN--PVDG-DDVTFESGR--YLTYGSEW----FASLDK-QSNYTSFC-FLKENSK---LKFIYFEEKN-KQHLKGSKD-----------------------------------------ILV------------------------------------------
;
end;

Usage example 1

$: alb Mnemiopsis.nex -trm clean

Output

 #NEXUS
begin data;
	dimensions ntax=7 nchar=212;
	format datatype=protein missing=? gap=-;
matrix
Mle_Panxα9  --ML-ILSKFKGVTPFKGR-QYTGSVISGFKKFG--STFAEDY-VRLLHN--PVES-DQELFSIGD--FKTYGSEW----IKKLKL-EDNLATEA-FLRDDSA---IKHMYFQRKL-RGNGKAL----------------------------------------------------------------------------------------
Mle_Panxα7A -MGV-ILFPIRATAP-KSRQQLGGAYIAGFSRDEEYERFAEEW-VAMLQN--PVED-GFEKFKFGS--FAEYGLLW----DTRNP--LNNVQSLS-SLDRSPAVGVISKLYLKDESPRAEVLKRRSKKVKVPSPRKPKLLFHEEIKKKLIKRTERKDDNLTKYAQEDVLDSEYVVVEQSVPETMTEQESVEESVPEISKAEQEGGSSDHIDV
Mle_Panxα8  -MVL-V-ALFPRLAPFKVR-SYTGSVIEGFLKVP--VEFAKDY-ISMLHN--PVET-AEELFHIGS--FFSYGVDW----AAEEPEPGEDYKTLA-FLRDEPA---YSEIWRVGGE-----------------------------VRCLVERC-------------FSLNRPHI--------------------PHLVK-------------
Mle_Panxα5  -MIYWV-AVFKRMAPFKVG-IIA-STIKGFEDSFRSEAFVDEY-INLLHN--PLTRDEEELFKIGN--FVTYGFEW----VGSQVP-NGSTYTLT-FWND-WN---LRHLYWNEYV-QYNGHLKTS--------------------------------------------------------------------------------------
Mle_Panxα4  -MVIEL-AGYKGLSPFKDR-QYS-TLIAGFTKFH--PQFAEDY-IKLLCL--ASGSPEQQMFEYGSNTWYRYGADWYGTRFSSYHE-TNNSITLG-MLPDKPS---YRYVFMTTSK-NADVIENLS-----------------------------------------KLDSSVI----------------------------ELGSKDSI--
Mle_Panxα3  MLLLGS-GTIKNLS-FKDS-QYT-KNISGFTKFG--EDFSQDY-LKIMSDYYHCTT-ETPSFQVGD--FKTYGIEW----LRQFPN-PENYSTSGHFSHKHPG---WKFMYYKHLR-IGHVPGE-------------------------------------------YLTDPA---------------------------------------
Mle_Panxα6  -MLLEI-ANFKGAT-FKER-QYT-GIIAGLTKFS--AAFAEDYILAMLHN--PVDG-DDVTFESGR--YLTYGSEW----FASLDK-QSNYTSFC-FLKENSK---LKFIYFEEKN-KQHLKGSKD-----------------------------------------ILV------------------------------------------
;
end;

Usage example 2

$: alb Mnemiopsis.nex -trm all

Output

 #NEXUS
begin data;
	dimensions ntax=7 nchar=89;
	format datatype=protein missing=? gap=-;
matrix
Mle_Panxα9  MLISKFKGVTKGRQYTSVISGFKKFGSTFAEDYVRLLHNPVESDQELFSIGDFKTYGSEWIKKLKEDNLATEAFLRDSAIKHMYFQRKL
Mle_Panxα7A GVIFPIRATAKSRQLGAYIAGFSRDEERFAEEWVAMLQNPVEDGFEKFKFGSFAEYGLLWDTRNPLNNVQSLSSLDRPAISKLYLKDES
Mle_Panxα8  VLVALFPRLAKVRSYTSVIEGFLKVPVEFAKDYISMLHNPVETAEELFHIGSFFSYGVDWAAEEPGEDYKTLAFLRDPAYSEIWRVGGE
Mle_Panxα5  IYVAVFKRMAKVGIIASTIKGFEDSFEAFVDEYINLLHNPLTREEELFKIGNFVTYGFEWVGSQVNGSTYTLTFWNDWNLRHLYWNEYV
Mle_Panxα4  VILAGYKGLSKDRQYSTLIAGFTKFHPQFAEDYIKLLCLASGSEQQMFEYGSWYRYGADWFSSYHTNNSITLGMLPDPSYRYVFMTTSK
Mle_Panxα3  LLSGTIKNLSKDSQYTKNISGFTKFGEDFSQDYLKIMSDHCTTETPSFQVGDFKTYGIEWLRQFPPENYSTSGFSHKPGWKFMYYKHLR
Mle_Panxα6  LLIANFKGATKERQYTGIIAGLTKFSAAFAEDYLAMLHNPVDGDDVTFESGRYLTYGSEWFASLDQSNYTSFCFLKESKLKFIYFEEKN
;
end;

Usage example 3

$: alb Mnemiopsis.nex -trm gappyout

Output

 #NEXUS
begin data;
	dimensions ntax=7 nchar=104;
	format datatype=protein missing=? gap=-;
matrix
Mle_Panxα9  -MLISKFKGVTPFKGRQYTSVISGFKKFGSTFAEDYVRLLHNPVESDQELFSIGDFKTYGSEWIKKLKLEDNLATEAFLRDDSAIKHMYFQRKLRGNGKAL---
Mle_Panxα7A MGVIFPIRATAP-KSRQLGAYIAGFSRDEERFAEEWVAMLQNPVEDGFEKFKFGSFAEYGLLWDTRNP-LNNVQSLSSLDRSPAISKLYLKDESRAEVLKRVLD
Mle_Panxα8  MVLVALFPRLAPFKVRSYTSVIEGFLKVPVEFAKDYISMLHNPVETAEELFHIGSFFSYGVDWAAEEPEGEDYKTLAFLRDEPAYSEIWRVGGE-------SLN
Mle_Panxα5  MIYVAVFKRMAPFKVGIIASTIKGFEDSFEAFVDEYINLLHNPLTREEELFKIGNFVTYGFEWVGSQVPNGSTYTLTFWND-WNLRHLYWNEYVQYNGHLK---
Mle_Panxα4  MVILAGYKGLSPFKDRQYSTLIAGFTKFHPQFAEDYIKLLCLASGSEQQMFEYGSWYRYGADWFSSYHETNNSITLGMLPDKPSYRYVFMTTSKNADVIENKLD
Mle_Panxα3  LLLSGTIKNLS-FKDSQYTKNISGFTKFGEDFSQDYLKIMSDHCTTETPSFQVGDFKTYGIEWLRQFPNPENYSTSGFSHKHPGWKFMYYKHLRIGHVPGEYLT
Mle_Panxα6  MLLIANFKGAT-FKERQYTGIIAGLTKFSAAFAEDYLAMLHNPVDGDDVTFESGRYLTYGSEWFASLDKQSNYTSFCFLKENSKLKFIYFEEKNKQHLKGSILV
;
end;

Usage example 4

$: alb Mnemiopsis.nex -trm 3

Output

 #NEXUS
begin data;
	dimensions ntax=7 nchar=110;
	format datatype=protein missing=? gap=-;
matrix
Mle_Panxα9  -ML-ISKFKGVTPFKGRQYTSVISGFKKFGSTFAEDYVRLLHNPVESDQELFSIGDFKTYGSEWIKKLKLEDNLATEAFLRDDSAIKHMYFQRKLRGNGKAL--------
Mle_Panxα7A MGV-IFPIRATAP-KSRQLGAYIAGFSRDEERFAEEWVAMLQNPVEDGFEKFKFGSFAEYGLLWDTRNP-LNNVQSLSSLDRSPAISKLYLKDESRAEVLKRRSVLDSEY
Mle_Panxα8  MVL-VALFPRLAPFKVRSYTSVIEGFLKVPVEFAKDYISMLHNPVETAEELFHIGSFFSYGVDWAAEEPEGEDYKTLAFLRDEPAYSEIWRVGGE---------SLNRPH
Mle_Panxα5  MIYWVAVFKRMAPFKVGIIASTIKGFEDSFEAFVDEYINLLHNPLTREEELFKIGNFVTYGFEWVGSQVPNGSTYTLTFWND-WNLRHLYWNEYVQYNGHLKTS------
Mle_Panxα4  MVIELAGYKGLSPFKDRQYSTLIAGFTKFHPQFAEDYIKLLCLASGSEQQMFEYGSWYRYGADWFSSYHETNNSITLGMLPDKPSYRYVFMTTSKNADVIENLSKLDSSV
Mle_Panxα3  LLLGSGTIKNLS-FKDSQYTKNISGFTKFGEDFSQDYLKIMSDHCTTETPSFQVGDFKTYGIEWLRQFPNPENYSTSGFSHKHPGWKFMYYKHLRIGHVPGE--YLTDPA
Mle_Panxα6  MLLEIANFKGAT-FKERQYTGIIAGLTKFSAAFAEDYLAMLHNPVDGDDVTFESGRYLTYGSEWFASLDKQSNYTSFCFLKENSKLKFIYFEEKNKQHLKGSKDILV---
;
end;

Usage example 5

$: alb Mnemiopsis.nex -trm 0.75

Output

 #NEXUS
begin data;
	dimensions ntax=7 nchar=138;
	format datatype=protein missing=? gap=-;
matrix
Mle_Panxα9  -ML-ILSKFKGVTPFKGRQYTGSVISGFKKFG--STFAEDYVRLLHNPVES-DQELFSIGDFKTYGSEWIKKLKLEDNLATEAFLRDDSAIKHMYFQRKLRGNGKAL-------------------------------
Mle_Panxα7A MGV-ILFPIRATAP-KSRQLGGAYIAGFSRDEEYERFAEEWVAMLQNPVED-GFEKFKFGSFAEYGLLWDTRNP-LNNVQSLSSLDRSPAISKLYLKDESRAEVLKRRSKKKLIKRTDVLDSEYVPEISKEGGSSDHI
Mle_Panxα8  MVL-V-ALFPRLAPFKVRSYTGSVIEGFLKVP--VEFAKDYISMLHNPVET-AEELFHIGSFFSYGVDWAAEEPEGEDYKTLAFLRDEPAYSEIWRVGGE---------VRCLVERCFSLNRPHIPHLVK--------
Mle_Panxα5  MIYWV-AVFKRMAPFKVGIIA-STIKGFEDSFRSEAFVDEYINLLHNPLTRDEEELFKIGNFVTYGFEWVGSQVPNGSTYTLTFWND-WNLRHLYWNEYVQYNGHLKTS-----------------------------
Mle_Panxα4  MVIEL-AGYKGLSPFKDRQYS-TLIAGFTKFH--PQFAEDYIKLLCLASGSPEQQMFEYGSWYRYGADWFSSYHETNNSITLGMLPDKPSYRYVFMTTSKNADVIENLS---------KLDSSVI-----ELGSKDSI
Mle_Panxα3  LLLGS-GTIKNLS-FKDSQYT-KNISGFTKFG--EDFSQDYLKIMSDHCTT-ETPSFQVGDFKTYGIEWLRQFPNPENYSTSGFSHKHPGWKFMYYKHLRIGHVPGE-----------YLTDPA--------------
Mle_Panxα6  MLLEI-ANFKGAT-FKERQYT-GIIAGLTKFS--AAFAEDYLAMLHNPVDG-DDVTFESGRYLTYGSEWFASLDKQSNYTSFCFLKENSKLKFIYFEEKNKQHLKGSKD---------ILV-----------------
;
end;

Main Toolkit Pages





Further Reading

Clone this wiki locally