-
Notifications
You must be signed in to change notification settings - Fork 1
/
heca
91 lines (73 loc) · 4.66 KB
/
heca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
################ggtree###########################
> library("treeio")
> library("readxl")
> library("tidyverse")
> library("ggplot2")
> library("phytools")
> library("ggtree")
> metadata <- read_xlsx("metadata.xlsx")
> metadata$Isolates[!tree$tip.label %in% metadata$Isolates]
> gg_simple <- ggtree(midpoint.root (tree)) %<+% metadata + geom_treescale(x = 0, y = 0, width = 0.001) + geom_tiplab(size = 3, align = TRUE) + coord_cartesian(clip = 'off')+ theme(plot.margin = margin(1,1,1,1, "cm"))
> ggsave(filename = "tree.pdf" , device = "pdf", width = 12, height = 6 , units = "in" , limitsize = FALSE)
> gg_simple + geom_tiplab(aes(label = MLST), color = "black", offset = 0.0025, size = 2, align = TRUE, linetype = NA) + geom_tiplab(aes(label = Serotype), color = "black", offset = 0.003, size = 2, align = TRUE, linetype = NA) + geom_tiplab(aes(label = Farm_Site), color = "black", offset = 0.0035, size = 2, align = TRUE, linetype = NA) + geom_tiplab(aes(label = Tissues_Disease), color = "black", offset = 0.0053, size = 2, align = TRUE, linetype = NA) + geom_tiplab(aes(label = MLST), color = "black", offset = 0.01, size = 2, align = TRUE, linetype = NA )
> dev.off()
ggtree(midpoint.root (pp), color="blue", size = 3, ladderize=FALSE) + coord_cartesian(clip = 'off') + geom_tiplab(size = 6, align = TRUE) + theme_tree2(plot.margin=margin(6, 500, 6, 6))
OR
ggtree(midpoint.root (pp), color="blue", size = 3) + xlim(0, 1) + geom_tiplab(size = 6, align = TRUE) + theme_tree2()
ggtree(root (tree, node = 26))
#################################################
data = read.csv('bigtable.tsv',header=T,sep='\t')
taxonCounts = read.csv('taxonLevelCounts.tsv',header=T,sep='\t')
View(data)
viruses = data %>% filter(kingdom=="Viruses")
OR
viruses = filter(data, family %in% c("Partitiviridae","Herpesviridae","Reoviridae","Coronaviridae","Astroviridae","Arenaviridae","Paramyxoviridae","Poxviridae"))
View(viruses)
viralCounts1 = taxonCounts %>% filter(taxonLevel=='family',grepl('k_Viruses',taxonPath))
viralCounts = viralCounts1 %>% filter(taxonLevel=='family',grepl('f_Partitiviridae|f_Herpesviridae|f_Reoviridae|f_Coronaviridae|f_Astroviridae|f_Arenaviridae|f_Paramyxoviridae|f_Poxviridae',taxonPath))
df2=filter(df, family %in% c("Partitiviridae","Herpesviridae","Reoviridae","Coronaviridae","Astroviridae","Arenaviridae","Paramyxoviridae","Poxviridae"))
virusesFiltered = df2
viralFiltCounts = virusesFiltered %>%
group_by(sampleID,family) %>%
summarise(n = sum(normCount))
`summarise()` has grouped output by 'sampleID'. You can override using the `.groups` argument.
ggplot(viralFiltCounts) +
geom_bar(aes(x=sampleID,y=n,fill=family),position='fill',stat='identity') +
coord_flip() +
theme_bw()
> viralFamCounts = viruses %>% group_by(family) %>%
+ summarise(n=sum(normCount)) %>%
+ arrange(desc(n))
> viralFamCounts$family = factor(viralFamCounts$family,levels=viralFamCounts$family)
#################### HARYANA ###################
getwd()
setwd("~/vdl/dtra/haryana/")
data = read.csv('bigtable.tsv',header=T,sep='\t')
taxonCounts = read.csv('taxonLevelCounts.tsv',header=T,sep='\t')
viruses = filter(data, family %in% c("Orthomyxoviridae","Paramyxoviridae","Herpesviridae","Coronaviridae","Astroviridae","Poxviridae","Pneumoviridae","Arenaviridae","Adenoviridae","Rhabdoviridae","Peribunyaviridae","Picornaviridae","Reoviridae","Flaviviridae","Papillomaviridae","Circoviridae","Caliciviridae"))
ggplot(viruses) +
geom_point(
aes(x=alnlen,y=pident,color=alnType,size=count),
alpha=0.1) +
facet_wrap(~family)
paramixoCounts = taxonCounts %>% filter(taxonLevel=='family',taxonName=='Paramyxoviridae')
ggplot(paramixoCounts, aes(x = sampleID, y = normCount)) + geom_col(fill = '#0099f9') +coord_flip()
viralFamCounts = viruses %>%
group_by(family) %>%
summarise(n=sum(normCount)) %>%
arrange(desc(n))
viralFamCounts$family = factor(viralFamCounts$family,levels=viralFamCounts$family)
ggplot(viralFamCounts) +
geom_bar(aes(x=family,y=n),stat='identity') +
coord_flip()
viralCounts = taxonCounts %>% filter(taxonLevel=='family',grepl('Orthomyxoviridae|Paramyxoviridae|Herpesviridae|Coronaviridae|Astroviridae|Poxviridae|Pneumoviridae|Arenaviridae|Adenoviridae|Rhabdoviridae|Peribunyaviridae|Picornaviridae|Reoviridae|Flaviviridae|Papillomaviridae|Circoviridae|Caliciviridae',taxonPath))
ggplot(viralCounts) +
geom_bar(aes(x=sampleID,y=normCount,fill=taxonName),position='stack',stat='identity') +
coord_flip()
viralFiltCounts = viruses %>%
group_by(sampleID,family) %>%
summarise(n = sum(normCount))
ggplot(viralFiltCounts) +
geom_bar(aes(x=sampleID,y=n,fill=family),position='fill',stat='identity') +
coord_flip() +
theme_bw()