forked from Tong-Chen/s-plot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sp_hclust.sh
executable file
·161 lines (147 loc) · 3.34 KB
/
sp_hclust.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/bin/bash
usage()
{
cat <<EOF
${txtcyn}
***CREATED BY Chen Tong (chentong_biology@163.com)***
Usage:
$0 options${txtrst}
${bldblu}Function${txtrst}:
This script is used to do clustring using hclust, which is clustered
by row. So transpose your data when necessary.
${txtbld}OPTIONS${txtrst}:
-f Data file (with header line, the first column is the
colname, tab seperated)${bldred}[NECESSARY]${txtrst}
-k If the names of your rows and columns startwith numeric value,
this can be set to FALSE to avoid modifying these names to be
illegal variable names. But duplicates can not be picked out.
[${bldred}Default TRUE${txtrst}]
Accept FALSE.
-s Scale the data.[${bldred}Default FALSE${txtrst}]
Accept TRUE.
-b Transpose data. [${bldred}Default FALSE${txtrst}]
Default cluster by rows when -b is FALSE. Or cluster
by cols when -b is TRUE.
-t Title of picture[${txtred}Default empty title${txtrst}]
-x xlab of picture[${txtred}Default empty xlab${txtrst}.
If setted, using the words which represents the
meaning of your columns]
-y ylab of picture[${txtred}Default empty ylab${txtrst}.
If setted, using the words which represents the
meaning of your rows]
-d Dist method[${bldred}Default "euclidean"${txtrst}]
Accept "euclidean", ‘maximum", ‘manhattan",
"canberra", ‘binary" or "minkowski"
-c hclust method[${txtred}Default "ward"${txtrst}]
Accept "single", "complete", "average", "mcquitty", "median"
or "centroid".
-z Is there a header[${bldred}Default TRUE${txtrst}]
Accept FALSE.
-e Execute or not[${bldred}Default TRUE${txtrst}]
Accept FALSE.
-a Number of final clusters[${txtred}Default 1${txtrst},
choice an integer(>=2)]
EOF
}
file=
checkNames='TRUE'
scale='FALSE'
title=''
xlab=''
ylab=''
dm='euclidean'
hm='ward'
header='TRUE'
execute='TRUE'
num=0
transpose='FALSE'
while getopts "hf:k:s:t:x:y:d:c:z:e:a:b:" OPTION
do
case $OPTION in
h)
usage
exit 1
;;
f)
file=$OPTARG
;;
k)
checkNames=$OPTARG
;;
s)
scale=$OPTARG
;;
t)
title=$OPTARG
;;
x)
xlab=$OPTARG
;;
y)
ylab=$OPTARG
;;
d)
dm=$OPTARG
;;
c)
hm=$OPTARG
;;
z)
header=$OPTARG
;;
e)
execute=$OPTARG
;;
a)
num=$OPTARG
;;
b)
transpose=$OPTARG
;;
?)
usage
exit 1
;;
esac
done
if [ -z $file ]; then
usage
echo "This is decrepated, please use **s-plot hcluster**"
exit 1
fi
mid='.hclust'
if [ "$scale" = 'TRUE' ]; then
mid=${mid}'.scale'
fi
cat <<EOF >$file${mid}.r
library(graphics)
data1 = read.table("$file", header=$header, quotes="",
sep="\t",row.names=1, comment.char="", check.names=${checkNames})
x <- as.matrix(data1)
if ($transpose){
x <- t(x)
}
if ($scale){
x <- scale(x)
}
d <- dist(x, method="$dm")
fit <- hclust(d, method="$hm")
#postscript(file="${file}${mid}.eps", onefile=FALSE,horizontal=FALSE)
png(file="${file}${mid}.png", width=600, height=900, res=100)
plot(fit, hang=-1, main="$title", xlab="$xlab", ylab="$ylab")
if ($num){
rect.hclust(fit, k=$num, border="red")
}
dev.off()
EOF
if [ "${execute}" = 'TRUE' ]; then
Rscript $file${mid}.r
if [ "$?" == "0" ]; then /bin/rm -f ${file}${mid}.r; fi
#epstopdf ${file}${mid}.eps
#if [ $? -eq 0 ]; then
#convert -density 200 -flatten ${file}${mid}.eps ${file}${mid}.png
#if [ $num -ne 0 ]; then
# convert -density 200 -flatten ${file}${mid}.${num}.eps ${file}${mid}.${num}.png
#fi
#fi
fi