-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgoInstallGATK4.sh
58 lines (46 loc) · 2.9 KB
/
goInstallGATK4.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#SUCCESSFUL WORKING SESSION - GATK4 ran in google cloud
# (c) copyright 2016 Martin Lurie 7/7/2016 sample code not supported
#==============
#notes on port forwarding to get to CM web UI
#[marty@cloudera-director-46afe32b-54a1-496c-89fe-71378db5450f gatk]$ echo use this: ssh -i martygooglecloud.key -g -L 9090:10.142.0.3:7180 104.196.10.156
#use this: ssh -i martygooglecloud.key -g -L 9090:10.142.0.3:7180 104.196.10.156
#[marty@cloudera-director-46afe32b-54a1-496c-89fe-71378db5450f gatk]$ echo 10.x.x.x is target host we want to reach - in this case the cm host
#10.x.x.x is target host we want to reach - in this case the cm host
#[marty@cloudera-director-46afe32b-54a1-496c-89fe-71378db5450f gatk]$ echo 104.x.x.x is the gatewayhost which could be the same as the cm host
#104.x.x.x is the gatewayhost which could be the same as the cm host
#[marty@cloudera-director-46afe32b-54a1-496c-89fe-71378db5450f gatk]$
#===========
#start google cloud cluster with Cloudera Director
# log into a data node on the cluster
sudo yum install -y curl unzip gcc python-setuptools git automake gcc-c++
wget https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && unzip google-cloud-sdk.zip && rm google-cloud-sdk.zip
sudo google-cloud-sdk/install.sh --usage-reporting=true --path-update=true --bash-completion=true --rc-path=/.bashrc --disable-installation-options
mkdir /home/$USER/gatk
cd /home/$USER/gatk
wget https://github.com/broadinstitute/cromwell/releases/download/0.19.3/cromwell-0.19.3.jar
pwd
wget https://github.com/broadinstitute/wdltool/releases/download/0.1/wdltool0.1.jar
git clone git://github.com/broadinstitute/gatk
cd gatk
echo rebuild gatk
./gradlew clean
./gradlew installAll
#
# set up a directory for this user
export NN=$(hdfs getconf -namenodes)
sudo su - hdfs -c "hadoop fs -mkdir /user/$USER"
sudo su - hdfs -c "hadoop fs -chown $USER:$USER /user/$USER"
hadoop fs -ls hdfs://$NN:8020/user/$USER
#
#
# Verification test - get a file and run a CountReadsSpark
echo get genomic file
hadoop fs -ls
wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA12878/alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam
hadoop fs -put NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam
# export NN="cloudera-director-16c6f850-e688-40a9-a165-258ac6e68f7a.c.mydirector-1280.internal"
# make sure job doesn't fail due to existing directory
hadoop fs -rm -r hdfs://$NN:8020/user/$USER/counted.out
./gatk-launch CountReadsSpark --input hdfs://$NN:8020/user/$USER/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam --output hdfs://$NN:8020/user/$USER/counted.out --sparkRunner SPARK --sparkMaster yarn-client -- --num-executors 4 --executor-cores 2 --executor-memory 1g --driver-memory=1g --conf spark.yarn.executor.memoryOverhead=450 --conf spark.yarn.am.memoryOverhead=450
hadoop fs -ls counted.out
hadoop fs -cat counted.out