forked from ocp-power-automation/openshift-install-power
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathopenshift-install-powervs
executable file
·1710 lines (1546 loc) · 60.5 KB
/
openshift-install-powervs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env bash
: '
Copyright (C) 2020 IBM Corporation
Licensed under the Apache License, Version 2.0 (the “License”);
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an “AS IS” BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'
#-------------------------------------------------------------------------
set -e
VERSION="v1.12.1"
#-------------------------------------------------------------------------
# Display help
#-------------------------------------------------------------------------
function help {
cat <<-EOF
Automation for deploying OpenShift 4.X on PowerVS
Usage:
openshift-install-powervs [command] [<args> [<value>]]
Available commands:
setup Install all the required packages/binaries in current directory
variables Interactive way to populate the variables file
create Create an OpenShift cluster
destroy Destroy an OpenShift cluster
output Display the cluster information. Runs terraform output [NAME]
access-info Display the access information of installed OpenShift cluster
help Display this information
Where <args>:
-var Terraform variable to be passed to the create/destroy command
-var-file Terraform variable file name in current directory. (By default using var.tfvars)
-flavor Cluster compute template to use eg: small, medium, large
-force-destroy Not ask for confirmation during destroy command
-ignore-os-checks Ignore operating system related checks
-ignore-warnings Warning messages will not be displayed. Should be specified first, before any other args.
-verbose Enable verbose for terraform console messages
-all-images List all the images available during variables prompt
-trace Enable tracing of all executed commands
-version, -v Display the script version
Environment Variables:
IBMCLOUD_API_KEY IBM Cloud API key
RELEASE_VER OpenShift release version (Default: 4.15)
ARTIFACTS_VERSION Tag or Branch name of ocp4-upi-powervs repository (Default: main)
RHEL_SUBS_PASSWORD RHEL subscription password if not provided in variables
NO_OF_RETRY Number of retries/attempts to run repeatable actions such as create (Default: 5)
Submit issues at: https://github.com/ocp-power-automation/openshift-install-power/issues
EOF
exit 0
}
RELEASE_VER=${RELEASE_VER:-"4.15"}
ARTIFACTS_REPO=${ARTIFACTS_REPO:-"https://github.com/ocp-power-automation/ocp4-upi-powervs"}
ARTIFACTS_VERSION=${ARTIFACTS_VERSION:-"main"}
#ARTIFACTS_VERSION=${ARTIFACTS_VERSION:-"release-$RELEASE_VER"}
#ARTIFACTS_VERSION="v4.5.3"
INSTALL_DIR=$PWD
TF="$INSTALL_DIR/terraform"
CLI_PATH="$INSTALL_DIR/ibmcloud"
OC_PATH="$INSTALL_DIR/oc"
ARTIFACTS_DIR="automation"
LOGFILE="ocp4-upi-powervs_$(date "+%Y%m%d%H%M%S")"
TRACE=0
TF_TRACE=0
FORCE_DESTROY=0
IGNORE_OS=0
IGNORE_WARN=0
DISPLAY_ALL_IMAGES=0
IS_HA=0
NO_OF_RETRY=${NO_OF_RETRY:-"5"}
SLEEP_TIME=10
REBOOT_TIMEOUT=15
REBOOT_TIMEOUT_BASTION=30
BOOTSTRAP_DELETE_VARFILE="bootstrap-delete.tfvars"
OLD_IFS=$IFS
# All the ibmcloud configs and targeted power-iaas CRN will be stored under current directory named '.bluemix'
export IBMCLOUD_HOME="$PWD"
#-------------------------------------------------------------------------
# Trap ctrl-c interrupt and call ctrl_c()
#-------------------------------------------------------------------------
trap ctrl_c INT
function ctrl_c() {
if [[ -f ./.terraform.tfstate.lock.info || -f ./"$ARTIFACTS_DIR"/.terraform.tfstate.lock.info ]]; then
error "Terraform process was running when the script was interrupted. Please run create command again to continue OR destroy command to clean up resources."
else
error "Exiting on user interrupt!"
fi
}
#-- Colors escape seqs
YEL='\033[1;33m'
CYN='\033[0;36m'
GRN='\033[1;32m'
RED='\033[1;31m'
PUR="\033[1;35m"
LGRN="\033[0;32m"
NRM='\033[0m'
function log {
echo -e "${CYN}[${FUNCNAME[1]}]${NRM} $1"
}
function warn {
if [[ $IGNORE_WARN -eq 0 ]]; then
echo -e "${YEL}[${FUNCNAME[1]}]${NRM} ${YEL}WARN${NRM}: $1"
fi
}
function failure {
echo -e "${PUR}[${FUNCNAME[1]}]${NRM} ${PUR}FAILED${NRM}: $1"
}
function success {
echo -e "${GRN}[${FUNCNAME[1]}]${NRM} ${GRN}SUCCESS${NRM}: $1"
}
function highlight {
echo -e "${LGRN}$1${NRM}"
}
function error {
echo -e "${RED}[${FUNCNAME[1]}]${NRM} ${RED}ERROR${NRM}: $1"
ret_code=$2
[[ "$ret_code" == "" ]] && ret_code=1
IFS=$OLD_IFS
exit $ret_code
}
function debug_switch {
if [[ $TRACE == 0 ]]; then
return 0
fi
if [[ $- =~ x ]]; then
set +x
else
set -x
fi
}
#-------------------------------------------------------------------------
# Display the cluster output variables
#-------------------------------------------------------------------------
function output {
cd ./"$ARTIFACTS_DIR"
$TF output "$output_var"
}
#-------------------------------------------------------------------------
# Util for retrying any command, special case for curl downloads
#-------------------------------------------------------------------------
function retry {
cmd=$1
for i in $(seq 1 "$NO_OF_RETRY"); do
echo "Attempt: $i/$NO_OF_RETRY"
ret_code=0
$cmd || ret_code=$?
if [ $ret_code = 0 ]; then
break
elif [ "$i" == "$NO_OF_RETRY" ]; then
error "All retry attempts failed! Please try running the script again after some time" $ret_code
else
sleep 1s
fi
done
}
#-------------------------------------------------------------------------
# Progress bar
#-------------------------------------------------------------------------
function show_progress {
if [[ "$TF_TRACE" -eq 1 ]]; then
return 0
fi
str="-"
for ((n=0;n<PERCENT;n+=2)); do str="${str}#"; done
for ((n=PERCENT;n<=100;n+=2)); do str="${str} "; done
echo -ne "$str($PERCENT%)\r"
}
#-------------------------------------------------------------------------
# Check if ping to an IP is working
#-------------------------------------------------------------------------
function check_ping {
[[ -z $1 ]] && return 1
$BASTION_SSH_CMD ping -w 2 -c 1 "$1" &>/dev/null
}
#-------------------------------------------------------------------------
# Check if resource state exist
#-------------------------------------------------------------------------
function checkState {
if ! $TF state list 2>/dev/null | grep -F "$1" >/dev/null 2>&1 || $TF state show "$1" 2>/dev/null | grep "(tainted)" >/dev/null; then
return 1;
fi
}
#-------------------------------------------------------------------------
# Check if resource state exist
#-------------------------------------------------------------------------
function checkOutput {
$TF output | grep -F "$1" >/dev/null 2>&1
}
#-------------------------------------------------------------------------
# Start or stop a node
# instance_action: $1: start/stop (default: stop)
# node_type: $2: bootstrap/master/worker (optional, will include all when empty)
#-------------------------------------------------------------------------
function take_action_node {
instance_action=$1
node_type=$2
[[ -z $instance_action ]] && instance_action=stop
# can 'stop' only if current status is ACTIVE; can 'start' only if current status is SHUTOFF
if [[ $instance_action == "stop" ]]; then
ALLOWED_STATUS="ACTIVE"
elif [[ $instance_action == "start" ]]; then
ALLOWED_STATUS="SHUTOFF"
else
warn "only supported actions are start and stop, default is stop"
return 0
fi
for node in $($TF state list 2>/dev/null | grep "module.nodes.ibm_pi_instance" | grep "$node_type"); do
instance_name=$($TF state show "$node" | grep pi_instance_name | awk '{print $3}' | sed 's/"//g')
instance_id=$($CLI_PATH pi ins ls 2>/dev/null | grep "$instance_name" | awk '{print $1}')
[[ -z $instance_id ]] && continue
status=$($CLI_PATH pi ins get "$instance_id" 2>/dev/null| grep '^Status' | awk '{print $2}')
if [[ $status == "$ALLOWED_STATUS" ]]; then
$CLI_PATH pi ins action "$instance_id" -o $instance_action
fi
done
}
#-------------------------------------------------------------------------
# Check if cluster nodes resources are created
#-------------------------------------------------------------------------
function checkAllNodes {
no_of_nodes=$($TF state list 2>/dev/null | grep -c "module.nodes.ibm_pi_instance")
if [[ $no_of_nodes -eq 0 ]]; then
return 1
fi
if [[ $no_of_nodes -eq $TOTAL_RHCOS ]]; then
PERCENT=65
else
current_percent=$(( 50 * no_of_nodes / TOTAL_RHCOS))
PERCENT=$(( 14 + current_percent ))
fi
}
#-------------------------------------------------------------------------
# Reboot node if ELAPSED_TIME is greater than REBOOT_TIMEOUT
#-------------------------------------------------------------------------
function reboot_node {
NODE=$1
if [[ -z $ELAPSED_TIME ]]; then
ELAPSED_TIME=$SECONDS
elif [[ $((SECONDS - ELAPSED_TIME)) -gt $((REBOOT_TIMEOUT * 60)) ]]; then
warn "Unable to connect to $NODE. Rebooting the node"
instance_id=$($CLI_PATH pi ins ls | grep "$NODE" | awk '{print $1}')
$CLI_PATH pi ins action "$instance_id" -o hard-reboot
ELAPSED_TIME=$SECONDS
fi
}
#-------------------------------------------------------------------------
# Check if the infra setup is working
#-------------------------------------------------------------------------
function checkClusterSetup {
if [[ -f $BOOTSTRAP_DELETE_VARFILE ]]; then
# No need to check setup if bootstrap node does not exist
PERCENT=82
return
fi
# Check if every node has an IP
if ! checkOutput "bastion_ssh_command" || ! checkOutput "bootstrap_ip" || ! checkOutput "master_ips" || ! checkOutput "worker_ips"; then
return 1
fi
if [[ $PERCENT -lt 71 ]]; then
BASTION_SSH_CMD="$($TF output -raw bastion_ssh_command | sed 's/,.*//') -q -o StrictHostKeyChecking=no"
# Check if ign file is available for download
ign_url="http://${NAME_PREFIX}bastion-0:8080/ignition/bootstrap.ign"
if $BASTION_SSH_CMD curl -o /dev/null -sIw '%{http_code}' "$ign_url" | grep -q 200; then
PERCENT=71
else
return 1
fi
fi
# Check bootstrap connection
if [[ $PERCENT -lt 72 ]]; then
if grep -E "ok: \[.*bootstrap\] => {\"changed\"" "$LOG_FILE" > /dev/null; then
PERCENT=72
unset ELAPSED_TIME
else
reboot_node "${NAME_PREFIX}bootstrap"
return 0
fi
fi
# Check masters connection
for ((i=0;i<MASTER_COUNT;i++)); do
if [[ $PERCENT -lt $((73 + i)) ]]; then
if grep -E "ok: \[.*master-$i\] => {\"changed\"" "$LOG_FILE" > /dev/null; then
# once a node is connected subsequent nodes should be faster to connect
REBOOT_TIMEOUT=1
unset ELAPSED_TIME
PERCENT=$((73 + i))
else
if reboot_node "${NAME_PREFIX}master-$i"; then
REBOOT_TIMEOUT=5
fi
return 0
fi
fi
done
# Check wait-for-bootstrap completion
# Implies that wait-for-bootstrap is complete when compute node check has started
if grep -F "PLAY [Check and configure compute nodes]" "$LOG_FILE" >/dev/null; then
if [[ $PERCENT -lt 82 ]]; then
PERCENT=82
fi
else
# all masters are up; reset time for checking workers
unset ELAPSED_TIME
REBOOT_TIMEOUT=5
return 0
fi
# Check workers connection
for ((i=0;i<WORKER_COUNT;i++)); do
if [[ $PERCENT -lt $((83 + i)) ]]; then
if grep -E "ok: \[.*worker-$i\] => {\"changed\"" "$LOG_FILE" > /dev/null; then
PERCENT=$((83 + i))
REBOOT_TIMEOUT=1
unset ELAPSED_TIME
else
if reboot_node "${NAME_PREFIX}worker-$i"; then
REBOOT_TIMEOUT=5
fi
return 0
fi
fi
done
# TODO: Check wait-for-complete
}
#-------------------------------------------------------------------------
# Check and reboot bastion nodes if ELAPSED_TIME is greater than REBOOT_TIMEOUT_BASTION
#-------------------------------------------------------------------------
function check_bastion {
bothReady=true
for ((i=0;i<BASTION_COUNT;i++)); do
if checkState "module.prepare.ibm_pi_instance.bastion[${i}]"; then
continue
else
bothReady=false
fi
instance_name="${NAME_PREFIX}bastion-$i"
instance_id=$($CLI_PATH pi ins ls 2>/dev/null | grep "$instance_name" | awk '{print $1}')
instance_health_status=$($CLI_PATH pi ins get "$instance_id" 2>/dev/null | grep "^Health Status" | awk '{print $3}')
if [[ $instance_health_status == "WARNING" ]]; then
if [[ -z ${BASTION_ELAPSED_TIME[$i]} ]]; then
BASTION_ELAPSED_TIME[$i]=$SECONDS
PERCENT=$((PERCENT + 2))
elif [[ $((SECONDS - BASTION_ELAPSED_TIME[i])) -gt $((REBOOT_TIMEOUT_BASTION * 60)) ]]; then
warn "Node $instance_name is in WARNING state for more than $REBOOT_TIMEOUT_BASTION mins. Rebooting the node"
$CLI_PATH pi ins action "$instance_id" -o hard-reboot
BASTION_ELAPSED_TIME[$i]=$SECONDS
fi
fi
done
if [[ $bothReady == true ]]; then
PERCENT=11
elif [[ $PERCENT -le 3 ]]; then
return 1
fi
}
#-------------------------------------------------------------------------
# Evaluate the progress
#-------------------------------------------------------------------------
function monitor {
if checkOutput "name_prefix"; then
NAME_PREFIX=$($TF output -raw "name_prefix" 2>/dev/null)
else
PERCENT=0
return 0
fi
if grep -F "module.install.null_resource.install: Creation complete after" "$LOG_FILE" >/dev/null; then
PERCENT=99
elif checkClusterSetup; then
return 0
elif checkState "module.install.null_resource.config"; then
PERCENT=70
elif checkAllNodes; then
return 0
elif checkState "module.prepare.null_resource.bastion_packages[0]"; then
PERCENT=14
elif checkState "module.prepare.null_resource.bastion_init[0]"; then
PERCENT=12
elif check_bastion; then
return 0
elif checkState "module.prepare.ibm_pi_network.public_network"; then
PERCENT=3
elif checkState "module.prepare.ibm_pi_key.key"; then
PERCENT=2
else
PERCENT=1
fi
}
#-------------------------------------------------------------------------
# Monitor loop for the progress of apply command
#-------------------------------------------------------------------------
function monitor_loop {
# Wait if log file is updated in last 1m
while [[ $(find "${LOG_FILE}" -mmin -1 -print) ]]; do
if [[ $action == "apply" ]]; then
monitor
show_progress
fi
sleep $SLEEP_TIME
done
}
#-------------------------------------------------------------------------
# Read the info from the plan file
#-------------------------------------------------------------------------
function plan_info {
BASTION_COUNT=$(grep ibm_pi_instance.bastion tfplan | grep -c -v "has changed")
BOOTSTRAP_COUNT=$(grep ibm_pi_instance.bootstrap tfplan | grep -c -v "has changed")
MASTER_COUNT=$(grep ibm_pi_instance.master tfplan | grep -c -v "has changed")
WORKER_COUNT=$(grep ibm_pi_instance.worker tfplan | grep -c -v "has changed")
TOTAL_RHCOS=$(( BOOTSTRAP_COUNT + MASTER_COUNT + WORKER_COUNT ))
}
#-------------------------------------------------------------------------
# # Check if terraform is already running
#-------------------------------------------------------------------------
function is_terraform_running {
LOG_FILE=$(ls -Art ../logs | tail -n 1)
[[ -z $LOG_FILE ]] && return 0
LOG_FILE="../logs/$LOG_FILE"
if [[ -n $(find "${LOG_FILE}" -mmin -1 -print) ]]; then
warn "Last run was less than a min ago... please wait a minute"
sleep 60
fi
if [[ -n $(find "${LOG_FILE}" -mmin -1 -print) ]]; then
warn "Existing Terraform process is already running... please wait"
plan_info
monitor_loop
log "Starting a new terraform process... please wait"
else
# No log files updated in last 1 min; Invalid TF lock file
if [[ -f ./.terraform.tfstate.lock.info ]]; then
rm -f ./.terraform.tfstate.lock.info
fi
fi
}
#-------------------------------------------------------------------------
# Delete stale nodes on PowerVS resource
#-------------------------------------------------------------------------
function delete_failed_instance {
NODE=$1
COUNT=$2
instance_name=""
n=0
while [[ "$n" -lt $COUNT ]]; do
if ! checkState "module.nodes.ibm_pi_instance.${NODE}[${n}]"; then
[[ "$NODE" == "bootstrap" ]] && instance_name="${NAME_PREFIX}${NODE}" || instance_name="${NAME_PREFIX}${NODE}-$n"
warn "$instance_name: Trying to delete the instance that exist on the cloud when status is not BUILD"
instance_id=$($CLI_PATH pi ins ls | grep "$instance_name" | awk '{print $1}')
while [[ $($CLI_PATH pi ins get "$instance_id" | grep "^Status" | awk '{print $2}') == "BUILD" ]]; do
# Cannot delete instance in BUILD status
sleep 30
done
$CLI_PATH pi ins delete "$instance_id" --delete-data-volumes
# Some breather for the delete action to complete
sleep 30
fi
n=$(( n + 1 ))
done
}
#-------------------------------------------------------------------------
# Retry and monitor the terraform commands
#-------------------------------------------------------------------------
function retry_terraform {
PERCENT=0
action=$1
options=$2
cmd="$TF $action $options -auto-approve"
# bootstrap node already deleted; subsequent cmds should have bootstrap count as 0
[[ -f "$BOOTSTRAP_DELETE_VARFILE" ]] && cmd="$cmd -var-file $BOOTSTRAP_DELETE_VARFILE"
while [[ -f ./tfplan ]] && [[ $(find ./tfplan -mmin -1 -print) ]]; do
# Concurrent plan requests will fail; last plan was in less than a min
sleep $SLEEP_TIME
done
is_terraform_running
if [[ $action == "apply" ]]; then
# Running terraform plan
# shellcheck disable=SC2086
$TF plan $vars -input=false > ./tfplan
# TODO: If plan does not create new resource then exit
plan_info
fi
for (( ATTEMPT=1; ATTEMPT <= NO_OF_RETRY; ATTEMPT++ )); do
LOG_FILE="../logs/${LOGFILE}_${action}_$ATTEMPT.log"
echo "Attempt: $ATTEMPT/$NO_OF_RETRY"
{
echo "========================"
echo "Attempt: $ATTEMPT/$NO_OF_RETRY"
echo "$cmd"
echo "========================"
} >> "$LOG_FILE"
if [[ "$TF_TRACE" -eq 0 ]]; then
$cmd >> "$LOG_FILE" 2>&1 &
else
$cmd 2>&1 | tee "$LOG_FILE" &
fi
monitor_loop
# Check if errors exist
if grep -c "Error:" "$LOG_FILE" >/dev/null; then
log "Encountered below errors:"
grep "Error:" "$LOG_FILE" | sort | uniq
# Handle unknown provisioning errors
if grep "failed to provision unknown error (status 504)" "$LOG_FILE" >/dev/null || grep "invalid name server name already exists for cloud-instance" "$LOG_FILE" >/dev/null; then
warn "Unknown issues were seen while provisioning cluster nodes. Verifying if failed nodes were created on the cloud..."
if [[ $PERCENT -ge 10 ]]; then
# PERCENT>10 means bastion is already created
delete_failed_instance bootstrap "$BOOTSTRAP_COUNT"
delete_failed_instance master "$MASTER_COUNT"
delete_failed_instance worker "$WORKER_COUNT"
else
delete_failed_instance bastion "$BASTION_COUNT"
fi
elif grep "${NAME_PREFIX}pub-net network name already exists for cloud instance" "$LOG_FILE" >/dev/null; then
warn "Trying to delete the existing public network..."
network_name="${NAME_PREFIX}pub-net"
network_id=$($CLI_PATH pi snet ls | grep "$network_name" | awk '{print $1}')
[[ -n $network_id ]] && $CLI_PATH pi snet del "$network_id"
fi
# All tries exhausted
if [[ $ATTEMPT -eq $NO_OF_RETRY ]]; then
error "Terraform command failed after $NO_OF_RETRY attempts! Please check the log files"
fi
# Nothing to do other than retry
warn "Issues were seen while running the terraform command. Attempting to run again..."
sleep $SLEEP_TIME
else
if [[ $action == "destroy" ]];then
rm -f "$BOOTSTRAP_DELETE_VARFILE"
elif [[ $action == "apply" ]] && [[ ! -f "$BOOTSTRAP_DELETE_VARFILE" ]]; then
log "Deleting the bootstrap node... please wait"
# Create the bootstrap delete var file
echo "bootstrap = {count = 0, memory = \"32\", processors = \"0.5\"}" > "$BOOTSTRAP_DELETE_VARFILE"
# Restart the loop for re-apply to delete the bootstrap node
action="delete_bootstrap"
cmd="$cmd -var-file $BOOTSTRAP_DELETE_VARFILE"
ATTEMPT=0
continue
fi
break
fi
done
log "Completed running the terraform command."
}
#-------------------------------------------------------------------------
# Initialize and validate the Terraform code with plugins
#-------------------------------------------------------------------------
function init_terraform {
log "Initializing Terraform plugins and validating the code..."
if [[ "$ARCH" == "ppc64le" ]]; then
retry "$TF init --plugin-dir ../" > /dev/null
else
retry "$TF init" > /dev/null
fi
$TF validate > /dev/null
}
#-------------------------------------------------------------------------
# Verify if pull-secret.txt exists
# Check if SSH key-pair is provided else use users key or create a new one
#-------------------------------------------------------------------------
function verify_data {
if [ ! -s "$VAR_PULL_SECRET_FILE" ]; then
if [ -s "./pull-secret.txt" ]; then
cp -f ./pull-secret.txt ./"$ARTIFACTS_DIR"/data/
else
error "File pull-secret.txt not found"
fi
fi
if [ ! -s "$VAR_PRIVATE_KEY_FILE" ] && [ ! -s "$VAR_PUBLIC_KEY_FILE" ]; then
if [ -s "./id_rsa" ] && [ -s "./id_rsa.pub" ]; then
log "Found id_rsa & id_rsa.pub in current directory"
cp -f ./id_rsa ./id_rsa.pub ./"$ARTIFACTS_DIR"/data/
else
warn "Creating new SSH key-pair..."
ssh-keygen -t rsa -f ./id_rsa -N ''
cp -f "./id_rsa" "./id_rsa.pub" ./"$ARTIFACTS_DIR"/data/
fi
fi
}
#-------------------------------------------------------------------------
# Common checks for apply and destroy functions
#-------------------------------------------------------------------------
function precheck_input {
if [ -z "$vars" ]; then
if [ ! -f "var.tfvars" ]; then
warn "No variables specified or var.tfvars does not exist.. running variables command" && variables
fi
varfile="var.tfvars"
vars="-var-file ../$varfile"
SERVICE_INSTANCE_ID=$(read_varfile "service_instance_id")
[[ -z "$SERVICE_INSTANCE_ID" ]] && error "Required input variable 'service_instance_id' not found"
debug_switch
VAR_IBMCLOUD_API_KEY=$(read_varfile "ibmcloud_api_key")
VAR_RHEL_SUBS_PASS=$(read_varfile "rhel_subscription_password")
debug_switch
VAR_PULL_SECRET_FILE=$(read_varfile "pull_secret_file")
VAR_PRIVATE_KEY_FILE=$(read_varfile "private_key_file")
VAR_PUBLIC_KEY_FILE=$(read_varfile "public_key_file")
VAR_RHEL_SUBS_USER=$(read_varfile "rhel_subscription_username")
fi
if [[ -n "$flavor" ]]; then
vars+=" -var-file ./compute-vars/$flavor.tfvars"
fi
debug_switch
if [ -n "$VAR_RHEL_SUBS_USER" ] && [ -z "$RHEL_SUBS_PASSWORD" ] && [ -z "$VAR_RHEL_SUBS_PASS" ]; then
error "Please export RHEL_SUBS_PASSWORD or set 'rhel_subscription_password' variable"
fi
[[ "${RHEL_SUBS_PASSWORD}" != "" ]] && export TF_VAR_rhel_subscription_password="$RHEL_SUBS_PASSWORD"
# If provided varfile does not have API key read from env
[[ -n $VAR_IBMCLOUD_API_KEY ]] && IBMCLOUD_API_KEY=$VAR_IBMCLOUD_API_KEY
if [[ -z "${IBMCLOUD_API_KEY}" ]]; then
error "Please export IBMCLOUD_API_KEY"
else
export TF_VAR_ibmcloud_api_key="$IBMCLOUD_API_KEY"
fi
debug_switch
verify_data
cd ./"$ARTIFACTS_DIR"
}
#-------------------------------------------------------------------------
# Login to IBM Cloud and target the service instance
#-------------------------------------------------------------------------
function powervs_login {
log "Trying to login with the provided IBMCLOUD_API_KEY..."
debug_switch
$CLI_PATH login --apikey "$IBMCLOUD_API_KEY" -q --no-region > /dev/null
debug_switch
CRN=$($CLI_PATH pi ws ls 2>/dev/null | grep "${SERVICE_INSTANCE_ID}" | awk '{print $1}')
[[ -z $CRN ]] && error "Cannot find PowerVS service instance with ID: $SERVICE_INSTANCE_ID for this account"
SVCNAME=$($CLI_PATH pi ws ls | grep "${SERVICE_INSTANCE_ID}" | awk '{$1=""; print $0}' | sed 's/^[ ]*//g')
$CLI_PATH pi ws tg "$CRN" 1>/dev/null
log "Targeting '$SVCNAME' with Id $CRN"
}
#-------------------------------------------------------------------------
# Check and run setup
#-------------------------------------------------------------------------
function precheck_artifacts {
# Run setup if no artifacts
if [[ ! -d $ARTIFACTS_DIR ]]; then
warn "Cannot find artifacts directory... running setup command"
setup
fi
# ignore check for dev preview install
if grep -F openshift_install_tarball $ARTIFACTS_DIR/var.tfvars | grep -q "ocp-dev-preview"; then
return
fi
}
#-------------------------------------------------------------------------
# Create the cluster
#-------------------------------------------------------------------------
function apply {
# Run setup if no artifacts
precheck_artifacts
precheck_input
powervs_login
init_terraform
log "Running terraform apply... please wait"
retry_terraform apply "$vars -input=false"
cluster_access_info && success "Congratulations! create command completed"
}
#-------------------------------------------------------------------------
# Destroy the cluster
#-------------------------------------------------------------------------
function destroy {
if [[ ! -d $ARTIFACTS_DIR ]] || [[ ! -f "$ARTIFACTS_DIR"/terraform.tfstate ]]; then
error "No artifacts or state file exist!"
fi
if [[ $($TF state list -state="$ARTIFACTS_DIR"/terraform.tfstate | wc -l) -eq 0 ]]; then
rm -f $ARTIFACTS_DIR/terraform.tfstate
error "Nothing to destroy!" && return
fi
if [[ $FORCE_DESTROY -eq 0 ]]; then
question "Are you sure you want to proceed with destroy?" "yes no"
if [[ "${value}" != "yes" ]]; then
error "Exiting on user request" && return
fi
fi
precheck_input
log "Running terraform destroy... please wait"
retry_terraform destroy "$vars -input=false"
rm -f ./terraform.tfstate
success "Done! destroy command completed"
}
#-------------------------------------------------------------------------
# Display the cluster access information
#-------------------------------------------------------------------------
function cluster_access_info {
if [[ -f ./terraform.tfstate ]] && checkState "module.install.null_resource.install"; then
# TODO: Find a way to change the bastion user as per TF variable; default is root
if [ -s "$VAR_PRIVATE_KEY_FILE" ]; then
echo "Login to bastion: $(highlight "$($TF output -raw bastion_ssh_command | sed 's/,.*//')") and start using the 'oc' command."
else
echo "Login to bastion: $(highlight "$($TF output -raw bastion_ssh_command | sed 's/,.*//' | sed 's/data/'"$ARTIFACTS_DIR"'\/data/')") and start using the 'oc' command."
fi
$($TF output -raw bastion_ssh_command | sed 's/,.*//') -q -o StrictHostKeyChecking=no cat /root/openstack-upi/auth/kubeconfig > ./kubeconfig
echo "To access the cluster on local system when using 'oc' run: $(highlight "export KUBECONFIG=$PWD/kubeconfig")"
echo "NOTE: 'oc' on local system will not work for WDC04 and DAL13 DC. Login to bastion system to use 'oc'"
echo "Access the OpenShift web-console here: $(highlight "$($TF output web_console_url)")"
echo "Login to the console with user: $(highlight "kubeadmin") and password: $(highlight "$($($TF output -raw bastion_ssh_command | sed 's/,.*//') -q -o StrictHostKeyChecking=no cat /root/openstack-upi/auth/kubeadmin-password)")"
if [[ $($TF output etc_hosts_entries 2>/dev/null) ]]; then
echo "Add the following records to your DNS server: $(highlight "$($TF output -raw dns_entries)")"
echo "Alternatively, you can add the line on local system 'hosts' file: $(highlight "$($TF output -raw etc_hosts_entries)")"
fi
else
return 1
fi
}
#-------------------------------------------------------------------------
# Display the access information of installed OpenShift cluster
#-------------------------------------------------------------------------
function access-info {
[[ -d $ARTIFACTS_DIR ]] && cd ./"$ARTIFACTS_DIR"
if ! cluster_access_info; then
error "Cluster is not installed"
fi
}
# -------------------------------------------------------------------------
# Function to read sensitive data by masking with asterisk
# -------------------------------------------------------------------------
function read_sensitive_data {
stty -icanon
stty -echo
charcount=0
# Empty prompt
prompt=''
while IFS= read -sp "$prompt" -r -n 1 ch
do
# Enter - accept password
if [[ $ch == $'\0' ]] ; then
break
fi
# Backspace
if [[ $ch == $'\177' ]] ; then
if [ $charcount -gt 0 ] ; then
charcount=$((charcount-1))
prompt=$'\b \b'
value="${value%?}"
else
prompt=''
fi
else
charcount=$((charcount+1))
prompt='*'
value+="$ch"
fi
done
stty sane
# New line
echo
}
#-------------------------------------------------------------------------
# Util for questions prompt
# 1.multi-choice 2.free-style input 3.free-style with a default value
#-------------------------------------------------------------------------
function question {
value=""
# question to ask
message=$1
# array of options eg: "a b c".
options=($2)
len=${#options[@]}
force_select=$3
if [[ $options == "-sensitive" ]]; then
log "> $message"
# read -s value
read_sensitive_data
return 0
fi
if [[ $len -gt 1 ]] || [[ -n "$force_select" ]]; then
# Multi-choice
# Allow select prompt even for if a single option.
log "> $message"
# Handle Ctrl-C when in select loop
# https://unix.stackexchange.com/questions/513466/bash-ignoring-sigint-trap-when-select-loop-is-running
set -o posix
# shellcheck disable=SC2068
select value in ${options[@]}
do
if [ "$value" == "" ]; then
echo 'Invalid value... please re-select'
else
break
fi
done
set +o posix
elif [[ $len -eq 1 ]]; then
# Input question with default value
# If only 1 option is sent then use it for default value prompt.
log "> $message (${options[0]})"
read -p "? " value
[[ "${value}" == "" ]] && value="${options[0]}"
else
# Input question without any default value.
log "> $message"
read -p "? " value
fi
echo "- You have answered: $value"
}
#-------------------------------------------------------------------------
# Interactive prompts for nodes configuration
#-------------------------------------------------------------------------
function variables_nodes {
if [[ -z $flavor ]]; then
question "Do you want to configure High Availability for bastion nodes?" "yes no"
if [ "${value}" == "yes" ]; then
IS_HA=1
fi
fi
if [[ $IS_HA -eq 1 ]]; then
no_of_bastion=2
else
no_of_bastion=1
fi
if [[ -n $flavor ]]; then
cat automation/compute-vars/"$flavor".tfvars >> "$VAR_TEMPLATE"
return 0
else
flavor_list=($(ls automation/compute-vars/))
flavor_list=(${flavor_list[*]/.tfvars/} CUSTOM)
question "Select the flavor for the cluster nodes:" "${flavor_list[*]}" yes
if [ "${value}" != "CUSTOM" ]; then
cat automation/compute-vars/"$value".tfvars >> "$VAR_TEMPLATE"
if [ "${no_of_bastion}" -eq 2 ]; then
rc=$(sed --version 2>/dev/null | grep -oc GNU)
if [[ "$OS" == "darwin" && $rc == 0 ]]; then
sed -i '' -e 's/\(bastion.*\) 1/\1 2/' "$VAR_TEMPLATE";
else
sed -i 's/\(bastion.*\) 1/\1 2/' "$VAR_TEMPLATE";
fi
fi
return 0
fi
fi
question "Do you want to use the default configuration for all the cluster nodes?" "yes no"
if [ "${value}" == "yes" ]; then
{
echo "bastion = {memory = \"16\", processors = \"1\", \"count\" = $no_of_bastion}"
echo "bootstrap = {memory = \"32\", processors = \"0.5\", \"count\" = 1}"
echo "master = {memory = \"32\", processors = \"0.5\", \"count\" = 3}"
echo "worker = {memory = \"32\", processors = \"0.5\", \"count\" = 2}"
} >> "$VAR_TEMPLATE"
return 0
fi
# Bastion node config
question "Do you want to use the default configuration for bastion node? (memory(GB)=16 processors=1)" "yes no"
if [ "${value}" == "yes" ]; then
echo "bastion = {memory = \"16\", processors = \"1\", \"count\" = $no_of_bastion}" >> "$VAR_TEMPLATE"
else
question "Enter the memory required for bastion nodes" "16"
memory="${value}"
question "Enter the processors required for bastion nodes" "1"
proc="${value}"
echo "bastion = {memory = \"$memory\", processors = \"$proc\", \"count\" = $no_of_bastion}" >> "$VAR_TEMPLATE"
fi
# Bootstrap node config
question "Do you want to use the default configuration for bootstrap node? (memory(GB)=32 processors=0.5)" "yes no"
if [ "${value}" == "yes" ]; then
echo "bootstrap = {memory = \"32\", processors = \"0.5\", \"count\" = 1}" >> "$VAR_TEMPLATE"
else
question "Enter the memory required for bootstrap node" "32"
memory="${value}"
question "Enter the processors required for bootstrap node" "0.5"
proc="${value}"
echo "bootstrap = {memory = \"$memory\", processors = \"$proc\", \"count\" = 1}" >> "$VAR_TEMPLATE"
fi
# Master nodes config
question "Do you want to use the default configuration for master nodes? (memory(GB)=32 processors=0.5 count=3)" "yes no"
if [ "${value}" == "yes" ]; then
echo "master = {memory = \"32\", processors = \"0.5\", \"count\" = 3}" >> "$VAR_TEMPLATE"
else
question "Enter the memory required for master nodes" "32"
memory="${value}"
question "Enter the processors required for master nodes" "0.5"
proc="${value}"
# TODO: Uncomment when we have support for masters >3
#question "Select the count of master nodes" "3 5"
#count="${value}"
count=3
echo "master = {memory = \"$memory\", processors = \"$proc\", \"count\" = $count}" >> "$VAR_TEMPLATE"
fi
# Worker nodes config
question "Do you want to use the default configuration for worker nodes? (memory(GB)=32 processors=0.5 count=2)" "yes no"
if [ "${value}" == "yes" ]; then
echo "worker = {memory = \"32\", processors = \"0.5\", \"count\" = 2}" >> "$VAR_TEMPLATE"
else
question "Enter the memory required for worker nodes" "32"
memory="${value}"
question "Enter the processors required for worker nodes" "0.5"
proc="${value}"
question "Enter the count of worker nodes" "2"
count="${value}"
echo "worker = {memory = \"$memory\", processors = \"$proc\", \"count\" = $count}" >> "$VAR_TEMPLATE"
fi
}
#-------------------------------------------------------------------------
# Interactive prompts to populate the var.tfvars file
#-------------------------------------------------------------------------
function variables {
precheck_artifacts
VAR_TEMPLATE="./var.tfvars.tmp"
VAR_FILE="./var.tfvars"
rm -f "$VAR_TEMPLATE"
# To handle input data with spaces in it
IFS=$'\n'
debug_switch
[[ -n $VAR_IBMCLOUD_API_KEY ]] && IBMCLOUD_API_KEY=$VAR_IBMCLOUD_API_KEY
[ "${IBMCLOUD_API_KEY}" == "" ] && error "Please export IBMCLOUD_API_KEY"
log "Trying to login with the provided IBMCLOUD_API_KEY..."
$CLI_PATH login --apikey "$IBMCLOUD_API_KEY" -q --no-region > /dev/null