-
Notifications
You must be signed in to change notification settings - Fork 7
/
maintain.sh
executable file
·135 lines (118 loc) · 6.79 KB
/
maintain.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/bash
set -e
echo "*******************************************************************************************"
echo "** **"
echo "** Maintenance script for HAWQ and Greenplum by PivotalGuru.com **"
echo "** **"
echo "*******************************************************************************************"
# check PGDATABASE
if [ -z $PGDATABASE ]; then
db=$USER
fi
v=`psql -t -A -c "SELECT CASE WHEN POSITION ('HAWQ' in version()) > 0 AND POSITION ('Greenplum' IN version()) > 0 THEN 'hawq' WHEN POSITION ('HAWQ' in version()) = 0 AND POSITION ('Greenplum' IN version()) > 0 THEN 'gp' ELSE 'OTHER' END;"`
if [ "$v" == "hawq" ]; then
search_path=public,pg_catalog,hawq_toolkit
# get the release number of HAWQ
r=`psql -t -A -c "SELECT REPLACE((SPLIT_PART(SUBSTR(version, POSITION ('HAWQ' IN version) + 5), ' ', 1)), '.', '') as release FROM version();"`
else
search_path=public,pg_catalog,gp_toolkit
fi
s=_stats_missing
stats_missing=$v$s
echo ""
echo "*******************************************************************************************"
echo "** VACUUM ANALYZE the pg_catalog **"
echo "** **"
echo "** Creating and dropping database objects will cause the catalog to grow in size so that **"
echo "** there is a read consistent view. VACUUM is recommended on a regular basis to prevent **"
echo "** the catalog from suffering from bloat. ANALYZE is also recommended for the cost based **"
echo "** optimizer to create the best query plans possble when querying the catalog. **"
echo "*******************************************************************************************"
t=`date`
echo "Start: $t"
psql -t -A -c "SET search_path=$search_path; SELECT 'VACUUM ANALYZE \"' || n.nspname || '\".\"' || c.relname || '\";' FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE n.nspname = 'pg_catalog' AND c.relkind = 'r'" | psql -e
t=`date`
echo "Finish: $t"
if [ "$v" == "gp" ] || ([ "$v" == "hawq" ] && [ $r -ge 1300 ]); then
echo "*******************************************************************************************"
echo "** REINDEX the pg_catalog . **"
echo "** **"
echo "** Reindexing the catalog indexes will help prevent bloat or poor performance when **"
echo "** querying the catalog. **"
echo "*******************************************************************************************"
t=`date`
echo "Start: $t"
reindexdb -s
t=`date`
echo "Finish: $t"
fi
echo "*******************************************************************************************"
echo "** ANALYZE all tables/partitions with missing statistics. **"
echo "*******************************************************************************************"
t=`date`
echo "Start: $t"
if [ "$v" == "hawq" ]; then
if [ $r -ge 1300 ]; then
analyzedb -d $db -a
else
psql -t -A -c "SET search_path=$search_path; select 'ANALYZE \"' || smischema || '\".\"' || smitable || '\";' from hawq_stats_missing" | psql -e
fi
else
psql -t -A -c "SET search_path=$search_path; SELECT 'ANALYZE \"' || n.nspname || '\".\"' || c.relname || '\";'
FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
JOIN gp_stats_missing g ON g.smischema = n.nspname AND g.smitable = c.relname
LEFT JOIN (--top level partitioned tables
SELECT c.oid
FROM pg_class c
LEFT JOIN pg_inherits i ON c.oid = i.inhrelid
WHERE i.inhseqno IS NULL
) pt ON c.oid = pt.oid
WHERE c.relkind = 'r'
AND pt.oid IS NULL" | psql -e
fi
t=`date`
echo "Finish: $t"
echo "*******************************************************************************************"
echo "** VACUUM all tables near the vacuum_freeze_min_age to prevent transaction wraparound **"
echo "*******************************************************************************************"
t=`date`
echo "Start: $t"
vacuum_freeze_min_age=`psql -t -A -c "show vacuum_freeze_min_age;"`
psql -t -A -c "SET search_path=$search_path; SELECT 'VACUUM \"' || n.nspname || '\".\"' || c.relname || '\";' FROM pg_class c join pg_namespace n ON c.relnamespace = n.oid WHERE age(relfrozenxid) > $vacuum_freeze_min_age AND c.relkind = 'r'" | psql -e
t=`date`
echo "Finish: $t"
if [ "$v" == "gp" ]; then
echo "*******************************************************************************************"
echo "** VACUUM all heap tables with bloat **"
echo "** **"
echo "** Utilize the toolkit schema to identify heap tables that have excessive bloat and need **"
echo "** to be vacuumed. **"
echo "*******************************************************************************************"
t=`date`
echo "Start: $t"
psql -t -A -c "SET search_path=$search_path; SELECT 'VACUUM \"' || bdinspname || '\".\"' || bdirelname || '\";' FROM gp_bloat_diag WHERE bdinspname <> 'pg_catalog'" | psql -e
t=`date`
echo "Finish: $t"
echo "*******************************************************************************************"
echo "** VACUUM all append optimized tables with bloat **"
echo "** **"
echo "** Utilize the toolkit schema to identify ao tables that have excessive bloat and need **"
echo "** to be vacuumed. **"
echo "*******************************************************************************************"
t=`date`
echo "Start: $t"
psql -t -A -c "SET search_path=$search_path; SELECT 'VACUUM ANALYZE \"' || schema_name || '\".\"' || table_name || '\";'
FROM (
SELECT n.nspname AS schema_name, c.relname AS table_name, c.reltuples AS num_rows, (__gp_aovisimap_hidden_info(c.oid)).total_tupcount AS ao_num_rows
FROM pg_appendonly a
JOIN pg_class c ON c.oid = a.relid
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE c.relkind = 'r'
AND c.reltuples > 0
) AS sub
GROUP BY schema_name, table_name, num_rows
HAVING sum(ao_num_rows) > num_rows * 1.05" | psql -e
t=`date`
echo "Finish: $t"
fi