Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cluster/spec): check alertmanager,prometheus's config #1074

Merged
3 changes: 1 addition & 2 deletions pkg/cluster/embed/autogen_pkger.go

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion pkg/cluster/spec/alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,10 @@ func (i *AlertManagerInstance) InitConfig(
if err := config.NewAlertManagerConfig().ConfigToFile(configPath); err != nil {
return err
}
return i.TransferLocalConfigFile(ctx, e, configPath, dst)
if err := i.TransferLocalConfigFile(ctx, e, configPath, dst); err != nil {
return err
}
return checkConfig(ctx, e, i.ComponentName(), clusterVersion, i.OS(), i.Arch(), i.ComponentName()+".yml", paths, nil)
}

// ScaleConfig deploy temporary config on scaling
Expand Down
4 changes: 2 additions & 2 deletions pkg/cluster/spec/grafana.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ func (i *GrafanaInstance) initDashboards(ctx context.Context, e ctxt.Executor, s
cmds := []string{
"mkdir -p %[1]s",
`find %[1]s -maxdepth 1 -type f -name "*.json" -delete`,
"cp %[2]s/bin/*.json %[1]s",
`find %[2]s/bin -maxdepth 1 -type f -name "*.json" -exec cp {} %[1]s \;`,
}
_, stderr, err := e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), dashboardsDir, paths.Deploy), false)
if err != nil {
Expand Down Expand Up @@ -275,7 +275,7 @@ func (i *GrafanaInstance) installDashboards(ctx context.Context, e ctxt.Executor
cmds := []string{
"mkdir -p %[1]s",
`find %[1]s -maxdepth 1 -type f -name "*.json" -delete`,
"cp %[2]s/dm-master/scripts/*.json %[1]s",
`find %[2]s/dm-master/scripts -type f -name "*.json" -exec cp {} %[1]s \;`,
"rm -rf %[2]s",
}
_, stderr, err = e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), targetDir, tmp), false)
Expand Down
10 changes: 7 additions & 3 deletions pkg/cluster/spec/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,11 @@ func (i *MonitorInstance) InitConfig(
return err
}
dst = filepath.Join(paths.Deploy, "conf", "prometheus.yml")
return e.Transfer(ctx, fp, dst, false)
if err := e.Transfer(ctx, fp, dst, false); err != nil {
return err
}

return checkConfig(ctx, e, i.ComponentName(), clusterVersion, i.OS(), i.Arch(), i.ComponentName()+".yml", paths, nil)
}

// We only really installRules for dm cluster because the rules(*.rules.yml) packed with the prometheus
Expand Down Expand Up @@ -314,7 +318,7 @@ func (i *MonitorInstance) installRules(ctx context.Context, e ctxt.Executor, dep
cmds := []string{
"mkdir -p %[1]s",
`find %[1]s -type f -name "*.rules.yml" -delete`,
"cp %[2]s/dm-master/conf/*.rules.yml %[1]s",
`find %[2]s/dm-master/conf -type f -name "*.rules.yml" -exec cp %[1]s \;`,
"rm -rf %[2]s",
}
_, stderr, err = e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), targetDir, tmp), false)
Expand All @@ -336,7 +340,7 @@ func (i *MonitorInstance) initRules(ctx context.Context, e ctxt.Executor, spec P
cmds := []string{
"mkdir -p %[1]s/conf",
`find %[1]s/conf -type f -name "*.rules.yml" -delete`,
`cp %[1]s/bin/prometheus/*.rules.yml %[1]s/conf/`,
`find %[1]s/bin/prometheus -maxdepth 1 -type f -name "*.rules.yml" -exec cp {} %[1]s/conf/ \;`,
}
_, stderr, err := e.Execute(ctx, fmt.Sprintf(strings.Join(cmds, " && "), paths.Deploy), false)
if err != nil {
Expand Down
112 changes: 112 additions & 0 deletions pkg/cluster/spec/prometheus_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package spec

import (
"context"
"io/ioutil"
"os"
"os/user"
"path"
"path/filepath"
"testing"

"github.com/pingcap/tiup/pkg/checkpoint"
"github.com/pingcap/tiup/pkg/cluster/executor"
"github.com/pingcap/tiup/pkg/meta"
"github.com/stretchr/testify/assert"
)

func TestLocalRuleDirs(t *testing.T) {
deployDir, err := ioutil.TempDir("", "tiup-*")
assert.Nil(t, err)
defer os.RemoveAll(deployDir)
err = os.MkdirAll(path.Join(deployDir, "bin/prometheus"), 0755)
assert.Nil(t, err)
localDir, err := filepath.Abs("./testdata/rules")
assert.Nil(t, err)

err = ioutil.WriteFile(path.Join(deployDir, "bin/prometheus", "dummy.rules.yml"), []byte("dummy"), 0644)
assert.Nil(t, err)

topo := new(Specification)
topo.Monitors = append(topo.Monitors, PrometheusSpec{
Host: "127.0.0.1",
Port: 9090,
RuleDir: localDir,
})

comp := MonitorComponent{topo}
ints := comp.Instances()

assert.Equal(t, len(ints), 1)
promInstance := ints[0].(*MonitorInstance)

user, err := user.Current()
assert.Nil(t, err)
e, err := executor.New(executor.SSHTypeNone, false, executor.SSHConfig{Host: "127.0.0.1", User: user.Username})
assert.Nil(t, err)

ctx := checkpoint.NewContext(context.Background())
err = promInstance.initRules(ctx, e, promInstance.InstanceSpec.(PrometheusSpec), meta.DirPaths{Deploy: deployDir})
assert.Nil(t, err)

assert.NoFileExists(t, path.Join(deployDir, "conf", "dummy.rules.yml"))
fs, err := ioutil.ReadDir(localDir)
assert.Nil(t, err)
for _, f := range fs {
assert.FileExists(t, path.Join(deployDir, "conf", f.Name()))
}
}

func TestNoLocalRuleDirs(t *testing.T) {
deployDir, err := ioutil.TempDir("", "tiup-*")
assert.Nil(t, err)
defer os.RemoveAll(deployDir)
err = os.MkdirAll(path.Join(deployDir, "bin/prometheus"), 0755)
assert.Nil(t, err)
localDir, err := filepath.Abs("./testdata/rules")
assert.Nil(t, err)

err = ioutil.WriteFile(path.Join(deployDir, "bin/prometheus", "dummy.rules.yml"), []byte("dummy"), 0644)
assert.Nil(t, err)

topo := new(Specification)
topo.Monitors = append(topo.Monitors, PrometheusSpec{
Host: "127.0.0.1",
Port: 9090,
})

comp := MonitorComponent{topo}
ints := comp.Instances()

assert.Equal(t, len(ints), 1)
promInstance := ints[0].(*MonitorInstance)

user, err := user.Current()
assert.Nil(t, err)
e, err := executor.New(executor.SSHTypeNone, false, executor.SSHConfig{Host: "127.0.0.1", User: user.Username})
assert.Nil(t, err)

ctx := checkpoint.NewContext(context.Background())
err = promInstance.initRules(ctx, e, promInstance.InstanceSpec.(PrometheusSpec), meta.DirPaths{Deploy: deployDir})
assert.Nil(t, err)

assert.FileExists(t, path.Join(deployDir, "conf", "dummy.rules.yml"))
fs, err := ioutil.ReadDir(localDir)
assert.Nil(t, err)
for _, f := range fs {
assert.NoFileExists(t, path.Join(deployDir, "conf", f.Name()))
}
}
55 changes: 32 additions & 23 deletions pkg/cluster/spec/server_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,35 +236,44 @@ func mergeImported(importConfig []byte, specConfigs ...map[string]interface{}) (
type BindVersion func(comp string, version string) (bindVersion string)

func checkConfig(ctx context.Context, e ctxt.Executor, componentName, clusterVersion, nodeOS, arch, config string, paths meta.DirPaths, bindVersion BindVersion) error {
repo, err := clusterutil.NewRepository(nodeOS, arch)
if err != nil {
return perrs.Annotate(ErrorCheckConfig, err.Error())
}
var cmd string
configPath := path.Join(paths.Deploy, "conf", config)
switch componentName {
case ComponentPrometheus:
cmd = fmt.Sprintf("%s/bin/prometheus/promtool check config %s", paths.Deploy, configPath)
case ComponentAlertmanager:
cmd = fmt.Sprintf("%s/bin/alertmanager/amtool check-config %s", paths.Deploy, configPath)
default:
repo, err := clusterutil.NewRepository(nodeOS, arch)
if err != nil {
return perrs.Annotate(ErrorCheckConfig, err.Error())
}

ver := clusterVersion
if bindVersion != nil {
ver = bindVersion(componentName, clusterVersion)
}
ver := clusterVersion
if bindVersion != nil {
ver = bindVersion(componentName, clusterVersion)
}

entry, err := repo.ComponentBinEntry(componentName, ver)
if err != nil {
return perrs.Annotate(ErrorCheckConfig, err.Error())
}
entry, err := repo.ComponentBinEntry(componentName, ver)
if err != nil {
return perrs.Annotate(ErrorCheckConfig, err.Error())
}
binPath := path.Join(paths.Deploy, "bin", entry)

binPath := path.Join(paths.Deploy, "bin", entry)
// Skip old versions
if !hasConfigCheckFlag(ctx, e, binPath) {
return nil
}
// Skip old versions
if !hasConfigCheckFlag(ctx, e, binPath) {
return nil
}

// Hack tikv --pd flag
extra := ""
if componentName == ComponentTiKV {
extra = `--pd=""`
// Hack tikv --pd flag
extra := ""
if componentName == ComponentTiKV {
extra = `--pd=""`
}
cmd = fmt.Sprintf("%s --config-check --config=%s %s", binPath, configPath, extra)
}

configPath := path.Join(paths.Deploy, "conf", config)
_, _, err = e.Execute(ctx, fmt.Sprintf("%s --config-check --config=%s %s", binPath, configPath, extra), false)
_, _, err := e.Execute(ctx, cmd, false)
if err != nil {
return perrs.Annotate(ErrorCheckConfig, err.Error())
}
Expand Down
15 changes: 15 additions & 0 deletions pkg/cluster/spec/testdata/rules/tidb.rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# magic-string-for-test
groups:
- name: alert.rules
rules:
- alert: TiDB_schema_error
expr: increase(tidb_session_schema_lease_error_total{type="outdated"}[15m]) > 0
for: 1m
labels:
env: ENV_LABELS_ENV
level: emergency
expr: increase(tidb_session_schema_lease_error_total{type="outdated"}[15m]) > 0
annotations:
description: "cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}"
value: "{{ $value }}"
summary: TiDB schema error
44 changes: 0 additions & 44 deletions templates/config/dm/prometheus.yml.tpl

This file was deleted.

9 changes: 6 additions & 3 deletions templates/config/prometheus.yml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ global:

# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
{{- if .LocalRules}}
{{- range .LocalRules}}
- '{{.}}'
{{- end}}
{{- else}}
{{- if .MonitoredServers}}
- 'node.rules.yml'
- 'blacker.rules.yml'
- 'bypass.rules.yml'
{{- end}}
{{- range .LocalRules}}
- '{{.}}'
{{- end}}
{{- if .PDAddrs}}
- 'pd.rules.yml'
{{- end}}
Expand Down Expand Up @@ -48,6 +50,7 @@ rule_files:
{{- if .DMMasterAddrs}}
- 'dm_master.rules.yml'
{{- end}}
{{- end}}

{{- if .AlertmanagerAddrs}}
alerting:
Expand Down
3 changes: 3 additions & 0 deletions tests/tiup-cluster/script/cmd_subtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ function cmd_subtest() {
tiup-cluster $client --yes deploy $name $version $topo -i ~/.ssh/id_rsa --skip-create-user

# check the local config
tiup-cluster $client exec $name -N n1 --command "grep tidb.rules.yml /home/tidb/deploy/prometheus-9090/conf/prometheus.yml"
! tiup-cluster $client exec $name -N n1 --command "grep node.rules.yml /home/tidb/deploy/prometheus-9090/conf/prometheus.yml"
tiup-cluster $client exec $name -N n1 --command "grep magic-string-for-test /home/tidb/deploy/prometheus-9090/conf/tidb.rules.yml"
tiup-cluster $client exec $name -N n1 --command "grep magic-string-for-test /home/tidb/deploy/grafana-3000/dashboards/tidb.json"
tiup-cluster $client exec $name -N n1 --command "grep magic-string-for-test /home/tidb/deploy/alertmanager-9093/conf/alertmanager.yml"
Expand Down Expand Up @@ -124,6 +126,7 @@ function cmd_subtest() {
tiup-cluster $client push $name test_transfer_1.txt "{{ .DeployDir }}/test_transfer.txt" -R grafana
tiup-cluster $client pull $name "{{ .DeployDir }}/test_transfer.txt" test_transfer_2.txt -R grafana
diff test_transfer_1.txt test_transfer_2.txt
rm -f test_transfer_{1,2}.txt

echo "checking cleanup data and log"
tiup-cluster $client exec $name -N n1 --command "ls /home/tidb/deploy/prometheus-9090/log/prometheus.log"
Expand Down