From 17166c55762ff047496f6289489e1b7c883967f7 Mon Sep 17 00:00:00 2001 From: xiaojingchen Date: Tue, 12 Mar 2019 17:33:16 +0800 Subject: [PATCH 1/4] add log dump --- tests/actions.go | 10 ++--- tests/cmd/e2e/main.go | 7 +++- tests/log_dump.go | 96 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 7 deletions(-) create mode 100644 tests/log_dump.go diff --git a/tests/actions.go b/tests/actions.go index 6636337c22..8b87c5a3b0 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -35,11 +35,12 @@ import ( "k8s.io/client-go/kubernetes" ) -func NewOperatorActions(cli versioned.Interface, kubeCli kubernetes.Interface) OperatorActions { +func NewOperatorActions(cli versioned.Interface, kubeCli kubernetes.Interface, logDir string) OperatorActions { return &operatorActions{ cli: cli, kubeCli: kubeCli, pdControl: controller.NewDefaultPDControl(), + logDir: logDir, } } @@ -47,7 +48,7 @@ type OperatorActions interface { DeployOperator(info *OperatorInfo) error CleanOperator(info *OperatorInfo) error UpgradeOperator(info *OperatorInfo) error - DumpAllLogs(info *OperatorInfo, clusterInfo *TidbClusterInfo) error + DumpAllLogs(info *OperatorInfo, clusterInfos []*TidbClusterInfo) error DeployTidbCluster(info *TidbClusterInfo) error CleanTidbCluster(info *TidbClusterInfo) error CheckTidbClusterStatus(info *TidbClusterInfo) error @@ -91,6 +92,7 @@ type operatorActions struct { cli versioned.Interface kubeCli kubernetes.Interface pdControl controller.PDControlInterface + logDir string } type OperatorInfo struct { @@ -199,10 +201,6 @@ func (oa *operatorActions) UpgradeOperator(info *OperatorInfo) error { return nil } -func (oa *operatorActions) DumpAllLogs(info *OperatorInfo, clusterInfo *TidbClusterInfo) error { - return nil -} - func (oa *operatorActions) DeployTidbCluster(info *TidbClusterInfo) error { cmd := fmt.Sprintf("helm install /charts/%s/tidb-cluster --name %s --namespace %s --set-string %s", info.OperatorTag, info.ClusterName, info.Namespace, info.HelmSetString()) diff --git a/tests/cmd/e2e/main.go b/tests/cmd/e2e/main.go index 46af075764..160006934e 100644 --- a/tests/cmd/e2e/main.go +++ b/tests/cmd/e2e/main.go @@ -42,7 +42,7 @@ func main() { glog.Fatalf("failed to get kubernetes Clientset: %v", err) } - oa := tests.NewOperatorActions(cli, kubeCli) + oa := tests.NewOperatorActions(cli, kubeCli, "/logDir") operatorInfo := &tests.OperatorInfo{ Namespace: "pingcap", @@ -53,9 +53,11 @@ func main() { LogLevel: "2", } if err := oa.CleanOperator(operatorInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{}) glog.Fatal(err) } if err := oa.DeployOperator(operatorInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{}) glog.Fatal(err) } @@ -71,12 +73,15 @@ func main() { Args: map[string]string{}, } if err := oa.CleanTidbCluster(clusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) glog.Fatal(err) } if err := oa.DeployTidbCluster(clusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) glog.Fatal(err) } if err := oa.CheckTidbClusterStatus(clusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) glog.Fatal(err) } } diff --git a/tests/log_dump.go b/tests/log_dump.go new file mode 100644 index 0000000000..9802e7e011 --- /dev/null +++ b/tests/log_dump.go @@ -0,0 +1,96 @@ +package tests + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "path/filepath" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func (oa *operatorActions) DumpAllLogs(operatorInfo *OperatorInfo, testClusters []*TidbClusterInfo) error { + logPath := fmt.Sprintf("/%s", oa.logDir) + if _, err := os.Stat(logPath); os.IsNotExist(err) { + err = os.MkdirAll(logPath, os.ModePerm) + if err != nil { + return err + } + } + + // dump all resources info + resourceLogFile, err := os.Create(filepath.Join(logPath, "resources")) + if err != nil { + return err + } + defer resourceLogFile.Close() + resourceWriter := bufio.NewWriter(resourceLogFile) + dumpLog("kubectl get po -owide -n kube-system", resourceWriter) + dumpLog(fmt.Sprintf("kubectl get po -owide -n %s", operatorInfo.Namespace), resourceWriter) + dumpLog("kubectl get pv", resourceWriter) + dumpLog("kubectl get pv -oyaml", resourceWriter) + for _, testCluster := range testClusters { + dumpLog(fmt.Sprintf("kubectl get po,pvc,svc,cm,cronjobs,jobs,statefulsets,tidbclusters -owide -n %s", testCluster.Namespace), resourceWriter) + dumpLog(fmt.Sprintf("kubectl get po,pvc,svc,cm,cronjobs,jobs,statefulsets,tidbclusters -n %s -oyaml", testCluster.Namespace), resourceWriter) + } + + // dump operator components's log + operatorPods, err := oa.kubeCli.CoreV1().Pods(operatorInfo.Namespace).List(metav1.ListOptions{}) + if err != nil { + return err + } + for _, pod := range operatorPods.Items { + err := dumpPod(logPath, &pod) + if err != nil { + return err + } + } + + // dump all test clusters's logs + for _, testCluster := range testClusters { + clusterPodList, err := oa.kubeCli.CoreV1().Pods(testCluster.Namespace).List(metav1.ListOptions{}) + if err != nil { + return err + } + for _, pod := range clusterPodList.Items { + err := dumpPod(logPath, &pod) + if err != nil { + return err + } + } + } + + return nil +} + +func dumpPod(logPath string, pod *corev1.Pod) error { + logFile, err := os.Create(filepath.Join(logPath, fmt.Sprintf("%s-%s.log", pod.Name, pod.Namespace))) + if err != nil { + return err + } + defer logFile.Close() + plogFile, err := os.Create(filepath.Join(logPath, fmt.Sprintf("%s-%s-p.log", pod.Name, pod.Namespace))) + if err != nil { + return err + } + defer plogFile.Close() + logWriter := bufio.NewWriter(logFile) + plogWriter := bufio.NewWriter(plogFile) + for _, c := range pod.Spec.Containers { + dumpLog(fmt.Sprintf("kubectl logs -n %s %s -c %s", pod.Namespace, pod.GetName(), c.Name), logWriter) + dumpLog(fmt.Sprintf("kubectl logs -n %s %s -c %s -p", pod.Namespace, pod.GetName(), c.Name), plogWriter) + } + + return nil +} + +func dumpLog(cmdStr string, writer *bufio.Writer) { + writer.WriteString(fmt.Sprintf("$ %s\n", cmdStr)) + data, err := exec.Command("/bin/sh", "-c", "/usr/local/bin/"+cmdStr).CombinedOutput() + if err != nil { + writer.WriteString(err.Error()) + } + writer.WriteString(string(data)) +} From 7c302393cd0b4cb01ec4330713566b0123007dfc Mon Sep 17 00:00:00 2001 From: xiaojingchen Date: Wed, 13 Mar 2019 17:38:35 +0800 Subject: [PATCH 2/4] add volume mount --- tests/log_dump.go | 2 +- tests/manifests/e2e.yaml | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/log_dump.go b/tests/log_dump.go index 9802e7e011..2ffb0e15fb 100644 --- a/tests/log_dump.go +++ b/tests/log_dump.go @@ -12,7 +12,7 @@ import ( ) func (oa *operatorActions) DumpAllLogs(operatorInfo *OperatorInfo, testClusters []*TidbClusterInfo) error { - logPath := fmt.Sprintf("/%s", oa.logDir) + logPath := fmt.Sprintf("/%s/%s", oa.logDir, "operator-stability") if _, err := os.Stat(logPath); os.IsNotExist(err) { err = os.MkdirAll(logPath, os.ModePerm) if err != nil { diff --git a/tests/manifests/e2e.yaml b/tests/manifests/e2e.yaml index ed50c4dcc5..404d1bf4f6 100644 --- a/tests/manifests/e2e.yaml +++ b/tests/manifests/e2e.yaml @@ -27,7 +27,15 @@ spec: serviceAccount: tidb-operator-e2e containers: - name: tidb-operator-e2e - image: "" + image: "localhost:5000/pingcap/tidb-operator-e2e:latest" imagePullPolicy: Always command: ["sh", "-c", "/usr/local/bin/e2e"] + volumeMounts: + - mountPath: /logDir + name: logdir + volumes: + - name: logdir + hostPath: + path: /var/log + type: Directory restartPolicy: Never From d1038cfcfe5d38e61cc318ca10aee3d49eb92a28 Mon Sep 17 00:00:00 2001 From: xiaojingchen Date: Fri, 15 Mar 2019 14:50:47 +0800 Subject: [PATCH 3/4] address comment --- tests/cmd/e2e/main.go | 4 ++-- tests/log_dump.go | 34 ++++++++++++++++++++++++---------- tests/manifests/e2e.yaml | 2 +- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/tests/cmd/e2e/main.go b/tests/cmd/e2e/main.go index 2b8b771cfc..d631c44930 100644 --- a/tests/cmd/e2e/main.go +++ b/tests/cmd/e2e/main.go @@ -53,11 +53,11 @@ func main() { LogLevel: "2", } if err := oa.CleanOperator(operatorInfo); err != nil { - oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{}) + oa.DumpAllLogs(operatorInfo, nil) glog.Fatal(err) } if err := oa.DeployOperator(operatorInfo); err != nil { - oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{}) + oa.DumpAllLogs(operatorInfo, nil) glog.Fatal(err) } diff --git a/tests/log_dump.go b/tests/log_dump.go index 2ffb0e15fb..97de55c0ad 100644 --- a/tests/log_dump.go +++ b/tests/log_dump.go @@ -31,9 +31,13 @@ func (oa *operatorActions) DumpAllLogs(operatorInfo *OperatorInfo, testClusters dumpLog(fmt.Sprintf("kubectl get po -owide -n %s", operatorInfo.Namespace), resourceWriter) dumpLog("kubectl get pv", resourceWriter) dumpLog("kubectl get pv -oyaml", resourceWriter) + dumpedNamespace := map[string]bool{} for _, testCluster := range testClusters { - dumpLog(fmt.Sprintf("kubectl get po,pvc,svc,cm,cronjobs,jobs,statefulsets,tidbclusters -owide -n %s", testCluster.Namespace), resourceWriter) - dumpLog(fmt.Sprintf("kubectl get po,pvc,svc,cm,cronjobs,jobs,statefulsets,tidbclusters -n %s -oyaml", testCluster.Namespace), resourceWriter) + if _, exist := dumpedNamespace[testCluster.Namespace]; !exist { + dumpLog(fmt.Sprintf("kubectl get po,pvc,svc,cm,cronjobs,jobs,statefulsets,tidbclusters -owide -n %s", testCluster.Namespace), resourceWriter) + dumpLog(fmt.Sprintf("kubectl get po,pvc,svc,cm,cronjobs,jobs,statefulsets,tidbclusters -n %s -oyaml", testCluster.Namespace), resourceWriter) + dumpedNamespace[testCluster.Namespace] = true + } } // dump operator components's log @@ -49,16 +53,20 @@ func (oa *operatorActions) DumpAllLogs(operatorInfo *OperatorInfo, testClusters } // dump all test clusters's logs + dumpedNamespace = map[string]bool{} for _, testCluster := range testClusters { - clusterPodList, err := oa.kubeCli.CoreV1().Pods(testCluster.Namespace).List(metav1.ListOptions{}) - if err != nil { - return err - } - for _, pod := range clusterPodList.Items { - err := dumpPod(logPath, &pod) + if _, exist := dumpedNamespace[testCluster.Namespace]; !exist { + clusterPodList, err := oa.kubeCli.CoreV1().Pods(testCluster.Namespace).List(metav1.ListOptions{}) if err != nil { return err } + for _, pod := range clusterPodList.Items { + err := dumpPod(logPath, &pod) + if err != nil { + return err + } + } + dumpedNamespace[testCluster.Namespace] = true } } @@ -76,8 +84,12 @@ func dumpPod(logPath string, pod *corev1.Pod) error { return err } defer plogFile.Close() + logWriter := bufio.NewWriter(logFile) plogWriter := bufio.NewWriter(plogFile) + defer logWriter.Flush() + defer plogWriter.Flush() + for _, c := range pod.Spec.Containers { dumpLog(fmt.Sprintf("kubectl logs -n %s %s -c %s", pod.Namespace, pod.GetName(), c.Name), logWriter) dumpLog(fmt.Sprintf("kubectl logs -n %s %s -c %s -p", pod.Namespace, pod.GetName(), c.Name), plogWriter) @@ -88,9 +100,11 @@ func dumpPod(logPath string, pod *corev1.Pod) error { func dumpLog(cmdStr string, writer *bufio.Writer) { writer.WriteString(fmt.Sprintf("$ %s\n", cmdStr)) - data, err := exec.Command("/bin/sh", "-c", "/usr/local/bin/"+cmdStr).CombinedOutput() + cmd := exec.Command("/bin/sh", "-c", "/usr/local/bin/"+cmdStr) + cmd.Stderr = writer + cmd.Stdout = writer + err := cmd.Run() if err != nil { writer.WriteString(err.Error()) } - writer.WriteString(string(data)) } diff --git a/tests/manifests/e2e.yaml b/tests/manifests/e2e.yaml index 404d1bf4f6..77e44a4559 100644 --- a/tests/manifests/e2e.yaml +++ b/tests/manifests/e2e.yaml @@ -27,7 +27,7 @@ spec: serviceAccount: tidb-operator-e2e containers: - name: tidb-operator-e2e - image: "localhost:5000/pingcap/tidb-operator-e2e:latest" + image: "" imagePullPolicy: Always command: ["sh", "-c", "/usr/local/bin/e2e"] volumeMounts: From 94cf8bdba2f04b685befe0599b3d733ddf244896 Mon Sep 17 00:00:00 2001 From: xiaojingchen Date: Mon, 18 Mar 2019 15:01:55 +0800 Subject: [PATCH 4/4] remove perror --- tests/cmd/e2e/main.go | 64 +++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/tests/cmd/e2e/main.go b/tests/cmd/e2e/main.go index 892f6c789f..73b303323c 100644 --- a/tests/cmd/e2e/main.go +++ b/tests/cmd/e2e/main.go @@ -25,7 +25,7 @@ import ( "k8s.io/client-go/rest" ) -func perror(err error, info *tests.OperatorInfo, clusterInfos []*tests.TidbClusterInfo) { +func perror(err error) { if err != nil { glog.Fatal(err) } @@ -59,9 +59,14 @@ func main() { SchedulerImage: "gcr.io/google-containers/hyperkube:v1.12.1", LogLevel: "2", } - oa.DumpAllLogs(operatorInfo, nil) - perror(oa.CleanOperator(operatorInfo), operatorInfo, nil) - perror(oa.DeployOperator(operatorInfo), operatorInfo, nil) + if err := oa.CleanOperator(operatorInfo); err != nil { + oa.DumpAllLogs(operatorInfo, nil) + glog.Fatal(err) + } + if err = oa.DeployOperator(operatorInfo); err != nil { + oa.DumpAllLogs(operatorInfo, nil) + glog.Fatal(err) + } clusterInfo := &tests.TidbClusterInfo{ Namespace: "tidb", @@ -89,17 +94,38 @@ func main() { Args: map[string]string{}, } - perror(oa.CleanTidbCluster(clusterInfo), operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) - perror(oa.DeployTidbCluster(clusterInfo), operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) - perror(oa.CheckTidbClusterStatus(clusterInfo), operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + if err = oa.CleanTidbCluster(clusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + glog.Fatal(err) + } + if err = oa.DeployTidbCluster(clusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + glog.Fatal(err) + } + if err = oa.CheckTidbClusterStatus(clusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + glog.Fatal(err) + } clusterInfo = clusterInfo.ScaleTiDB(3) - perror(oa.ScaleTidbCluster(clusterInfo), operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) - perror(oa.CheckTidbClusterStatus(clusterInfo), operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + if err := oa.ScaleTidbCluster(clusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + glog.Fatal(err) + } + if err = oa.CheckTidbClusterStatus(clusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + glog.Fatal(err) + } clusterInfo = clusterInfo.UpgradeAll("v2.1.4") - perror(oa.UpgradeTidbCluster(clusterInfo), operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) - perror(oa.CheckTidbClusterStatus(clusterInfo), operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + if err = oa.UpgradeTidbCluster(clusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + glog.Fatal(err) + } + if err = oa.CheckTidbClusterStatus(clusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + glog.Fatal(err) + } restoreClusterInfo := &tests.TidbClusterInfo{ Namespace: "tidb", @@ -127,13 +153,23 @@ func main() { Args: map[string]string{}, } - perror(oa.CleanTidbCluster(restoreClusterInfo), operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) - perror(oa.DeployTidbCluster(restoreClusterInfo), operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) - perror(oa.CheckTidbClusterStatus(restoreClusterInfo), operatorInfo, []*tests.TidbClusterInfo{clusterInfo}) + if err = oa.CleanTidbCluster(restoreClusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo, restoreClusterInfo}) + glog.Fatal(err) + } + if err = oa.DeployTidbCluster(restoreClusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo, restoreClusterInfo}) + glog.Fatal(err) + } + if err = oa.CheckTidbClusterStatus(restoreClusterInfo); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo, restoreClusterInfo}) + glog.Fatal(err) + } backupCase := backup.NewBackupCase(oa, clusterInfo, restoreClusterInfo) if err := backupCase.Run(); err != nil { + oa.DumpAllLogs(operatorInfo, []*tests.TidbClusterInfo{clusterInfo, restoreClusterInfo}) glog.Fatal(err) } }