From 443adedb16b02fdf04c0ab4daa0acbefa9a4a5b0 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Thu, 22 Apr 2021 19:58:40 +0200 Subject: [PATCH 1/5] cli: add a missing unit test The node selection logic was untested. This commit adds the missing test. Release note: None --- pkg/cli/BUILD.bazel | 1 + pkg/cli/zip_helpers_test.go | 58 +++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 pkg/cli/zip_helpers_test.go diff --git a/pkg/cli/BUILD.bazel b/pkg/cli/BUILD.bazel index 97fba1db081f..1b5b081f5117 100644 --- a/pkg/cli/BUILD.bazel +++ b/pkg/cli/BUILD.bazel @@ -284,6 +284,7 @@ go_test( "start_test.go", "statement_diag_test.go", "userfiletable_test.go", + "zip_helpers_test.go", "zip_test.go", ], data = glob(["testdata/**"]), diff --git a/pkg/cli/zip_helpers_test.go b/pkg/cli/zip_helpers_test.go new file mode 100644 index 000000000000..056c27835ecf --- /dev/null +++ b/pkg/cli/zip_helpers_test.go @@ -0,0 +1,58 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cli + +import ( + "testing" + + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" +) + +func TestNodeSelection(t *testing.T) { + defer leaktest.AfterTest(t)() + + testCases := []struct { + incl string + excl string + accepts []roachpb.NodeID + rejects []roachpb.NodeID + }{ + {"", "", []roachpb.NodeID{1, 2, 3, 100}, nil}, + {"1-10", "", []roachpb.NodeID{1, 2, 3}, []roachpb.NodeID{100}}, + {"1-2,4", "", []roachpb.NodeID{1, 2, 4}, []roachpb.NodeID{3, 5}}, + {"", "3-10", []roachpb.NodeID{1, 2, 100}, []roachpb.NodeID{4, 5}}, + } + + for _, tc := range testCases { + var sel nodeSelection + if tc.incl != "" { + if err := sel.inclusive.Set(tc.incl); err != nil { + t.Fatal(err) + } + } + if tc.excl != "" { + if err := sel.exclusive.Set(tc.excl); err != nil { + t.Fatal(err) + } + } + for _, n := range tc.accepts { + if !sel.isIncluded(n) { + t.Errorf("incl=%s, excl=%s: mistakenly does not include %d", &sel.inclusive, &sel.exclusive, n) + } + } + for _, n := range tc.rejects { + if sel.isIncluded(n) { + t.Errorf("incl=%s, excl=%s: mistakenly includes %d", &sel.inclusive, &sel.exclusive, n) + } + } + } +} From 2f4f6805adff4d825c030af945a46ed5997ecf95 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Thu, 22 Apr 2021 20:50:33 +0200 Subject: [PATCH 2/5] cli/debug: allow the user to refine which files get included We have observed in practice that the majority of the size of generated `debug zip` outputs is attributable to the discrete files retrieved from the server: - log files - goroutine dumps - heap profiles As clusters get larger and older, these files increase in number, and the size of the `debug zip` output grows accordingly. Meanwhile, in practice, there are many support cases where we know ahead of running the `debug zip` command that only certain files are going to be useful. Therefore, we wish to be able to restrict which files get included / excluded from the generated archive. Hence the functional change described below. (Note: there is another expressed wish to limit the size of generated archives based on a *time or date range*, not just file names / types. This will be handled in a separate commit.) Release note (cli change): It is now possible to fine tune which files get retrieved from the server nodes by the `debug zip` command, using the new flag `--include-files` and `--exclude-files`. These flags take glob patterns that are applied to the file names server-side. For example, to include only log files, use `--include-files='*.log'`. The command `cockroach debug list-files` also accepts these flags and can thus be used to experiment with the new flags before running the `debug zip` command. --- pkg/cli/cliflags/flags.go | 44 +++++++ pkg/cli/context.go | 4 + pkg/cli/debug_list_files.go | 12 +- pkg/cli/flags.go | 2 + pkg/cli/interactive_tests/test_zip_filter.tcl | 35 ++++++ pkg/cli/testdata/zip/partial1 | 36 +++--- pkg/cli/testdata/zip/partial1_excluded | 24 ++-- pkg/cli/testdata/zip/partial2 | 24 ++-- pkg/cli/testdata/zip/testzip | 4 +- pkg/cli/testdata/zip/testzip_concurrent | 60 ++++----- pkg/cli/testdata/zip/unavailable | 12 +- pkg/cli/zip.go | 4 + pkg/cli/zip_helpers.go | 53 ++++++++ pkg/cli/zip_helpers_test.go | 33 +++++ pkg/cli/zip_per_node.go | 119 +++++++++++++++++- 15 files changed, 379 insertions(+), 87 deletions(-) create mode 100644 pkg/cli/interactive_tests/test_zip_filter.tcl diff --git a/pkg/cli/cliflags/flags.go b/pkg/cli/cliflags/flags.go index b9d732032c76..2862daaa3aaf 100644 --- a/pkg/cli/cliflags/flags.go +++ b/pkg/cli/cliflags/flags.go @@ -1288,6 +1288,50 @@ list of node IDs or ranges of node IDs, for example: 5,10-20,23. The default is to not exclude any node.`, } + ZipIncludedFiles = FlagInfo{ + Name: "include-files", + Description: ` +List of glob patterns that determine files that can be included +in the output. The list can be specified as a comma-delimited +list of patterns, or by using the flag multiple times. +The patterns apply to the base name of the file, without +a path prefix. +The default is to include all files. +
+
+
+This flag is applied before --exclude-files; for example, +including '*.log' and then excluding '*foo*.log' will +exclude 'barfoos.log'. +
+
+
+You can use the 'debug list-files' command to explore how +this flag is applied.`, + } + + ZipExcludedFiles = FlagInfo{ + Name: "exclude-files", + Description: ` +List of glob patterns that determine files that are to +be excluded from the output. The list can be specified +as a comma-delimited list of patterns, or by using the +flag multiple times. +The patterns apply to the base name of the file, without +a path prefix. +
+
+
+This flag is applied after --include-files; for example, +including '*.log' and then excluding '*foo*.log' will +exclude 'barfoos.log'. +
+
+
+You can use the 'debug list-files' command to explore how +this flag is applied.`, + } + ZipRedactLogs = FlagInfo{ Name: "redact-logs", Description: ` diff --git a/pkg/cli/context.go b/pkg/cli/context.go index db43c5ea383b..40014746b354 100644 --- a/pkg/cli/context.go +++ b/pkg/cli/context.go @@ -330,6 +330,9 @@ type zipContext struct { // How much concurrency to use during the collection. The code // attempts to access multiple nodes concurrently by default. concurrency int + + // The log/heap/etc files to include. + files fileSelection } // setZipContextDefaults set the default values in zipCtx. This @@ -337,6 +340,7 @@ type zipContext struct { // test that exercises command-line parsing. func setZipContextDefaults() { zipCtx.nodes = nodeSelection{} + zipCtx.files = fileSelection{} zipCtx.redactLogs = false zipCtx.cpuProfDuration = 5 * time.Second zipCtx.concurrency = 15 diff --git a/pkg/cli/debug_list_files.go b/pkg/cli/debug_list_files.go index 87f2d526b3bd..5d357f9cab2c 100644 --- a/pkg/cli/debug_list_files.go +++ b/pkg/cli/debug_list_files.go @@ -36,6 +36,10 @@ var debugListFilesCmd = &cobra.Command{ } func runDebugListFiles(cmd *cobra.Command, _ []string) error { + if err := zipCtx.files.validate(); err != nil { + return err + } + ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -110,7 +114,7 @@ func runDebugListFiles(cmd *cobra.Command, _ []string) error { NodeId: nodeIDs, ListOnly: true, Type: serverpb.FileType(fileType), - Patterns: []string{"*"}, + Patterns: zipCtx.files.retrievalPatterns(), }) if err != nil { log.Warningf(ctx, "cannot retrieve %s file list from node %d: %v", serverpb.FileType_name[fileType], nodeID, err) @@ -132,6 +136,9 @@ func runDebugListFiles(cmd *cobra.Command, _ []string) error { for _, nodeID := range nodeList { nodeIDs := fmt.Sprintf("%d", nodeID) for _, logFile := range logFiles[nodeID] { + if !zipCtx.files.isIncluded(logFile.Name) { + continue + } totalSize += logFile.SizeBytes ctime := formatTimeSimple(extractTimeFromFileName(logFile.Name)) mtime := formatTimeSimple(timeutil.Unix(0, logFile.ModTimeNanos)) @@ -140,6 +147,9 @@ func runDebugListFiles(cmd *cobra.Command, _ []string) error { for _, ft := range fileTypes { fileType := int32(ft) for _, other := range otherFiles[nodeID][fileType] { + if !zipCtx.files.isIncluded(other.Name) { + continue + } totalSize += other.FileSize ctime := formatTimeSimple(extractTimeFromFileName(other.Name)) mtime := ctime diff --git a/pkg/cli/flags.go b/pkg/cli/flags.go index 5a1cfe21ed96..1599c3e1b5a9 100644 --- a/pkg/cli/flags.go +++ b/pkg/cli/flags.go @@ -631,6 +631,8 @@ func init() { f := cmd.Flags() varFlag(f, &zipCtx.nodes.inclusive, cliflags.ZipNodes) varFlag(f, &zipCtx.nodes.exclusive, cliflags.ZipExcludeNodes) + stringSliceFlag(f, &zipCtx.files.includePatterns, cliflags.ZipIncludedFiles) + stringSliceFlag(f, &zipCtx.files.excludePatterns, cliflags.ZipExcludedFiles) } // Decommission command. diff --git a/pkg/cli/interactive_tests/test_zip_filter.tcl b/pkg/cli/interactive_tests/test_zip_filter.tcl new file mode 100644 index 000000000000..2bdbbad9d0d6 --- /dev/null +++ b/pkg/cli/interactive_tests/test_zip_filter.tcl @@ -0,0 +1,35 @@ +#! /usr/bin/env expect -f + +source [file join [file dirname $argv0] common.tcl] + +spawn /bin/bash +send "PS1=':''/# '\r" +eexpect ":/# " + +start_server $argv + +# A server start populates a log directory with multiple files. We expect +# at least cockroach-stderr.*, cockroach.* and cockroach-pebble.*. + +start_test "Check that --include-files excludes files that do not match the pattern" +send "$argv debug zip --cpu-profile-duration=0 --include-files='cockroach.*' /dev/null\r" + +eexpect "log files found" +eexpect "skipping excluded log file: cockroach-stderr." +eexpect "skipping excluded log file: cockroach-pebble." +eexpect "\[log file: cockroach." +eexpect ":/# " +end_test + +start_test "Check that --excludes-files excludes files that match the pattern" +send "$argv debug zip --cpu-profile-duration=0 --exclude-files='cockroach.*' /dev/null\r" + +eexpect "log files found" +eexpect "\[log file: cockroach-stderr." +eexpect "\[log file: cockroach-pebble." +eexpect "skipping excluded log file: cockroach." +eexpect ":/# " +end_test + + +stop_server $argv diff --git a/pkg/cli/testdata/zip/partial1 b/pkg/cli/testdata/zip/partial1 index e9681de95767..78f84b854e91 100644 --- a/pkg/cli/testdata/zip/partial1 +++ b/pkg/cli/testdata/zip/partial1 @@ -54,12 +54,12 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting threads... received response... writing binary output: debug/nodes/1/threads.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done -[node 1] requesting heap files... received response... -[node 1] requesting heap files: last request failed: rpc error: ... -[node 1] requesting heap files: creating error output: debug/nodes/1/heapprof.err.txt... done -[node 1] requesting goroutine dumps... received response... -[node 1] requesting goroutine dumps: last request failed: rpc error: ... -[node 1] requesting goroutine dumps: creating error output: debug/nodes/1/goroutines.err.txt... done +[node 1] requesting heap file list... received response... +[node 1] requesting heap file list: last request failed: rpc error: ... +[node 1] requesting heap file list: creating error output: debug/nodes/1/heapprof.err.txt... done +[node 1] requesting goroutine dump list... received response... +[node 1] requesting goroutine dump list: last request failed: rpc error: ... +[node 1] requesting goroutine dump list: creating error output: debug/nodes/1/goroutines.err.txt... done [node 1] requesting log file ... [node 1] 1 log file ... [node 1] [log file ... @@ -173,12 +173,12 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null [node 2] requesting heap profile... received response... [node 2] requesting heap profile: last request failed: rpc error: ... [node 2] requesting heap profile: creating error output: debug/nodes/2/heap.pprof.err.txt... done -[node 2] requesting heap files... received response... -[node 2] requesting heap files: last request failed: rpc error: ... -[node 2] requesting heap files: creating error output: debug/nodes/2/heapprof.err.txt... done -[node 2] requesting goroutine dumps... received response... -[node 2] requesting goroutine dumps: last request failed: rpc error: ... -[node 2] requesting goroutine dumps: creating error output: debug/nodes/2/goroutines.err.txt... done +[node 2] requesting heap file list... received response... +[node 2] requesting heap file list: last request failed: rpc error: ... +[node 2] requesting heap file list: creating error output: debug/nodes/2/heapprof.err.txt... done +[node 2] requesting goroutine dump list... received response... +[node 2] requesting goroutine dump list: last request failed: rpc error: ... +[node 2] requesting goroutine dump list: creating error output: debug/nodes/2/goroutines.err.txt... done [node 2] requesting log file ... [node 2] requesting log file ... [node 2] requesting log file ... @@ -210,12 +210,12 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null [node 3] requesting stacks... received response... writing binary output: debug/nodes/3/stacks.txt... done [node 3] requesting threads... received response... writing binary output: debug/nodes/3/threads.txt... done [node 3] requesting heap profile... received response... writing binary output: debug/nodes/3/heap.pprof... done -[node 3] requesting heap files... received response... -[node 3] requesting heap files: last request failed: rpc error: ... -[node 3] requesting heap files: creating error output: debug/nodes/3/heapprof.err.txt... done -[node 3] requesting goroutine dumps... received response... -[node 3] requesting goroutine dumps: last request failed: rpc error: ... -[node 3] requesting goroutine dumps: creating error output: debug/nodes/3/goroutines.err.txt... done +[node 3] requesting heap file list... received response... +[node 3] requesting heap file list: last request failed: rpc error: ... +[node 3] requesting heap file list: creating error output: debug/nodes/3/heapprof.err.txt... done +[node 3] requesting goroutine dump list... received response... +[node 3] requesting goroutine dump list: last request failed: rpc error: ... +[node 3] requesting goroutine dump list: creating error output: debug/nodes/3/goroutines.err.txt... done [node 3] requesting log file ... [node 3] 1 log file ... [node 3] [log file ... diff --git a/pkg/cli/testdata/zip/partial1_excluded b/pkg/cli/testdata/zip/partial1_excluded index 855e22cd6128..61f74fc5d3e7 100644 --- a/pkg/cli/testdata/zip/partial1_excluded +++ b/pkg/cli/testdata/zip/partial1_excluded @@ -54,12 +54,12 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting threads... received response... writing binary output: debug/nodes/1/threads.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done -[node 1] requesting heap files... received response... -[node 1] requesting heap files: last request failed: rpc error: ... -[node 1] requesting heap files: creating error output: debug/nodes/1/heapprof.err.txt... done -[node 1] requesting goroutine dumps... received response... -[node 1] requesting goroutine dumps: last request failed: rpc error: ... -[node 1] requesting goroutine dumps: creating error output: debug/nodes/1/goroutines.err.txt... done +[node 1] requesting heap file list... received response... +[node 1] requesting heap file list: last request failed: rpc error: ... +[node 1] requesting heap file list: creating error output: debug/nodes/1/heapprof.err.txt... done +[node 1] requesting goroutine dump list... received response... +[node 1] requesting goroutine dump list: last request failed: rpc error: ... +[node 1] requesting goroutine dump list: creating error output: debug/nodes/1/goroutines.err.txt... done [node 1] requesting log file ... [node 1] 1 log file ... [node 1] [log file ... @@ -128,12 +128,12 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 [node 3] requesting stacks... received response... writing binary output: debug/nodes/3/stacks.txt... done [node 3] requesting threads... received response... writing binary output: debug/nodes/3/threads.txt... done [node 3] requesting heap profile... received response... writing binary output: debug/nodes/3/heap.pprof... done -[node 3] requesting heap files... received response... -[node 3] requesting heap files: last request failed: rpc error: ... -[node 3] requesting heap files: creating error output: debug/nodes/3/heapprof.err.txt... done -[node 3] requesting goroutine dumps... received response... -[node 3] requesting goroutine dumps: last request failed: rpc error: ... -[node 3] requesting goroutine dumps: creating error output: debug/nodes/3/goroutines.err.txt... done +[node 3] requesting heap file list... received response... +[node 3] requesting heap file list: last request failed: rpc error: ... +[node 3] requesting heap file list: creating error output: debug/nodes/3/heapprof.err.txt... done +[node 3] requesting goroutine dump list... received response... +[node 3] requesting goroutine dump list: last request failed: rpc error: ... +[node 3] requesting goroutine dump list: creating error output: debug/nodes/3/goroutines.err.txt... done [node 3] requesting log file ... [node 3] 1 log file ... [node 3] [log file ... diff --git a/pkg/cli/testdata/zip/partial2 b/pkg/cli/testdata/zip/partial2 index ddc771e64bc3..bfec3323dd77 100644 --- a/pkg/cli/testdata/zip/partial2 +++ b/pkg/cli/testdata/zip/partial2 @@ -54,12 +54,12 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting threads... received response... writing binary output: debug/nodes/1/threads.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done -[node 1] requesting heap files... received response... -[node 1] requesting heap files: last request failed: rpc error: ... -[node 1] requesting heap files: creating error output: debug/nodes/1/heapprof.err.txt... done -[node 1] requesting goroutine dumps... received response... -[node 1] requesting goroutine dumps: last request failed: rpc error: ... -[node 1] requesting goroutine dumps: creating error output: debug/nodes/1/goroutines.err.txt... done +[node 1] requesting heap file list... received response... +[node 1] requesting heap file list: last request failed: rpc error: ... +[node 1] requesting heap file list: creating error output: debug/nodes/1/heapprof.err.txt... done +[node 1] requesting goroutine dump list... received response... +[node 1] requesting goroutine dump list: last request failed: rpc error: ... +[node 1] requesting goroutine dump list: creating error output: debug/nodes/1/goroutines.err.txt... done [node 1] requesting log file ... [node 1] 1 log file ... [node 1] [log file ... @@ -127,12 +127,12 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null [node 3] requesting stacks... received response... writing binary output: debug/nodes/3/stacks.txt... done [node 3] requesting threads... received response... writing binary output: debug/nodes/3/threads.txt... done [node 3] requesting heap profile... received response... writing binary output: debug/nodes/3/heap.pprof... done -[node 3] requesting heap files... received response... -[node 3] requesting heap files: last request failed: rpc error: ... -[node 3] requesting heap files: creating error output: debug/nodes/3/heapprof.err.txt... done -[node 3] requesting goroutine dumps... received response... -[node 3] requesting goroutine dumps: last request failed: rpc error: ... -[node 3] requesting goroutine dumps: creating error output: debug/nodes/3/goroutines.err.txt... done +[node 3] requesting heap file list... received response... +[node 3] requesting heap file list: last request failed: rpc error: ... +[node 3] requesting heap file list: creating error output: debug/nodes/3/heapprof.err.txt... done +[node 3] requesting goroutine dump list... received response... +[node 3] requesting goroutine dump list: last request failed: rpc error: ... +[node 3] requesting goroutine dump list: creating error output: debug/nodes/3/goroutines.err.txt... done [node 3] requesting log file ... [node 3] 1 log file ... [node 3] [log file ... diff --git a/pkg/cli/testdata/zip/testzip b/pkg/cli/testdata/zip/testzip index 4baa8527ed59..777299f357e8 100644 --- a/pkg/cli/testdata/zip/testzip +++ b/pkg/cli/testdata/zip/testzip @@ -57,9 +57,9 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting threads... received response... writing binary output: debug/nodes/1/threads.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done -[node 1] requesting heap files... received response... done +[node 1] requesting heap file list... received response... done [node 1] 0 heap profiles found -[node 1] requesting goroutine dumps... received response... done +[node 1] requesting goroutine dump list... received response... done [node 1] 0 goroutine dumps found [node 1] requesting log file ... [node 1] 0 log file ... diff --git a/pkg/cli/testdata/zip/testzip_concurrent b/pkg/cli/testdata/zip/testzip_concurrent index 7b48a189190a..8ee43e438295 100644 --- a/pkg/cli/testdata/zip/testzip_concurrent +++ b/pkg/cli/testdata/zip/testzip_concurrent @@ -304,16 +304,16 @@ zip [node 1] requesting data for debug/nodes/1/gossip: done [node 1] requesting data for debug/nodes/1/gossip: received response... [node 1] requesting data for debug/nodes/1/gossip: writing binary output: debug/nodes/1/gossip.json... -[node 1] requesting goroutine dumps... -[node 1] requesting goroutine dumps: creating error output: debug/nodes/1/goroutines.err.txt... -[node 1] requesting goroutine dumps: done -[node 1] requesting goroutine dumps: last request failed: rpc error: ... -[node 1] requesting goroutine dumps: received response... -[node 1] requesting heap files... -[node 1] requesting heap files: creating error output: debug/nodes/1/heapprof.err.txt... -[node 1] requesting heap files: done -[node 1] requesting heap files: last request failed: rpc error: ... -[node 1] requesting heap files: received response... +[node 1] requesting goroutine dump list... +[node 1] requesting goroutine dump list: creating error output: debug/nodes/1/goroutines.err.txt... +[node 1] requesting goroutine dump list: done +[node 1] requesting goroutine dump list: last request failed: rpc error: ... +[node 1] requesting goroutine dump list: received response... +[node 1] requesting heap file list... +[node 1] requesting heap file list: creating error output: debug/nodes/1/heapprof.err.txt... +[node 1] requesting heap file list: done +[node 1] requesting heap file list: last request failed: rpc error: ... +[node 1] requesting heap file list: received response... [node 1] requesting heap profile... [node 1] requesting heap profile: done [node 1] requesting heap profile: received response... @@ -557,16 +557,16 @@ zip [node 2] requesting data for debug/nodes/2/gossip: done [node 2] requesting data for debug/nodes/2/gossip: received response... [node 2] requesting data for debug/nodes/2/gossip: writing binary output: debug/nodes/2/gossip.json... -[node 2] requesting goroutine dumps... -[node 2] requesting goroutine dumps: creating error output: debug/nodes/2/goroutines.err.txt... -[node 2] requesting goroutine dumps: done -[node 2] requesting goroutine dumps: last request failed: rpc error: ... -[node 2] requesting goroutine dumps: received response... -[node 2] requesting heap files... -[node 2] requesting heap files: creating error output: debug/nodes/2/heapprof.err.txt... -[node 2] requesting heap files: done -[node 2] requesting heap files: last request failed: rpc error: ... -[node 2] requesting heap files: received response... +[node 2] requesting goroutine dump list... +[node 2] requesting goroutine dump list: creating error output: debug/nodes/2/goroutines.err.txt... +[node 2] requesting goroutine dump list: done +[node 2] requesting goroutine dump list: last request failed: rpc error: ... +[node 2] requesting goroutine dump list: received response... +[node 2] requesting heap file list... +[node 2] requesting heap file list: creating error output: debug/nodes/2/heapprof.err.txt... +[node 2] requesting heap file list: done +[node 2] requesting heap file list: last request failed: rpc error: ... +[node 2] requesting heap file list: received response... [node 2] requesting heap profile... [node 2] requesting heap profile: done [node 2] requesting heap profile: received response... @@ -810,16 +810,16 @@ zip [node 3] requesting data for debug/nodes/3/gossip: done [node 3] requesting data for debug/nodes/3/gossip: received response... [node 3] requesting data for debug/nodes/3/gossip: writing binary output: debug/nodes/3/gossip.json... -[node 3] requesting goroutine dumps... -[node 3] requesting goroutine dumps: creating error output: debug/nodes/3/goroutines.err.txt... -[node 3] requesting goroutine dumps: done -[node 3] requesting goroutine dumps: last request failed: rpc error: ... -[node 3] requesting goroutine dumps: received response... -[node 3] requesting heap files... -[node 3] requesting heap files: creating error output: debug/nodes/3/heapprof.err.txt... -[node 3] requesting heap files: done -[node 3] requesting heap files: last request failed: rpc error: ... -[node 3] requesting heap files: received response... +[node 3] requesting goroutine dump list... +[node 3] requesting goroutine dump list: creating error output: debug/nodes/3/goroutines.err.txt... +[node 3] requesting goroutine dump list: done +[node 3] requesting goroutine dump list: last request failed: rpc error: ... +[node 3] requesting goroutine dump list: received response... +[node 3] requesting heap file list... +[node 3] requesting heap file list: creating error output: debug/nodes/3/heapprof.err.txt... +[node 3] requesting heap file list: done +[node 3] requesting heap file list: last request failed: rpc error: ... +[node 3] requesting heap file list: received response... [node 3] requesting heap profile... [node 3] requesting heap profile: done [node 3] requesting heap profile: received response... diff --git a/pkg/cli/testdata/zip/unavailable b/pkg/cli/testdata/zip/unavailable index 9b6b1881cecd..129e22ad69cb 100644 --- a/pkg/cli/testdata/zip/unavailable +++ b/pkg/cli/testdata/zip/unavailable @@ -92,12 +92,12 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null --timeout=.5s [node 1] requesting stacks... received response... writing binary output: debug/nodes/1/stacks.txt... done [node 1] requesting threads... received response... writing binary output: debug/nodes/1/threads.txt... done [node 1] requesting heap profile... received response... writing binary output: debug/nodes/1/heap.pprof... done -[node 1] requesting heap files... received response... -[node 1] requesting heap files: last request failed: rpc error: ... -[node 1] requesting heap files: creating error output: debug/nodes/1/heapprof.err.txt... done -[node 1] requesting goroutine dumps... received response... -[node 1] requesting goroutine dumps: last request failed: rpc error: ... -[node 1] requesting goroutine dumps: creating error output: debug/nodes/1/goroutines.err.txt... done +[node 1] requesting heap file list... received response... +[node 1] requesting heap file list: last request failed: rpc error: ... +[node 1] requesting heap file list: creating error output: debug/nodes/1/heapprof.err.txt... done +[node 1] requesting goroutine dump list... received response... +[node 1] requesting goroutine dump list: last request failed: rpc error: ... +[node 1] requesting goroutine dump list: creating error output: debug/nodes/1/goroutines.err.txt... done [node 1] requesting log file ... [node 1] 1 log file ... [node 1] [log file ... diff --git a/pkg/cli/zip.go b/pkg/cli/zip.go index 3df7b8f8666b..4780dbdae6e3 100644 --- a/pkg/cli/zip.go +++ b/pkg/cli/zip.go @@ -137,6 +137,10 @@ func (zc *debugZipContext) forAllNodes( type nodeLivenesses = map[roachpb.NodeID]livenesspb.NodeLivenessStatus func runDebugZip(cmd *cobra.Command, args []string) (retErr error) { + if err := zipCtx.files.validate(); err != nil { + return err + } + ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/pkg/cli/zip_helpers.go b/pkg/cli/zip_helpers.go index d883077472d5..24a6b3407e46 100644 --- a/pkg/cli/zip_helpers.go +++ b/pkg/cli/zip_helpers.go @@ -262,6 +262,59 @@ func (r *rangeSelection) items() map[int]struct{} { return s } +// fileSelection is used to define a subset of the files on the command line. +type fileSelection struct { + includePatterns []string + excludePatterns []string +} + +// validate checks that all specified patterns are valid. +func (fs *fileSelection) validate() error { + for _, p := range append(fs.includePatterns, fs.excludePatterns...) { + if _, err := filepath.Match(p, ""); err != nil { + return err + } + } + return nil +} + +// retrievalPatterns returns the list of glob patterns to send to the +// server, when listing which files are remotely available. We perform +// this filtering server-side so that the inclusion pattern can be +// used to reduce the amount of data retrieved in the "get file list" +// response. +func (fs *fileSelection) retrievalPatterns() []string { + if len(fs.includePatterns) == 0 { + // No include pattern defined: retrieve all files. + return []string{"*"} + } + return fs.includePatterns +} + +// isIncluded determine whether the given file name is included in the selection. +func (fs *fileSelection) isIncluded(filename string) bool { + // To be included, a file must be included in at least one of the retrieval patterns. + included := false + for _, p := range fs.retrievalPatterns() { + if matched, _ := filepath.Match(p, filename); matched { + included = true + break + } + } + if !included { + return false + } + // Then it must not match any of the exclusion patterns. + for _, p := range fs.excludePatterns { + if matched, _ := filepath.Match(p, filename); matched { + included = false + break + } + } + + return included +} + // to prevent interleaved output. var zipReportingMu syncutil.Mutex diff --git a/pkg/cli/zip_helpers_test.go b/pkg/cli/zip_helpers_test.go index 056c27835ecf..be4e2c9bd473 100644 --- a/pkg/cli/zip_helpers_test.go +++ b/pkg/cli/zip_helpers_test.go @@ -17,6 +17,39 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/leaktest" ) +func TestFileSelection(t *testing.T) { + defer leaktest.AfterTest(t)() + + type s = string + testCases := []struct { + incl []s + excl []s + accepts []s + rejects []s + }{ + {nil, nil, []s{"anything", "really"}, nil}, + {nil, []s{"any*"}, []s{"really", "unrelated"}, []s{"anything"}}, + {nil, []s{"any*", "*lly"}, []s{"unrelated"}, []s{"anything", "really"}}, + {[]s{"any*"}, nil, []s{"anything"}, []s{"really", "unrelated"}}, + {[]s{"any*", "*lly"}, nil, []s{"anything", "really"}, []s{"unrelated"}}, + {[]s{"any*", "*lly"}, []s{"re*"}, []s{"anything", "oreilly"}, []s{"unrelated", "really"}}, + } + + for _, tc := range testCases { + sel := fileSelection{includePatterns: tc.incl, excludePatterns: tc.excl} + for _, f := range tc.accepts { + if !sel.isIncluded(f) { + t.Errorf("incl=%+v, excl=%+v: mistakenly does not include %q", sel.includePatterns, sel.excludePatterns, f) + } + } + for _, f := range tc.rejects { + if sel.isIncluded(f) { + t.Errorf("incl=%+v, excl=%+v: mistakenly includes %q", sel.includePatterns, sel.excludePatterns, f) + } + } + } +} + func TestNodeSelection(t *testing.T) { defer leaktest.AfterTest(t)() diff --git a/pkg/cli/zip_per_node.go b/pkg/cli/zip_per_node.go index e3a1d2f71f93..add34a9f066e 100644 --- a/pkg/cli/zip_per_node.go +++ b/pkg/cli/zip_per_node.go @@ -274,14 +274,15 @@ func (zc *debugZipContext) collectPerNodeData( } var profiles *serverpb.GetFilesResponse - s = nodePrinter.start("requesting heap files") + s = nodePrinter.start("requesting heap file list") if requestErr := zc.runZipFn(ctx, s, func(ctx context.Context) error { var err error profiles, err = zc.status.GetFiles(ctx, &serverpb.GetFilesRequest{ NodeId: id, Type: serverpb.FileType_HEAP, - Patterns: []string{"*"}, + Patterns: zipCtx.files.retrievalPatterns(), + ListOnly: true, }) return err }); requestErr != nil { @@ -290,25 +291,69 @@ func (zc *debugZipContext) collectPerNodeData( } } else { s.done() + + // Now filter the list of files and for each file selected, + // retrieve it. + // + // We retrieve the files one by one to avoid loading up multiple + // files' worth of data server-side in one response in RAM. This + // sequential processing is not significantly slower than + // requesting multiple files at once, because these files are + // large and the transfer time is mostly incurred in the data + // transmission, not the request-response roundtrip latency. + // Additionally, cross-node concurrency is parallelizing these + // transfers somehow. + nodePrinter.info("%d heap profiles found", len(profiles.Files)) for _, file := range profiles.Files { + if !zipCtx.files.isIncluded(file.Name) { + nodePrinter.info("skipping excluded heap profile: %s", file.Name) + continue + } + fName := maybeAddProfileSuffix(file.Name) name := prefix + "/heapprof/" + fName - if err := zc.z.createRaw(nodePrinter.start("writing profile"), name, file.Contents); err != nil { + fs := nodePrinter.start("retrieving %s", file.Name) + var oneprof *serverpb.GetFilesResponse + if fileErr := zc.runZipFn(ctx, fs, func(ctx context.Context) error { + var err error + oneprof, err = zc.status.GetFiles(ctx, &serverpb.GetFilesRequest{ + NodeId: id, + Type: serverpb.FileType_HEAP, + Patterns: []string{file.Name}, + ListOnly: false, // Retrieve the file contents. + }) return err + }); fileErr != nil { + if err := zc.z.createError(fs, name, fileErr); err != nil { + return err + } + } else { + fs.done() + + if len(oneprof.Files) < 1 { + // This is possible if the file was removed in-between + // the list request above and the content retrieval request. + continue + } + file := oneprof.Files[0] + if err := zc.z.createRaw(nodePrinter.start("writing profile"), name, file.Contents); err != nil { + return err + } } } } var goroutinesResp *serverpb.GetFilesResponse - s = nodePrinter.start("requesting goroutine dumps") + s = nodePrinter.start("requesting goroutine dump list") if requestErr := zc.runZipFn(ctx, s, func(ctx context.Context) error { var err error goroutinesResp, err = zc.status.GetFiles(ctx, &serverpb.GetFilesRequest{ NodeId: id, Type: serverpb.FileType_GOROUTINES, - Patterns: []string{"*"}, + Patterns: zipCtx.files.retrievalPatterns(), + ListOnly: true, }) return err }); requestErr != nil { @@ -317,12 +362,56 @@ func (zc *debugZipContext) collectPerNodeData( } } else { s.done() + + // Now filter the list of files and for each file selected, + // retrieve it. + // + // We retrieve the files one by one to avoid loading up multiple + // files' worth of data server-side in one response in RAM. This + // sequential processing is not significantly slower than + // requesting multiple files at once, because these files are + // large and the transfer time is mostly incurred in the data + // transmission, not the request-response roundtrip latency. + // Additionally, cross-node concurrency is parallelizing these + // transfers somehow. + nodePrinter.info("%d goroutine dumps found", len(goroutinesResp.Files)) for _, file := range goroutinesResp.Files { + if !zipCtx.files.isIncluded(file.Name) { + nodePrinter.info("skipping excluded goroutine dump: %s", file.Name) + continue + } + // NB: the files have a .txt.gz suffix already. name := prefix + "/goroutines/" + file.Name - if err := zc.z.createRaw(nodePrinter.start("writing dump"), name, file.Contents); err != nil { + + fs := nodePrinter.start("retrieving %s", file.Name) + var onedump *serverpb.GetFilesResponse + if fileErr := zc.runZipFn(ctx, fs, func(ctx context.Context) error { + var err error + onedump, err = zc.status.GetFiles(ctx, &serverpb.GetFilesRequest{ + NodeId: id, + Type: serverpb.FileType_GOROUTINES, + Patterns: []string{file.Name}, + ListOnly: false, // Retrieve the file contents. + }) return err + }); fileErr != nil { + if err := zc.z.createError(fs, name, fileErr); err != nil { + return err + } + } else { + fs.done() + + if len(onedump.Files) < 1 { + // This is possible if the file was removed in-between + // the list request above and the content retrieval request. + continue + } + file := onedump.Files[0] + if err := zc.z.createRaw(nodePrinter.start("writing dump"), name, file.Contents); err != nil { + return err + } } } } @@ -341,8 +430,26 @@ func (zc *debugZipContext) collectPerNodeData( } } else { s.done() + + // Now filter the list of files and for each file selected, + // retrieve it. + // + // We retrieve the files one by one to avoid loading up multiple + // files' worth of data server-side in one response in RAM. This + // sequential processing is not significantly slower than + // requesting multiple files at once, because these files are + // large and the transfer time is mostly incurred in the data + // transmission, not the request-response roundtrip latency. + // Additionally, cross-node concurrency is parallelizing these + // transfers somehow. + nodePrinter.info("%d log files found", len(logs.Files)) for _, file := range logs.Files { + if !zipCtx.files.isIncluded(file.Name) { + nodePrinter.info("skipping excluded log file: %s", file.Name) + continue + } + logPrinter := nodePrinter.withPrefix("log file: %s", file.Name) name := prefix + "/logs/" + file.Name var entries *serverpb.LogEntriesResponse From d13fda9ebd285d10c59e63f8ff359d29961395f6 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Mon, 26 Apr 2021 12:57:29 +0200 Subject: [PATCH 3/5] cli: fix a comment Release note: None --- pkg/cli/flags.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/cli/flags.go b/pkg/cli/flags.go index 1599c3e1b5a9..32df49394860 100644 --- a/pkg/cli/flags.go +++ b/pkg/cli/flags.go @@ -626,7 +626,7 @@ func init() { durationFlag(f, &zipCtx.cpuProfDuration, cliflags.ZipCPUProfileDuration) intFlag(f, &zipCtx.concurrency, cliflags.ZipConcurrency) } - // List-nodes + Zip commands. + // List-files + Zip commands. for _, cmd := range []*cobra.Command{debugZipCmd, debugListFilesCmd} { f := cmd.Flags() varFlag(f, &zipCtx.nodes.inclusive, cliflags.ZipNodes) From 1e9840f8852a563706ef2e3e1a020fda905caa1c Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Fri, 23 Apr 2021 13:01:19 +0200 Subject: [PATCH 4/5] cli/zip: clamp file retrieval by date, default to last 48 hours The size of generated `debug zip` outputs is mostly attributable to the discrete files retrieved from the server: - log files - goroutine dumps - heap profiles Even though we have various garbage collection techniques applied to these files, they still yield very large `debug zip` outputs: - the default retained cap for log files is 100MB per group, so with the default of up to 5 log groups per node, we are retaining up to 500MB of log files per node. - the default retained cap for goroutine dumps, heap profiles and memory stats is 128MB for each, so we are retaining up to 384MB of these files per node. For example, for a 10-node cluster we are looking at up to 7GB of data retained in these files in the default configuration. While this amount of data is relatively innocuous while it remains in the data directories server-side, the `debug zip` behavior which was previously to retrieve *all* of it was detrimental to the process of troubleshooting anomalies: this is a lot of data to move around, it cannot be e-mailed effectively, etc. In comparison, the other items retrieved by the `debug zip` command (range descriptors, SQL system tables) are typically just kilobytes large. Even a large cluster with tens of thousands of ranges and system tables typically incurs `zip` payloads no greater than a dozen megabytes. Hence the change described in the release note below. Release note (cli change): The `cockroach debug zip` command now retrieves only the log files, goroutine dumps and heap profiles pertaining to the last 48 hours prior to the command invocation. This behavior is supported entirely client-side, i.e. it is not necessary to upgrade the server nodes to effect these newly configurable limits. The other data items retrieved by `debug zip` are not affected by this time limit. This behavior can be customized by the two new flags `--files-from` and `--files-until`. Both are optional. See the command-line help text for details. The two new flags are also supported by `cockroach debug list-files`. It is advised to experiment with `list-files` prior to issuing a `debug zip` command that may retrieve a large amount of data. --- pkg/cli/cliflags/flags.go | 46 ++++++++++++++++++++ pkg/cli/context.go | 9 ++++ pkg/cli/debug_list_files.go | 18 ++++---- pkg/cli/flags.go | 2 + pkg/cli/zip_helpers.go | 56 ++++++++++++++++++++++-- pkg/cli/zip_helpers_test.go | 85 ++++++++++++++++++++++++++++++++++++- pkg/cli/zip_per_node.go | 10 +++-- 7 files changed, 210 insertions(+), 16 deletions(-) diff --git a/pkg/cli/cliflags/flags.go b/pkg/cli/cliflags/flags.go index 2862daaa3aaf..a68dcc59bd81 100644 --- a/pkg/cli/cliflags/flags.go +++ b/pkg/cli/cliflags/flags.go @@ -1327,6 +1327,52 @@ including '*.log' and then excluding '*foo*.log' will exclude 'barfoos.log'.
 
+
+You can use the 'debug list-files' command to explore how +this flag is applied.`, + } + + ZipFilesFrom = FlagInfo{ + Name: "files-from", + Description: ` +Limit file collection to those files modified after the +specified timestamp, inclusive. +The timestamp can be expressed as YYYY-MM-DD, +YYYY-MM-DD HH:MM or YYYY-MM-DD HH:MM:SS and is interpreted +in the UTC time zone. +The default value for this flag is 48 hours before now. +
+
+
+When customizing this flag to capture a narrow range +of time, consider adding extra seconds/minutes +to the range to accommodate clock drift and uncertainties. +
+
+
+You can use the 'debug list-files' command to explore how +this flag is applied.`, + } + + ZipFilesUntil = FlagInfo{ + Name: "files-until", + Description: ` +Limit file collection to those files created before the +specified timestamp, inclusive. +The timestamp can be expressed as YYYY-MM-DD, +YYYY-MM-DD HH:MM or YYYY-MM-DD HH:MM:SS and is interpreted +in the UTC time zone. +The default value for this flag is some time beyond +the current time, to ensure files created during +the collection are also included. +
+
+
+When customizing this flag to capture a narrow range +of time, consider adding extra seconds/minutes +to the range to accommodate clock drift and uncertainties. +
+
 
You can use the 'debug list-files' command to explore how this flag is applied.`, diff --git a/pkg/cli/context.go b/pkg/cli/context.go index 40014746b354..73586fbb8251 100644 --- a/pkg/cli/context.go +++ b/pkg/cli/context.go @@ -25,6 +25,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/util/log/logconfig" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/mattn/go-isatty" "github.com/spf13/cobra" "github.com/spf13/pflag" @@ -344,6 +345,14 @@ func setZipContextDefaults() { zipCtx.redactLogs = false zipCtx.cpuProfDuration = 5 * time.Second zipCtx.concurrency = 15 + + // File selection covers the last 48 hours by default. + // We add 24 hours to now for the end timestamp to ensure + // that files created during the zip operation are + // also included. + now := timeutil.Now() + zipCtx.files.startTimestamp = timestampValue(now.Add(-48 * time.Hour)) + zipCtx.files.endTimestamp = timestampValue(now.Add(24 * time.Hour)) } // dumpCtx captures the command-line parameters of the `dump` command. diff --git a/pkg/cli/debug_list_files.go b/pkg/cli/debug_list_files.go index 5d357f9cab2c..1f626f8547e3 100644 --- a/pkg/cli/debug_list_files.go +++ b/pkg/cli/debug_list_files.go @@ -136,24 +136,26 @@ func runDebugListFiles(cmd *cobra.Command, _ []string) error { for _, nodeID := range nodeList { nodeIDs := fmt.Sprintf("%d", nodeID) for _, logFile := range logFiles[nodeID] { - if !zipCtx.files.isIncluded(logFile.Name) { + ctime := extractTimeFromFileName(logFile.Name) + mtime := timeutil.Unix(0, logFile.ModTimeNanos) + if !zipCtx.files.isIncluded(logFile.Name, ctime, mtime) { continue } totalSize += logFile.SizeBytes - ctime := formatTimeSimple(extractTimeFromFileName(logFile.Name)) - mtime := formatTimeSimple(timeutil.Unix(0, logFile.ModTimeNanos)) - rows = append(rows, []string{nodeIDs, "log", logFile.Name, ctime, mtime, fmt.Sprintf("%d", logFile.SizeBytes)}) + ctimes := formatTimeSimple(ctime) + mtimes := formatTimeSimple(mtime) + rows = append(rows, []string{nodeIDs, "log", logFile.Name, ctimes, mtimes, fmt.Sprintf("%d", logFile.SizeBytes)}) } for _, ft := range fileTypes { fileType := int32(ft) for _, other := range otherFiles[nodeID][fileType] { - if !zipCtx.files.isIncluded(other.Name) { + ctime := extractTimeFromFileName(other.Name) + if !zipCtx.files.isIncluded(other.Name, ctime, ctime) { continue } totalSize += other.FileSize - ctime := formatTimeSimple(extractTimeFromFileName(other.Name)) - mtime := ctime - rows = append(rows, []string{nodeIDs, fileTypeNames[fileType], other.Name, ctime, mtime, fmt.Sprintf("%d", other.FileSize)}) + ctimes := formatTimeSimple(ctime) + rows = append(rows, []string{nodeIDs, fileTypeNames[fileType], other.Name, ctimes, ctimes, fmt.Sprintf("%d", other.FileSize)}) } } } diff --git a/pkg/cli/flags.go b/pkg/cli/flags.go index 32df49394860..813bf870ddb9 100644 --- a/pkg/cli/flags.go +++ b/pkg/cli/flags.go @@ -633,6 +633,8 @@ func init() { varFlag(f, &zipCtx.nodes.exclusive, cliflags.ZipExcludeNodes) stringSliceFlag(f, &zipCtx.files.includePatterns, cliflags.ZipIncludedFiles) stringSliceFlag(f, &zipCtx.files.excludePatterns, cliflags.ZipExcludedFiles) + varFlag(f, &zipCtx.files.startTimestamp, cliflags.ZipFilesFrom) + varFlag(f, &zipCtx.files.endTimestamp, cliflags.ZipFilesUntil) } // Decommission command. diff --git a/pkg/cli/zip_helpers.go b/pkg/cli/zip_helpers.go index 24a6b3407e46..e4fbd8cb24db 100644 --- a/pkg/cli/zip_helpers.go +++ b/pkg/cli/zip_helpers.go @@ -266,6 +266,8 @@ func (r *rangeSelection) items() map[int]struct{} { type fileSelection struct { includePatterns []string excludePatterns []string + startTimestamp timestampValue + endTimestamp timestampValue } // validate checks that all specified patterns are valid. @@ -292,7 +294,7 @@ func (fs *fileSelection) retrievalPatterns() []string { } // isIncluded determine whether the given file name is included in the selection. -func (fs *fileSelection) isIncluded(filename string) bool { +func (fs *fileSelection) isIncluded(filename string, ctime, mtime time.Time) bool { // To be included, a file must be included in at least one of the retrieval patterns. included := false for _, p := range fs.retrievalPatterns() { @@ -311,8 +313,20 @@ func (fs *fileSelection) isIncluded(filename string) bool { break } } - - return included + if !included { + return false + } + // Then its mtime must not be before the selected "from" time. + if mtime.Before(time.Time(fs.startTimestamp)) { + return false + } + // And the selected "until" time must not be before the ctime. + // Note: the inverted call is because `Before` uses strict + // inequality. + if (*time.Time)(&fs.endTimestamp).Before(ctime) { + return false + } + return true } // to prevent interleaved output. @@ -520,3 +534,39 @@ func (z *zipReporter) result(err error) error { } return z.fail(err) } + +// timestampValue is a wrapper around time.Time which supports the +// pflag.Value interface and can be initialized from a command line flag. +// It recognizes the following input formats: +// YYYY-MM-DD +// YYYY-MM-DD HH:MM +// YYYY-MM-DD HH:MM:SS +type timestampValue time.Time + +// Type implements the pflag.Value interface. +func (t *timestampValue) Type() string { + return "YYYY-MM-DD [HH:MM[:SS]]" +} + +func (t *timestampValue) String() string { + return (*time.Time)(t).Format("2006-01-02 15:04:05") +} + +// Set implements the pflag.Value interface. +func (t *timestampValue) Set(v string) error { + v = strings.TrimSpace(v) + var tm time.Time + var err error + if len(v) <= len("YYYY-MM-DD") { + tm, err = time.ParseInLocation("2006-01-02", v, time.UTC) + } else if len(v) <= len("YYYY-MM-DD HH:MM") { + tm, err = time.ParseInLocation("2006-01-02 15:04", v, time.UTC) + } else { + tm, err = time.ParseInLocation("2006-01-02 15:04:05", v, time.UTC) + } + if err != nil { + return err + } + *t = timestampValue(tm) + return nil +} diff --git a/pkg/cli/zip_helpers_test.go b/pkg/cli/zip_helpers_test.go index be4e2c9bd473..570221f56cfa 100644 --- a/pkg/cli/zip_helpers_test.go +++ b/pkg/cli/zip_helpers_test.go @@ -12,6 +12,7 @@ package cli import ( "testing" + "time" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/util/leaktest" @@ -35,21 +36,68 @@ func TestFileSelection(t *testing.T) { {[]s{"any*", "*lly"}, []s{"re*"}, []s{"anything", "oreilly"}, []s{"unrelated", "really"}}, } + var zt time.Time for _, tc := range testCases { sel := fileSelection{includePatterns: tc.incl, excludePatterns: tc.excl} for _, f := range tc.accepts { - if !sel.isIncluded(f) { + if !sel.isIncluded(f, zt, zt) { t.Errorf("incl=%+v, excl=%+v: mistakenly does not include %q", sel.includePatterns, sel.excludePatterns, f) } } for _, f := range tc.rejects { - if sel.isIncluded(f) { + if sel.isIncluded(f, zt, zt) { t.Errorf("incl=%+v, excl=%+v: mistakenly includes %q", sel.includePatterns, sel.excludePatterns, f) } } } } +func TestFileTimeBounds(t *testing.T) { + defer leaktest.AfterTest(t)() + + const tfmt = "2006-01-02 15:04" + var fs fileSelection + const startTs = "2021-04-21 10:00" + const endTs = "2021-04-23 10:00" + _ = fs.startTimestamp.Set(startTs) + _ = fs.endTimestamp.Set(endTs) + + testCases := []struct { + ctime string + mtime string + included bool + }{ + // Completely before. + {"2021-03-01 01:00", "2021-04-01 12:00", false}, + // Completely after. + {"2021-04-25 01:00", "2021-04-30 12:00", false}, + // mtime exactly at the start of the range. + {"2021-04-20 01:00", startTs, true}, + // ctime exactly at the end of the range. + {endTs, "2021-04-25 15:00", true}, + // Straddles at the beginning. + {"2021-04-20 01:00", "2021-04-21 15:00", true}, + // Straddles at the end. + {"2021-04-21 14:00", "2021-04-25 15:00", true}, + // Straddles over the entire range. + {"2021-04-20 01:00", "2021-04-25 15:00", true}, + // Straddles over the entire range, ctime at start timestamp. + {startTs, "2021-04-21 11:00", true}, + // Straddles over the entire range, mtime at end timestamp. + {"2021-04-20 10:00", endTs, true}, + // Coincides exactly. + {startTs, endTs, true}, + } + + for i, tc := range testCases { + ct, _ := time.ParseInLocation(tfmt, tc.ctime, time.UTC) + mt, _ := time.ParseInLocation(tfmt, tc.mtime, time.UTC) + if expected, actual := tc.included, fs.isIncluded("unused", ct, mt); expected != actual { + t.Errorf("%d: file(ctime=%s,mtime=%s) expected included %v, got %v", i, ct, mt, expected, actual) + } + } +} + func TestNodeSelection(t *testing.T) { defer leaktest.AfterTest(t)() @@ -89,3 +137,36 @@ func TestNodeSelection(t *testing.T) { } } } + +func TestTimestampFlag(t *testing.T) { + defer leaktest.AfterTest(t)() + + testCases := []struct { + in string + exp string + }{ + {"", `parsing time "" as "2006-01-02": cannot parse "" as "2006"`}, + {"2011-01-02", "2011-01-02 00:00:00"}, + {" 2011-01-02", "2011-01-02 00:00:00"}, + {"2011-01-02 ", "2011-01-02 00:00:00"}, + {"2011-01-02 03", `parsing time "2011-01-02 03" as "2006-01-02 15:04": cannot parse "" as ":"`}, + {"2011-01-02 03:04", "2011-01-02 03:04:00"}, + {" 2011-01-02 03:04", "2011-01-02 03:04:00"}, + {"2011-01-02 03:04 ", "2011-01-02 03:04:00"}, + {"2011-01-02 03:04:06", "2011-01-02 03:04:06"}, + {" 2011-01-02 03:04:06", "2011-01-02 03:04:06"}, + {"2011-01-02 03:04:06 ", "2011-01-02 03:04:06"}, + } + + for _, tc := range testCases { + var tt timestampValue + err := tt.Set(tc.in) + actual := tt.String() + if err != nil { + actual = err.Error() + } + if actual != tc.exp { + t.Errorf("%s: expected %q, got %q", tc.in, tc.exp, actual) + } + } +} diff --git a/pkg/cli/zip_per_node.go b/pkg/cli/zip_per_node.go index add34a9f066e..89d6dcd88910 100644 --- a/pkg/cli/zip_per_node.go +++ b/pkg/cli/zip_per_node.go @@ -306,7 +306,8 @@ func (zc *debugZipContext) collectPerNodeData( nodePrinter.info("%d heap profiles found", len(profiles.Files)) for _, file := range profiles.Files { - if !zipCtx.files.isIncluded(file.Name) { + ctime := extractTimeFromFileName(file.Name) + if !zipCtx.files.isIncluded(file.Name, ctime, ctime) { nodePrinter.info("skipping excluded heap profile: %s", file.Name) continue } @@ -377,7 +378,8 @@ func (zc *debugZipContext) collectPerNodeData( nodePrinter.info("%d goroutine dumps found", len(goroutinesResp.Files)) for _, file := range goroutinesResp.Files { - if !zipCtx.files.isIncluded(file.Name) { + ctime := extractTimeFromFileName(file.Name) + if !zipCtx.files.isIncluded(file.Name, ctime, ctime) { nodePrinter.info("skipping excluded goroutine dump: %s", file.Name) continue } @@ -445,7 +447,9 @@ func (zc *debugZipContext) collectPerNodeData( nodePrinter.info("%d log files found", len(logs.Files)) for _, file := range logs.Files { - if !zipCtx.files.isIncluded(file.Name) { + ctime := extractTimeFromFileName(file.Name) + mtime := timeutil.Unix(0, file.ModTimeNanos) + if !zipCtx.files.isIncluded(file.Name, ctime, mtime) { nodePrinter.info("skipping excluded log file: %s", file.Name) continue } From 6a2b422aac7d6622927af25068a80fb10fdb2444 Mon Sep 17 00:00:00 2001 From: Ricky Stewart Date: Mon, 26 Apr 2021 10:02:45 -0500 Subject: [PATCH 5/5] lint: check `make bazel-generate` earlier in the `teamcity-check` script We used to do this after `make buildshort` and `make generate`, but those can take a while. Do it before so obvious Bazel linter errors break the build sooner. Release note: None --- build/teamcity-check.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build/teamcity-check.sh b/build/teamcity-check.sh index bb78c044b7ce..30123143345f 100755 --- a/build/teamcity-check.sh +++ b/build/teamcity-check.sh @@ -38,16 +38,16 @@ fi tc_start_block "Ensure generated code is up-to-date" # Buffer noisy output and only print it on failure. +TEAMCITY_BAZEL_SUPPORT_LINT=1 # See teamcity-bazel-support.sh. +run run_bazel build/bazelutil/bazel-generate.sh &> artifacts/buildshort.log || (cat artifacts/buildshort.log && false) +rm artifacts/buildshort.log +check_clean "Run \`make bazel-generate\` to automatically regenerate these." run build/builder.sh make generate &> artifacts/generate.log || (cat artifacts/generate.log && false) rm artifacts/generate.log check_clean "Run \`make generate\` to automatically regenerate these." run build/builder.sh make buildshort &> artifacts/buildshort.log || (cat artifacts/buildshort.log && false) rm artifacts/buildshort.log check_clean "Run \`make buildshort\` to automatically regenerate these." -TEAMCITY_BAZEL_SUPPORT_LINT=1 # See teamcity-bazel-support.sh. -run run_bazel build/bazelutil/bazel-generate.sh &> artifacts/buildshort.log || (cat artifacts/buildshort.log && false) -rm artifacts/buildshort.log -check_clean "Run \`make bazel-generate\` to automatically regenerate these." tc_end_block "Ensure generated code is up-to-date" # generated code can generate new dependencies; check dependencies after generated code.