Skip to content

Commit

Permalink
Implement log analyzer
Browse files Browse the repository at this point in the history
Signed-off-by: Ben <belimele@redhat.com>
  • Loading branch information
Neon-White committed Dec 11, 2024
1 parent 1fd00fb commit d248009
Show file tree
Hide file tree
Showing 6 changed files with 372 additions and 3 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ require (
github.com/kedacore/keda/v2 v2.7.0
github.com/kube-object-storage/lib-bucket-provisioner v0.0.0-20221122204822-d1a8c34382f1
github.com/libopenstorage/secrets v0.0.0-20240416031220-a17cf7f72c6c
github.com/lithammer/fuzzysearch v1.1.8
github.com/marstr/randname v0.0.0-20200428202425-99aca53a2176
github.com/onsi/ginkgo/v2 v2.20.2
github.com/onsi/gomega v1.34.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,8 @@ github.com/libopenstorage/operator v0.0.0-20200725001727-48d03e197117/go.mod h1:
github.com/libopenstorage/stork v1.3.0-beta1.0.20200630005842-9255e7a98775/go.mod h1:qBSzYTJVHlOMg5RINNiHD1kBzlasnrc2uKLPZLgu1Qs=
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0=
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE=
github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4=
github.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
Expand Down
2 changes: 1 addition & 1 deletion pkg/diagnostics/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func (c *Collector) CollectPodsLogs(listOptions client.ListOptions) {

c.CollectDescribe("pod", pod.Name)

podLogs, _ := util.GetPodLogs(*pod)
podLogs, _ := util.GetPodLogs(*pod, nil, false)
for containerName, containerLog := range podLogs {
targetFile := fmt.Sprintf("%s/%s-%s.log", c.folderName, pod.Name, containerName)
err := util.SaveStreamToFile(containerLog, targetFile)
Expand Down
18 changes: 18 additions & 0 deletions pkg/diagnostics/diagnostics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ func Cmd() *cobra.Command {
CmdDbDump(),
CmdAnalyze(),
CmdReport(),
CmdLogAnalysis(),
)
return cmd
}
Expand Down Expand Up @@ -123,6 +124,23 @@ func CmdAnalyzeResources() *cobra.Command {
return cmd
}

// CmdLogAnalysis returns a CLI command
func CmdLogAnalysis() *cobra.Command {
cmd := &cobra.Command{
Use: "log-analysis",
Short: "Run the log analyzer on all NooBaa pods",
Run: RunLogAnalysis,
}
cmd.Flags().Bool("fuzzy", false, "(Experimental) Toggle fuzzy matching for the search string")
cmd.Flags().Int64("tail", 1000, "Number of lines to tail from the logs, minimum 1, default 1000")
cmd.Flags().Bool("noobaa-time", false, "Use NooBaa-provided timestamps instead of the Kubernetes ones (~10ms earlier than Kubernetes)")
cmd.Flags().BoolP("case-insensitive", "i", false, "Toggle search-string case insensitivity (similar to grep's -i flag)")
cmd.Flags().BoolP("whole-string", "w", false, "Match the whole search string as a single word (similar to grep's -w flag)")
cmd.Flags().Bool("prefer-line", false, "Prefer to print the line containing the search string when it doesn't contain a timestamp")

return cmd
}

/////// Deprecated Functions ///////

// CmdDbDumpDeprecated returns a CLI command
Expand Down
330 changes: 330 additions & 0 deletions pkg/diagnostics/log_analysis.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,330 @@
package diagnostics

import (
"bufio"
"fmt"
"regexp"
"strings"
"time"

"github.com/noobaa/noobaa-operator/v5/pkg/options"
"github.com/noobaa/noobaa-operator/v5/pkg/util"
"github.com/lithammer/fuzzysearch/fuzzy"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/spf13/cobra"
)

// LogAnalysisOptions holds parameters for log analysis
type LogAnalysisOptions struct {
tailLines int64
fuzzyMatch bool
searchString string
noobaaTimestamp bool
matchWholeString bool
caseInsensitivity bool
showLineIfNoTimestamp bool
}

const (
noobaaOutputTimestampFormat = "02/01/2006 15:04:05"
// To be used in case of additional NooBaa operators in the future
noobaaOperatorPodNamePrefix = "noobaa-operator"
operatorTimestampPrefix = `time="`
operatorWTimestampPrefix = "W"
)

var (
ansiRegex = regexp.MustCompile(`\x1b\[[0-9;]*m`)
)

// RunLogAnalysis runs a CLI command
func RunLogAnalysis(cmd *cobra.Command, args []string) {
tailLines, _ := cmd.Flags().GetInt64("tail")
fuzzyMatch, _ := cmd.Flags().GetBool("fuzzy")
noobaaTimestamp, _ := cmd.Flags().GetBool("noobaa-time")
showLineIfNoTimestamp, _ := cmd.Flags().GetBool("prefer-line")
matchWholeString, _ := cmd.Flags().GetBool("whole-string")
caseInsensitivity, _ := cmd.Flags().GetBool("case-insensitive")
searchString := ""

analysisOptions := LogAnalysisOptions{
tailLines: tailLines,
fuzzyMatch: fuzzyMatch,
searchString: searchString,
noobaaTimestamp: noobaaTimestamp,
matchWholeString: matchWholeString,
caseInsensitivity: caseInsensitivity,
showLineIfNoTimestamp: showLineIfNoTimestamp,
}

validateLogAnalysisParameters(&analysisOptions, args)
podSelector, _ := labels.Parse("app=noobaa")
listOptions := client.ListOptions{Namespace: options.Namespace, LabelSelector: podSelector}
CollectAndAnalyzeLogs(listOptions, &analysisOptions)
}

// validateLogAnalysisParameters validates the parameters for log analysis
func validateLogAnalysisParameters(analysisOptions *LogAnalysisOptions, args []string) {
log := util.Logger()
if len(args) != 1 || args[0] == "" {
analysisOptions.searchString = util.ShowStringPrompt("Provide a search string")
} else {
analysisOptions.searchString = args[0]
}

if analysisOptions.tailLines < 1 {
log.Fatalf("❌ Tail must be a whole positive number")
}

if analysisOptions.fuzzyMatch && analysisOptions.matchWholeString {
log.Fatalf("❌ Cannot use both fuzzy matching and whole-string matching")
}
}

// CollectAndAnalyzeLogs collects and analyzes logs of all existing noobaa pods
func CollectAndAnalyzeLogs(listOptions client.ListOptions, analysisOptions *LogAnalysisOptions) {
log := util.Logger()
chosenTimestamp := ""
if analysisOptions.noobaaTimestamp {
chosenTimestamp = "NooBaa"
} else {
chosenTimestamp = "Kubernetes"
}
log.Println()
log.Println("✨────────────────────────────────────────────✨")
log.Printf(" Collecting and analyzing pod logs -")
log.Printf(" Search string: %s", analysisOptions.searchString)
log.Printf(" Case insensitivity: %t", analysisOptions.caseInsensitivity)
log.Printf(" Match whole string: %t", analysisOptions.matchWholeString)
log.Printf(" From the last %d lines", analysisOptions.tailLines)
log.Printf(" Using %s timestamps", chosenTimestamp)
log.Println(" Found occurrences will be printed below")
log.Println(" in the format <pod name>:<container name>")
log.Println("✨────────────────────────────────────────────✨")
podList := &corev1.PodList{}
if !util.KubeList(podList, &listOptions) {
log.Printf(`❌ failed to get NooBaa pod list within namespace %s\n`, options.Namespace)
return
}
for i := range podList.Items {
pod := &podList.Items[i]
analyzePodLogs(pod, analysisOptions)
}
}

// analyzePodLogs will count the number of occurrences of the search string in a pod log
// as well as find and print the timestamps of the first and last occurrence of
// the search string in the logs
func analyzePodLogs(pod *corev1.Pod, analysisOptions *LogAnalysisOptions) {
log := util.Logger()
podLogs, err := util.GetPodLogs(*pod, &analysisOptions.tailLines, true)
if err != nil {
log.Printf("❌ Failed to get logs for pod %s: %v", pod.Name, err)
return
}
if analysisOptions.caseInsensitivity {
analysisOptions.searchString = strings.ToLower(analysisOptions.searchString)
}
stringBoundaryRegex := compileStringBoundaryRegex(analysisOptions) // Compiled here for better efficiency
for containerName, containerLog := range podLogs {
firstAppearanceFound := false
firstAppearanceTimestamp := ""
lastAppearanceTimestamp := ""
lastOccurrenceLine := ""
log.Printf("Analyzing %s:%s", pod.Name, containerName)
defer containerLog.Close()
occurrenceCounter := 0
scanner := bufio.NewScanner(containerLog)
for scanner.Scan() {
line := scanner.Text()
// Clean line from ANSI escape codes
if !strings.Contains(pod.Name, noobaaOperatorPodNamePrefix) {
line = sanitizeANSI(line)
}
lineContainsMatch := stringMatchCheck(line, stringBoundaryRegex, analysisOptions)
if lineContainsMatch {
if !firstAppearanceFound {
firstAppearanceFound = true
firstAppearanceTimestamp = extractTimeString(pod, line, *analysisOptions)
}
occurrenceCounter++
lastOccurrenceLine = line
}
}
lastAppearanceTimestamp = extractTimeString(pod, lastOccurrenceLine, *analysisOptions)
if occurrenceCounter == 0 {
log.Println("No occurrences found")
} else {
log.Printf("Hits: %d", occurrenceCounter)
log.Printf("Earliest appearance: %s", firstAppearanceTimestamp)
if occurrenceCounter > 1 {
log.Printf("Latest appearance: %s", lastAppearanceTimestamp)
}
}
log.Println("──────────────────────────────────────────────────────────────────────────────────")
}
}

// sanitizeANSI removes ANSI escape codes from a string
func sanitizeANSI(line string) string {
// Remove ANSI escape codes from the line
// This is done to avoid the terminal from interpreting them as color codes
// and printing them as garbage characters
// The regex pattern is taken from
return ansiRegex.ReplaceAllString(line, "")
}

// compileStringBoundaryRegex compiles a word boundary regex pattern for the search string
func compileStringBoundaryRegex(analysisOptions *LogAnalysisOptions) *regexp.Regexp {
var stringBoundarySearchPattern *regexp.Regexp
stringBoundaryPattern := fmt.Sprintf(`\b%s\b`, regexp.QuoteMeta(analysisOptions.searchString))
if analysisOptions.caseInsensitivity {
stringBoundarySearchPattern = regexp.MustCompile("(?i)" + stringBoundaryPattern)
} else {
stringBoundarySearchPattern = regexp.MustCompile(stringBoundaryPattern)
}
return stringBoundarySearchPattern
}

// stringMatchCheck checks if a line contains a match to the search string
func stringMatchCheck (line string, stringBoundaryRegex *regexp.Regexp, analysisOptions *LogAnalysisOptions) bool {
if analysisOptions.matchWholeString {
return wholestringMatchCheck(line, stringBoundaryRegex)
} else {
return partialMatchCheck(line, analysisOptions)
}
}

// wholestringMatchCheck checks if a line contains a whole string match to the search string
// Mostly used for readability and organization purposes
func wholestringMatchCheck (line string, stringBoundaryRegex *regexp.Regexp) bool {
return stringBoundaryRegex.MatchString(line)
}

// partialMatchCheck checks if a line contains a partial/fuzzy match to the search string
func partialMatchCheck (line string, analysisOptions *LogAnalysisOptions) bool {
if analysisOptions.fuzzyMatch {
fuzzyCaseInsensitiveMatch := analysisOptions.caseInsensitivity && fuzzy.MatchNormalized(analysisOptions.searchString, line)
fuzzyCaseSensitiveMatch := fuzzy.Match(analysisOptions.searchString, line)
return fuzzyCaseInsensitiveMatch || fuzzyCaseSensitiveMatch
} else {
// Check for a match by temporarily casting the line string to lowercase
// (the search string is cast in the beginning of analyzePodLogs)
caseInsensitiveMatch := analysisOptions.caseInsensitivity && strings.Contains(strings.ToLower(line), analysisOptions.searchString)
caseSensitiveMatch := strings.Contains(line, analysisOptions.searchString)
return caseInsensitiveMatch || caseSensitiveMatch
}
}

// extractTimeString extracts the timestamp from a log line by checking which pod
// it originated from and redirecting it to the appropriate extraction function
func extractTimeString(pod *corev1.Pod, line string, analysisOptions LogAnalysisOptions) string {
if analysisOptions.noobaaTimestamp {
if strings.Contains(pod.Name, noobaaOperatorPodNamePrefix) {
return extractOperatorTimestampString(line, analysisOptions.showLineIfNoTimestamp)
} else {
return extractCoreTimestampString(line, analysisOptions.showLineIfNoTimestamp)
}
} else {
return extractKubernetesTimestampString(line)
}
}

// extractKubernetesTimestampString extracts the timestamp from a Kubernetes log line
func extractKubernetesTimestampString(line string) string {
// Example log line:
// 2024-12-10T07:27:16.856641898Z Dec-10 7:27:16.847 [BGWorkers/36]...
splitLine := strings.SplitN(line, " ", 2)
return splitLine[0]
}

// extractCoreTimestampString extracts, parses and formats a timestamp string
// from pods running NooBaa Core code (core, endpoint, PV pod)
func extractCoreTimestampString(line string, showLineIfNoTimestamp bool) string {
// Example log line:
// Dec-9 15:16:31.621 [BGWorkers/36] [L0] ...
const minimumRequiredIndices = 2
// Example split result:
// ["Dec-9", "15:16:31.621", "[BGWorkers..."]
splitLine := strings.SplitN(line, " ", 3)
if len(splitLine) < minimumRequiredIndices {
return timeParsingError(showLineIfNoTimestamp, line)
}
lineDate := splitLine[0]
lineTime := splitLine[1]
// The year is assumed to be the current one since it's not provided
year := time.Now().Year()
layout := "Jan-2-2006 15:04:05.000"
timestampString := fmt.Sprintf("%s-%d %s", lineDate, year, lineTime)
parsedTime, err := time.Parse(layout, timestampString)
if err != nil {
return timeParsingError(showLineIfNoTimestamp, line)
}
return parsedTime.Format(noobaaOutputTimestampFormat)
}

// extractOperatorTimestampString extracts, parses, formats and returns a timestamp
// string from the NooBaa Operator pod logs
func extractOperatorTimestampString(line string, showLineIfNoTimestamp bool) string {
if strings.HasPrefix(line, operatorTimestampPrefix) {
return extractStandardOperatorTimestampString(line, showLineIfNoTimestamp)
} else if strings.HasPrefix(line, operatorWTimestampPrefix) {
return extractOperatorWTimestampString(line, showLineIfNoTimestamp)
}
return timeParsingError(showLineIfNoTimestamp, line)
}

// extractStandardOperatorTimestampString extracts the timestamp in case of a standard operator log line
// Example:
// time="2024-12-10T07:27:36Z" level=info msg="...
func extractStandardOperatorTimestampString(line string, showLineIfNoTimestamp bool) string {
secondQuotesIndex := strings.Index(line[len(operatorTimestampPrefix):], `"`)
if secondQuotesIndex == -1 {
return timeParsingError(showLineIfNoTimestamp, line)
}
timestampString := line[len(operatorTimestampPrefix) : len(operatorTimestampPrefix) + secondQuotesIndex]
// Parse the date using RFC3339 layout
const operatorTimestampFormat = time.RFC3339
parsedTimestamp, err := time.Parse(operatorTimestampFormat, timestampString)
if err != nil {
return timeParsingError(showLineIfNoTimestamp, line)
}
return parsedTimestamp.Format(noobaaOutputTimestampFormat)
}

// extractOperatorWTimestampString extracts the timestamp in case of a non-standard operator log line
// Example:
// W1209 13:41:05.890285 1 reflector.go:484...
func extractOperatorWTimestampString(line string, showLineIfNoTimestamp bool) string {
const minimumWTimeParseLength = 22
if len(line) < minimumWTimeParseLength {
return timeParsingError(showLineIfNoTimestamp, line)
}
wStringDayMonthStartIndex := 1
wStringDayMonthEndIndex := 5
wStringTimeStartIndex := 6
wStringTimeEndIndex := 21
datePart := line[wStringDayMonthStartIndex : wStringDayMonthEndIndex]
timePart := line[wStringTimeStartIndex : wStringTimeEndIndex]
day := datePart[2:]
month := datePart[:2]
// The year is assumed to be the current one since it's not provided
year := time.Now().Year()
fullTimeStr := fmt.Sprintf("%s/%s/%d %s", day, month, year, timePart)
return fullTimeStr
}

// timeParsingError returns an error message if the timestamp
// could not be parsed based on the user's prefer-line setting
func timeParsingError(showLineIfNoTimestamp bool, line string) string {
absentTimestampErrorWithLine := fmt.Sprintf("Could not parse timestamp in line %s", line)
const absentTimestampError = "No timestamp found"
if showLineIfNoTimestamp {
return absentTimestampErrorWithLine
}
return absentTimestampError
}
Loading

0 comments on commit d248009

Please sign in to comment.