Skip to content

Commit

Permalink
newlog: make filename parsing future-proof
Browse files Browse the repository at this point in the history
Instead of parsing filenames according to a certain schema,
we should use regexp to look for timestamps or UUIDs. This
way the file names can change in the future without affecting
backward compatibility with the existing code for parsing
filenames.

Signed-off-by: Paul Gaiduk <paulg@zededa.com>
  • Loading branch information
europaul committed Jan 13, 2025
1 parent 1436a36 commit 8be247a
Show file tree
Hide file tree
Showing 3 changed files with 228 additions and 36 deletions.
31 changes: 10 additions & 21 deletions pkg/pillar/cmd/loguploader/loguploader.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ func doFetchSend(ctx *loguploaderContext, zipDir string, iter *int) int {
if !strings.HasSuffix(f.Name(), ".gz") {
continue
}
timestamp, err := types.GetTimestampFromGzipName(f.Name())
timestamp, err := types.GetTimestampFromFileName(f.Name())
if err != nil {
continue
}
Expand Down Expand Up @@ -722,22 +722,14 @@ func doFetchSend(ctx *loguploaderContext, zipDir string, iter *int) int {
}

func buildAppUUIDMap(fName string) {
var appUUID string
if strings.HasPrefix(fName, types.AppPrefix) && strings.HasSuffix(fName, ".gz") {
fStr1 := strings.TrimPrefix(fName, types.AppPrefix)
fStr := strings.Split(fStr1, types.AppSuffix)
if len(fStr) != 2 {
err := fmt.Errorf("app split is not 2")
log.Error(err)
return
}
appUUID = fStr[0]
appUUID, err := types.GetUUIDFromFileName(fName)
if err != nil {
log.Errorf("buildAppUUIDMap: cannot parse app file name %s: %v", fName, err)
return
}

if len(appUUID) > 0 {
if _, ok := appGzipMap[appUUID]; !ok {
appGzipMap[appUUID] = true
}
if _, ok := appGzipMap[appUUID]; !ok {
appGzipMap[appUUID] = true
}
}

Expand Down Expand Up @@ -769,13 +761,10 @@ func sendToCloud(ctx *loguploaderContext, data []byte, iter int, fName string, f
var logsURL, appLogURL string
var sentFailed, serviceUnavailable bool
if isApp {
fStr1 := strings.TrimPrefix(fName, types.AppPrefix)
fStr := strings.Split(fStr1, types.AppSuffix)
if len(fStr) != 2 {
err := fmt.Errorf("app split is not 2")
log.Fatal(err)
appUUID, err := types.GetUUIDFromFileName(fName)
if err != nil {
return false, fmt.Errorf("sendToCloud: cannot parse app file name %s: %v", fName, err)
}
appUUID := fStr[0]
if ctx.zedcloudCtx.V2API {
appLogURL = fmt.Sprintf("apps/instanceid/%s/newlogs", appUUID)
} else {
Expand Down
53 changes: 38 additions & 15 deletions pkg/pillar/types/newlogtypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package types

import (
"fmt"
"regexp"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -85,21 +86,43 @@ type NewlogMetrics struct {
AppMetrics logfileMetrics // App metrics
}

// GetTimestampFromGzipName - get timestamp from gzip file name
func GetTimestampFromGzipName(fName string) (time.Time, error) {
// here are example file names:
// app.6656f860-7563-4bbf-8bba-051f5942982b.log.1730464687367.gz
// dev.log.keep.1730404601953.gz
// dev.log.upload.1730404601953.gz
// the timestamp is the number between the last two dots
nameParts := strings.Split(fName, ".")
if len(nameParts) < 2 {
return time.Time{}, fmt.Errorf("getTimestampFromGzipName: invalid log file name %s", fName)
// GetTimestampFromFileName extracts a microsecond timestamp from a filename
func GetTimestampFromFileName(filename string) (time.Time, error) {
// Regular expression to match a microsecond timestamp (sequence of 13-16 digits)
timestampRegex := regexp.MustCompile(`\b\d{13,16}\b`)

// Split the filename into parts using dots
parts := strings.Split(filename, ".")

// Check each part for a timestamp match
for _, part := range parts {
if timestampRegex.MatchString(part) {
// Convert the matched timestamp string to an integer
timestamp, err := strconv.ParseInt(part, 10, 64)
if err != nil {
return time.Time{}, fmt.Errorf("failed to parse timestamp: %s", err)
}
return time.Unix(0, timestamp*int64(time.Millisecond)), nil // Return the first valid timestamp found
}
}
timeStr := nameParts[len(nameParts)-2]
fTime, err := strconv.Atoi(timeStr)
if err != nil {
return time.Time{}, fmt.Errorf("getTimestampFromGzipName: %w", err)

return time.Time{}, fmt.Errorf("no timestamp found in filename: %s", filename)
}

// GetUUIDFromFileName extracts a UUID from a filename with dot-delimited parts
func GetUUIDFromFileName(filename string) (string, error) {
// UUID regex pattern (supports v4 UUIDs like "123e4567-e89b-12d3-a456-426614174000")
uuidRegex := regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`)

// Split the filename into parts using dots
parts := strings.Split(filename, ".")

// Check each part for a UUID match
for _, part := range parts {
if uuidRegex.MatchString(part) {
return part, nil // Return the first UUID found
}
}
return time.Unix(0, int64(fTime)*int64(time.Millisecond)), nil

return "", fmt.Errorf("no UUID found in filename: %s", filename)
}
180 changes: 180 additions & 0 deletions pkg/pillar/types/newlogtypes_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
// Copyright (c) 2025 Zededa, Inc.
// SPDX-License-Identifier: Apache-2.0

package types

import (
"testing"
"time"

"github.com/onsi/gomega"
)

func TestGetTimestampFromFileName(t *testing.T) {
t.Parallel()
g := gomega.NewWithT(t)

tests := []struct {
name string
filename string
wantTime time.Time
wantError bool
}{
{
name: "Valid timestamp in filename",
filename: "dev.log.1731491904032.gz",
wantTime: time.Unix(0, 1731491904032*int64(time.Millisecond)),
wantError: false,
},
{
name: "Valid timestamp in regular filename",
filename: "dev.log.1731491904032",
wantTime: time.Unix(0, 1731491904032*int64(time.Millisecond)),
wantError: false,
},
{
name: "Valid timestamp with UUID",
filename: "app.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.log.1731935033496.gz",
wantTime: time.Unix(0, 1731935033496*int64(time.Millisecond)),
wantError: false,
},
{
name: "Invalid timestamp in filename",
filename: "dev.log.invalidtimestamp.gz",
wantTime: time.Time{},
wantError: true,
},
{
name: "No timestamp in filename",
filename: "dev.log.gz",
wantTime: time.Time{},
wantError: true,
},
{
name: "Short timestamp in filename",
filename: "dev.log.123456789012.gz",
wantTime: time.Time{},
wantError: true,
},
{
name: "Old timestamp in filename",
filename: "dev.log.0000000000123.gz",
wantTime: time.Unix(0, 123*int64(time.Millisecond)),
wantError: false,
},
}

for _, tt := range tests {
tt := tt // create a new variable to hold the value of tt to avoid being overwritten by the next iteration (needed until Go 1.23)
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
gotTime, err := GetTimestampFromFileName(tt.filename)
if tt.wantError {
g.Expect(err).To(gomega.HaveOccurred())
} else {
g.Expect(err).NotTo(gomega.HaveOccurred())
g.Expect(gotTime).To(gomega.Equal(tt.wantTime))
}
})
}
}

func FuzzGetTimestampFromFileName(f *testing.F) {
testcases := []string{
"dev.log.1731491904032.gz",
"app.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.log.1731935033496.gz",
"dev.log.invalidtimestamp.gz",
"dev.log.gz",
"dev.log.123456789012.gz",
"dev.log.1234567890123456.gz",
}

for _, tc := range testcases {
f.Add(tc)
}

f.Fuzz(func(t *testing.T, filename string) {
_, _ = GetTimestampFromFileName(filename)
})
}

func TestGetUUIDFromFileName(t *testing.T) {
t.Parallel()
g := gomega.NewWithT(t)

tests := []struct {
name string
filename string
wantUUID string
wantError bool
}{
{
name: "Valid UUID in filename",
filename: "app.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.log.1731935033496.gz",
wantUUID: "8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d",
wantError: false,
},
{
name: "Valid UUID in regular filename",
filename: "app.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.log.1731935033496",
wantUUID: "8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d",
wantError: false,
},
{
name: "Valid UUID with timestamp",
filename: "app.123e4567-e89b-12d3-a456-426614174000.log.1731935033496.gz",
wantUUID: "123e4567-e89b-12d3-a456-426614174000",
wantError: false,
},
{
name: "No UUID in filename",
filename: "dev.log.1731491904032.gz",
wantUUID: "",
wantError: true,
},
{
name: "Invalid UUID in filename",
filename: "app.invalid-uuid-string.log.1731935033496.gz",
wantUUID: "",
wantError: true,
},
{
name: "UUID at the end of filename",
filename: "app.log.1731935033496.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.gz",
wantUUID: "8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d",
wantError: false,
},
}

for _, tt := range tests {
tt := tt // create a new variable to hold the value of tt to avoid being overwritten by the next iteration (needed until Go 1.23)
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
gotUUID, err := GetUUIDFromFileName(tt.filename)
if tt.wantError {
g.Expect(err).To(gomega.HaveOccurred())
} else {
g.Expect(err).NotTo(gomega.HaveOccurred())
g.Expect(gotUUID).To(gomega.Equal(tt.wantUUID))
}
})
}
}

func FuzzGetUUIDFromFileName(f *testing.F) {
testcases := []string{
"app.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.log.1731935033496.gz",
"app.123e4567-e89b-12d3-a456-426614174000.log.1731935033496.gz",
"dev.log.1731491904032.gz",
"app.invalid-uuid-string.log.1731935033496.gz",
"app.log.1731935033496.8ce1cc69-e1bb-4fe3-9613-e3eb1c5f5c4d.gz",
}

for _, tc := range testcases {
f.Add(tc)
}

f.Fuzz(func(t *testing.T, filename string) {
_, _ = GetUUIDFromFileName(filename)
})
}

0 comments on commit 8be247a

Please sign in to comment.