Skip to content

Commit

Permalink
feat: add wildcard and prefix support to cat
Browse files Browse the repository at this point in the history
  • Loading branch information
tarikozyurtt authored Jul 8, 2024
1 parent 706f74a commit a2e86d1
Show file tree
Hide file tree
Showing 7 changed files with 283 additions and 44 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
# Changelog
## v2.3.0

#### Breaking changes
- Changed the exit code from 1 to 0 for `ls` when used with an empty bucket. Exits with 1 if the bucket is non-existent. ([#722](https://github.com/peak/s5cmd/issues/722))

#### Features
- Added prefix and wildcard support to `cat` command. ([#716](https://github.com/peak/s5cmd/issues/716))

## v2.2.2 - 13 Sep 2023

#### Bugfixes
Expand Down
62 changes: 45 additions & 17 deletions command/cat.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ Examples:
2. Print specific version of a remote object's content to stdout
> s5cmd {{.HelpName}} --version-id VERSION_ID s3://bucket/prefix/object
3. Concatenate multiple objects matching a prefix or wildcard and print to stdout
> s5cmd {{.HelpName}} s3://bucket/prefix/*
`

func NewCatCommand() *cli.Command {
Expand Down Expand Up @@ -111,16 +114,43 @@ func (c Cat) Run(ctx context.Context) error {
printError(c.fullCommand, c.op, err)
return err
}
_, err = client.Stat(ctx, c.src)
if err != nil {
printError(c.fullCommand, c.op, err)
return err

// Initialize an empty channel to handle single or multiple objects
var objectChan <-chan *storage.Object

if c.src.IsWildcard() || c.src.IsPrefix() || c.src.IsBucket() {
objectChan = client.List(ctx, c.src, false)
} else {
_, err = client.Stat(ctx, c.src)
if err != nil {
printError(c.fullCommand, c.op, err)
return err
}
singleObjChan := make(chan *storage.Object, 1)
singleObjChan <- &storage.Object{URL: c.src}
close(singleObjChan)
objectChan = singleObjChan
}
buf := orderedwriter.New(os.Stdout)
_, err = client.Get(ctx, c.src, buf, c.concurrency, c.partSize)
if err != nil {
printError(c.fullCommand, c.op, err)
return err

return c.processObjects(ctx, client, objectChan)
}

func (c Cat) processObjects(ctx context.Context, client *storage.S3, objectChan <-chan *storage.Object) error {
for obj := range objectChan {
if obj.Err != nil {
printError(c.fullCommand, c.op, obj.Err)
return obj.Err
}
if obj.Type.IsDir() {
continue
}
buf := orderedwriter.New(os.Stdout)

_, err := client.Get(ctx, obj.URL, buf, c.concurrency, c.partSize)
if err != nil {
printError(c.fullCommand, c.op, err)
return err
}
}
return nil
}
Expand All @@ -140,17 +170,15 @@ func validateCatCommand(c *cli.Context) error {
return fmt.Errorf("source must be a remote object")
}

if src.IsBucket() || src.IsPrefix() {
return fmt.Errorf("remote source must be an object")
}

if src.IsWildcard() {
return fmt.Errorf("remote source %q can not contain glob characters", src)
}

if err := checkVersioningWithGoogleEndpoint(c); err != nil {
return err
}

if src.IsWildcard() || src.IsPrefix() || src.IsBucket() {
if c.String("version-id") != "" {
return fmt.Errorf("wildcard/prefix operations are disabled with --version-id flag")
}
}

return nil
}
205 changes: 181 additions & 24 deletions e2e/cat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,30 +133,6 @@ func TestCatS3ObjectFail(t *testing.T) {
jsonCheck(true),
},
},
{
src: "s3://%v/prefix/file.txt/*",
name: "cat remote object with glob",
cmd: []string{
"--json",
"cat",
},
expected: map[int]compareFunc{
0: match(`{"operation":"cat","command":"cat s3:\/\/(.+)?\/prefix\/file\.txt\/\*","error":"remote source \\"s3:\/\/(.*)\/prefix\/file\.txt\/\*\\" can not contain glob characters"}`),
},
assertOps: []assertOp{
jsonCheck(true),
},
},
{
src: "s3://%v/prefix/",
name: "cat bucket",
cmd: []string{
"cat",
},
expected: map[int]compareFunc{
0: match(`ERROR "cat s3://(.+)?": remote source must be an object`),
},
},
}

for _, tc := range testcases {
Expand Down Expand Up @@ -229,6 +205,41 @@ func TestCatLocalFileFail(t *testing.T) {
}
}

func TestCatInEmptyBucket(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

t.Run("EmptyBucket", func(t *testing.T) {
cmd := s5cmd("cat", fmt.Sprintf("s3://%v", bucket))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 0})
assertLines(t, result.Stdout(), nil)
})

t.Run("PrefixInEmptyBucket", func(t *testing.T) {
cmd := s5cmd("cat", fmt.Sprintf("s3://%v/", bucket))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 0})
assertLines(t, result.Stdout(), nil)
})

t.Run("WildcardInEmptyBucket", func(t *testing.T) {
cmd := s5cmd("cat", fmt.Sprintf("s3://%v/*", bucket))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: contains(fmt.Sprintf(`ERROR "cat s3://%v/*": no object found`, bucket)),
})
})
}

// getSequentialFileContent creates a string with size bytes in size.
func getSequentialFileContent(size int64) (string, map[int]compareFunc) {
sb := strings.Builder{}
Expand Down Expand Up @@ -305,4 +316,150 @@ func TestCatByVersionID(t *testing.T) {
t.Errorf("(-want +got):\n%v", diff)
}
}

version := "1"

// wildcard and prefix fail cases
cmd = s5cmd("cat", "--version-id", version, "s3://"+bucket+"/")
result = icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: equals(`ERROR "cat --version-id=%v s3://%v/": wildcard/prefix operations are disabled with --version-id flag`, version, bucket),
}, strictLineCheck(false))

cmd = s5cmd("cat", "--version-id", version, "s3://"+bucket+"/folder/")
result = icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: equals(`ERROR "cat --version-id=%v s3://%v/folder/": wildcard/prefix operations are disabled with --version-id flag`, version, bucket),
}, strictLineCheck(false))

cmd = s5cmd("cat", "--version-id", version, "s3://"+bucket+"/*")
result = icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: equals(`ERROR "cat --version-id=%v s3://%v/*": wildcard/prefix operations are disabled with --version-id flag`, version, bucket),
}, strictLineCheck(false))
}

func TestCatPrefix(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)
bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

testCases := []struct {
files []string
prefix string
expected string
}{
{files: []string{"file1.txt", "file2.txt"}, prefix: "", expected: "content0content1"},
{files: []string{"dir/file3.txt", "dir/file4.txt"}, prefix: "", expected: "content0content1"},
{files: nil, prefix: "dir/", expected: "content2content3"},
{files: []string{"dir/nesteddir/file5.txt"}, prefix: "dir/", expected: "content2content3"},
{files: nil, prefix: "dir/nesteddir/", expected: "content4"},
}

offset := 0
for _, tc := range testCases {
if tc.files != nil {
var concatenatedContent strings.Builder
for idx, file := range tc.files {
content := fmt.Sprintf("content%d", idx+offset)
putFile(t, s3client, bucket, file, content)
concatenatedContent.WriteString(content)
}
offset += len(tc.files)
}
verifyCatCommand(t, s5cmd, bucket, tc.expected, tc.prefix)
}
}

func TestCatWildcard(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)
bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

files := []struct {
key string
content string
}{
{"foo1.txt", "content0"},
{"foo2.txt", "content1"},
{"bar1.txt", "content2"},
{"foolder/foo3.txt", "content3"},
{"log-file-2024-01.txt", "content4"},
{"log-file-2024-02.txt", "content5"},
{"log-file-2023-01.txt", "content6"},
{"log-file-2022-01.txt", "content7"},
}

for _, file := range files {
putFile(t, s3client, bucket, file.key, file.content)
}

testCases := []struct {
prefix string
expected string
}{
{"foo*", "content0content1content3"},
{"log-file-2024-*", "content4content5"},
{"log-file-*", "content7content6content4content5"},
}

for _, tc := range testCases {
verifyCatCommand(t, s5cmd, bucket, tc.expected, tc.prefix)
}
}

func TestPrefixWildcardFail(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)
bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

testCases := []struct {
prefix string
}{
{"foo*"},
{"foolder/"},
}

for _, tc := range testCases {
cmd := s5cmd("cat", fmt.Sprintf("s3://%v/%v", bucket, tc.prefix))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: equals(`ERROR "cat s3://%v/%v": no object found`, bucket, tc.prefix),
}, strictLineCheck(false))
}

for _, tc := range testCases {
cmd := s5cmd("--json", "cat", fmt.Sprintf("s3://%v/%v", bucket, tc.prefix))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Expected{ExitCode: 1})
assertLines(t, result.Stderr(), map[int]compareFunc{
0: equals(`{"operation":"cat","command":"cat s3://%v/%v","error":"no object found"}`, bucket, tc.prefix),
}, strictLineCheck(false))
}

}

func verifyCatCommand(t *testing.T, s5cmd func(...string) icmd.Cmd, bucket, expectedContent, prefix string) {
cmd := s5cmd("cat", fmt.Sprintf("s3://%v/%v", bucket, prefix))
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)
assertLines(t, result.Stdout(), map[int]compareFunc{
0: equals(expectedContent),
}, alignment(true))
}
18 changes: 18 additions & 0 deletions e2e/du_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,3 +300,21 @@ func TestDiskUsageByVersionIDAndAllVersions(t *testing.T) {
})
}
}

func TestDiskUsageEmptyBucket(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

cmd := s5cmd("du", "s3://"+bucket)
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{
0: suffix(`0 bytes in 0 objects: s3://%v`, bucket),
})
}
16 changes: 16 additions & 0 deletions e2e/ls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -786,3 +786,19 @@ func TestListNestedLocalFolders(t *testing.T) {
2: match(filepath.ToSlash("file.txt")),
}, trimMatch(dateRe), alignment(true))
}

func TestEmptyBucket(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

cmd := s5cmd("ls", "s3://"+bucket)
result := icmd.RunCmd(cmd)

result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), nil)
}
12 changes: 12 additions & 0 deletions e2e/select_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,18 @@ func TestSelectCommand(t *testing.T) {
outformat: "json",
expectedValue: "id0\n",
},
{
name: "input:json-lines,output:json-lines,all-versions:true",
cmd: []string{
"select", "json",
"--all-versions",
"--query", query,
},
informat: "json",
structure: "lines",
outformat: "json",
expectedValue: "",
},
},
"csv": {
{
Expand Down
Loading

0 comments on commit a2e86d1

Please sign in to comment.