Skip to content

Commit

Permalink
fix: fix single object cases for sync (#740)
Browse files Browse the repository at this point in the history
The problem is mainly caused by the `compareObjects` function inside
`command/sync.go`, where `s5cmd` compares source and destination paths
and extracts files that are present only in the source or destination
path (while also counting nested folders or rather name with its
prefixes) along with common objects. If they both are non-objects, like
wildcard expression, prefix, or bucket, getting relative paths of files
with `src.URL.Relative()` results in compatible and comparable paths. In
this case, no problem is present, at least within the scope of this
issue.

However, when an object is selected as the source, it is not assigned a
relative path using `func (u *url.URL) SetRelative(base *url.URL)`, so
the `src.URL.Relative()` function returns its absolute path.

Let's say the source file has an absolute path of `folder/foo.txt`. The
algorithm compares `folder/foo.txt` with `s3://bucket/remoteFolder/` and
looks for the item `s3://bucket/remoteFolder/folder/foo.txt`. If it does
not match, except for the edge case where there is a duplicate item
inside the searched path, the files never match.

While copying files, `s5cmd` does not use relative paths, so `foo.txt`
is written to the intended path in the remote. However, this happens
during every sync operation, as they do not match.

Problem solved by taking path of source object as its name. This made
algorithm to simply look for matches in destination, a file named
`foo.txt` as intended.

This PR adds new test cases to the sync command. Previously, tests
failed to capture sync command cases where the source is an object in a
prefix, not an object directly in a bucket, or not multiple objects like
a wildcard or prefix expression.

If an object is in the `s3://bucket/` path, its relative path is the
same as its absolute path, so they match during comparison. This
prevented copying the file every time. The new test cases cover all
scenarios.
Resolves: #676.
  • Loading branch information
tarikozyurtt authored Jul 12, 2024
1 parent 21fa2da commit 787bc88
Show file tree
Hide file tree
Showing 2 changed files with 209 additions and 2 deletions.
7 changes: 5 additions & 2 deletions command/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ func (s Sync) Run(c *cli.Context) error {
isBatch = obj != nil && obj.Type.IsDir()
}

onlySource, onlyDest, commonObjects := compareObjects(sourceObjects, destObjects)
onlySource, onlyDest, commonObjects := compareObjects(sourceObjects, destObjects, isBatch)

sourceObjects = nil
destObjects = nil
Expand Down Expand Up @@ -242,7 +242,7 @@ func (s Sync) Run(c *cli.Context) error {
// sourceObjects and destObjects channels are already sorted in ascending order.
// Returns objects those in only source, only destination
// and both.
func compareObjects(sourceObjects, destObjects chan *storage.Object) (chan *url.URL, chan *url.URL, chan *ObjectPair) {
func compareObjects(sourceObjects, destObjects chan *storage.Object, isSrcBatch bool) (chan *url.URL, chan *url.URL, chan *ObjectPair) {
var (
srcOnly = make(chan *url.URL, extsortChannelBufferSize)
dstOnly = make(chan *url.URL, extsortChannelBufferSize)
Expand All @@ -262,6 +262,9 @@ func compareObjects(sourceObjects, destObjects chan *storage.Object) (chan *url.
for {
if srcOk {
srcName = filepath.ToSlash(src.URL.Relative())
if !isSrcBatch {
srcName = src.URL.Base()
}
}
if dstOk {
dstName = filepath.ToSlash(dst.URL.Relative())
Expand Down
204 changes: 204 additions & 0 deletions e2e/sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,100 @@ func TestSyncSingleS3ObjectToLocalTwice(t *testing.T) {
assertLines(t, result.Stdout(), map[int]compareFunc{})
}

// sync s3://bucket/dir/source.go .
func TestSyncSinglePrefixedS3ObjectToCurrentDirectory(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

const (
dirname = "dir"
filename = "source.go"
)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)
putFile(t, s3client, bucket, fmt.Sprintf("%s/%s", dirname, filename), "content")

srcpath := fmt.Sprintf("s3://%s/%s/%s", bucket, dirname, filename)

cmd := s5cmd("sync", srcpath, ".")
result := icmd.RunCmd(cmd)
result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{
0: equals(`cp %v %v`, srcpath, filename),
})

// rerunning same command should not download object, empty result expected
result = icmd.RunCmd(cmd)
result.Assert(t, icmd.Success)
assertLines(t, result.Stdout(), map[int]compareFunc{})
}

// sync s3://bucket/prefix/source.go dir/
func TestSyncPrefixedSingleS3ObjectToLocalDirectory(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

const (
dirname = "dir"
filename = "source.go"
)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)
putFile(t, s3client, bucket, fmt.Sprintf("%s/%s", dirname, filename), "content")

srcpath := fmt.Sprintf("s3://%s/%s/%s", bucket, dirname, filename)
dstpath := "folder"

cmd := s5cmd("sync", srcpath, fmt.Sprintf("%v/", dstpath))
result := icmd.RunCmd(cmd)
result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{
0: equals(`cp %v %v/%v`, srcpath, dstpath, filename),
})

// rerunning same command should not download object, empty result expected
result = icmd.RunCmd(cmd)
result.Assert(t, icmd.Success)
assertLines(t, result.Stdout(), map[int]compareFunc{})
}

// sync s3://bucket/source.go dir/
func TestSyncSingleS3ObjectToLocalDirectory(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

const (
filename = "source.go"
)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)
putFile(t, s3client, bucket, filename, "content")

srcpath := fmt.Sprintf("s3://%s/%s", bucket, filename)
dstpath := "folder"

cmd := s5cmd("sync", srcpath, fmt.Sprintf("%v/", dstpath))
result := icmd.RunCmd(cmd)
result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{
0: equals(`cp %v %v/%v`, srcpath, dstpath, filename),
})

// rerunning same command should not download object, empty result expected
result = icmd.RunCmd(cmd)
result.Assert(t, icmd.Success)
assertLines(t, result.Stdout(), map[int]compareFunc{})
}

// sync file s3://bucket
func TestSyncLocalFileToS3Twice(t *testing.T) {
t.Parallel()
Expand Down Expand Up @@ -127,6 +221,116 @@ func TestSyncLocalFileToS3Twice(t *testing.T) {
assertLines(t, result.Stdout(), map[int]compareFunc{})
}

// sync file s3://bucket/prefix/
func TestSyncLocalFileToS3Prefix(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

const (
filename = "testfile1.txt"
content = "this is the content"
dirname = "dir"
)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

workdir := fs.NewDir(t, t.Name(), fs.WithFile(filename, content))
defer workdir.Remove()

dstpath := fmt.Sprintf("s3://%v/%v", bucket, dirname)

cmd := s5cmd("sync", filename, fmt.Sprintf("%v/", dstpath))
result := icmd.RunCmd(cmd, withWorkingDir(workdir))

result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{
0: equals(`cp %v %v/%v`, filename, dstpath, filename),
})

// rerunning same command should not upload files, empty result expected
result = icmd.RunCmd(cmd, withWorkingDir(workdir))
result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{})
}

// sync dir/file s3://bucket
func TestSyncLocalFileInDirectoryToS3(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

const (
dirname = "dir"
filename = "testfile1.txt"
content = "this is the content"
)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

workdir := fs.NewDir(t, t.Name(), fs.WithDir(dirname, fs.WithFile(filename, content)))
defer workdir.Remove()

srcpath := fmt.Sprintf("%v/%v", dirname, filename)
dstpath := fmt.Sprintf("s3://%v", bucket)

cmd := s5cmd("sync", srcpath, dstpath)
result := icmd.RunCmd(cmd, withWorkingDir(workdir))

result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{
0: equals(`cp %v/%v %v/%v`, dirname, filename, dstpath, filename),
})

// rerunning same command should not upload files, empty result expected
result = icmd.RunCmd(cmd, withWorkingDir(workdir))
result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{})
}

// sync dir/file s3://bucket/prefix/
func TestSyncLocalFileInDirectoryToS3Prefix(t *testing.T) {
t.Parallel()

s3client, s5cmd := setup(t)

const (
dirname = "dir"
filename = "testfile1.txt"
content = "this is the content"
)

bucket := s3BucketFromTestName(t)
createBucket(t, s3client, bucket)

workdir := fs.NewDir(t, t.Name(), fs.WithDir(dirname, fs.WithFile(filename, content)))
defer workdir.Remove()

srcpath := fmt.Sprintf("%v/%v", dirname, filename)
dstpath := fmt.Sprintf("s3://%v/%v", bucket, dirname)

cmd := s5cmd("sync", srcpath, fmt.Sprintf("%v/", dstpath))
result := icmd.RunCmd(cmd, withWorkingDir(workdir))

result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{
0: equals(`cp %v/%v %v/%v`, dirname, filename, dstpath, filename),
})

// rerunning same command should not upload files, empty result expected
result = icmd.RunCmd(cmd, withWorkingDir(workdir))
result.Assert(t, icmd.Success)

assertLines(t, result.Stdout(), map[int]compareFunc{})
}

// sync --raw object* s3://bucket/prefix/
func TestCopyLocalFilestoS3WithRawFlag(t *testing.T) {
if runtime.GOOS == "windows" {
Expand Down

0 comments on commit 787bc88

Please sign in to comment.