Skip to content

Commit

Permalink
feat(decomposedfs): check checksum in WriteChunk
Browse files Browse the repository at this point in the history
Signed-off-by: jkoberg <jkoberg@owncloud.com>
  • Loading branch information
kobergj committed Aug 13, 2024
1 parent dce7872 commit 7217b85
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 26 deletions.
5 changes: 5 additions & 0 deletions changelog/unreleased/tusd-checksums.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Enhancement: Tusd PATCH checksums

Check checksums also on chunked uploads during PATCH requests

https://github.com/cs3org/reva/pull/4807
5 changes: 5 additions & 0 deletions pkg/rhttp/datatx/manager/tus/tus.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ func (m *manager) Handler(fs storage.FS) (http.Handler, error) {
}()
// set etag, mtime and file id
setHeaders(fs, w, r)
// set checksum
if v := r.Header.Get("Upload-Checksum"); v != "" {
ctx := context.WithValue(r.Context(), "checksum", v)
r = r.WithContext(ctx)
}
handler.PatchFile(w, r)
case "DELETE":
handler.DelFile(w, r)
Expand Down
19 changes: 12 additions & 7 deletions pkg/storage/utils/decomposedfs/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -1354,10 +1354,6 @@ func enoughDiskSpace(path string, fileSize uint64) bool {

// CalculateChecksums calculates the sha1, md5 and adler32 checksums of a file
func CalculateChecksums(ctx context.Context, path string) (hash.Hash, hash.Hash, hash.Hash32, error) {
sha1h := sha1.New()
md5h := md5.New()
adler32h := adler32.New()

_, subspan := tracer.Start(ctx, "os.Open")
f, err := os.Open(path)
subspan.End()
Expand All @@ -1366,11 +1362,20 @@ func CalculateChecksums(ctx context.Context, path string) (hash.Hash, hash.Hash,
}
defer f.Close()

r1 := io.TeeReader(f, sha1h)
return CalculateChecksumsFromReader(ctx, f)
}

// CalculateChecksumsFromReader calculates the sha1, md5 and adler32 checksums of a io.Reader
func CalculateChecksumsFromReader(ctx context.Context, r io.Reader) (hash.Hash, hash.Hash, hash.Hash32, error) {
sha1h := sha1.New()
md5h := md5.New()
adler32h := adler32.New()

r1 := io.TeeReader(r, sha1h)
r2 := io.TeeReader(r1, md5h)

_, subspan = tracer.Start(ctx, "io.Copy")
_, err = io.Copy(adler32h, r2)
_, subspan := tracer.Start(ctx, "io.Copy")
_, err := io.Copy(adler32h, r2)
subspan.End()
if err != nil {
return nil, nil, nil, err
Expand Down
54 changes: 35 additions & 19 deletions pkg/storage/utils/decomposedfs/upload/upload.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package upload

import (
"bytes"
"context"
"encoding/hex"
"fmt"
Expand Down Expand Up @@ -57,6 +58,22 @@ var defaultFilePerm = os.FileMode(0664)
func (session *OcisSession) WriteChunk(ctx context.Context, offset int64, src io.Reader) (int64, error) {
ctx, span := tracer.Start(session.Context(ctx), "WriteChunk")
defer span.End()

// calculate checksum here
if checksum, ok := ctx.Value("checksum").(string); ok {
// we need to copy the contents into memory so we can write it to disk later
b := bytes.NewBuffer(nil)
sha1, md5, adler32, err := node.CalculateChecksumsFromReader(ctx, io.TeeReader(src, b))
if err != nil {
return 0, err
}

if err := verifyChecksum(checksum, sha1, md5, adler32); err != nil {
return 0, err
}
src = b
}

_, subspan := tracer.Start(ctx, "os.OpenFile")
file, err := os.OpenFile(session.binPath(), os.O_WRONLY|os.O_APPEND, defaultFilePerm)
subspan.End()
Expand All @@ -65,10 +82,6 @@ func (session *OcisSession) WriteChunk(ctx context.Context, offset int64, src io
}
defer file.Close()

// calculate cheksum here? needed for the TUS checksum extension. https://tus.io/protocols/resumable-upload.html#checksum
// TODO but how do we get the `Upload-Checksum`? WriteChunk() only has a context, offset and the reader ...
// It is sent with the PATCH request, well or in the POST when the creation-with-upload extension is used
// but the tus handler uses a context.Background() so we cannot really check the header and put it in the context ...
_, subspan = tracer.Start(ctx, "io.Copy")
n, err := io.Copy(file, src)
subspan.End()
Expand Down Expand Up @@ -116,21 +129,7 @@ func (session *OcisSession) FinishUpload(ctx context.Context) error {
// compare if they match the sent checksum
// TODO the tus checksum extension would do this on every chunk, but I currently don't see an easy way to pass in the requested checksum. for now we do it in FinishUpload which is also called for chunked uploads
if session.info.MetaData["checksum"] != "" {
var err error
parts := strings.SplitN(session.info.MetaData["checksum"], " ", 2)
if len(parts) != 2 {
return errtypes.BadRequest("invalid checksum format. must be '[algorithm] [checksum]'")
}
switch parts[0] {
case "sha1":
err = checkHash(parts[1], sha1h)
case "md5":
err = checkHash(parts[1], md5h)
case "adler32":
err = checkHash(parts[1], adler32h)
default:
err = errtypes.BadRequest("unsupported checksum algorithm: " + parts[0])
}
err := verifyChecksum(session.info.MetaData["checksum"], sha1h, md5h, adler32h)
if err != nil {
session.store.Cleanup(ctx, session, true, false, false)
return err
Expand Down Expand Up @@ -376,3 +375,20 @@ func joinurl(paths ...string) string {

return s.String()
}

func verifyChecksum(checksum string, sha1h hash.Hash, md5h hash.Hash, adler32h hash.Hash32) error {
parts := strings.SplitN(checksum, " ", 2)
if len(parts) != 2 {
return errtypes.BadRequest("invalid checksum format. must be '[algorithm] [checksum]'")
}
switch parts[0] {
case "sha1":
return checkHash(parts[1], sha1h)
case "md5":
return checkHash(parts[1], md5h)
case "adler32":
return checkHash(parts[1], adler32h)
default:
return errtypes.BadRequest("unsupported checksum algorithm: " + parts[0])
}
}

0 comments on commit 7217b85

Please sign in to comment.