From 57a129d6e56b5fb06fb8799a72fe1983490343aa Mon Sep 17 00:00:00 2001 From: jkoberg Date: Tue, 13 Aug 2024 16:00:33 +0200 Subject: [PATCH] feat(decomposedfs): check checksum in WriteChunk Signed-off-by: jkoberg --- changelog/unreleased/tusd-checksums.md | 5 ++ pkg/rhttp/datatx/manager/tus/tus.go | 5 ++ pkg/storage/utils/decomposedfs/node/node.go | 19 ++++--- .../utils/decomposedfs/upload/upload.go | 54 ++++++++++++------- 4 files changed, 57 insertions(+), 26 deletions(-) create mode 100644 changelog/unreleased/tusd-checksums.md diff --git a/changelog/unreleased/tusd-checksums.md b/changelog/unreleased/tusd-checksums.md new file mode 100644 index 0000000000..0e64d3cdc7 --- /dev/null +++ b/changelog/unreleased/tusd-checksums.md @@ -0,0 +1,5 @@ +Enhancement: Tusd PATCH checksums + +Check checksums also on chunked uploads during PATCH requests + +https://github.com/cs3org/reva/pull/4807 diff --git a/pkg/rhttp/datatx/manager/tus/tus.go b/pkg/rhttp/datatx/manager/tus/tus.go index 829440ecaf..58e79f5a52 100644 --- a/pkg/rhttp/datatx/manager/tus/tus.go +++ b/pkg/rhttp/datatx/manager/tus/tus.go @@ -174,6 +174,11 @@ func (m *manager) Handler(fs storage.FS) (http.Handler, error) { }() // set etag, mtime and file id setHeaders(fs, w, r) + // set checksum + if v := r.Header.Get("Upload-Checksum"); v != "" { + ctx := context.WithValue(r.Context(), "checksum", v) + r = r.WithContext(ctx) + } handler.PatchFile(w, r) case "DELETE": handler.DelFile(w, r) diff --git a/pkg/storage/utils/decomposedfs/node/node.go b/pkg/storage/utils/decomposedfs/node/node.go index 42a507e303..b67a21e74b 100644 --- a/pkg/storage/utils/decomposedfs/node/node.go +++ b/pkg/storage/utils/decomposedfs/node/node.go @@ -1354,10 +1354,6 @@ func enoughDiskSpace(path string, fileSize uint64) bool { // CalculateChecksums calculates the sha1, md5 and adler32 checksums of a file func CalculateChecksums(ctx context.Context, path string) (hash.Hash, hash.Hash, hash.Hash32, error) { - sha1h := sha1.New() - md5h := md5.New() - adler32h := adler32.New() - _, subspan := tracer.Start(ctx, "os.Open") f, err := os.Open(path) subspan.End() @@ -1366,11 +1362,20 @@ func CalculateChecksums(ctx context.Context, path string) (hash.Hash, hash.Hash, } defer f.Close() - r1 := io.TeeReader(f, sha1h) + return CalculateChecksumsFromReader(ctx, f) +} + +// CalculateChecksumsFromReader calculates the sha1, md5 and adler32 checksums of a io.Reader +func CalculateChecksumsFromReader(ctx context.Context, r io.Reader) (hash.Hash, hash.Hash, hash.Hash32, error) { + sha1h := sha1.New() + md5h := md5.New() + adler32h := adler32.New() + + r1 := io.TeeReader(r, sha1h) r2 := io.TeeReader(r1, md5h) - _, subspan = tracer.Start(ctx, "io.Copy") - _, err = io.Copy(adler32h, r2) + _, subspan := tracer.Start(ctx, "io.Copy") + _, err := io.Copy(adler32h, r2) subspan.End() if err != nil { return nil, nil, nil, err diff --git a/pkg/storage/utils/decomposedfs/upload/upload.go b/pkg/storage/utils/decomposedfs/upload/upload.go index 4a3bfc61c2..e81bff2b65 100644 --- a/pkg/storage/utils/decomposedfs/upload/upload.go +++ b/pkg/storage/utils/decomposedfs/upload/upload.go @@ -19,6 +19,7 @@ package upload import ( + "bytes" "context" "encoding/hex" "fmt" @@ -57,6 +58,22 @@ var defaultFilePerm = os.FileMode(0664) func (session *OcisSession) WriteChunk(ctx context.Context, offset int64, src io.Reader) (int64, error) { ctx, span := tracer.Start(session.Context(ctx), "WriteChunk") defer span.End() + + // calculate checksum here + if checksum, ok := ctx.Value("checksum").(string); ok { + // we need to copy the contents into memory so we can write it to disk later + b := bytes.NewBuffer(nil) + sha1, md5, adler32, err := node.CalculateChecksumsFromReader(ctx, io.TeeReader(src, b)) + if err != nil { + return 0, err + } + + if err := verifyChecksum(checksum, sha1, md5, adler32); err != nil { + return 0, err + } + src = b + } + _, subspan := tracer.Start(ctx, "os.OpenFile") file, err := os.OpenFile(session.binPath(), os.O_WRONLY|os.O_APPEND, defaultFilePerm) subspan.End() @@ -65,10 +82,6 @@ func (session *OcisSession) WriteChunk(ctx context.Context, offset int64, src io } defer file.Close() - // calculate cheksum here? needed for the TUS checksum extension. https://tus.io/protocols/resumable-upload.html#checksum - // TODO but how do we get the `Upload-Checksum`? WriteChunk() only has a context, offset and the reader ... - // It is sent with the PATCH request, well or in the POST when the creation-with-upload extension is used - // but the tus handler uses a context.Background() so we cannot really check the header and put it in the context ... _, subspan = tracer.Start(ctx, "io.Copy") n, err := io.Copy(file, src) subspan.End() @@ -116,21 +129,7 @@ func (session *OcisSession) FinishUpload(ctx context.Context) error { // compare if they match the sent checksum // TODO the tus checksum extension would do this on every chunk, but I currently don't see an easy way to pass in the requested checksum. for now we do it in FinishUpload which is also called for chunked uploads if session.info.MetaData["checksum"] != "" { - var err error - parts := strings.SplitN(session.info.MetaData["checksum"], " ", 2) - if len(parts) != 2 { - return errtypes.BadRequest("invalid checksum format. must be '[algorithm] [checksum]'") - } - switch parts[0] { - case "sha1": - err = checkHash(parts[1], sha1h) - case "md5": - err = checkHash(parts[1], md5h) - case "adler32": - err = checkHash(parts[1], adler32h) - default: - err = errtypes.BadRequest("unsupported checksum algorithm: " + parts[0]) - } + err := verifyChecksum(session.info.MetaData["checksum"], sha1h, md5h, adler32h) if err != nil { session.store.Cleanup(ctx, session, true, false, false) return err @@ -376,3 +375,20 @@ func joinurl(paths ...string) string { return s.String() } + +func verifyChecksum(checksum string, sha1h hash.Hash, md5h hash.Hash, adler32h hash.Hash32) error { + parts := strings.SplitN(checksum, " ", 2) + if len(parts) != 2 { + return errtypes.BadRequest("invalid checksum format. must be '[algorithm] [checksum]'") + } + switch strings.ToLower(parts[0]) { + case "sha1": + return checkHash(parts[1], sha1h) + case "md5": + return checkHash(parts[1], md5h) + case "adler32": + return checkHash(parts[1], adler32h) + default: + return errtypes.BadRequest("unsupported checksum algorithm: " + parts[0]) + } +}