diff --git a/changelog/unreleased/checksums.md b/changelog/unreleased/checksums.md new file mode 100644 index 0000000000..aa1cf28ab9 --- /dev/null +++ b/changelog/unreleased/checksums.md @@ -0,0 +1,8 @@ +Enhancement: Checksum support + +We now support checksums on file uploads and PROPFIND results. On uploads, the ocdav service now forwards the `OC-Checksum` (and the similar TUS `Upload-Checksum`) header to the storage provider. We added an internal http status code that allows storage drivers to return checksum errors. On PROPFINDs, ocdav now renders the `` header in a bug compatible way for oc10 backward compatability with existing clients. Finally, GET and HEAD requests now return the `OC-Checksum` header. + +https://github.com/cs3org/reva/pull/1400 +https://github.com/owncloud/core/pull/38304 +https://github.com/owncloud/ocis/issues/1291 +https://github.com/owncloud/ocis/issues/1316 \ No newline at end of file diff --git a/internal/grpc/services/storageprovider/storageprovider.go b/internal/grpc/services/storageprovider/storageprovider.go index fc2eadbc85..6c94cfc417 100644 --- a/internal/grpc/services/storageprovider/storageprovider.go +++ b/internal/grpc/services/storageprovider/storageprovider.go @@ -315,6 +315,11 @@ func (s *service) InitiateFileUpload(ctx context.Context, req *provider.Initiate }, nil } } + // TUS forward Upload-Checksum header as checksum, uses '[type] [hash]' format + if req.Opaque.Map["Upload-Checksum"] != nil { + metadata["checksum"] = string(req.Opaque.Map["Upload-Checksum"].Value) + } + // ownCloud mtime to set for the uploaded file if req.Opaque.Map["X-OC-Mtime"] != nil { metadata["mtime"] = string(req.Opaque.Map["X-OC-Mtime"].Value) } @@ -325,6 +330,17 @@ func (s *service) InitiateFileUpload(ctx context.Context, req *provider.Initiate switch err.(type) { case errtypes.IsNotFound: st = status.NewNotFound(ctx, "path not found when initiating upload") + case errtypes.IsBadRequest, errtypes.IsChecksumMismatch: + st = status.NewInvalidArg(ctx, err.Error()) + // TODO TUS uses a custom ChecksumMismatch 460 http status which is in an unnasigned range in + // https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml + // maybe 409 conflict is good enough + // someone is proposing `419 Checksum Error`, see https://stackoverflow.com/a/35665694 + // - it is also unassigned + // - ends in 9 as the 409 conflict + // - is near the 4xx errors about conditions: 415 Unsupported Media Type, 416 Range Not Satisfiable or 417 Expectation Failed + // owncloud only expects a 400 Bad request so InvalidArg is good enough for now + // seealso errtypes.StatusChecksumMismatch case errtypes.PermissionDenied: st = status.NewPermissionDenied(ctx, err, "permission denied") default: diff --git a/internal/http/services/owncloud/ocdav/error.go b/internal/http/services/owncloud/ocdav/error.go index 745918d466..e20fbc3aff 100644 --- a/internal/http/services/owncloud/ocdav/error.go +++ b/internal/http/services/owncloud/ocdav/error.go @@ -23,18 +23,23 @@ import ( "net/http" rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" + "github.com/pkg/errors" "github.com/rs/zerolog" ) type code int const ( + + // SabredavMethodBadRequest maps to HTTP 400 + SabredavMethodBadRequest code = iota // SabredavMethodNotAllowed maps to HTTP 405 - SabredavMethodNotAllowed code = iota + SabredavMethodNotAllowed ) var ( codesEnum = []string{ + "Sabre\\DAV\\Exception\\BadRequest", "Sabre\\DAV\\Exception\\MethodNotAllowed", } ) @@ -54,6 +59,18 @@ func Marshal(e exception) ([]byte, error) { }) } +// http://www.webdav.org/specs/rfc4918.html#ELEMENT_error +type errorXML struct { + XMLName xml.Name `xml:"d:error"` + Xmlnsd string `xml:"xmlns:d,attr"` + Xmlnss string `xml:"xmlns:s,attr"` + Exception string `xml:"s:exception"` + Message string `xml:"s:message"` + InnerXML []byte `xml:",innerxml"` +} + +var errInvalidPropfind = errors.New("webdav: invalid propfind") + // HandleErrorStatus checks the status code, logs a Debug or Error level message // and writes an appropriate http status func HandleErrorStatus(log *zerolog.Logger, w http.ResponseWriter, s *rpc.Status) { diff --git a/internal/http/services/owncloud/ocdav/get.go b/internal/http/services/owncloud/ocdav/get.go index 5e892e8337..d6ab7d6c89 100644 --- a/internal/http/services/owncloud/ocdav/get.go +++ b/internal/http/services/owncloud/ocdav/get.go @@ -19,12 +19,15 @@ package ocdav import ( + "fmt" "io" "net/http" "path" "strconv" + "strings" "time" + "github.com/cs3org/reva/internal/grpc/services/storageprovider" "github.com/cs3org/reva/internal/http/services/datagateway" "go.opencensus.io/trace" @@ -144,11 +147,9 @@ func (s *svc) handleGet(w http.ResponseWriter, r *http.Request, ns string) { } else { w.Header().Set("Content-Length", strconv.FormatUint(info.Size, 10)) } - /* - if md.Checksum != "" { - w.Header().Set("OC-Checksum", md.Checksum) - } - */ + if info.Checksum != nil { + w.Header().Set("OC-Checksum", fmt.Sprintf("%s:%s", strings.ToUpper(string(storageprovider.GRPC2PKGXS(info.Checksum.Type))), info.Checksum.Sum)) + } var c int64 if c, err = io.Copy(w, httpRes.Body); err != nil { sublog.Error().Err(err).Msg("error finishing copying data to response") diff --git a/internal/http/services/owncloud/ocdav/head.go b/internal/http/services/owncloud/ocdav/head.go index 13d4f1fb02..250da299bb 100644 --- a/internal/http/services/owncloud/ocdav/head.go +++ b/internal/http/services/owncloud/ocdav/head.go @@ -19,13 +19,16 @@ package ocdav import ( + "fmt" "net/http" "path" "strconv" + "strings" "time" rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/cs3org/reva/internal/grpc/services/storageprovider" "github.com/cs3org/reva/pkg/appctx" "github.com/cs3org/reva/pkg/utils" "go.opencensus.io/trace" @@ -68,6 +71,9 @@ func (s *svc) handleHead(w http.ResponseWriter, r *http.Request, ns string) { w.Header().Set("ETag", info.Etag) w.Header().Set("OC-FileId", wrapResourceID(info.Id)) w.Header().Set("OC-ETag", info.Etag) + if info.Checksum != nil { + w.Header().Set("OC-Checksum", fmt.Sprintf("%s:%s", strings.ToUpper(string(storageprovider.GRPC2PKGXS(info.Checksum.Type))), info.Checksum.Sum)) + } t := utils.TSToTime(info.Mtime).UTC() lastModifiedString := t.Format(time.RFC1123Z) w.Header().Set("Last-Modified", lastModifiedString) diff --git a/internal/http/services/owncloud/ocdav/propfind.go b/internal/http/services/owncloud/ocdav/propfind.go index c56bb11bfb..a4a0716472 100644 --- a/internal/http/services/owncloud/ocdav/propfind.go +++ b/internal/http/services/owncloud/ocdav/propfind.go @@ -38,11 +38,11 @@ import ( rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" link "github.com/cs3org/go-cs3apis/cs3/sharing/link/v1beta1" provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/cs3org/reva/internal/grpc/services/storageprovider" "github.com/cs3org/reva/internal/http/services/owncloud/ocs/conversions" "github.com/cs3org/reva/pkg/appctx" ctxuser "github.com/cs3org/reva/pkg/user" "github.com/cs3org/reva/pkg/utils" - "github.com/pkg/errors" ) const ( @@ -106,6 +106,11 @@ func (s *svc) handlePropfind(w http.ResponseWriter, r *http.Request, ns string) metadataKeys := []string{} if pf.Allprop != nil { + // TODO this changes the behavior and returns all properties if allprops has been set, + // but allprops should only return some default properties + // see https://tools.ietf.org/html/rfc4918#section-9.1 + // the description of arbitrary_metadata_keys in https://cs3org.github.io/cs3apis/#cs3.storage.provider.v1beta1.ListContainerRequest an others may need clarification + // tracked in https://github.com/cs3org/cs3apis/issues/104 metadataKeys = append(metadataKeys, "*") } else { for i := range pf.Prop { @@ -207,7 +212,7 @@ func requiresExplicitFetching(n *xml.Name) bool { return false case _nsOwncloud: switch n.Local { - case "favorite", "share-types": + case "favorite", "share-types", "checksums": return true default: return false @@ -384,14 +389,35 @@ func (s *svc) mdToPropResponse(ctx context.Context, pf *propfindXML, md *provide lastModifiedString := t.Format(time.RFC1123Z) response.Propstat[0].Prop = append(response.Propstat[0].Prop, s.newProp("d:getlastmodified", lastModifiedString)) + // stay bug compatible with oc10, see https://github.com/owncloud/core/pull/38304#issuecomment-762185241 + var checksums strings.Builder if md.Checksum != nil { - // TODO(jfd): the actual value is an abomination like this: - // - // SHA1:9bd253a09d58be107bcb4169ebf338c8df34d086 MD5:d90bcc6bf847403d22a4abba64e79994 ADLER32:fca23ff5 - // - // yep, correct, space delimited key value pairs inside an oc:checksum tag inside an oc:checksums tag - value := fmt.Sprintf("%s:%s", md.Checksum.Type, md.Checksum.Sum) - response.Propstat[0].Prop = append(response.Propstat[0].Prop, s.newProp("oc:checksums", value)) + checksums.WriteString("") + checksums.WriteString(strings.ToUpper(string(storageprovider.GRPC2PKGXS(md.Checksum.Type)))) + checksums.WriteString(":") + checksums.WriteString(md.Checksum.Sum) + } + if md.Opaque != nil { + if e, ok := md.Opaque.Map["md5"]; ok { + if checksums.Len() == 0 { + checksums.WriteString("MD5:") + } else { + checksums.WriteString(" MD5:") + } + checksums.WriteString(string(e.Value)) + } + if e, ok := md.Opaque.Map["adler32"]; ok { + if checksums.Len() == 0 { + checksums.WriteString("ADLER32:") + } else { + checksums.WriteString(" ADLER32:") + } + checksums.WriteString(string(e.Value)) + } + } + if checksums.Len() > 0 { + checksums.WriteString("") + response.Propstat[0].Prop = append(response.Propstat[0].Prop, s.newProp("oc:checksums", checksums.String())) } // favorites from arbitrary metadata @@ -527,15 +553,37 @@ func (s *svc) mdToPropResponse(ctx context.Context, pf *propfindXML, md *provide } else { propstatOK.Prop = append(propstatOK.Prop, s.newProp("oc:favorite", "0")) } - case "checksums": // desktop + case "checksums": // desktop ... not really ... the desktop sends the OC-Checksum header + + // stay bug compatible with oc10, see https://github.com/owncloud/core/pull/38304#issuecomment-762185241 + var checksums strings.Builder if md.Checksum != nil { - // TODO(jfd): the actual value is an abomination like this: - // - // SHA1:9bd253a09d58be107bcb4169ebf338c8df34d086 MD5:d90bcc6bf847403d22a4abba64e79994 ADLER32:fca23ff5 - // - // yep, correct, space delimited key value pairs inside an oc:checksum tag inside an oc:checksums tag - value := fmt.Sprintf("%s:%s", md.Checksum.Type, md.Checksum.Sum) - propstatOK.Prop = append(propstatOK.Prop, s.newProp("oc:checksums", value)) + checksums.WriteString("") + checksums.WriteString(strings.ToUpper(string(storageprovider.GRPC2PKGXS(md.Checksum.Type)))) + checksums.WriteString(":") + checksums.WriteString(md.Checksum.Sum) + } + if md.Opaque != nil { + if e, ok := md.Opaque.Map["md5"]; ok { + if checksums.Len() == 0 { + checksums.WriteString("MD5:") + } else { + checksums.WriteString(" MD5:") + } + checksums.WriteString(string(e.Value)) + } + if e, ok := md.Opaque.Map["adler32"]; ok { + if checksums.Len() == 0 { + checksums.WriteString("ADLER32:") + } else { + checksums.WriteString(" ADLER32:") + } + checksums.WriteString(string(e.Value)) + } + } + if checksums.Len() > 13 { + checksums.WriteString("") + propstatOK.Prop = append(propstatOK.Prop, s.newProp("oc:checksums", checksums.String())) } else { propstatNotFound.Prop = append(propstatNotFound.Prop, s.newProp("oc:checksums", "")) } @@ -774,15 +822,3 @@ type propertyXML struct { // even including the DAV: namespace. InnerXML []byte `xml:",innerxml"` } - -// http://www.webdav.org/specs/rfc4918.html#ELEMENT_error -type errorXML struct { - XMLName xml.Name `xml:"d:error"` - Xmlnsd string `xml:"xmlns:d,attr"` - Xmlnss string `xml:"xmlns:s,attr"` - Exception string `xml:"s:exception"` - Message string `xml:"s:message"` - InnerXML []byte `xml:",innerxml"` -} - -var errInvalidPropfind = errors.New("webdav: invalid propfind") diff --git a/internal/http/services/owncloud/ocdav/put.go b/internal/http/services/owncloud/ocdav/put.go index 77ac6e8859..71a8c1b7b3 100644 --- a/internal/http/services/owncloud/ocdav/put.go +++ b/internal/http/services/owncloud/ocdav/put.go @@ -23,6 +23,7 @@ import ( "net/http" "path" "strconv" + "strings" "time" rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" @@ -30,6 +31,7 @@ import ( typespb "github.com/cs3org/go-cs3apis/cs3/types/v1beta1" "github.com/cs3org/reva/internal/http/services/datagateway" "github.com/cs3org/reva/pkg/appctx" + "github.com/cs3org/reva/pkg/errtypes" "github.com/cs3org/reva/pkg/rhttp" "github.com/cs3org/reva/pkg/storage/utils/chunking" "github.com/cs3org/reva/pkg/utils" @@ -204,6 +206,36 @@ func (s *svc) handlePutHelper(w http.ResponseWriter, r *http.Request, content io w.Header().Set("X-OC-Mtime", "accepted") } + // curl -X PUT https://demo.owncloud.com/remote.php/webdav/testcs.bin -u demo:demo -d '123' -v -H 'OC-Checksum: SHA1:40bd001563085fc35165329ea1ff5c5ecbdbbeef' + + var cparts []string + // TUS Upload-Checksum header takes precedence + if checksum := r.Header.Get("Upload-Checksum"); checksum != "" { + cparts = strings.SplitN(checksum, " ", 2) + if len(cparts) != 2 { + sublog.Debug().Str("upload-checksum", checksum).Msg("invalid Upload-Checksum format, expected '[algorithm] [checksum]'") + w.WriteHeader(http.StatusBadRequest) + return + } + // Then try owncloud header + } else if checksum := r.Header.Get("OC-Checksum"); checksum != "" { + cparts = strings.SplitN(checksum, ":", 2) + if len(cparts) != 2 { + sublog.Debug().Str("oc-checksum", checksum).Msg("invalid OC-Checksum format, expected '[algorithm]:[checksum]'") + w.WriteHeader(http.StatusBadRequest) + return + } + } + // we do not check the algorithm here, because it might depend on the storage + if len(cparts) == 2 { + // Translate into TUS style Upload-Checksum header + opaqueMap["Upload-Checksum"] = &typespb.OpaqueEntry{ + Decoder: "plain", + // algorithm is always lowercase, checksum is separated by space + Value: []byte(strings.ToLower(cparts[0]) + " " + cparts[1]), + } + } + uReq := &provider.InitiateFileUploadRequest{ Ref: ref, Opaque: &typespb.Opaque{Map: opaqueMap}, @@ -249,8 +281,25 @@ func (s *svc) handlePutHelper(w http.ResponseWriter, r *http.Request, content io w.WriteHeader(http.StatusPartialContent) return } + if httpRes.StatusCode == errtypes.StatusChecksumMismatch { + w.WriteHeader(http.StatusBadRequest) + b, err := Marshal(exception{ + code: SabredavMethodBadRequest, + message: "The computed checksum does not match the one received from the client.", + }) + if err != nil { + sublog.Error().Msgf("error marshaling xml response: %s", b) + w.WriteHeader(http.StatusInternalServerError) + return + } + _, err = w.Write(b) + if err != nil { + sublog.Err(err).Msg("error writing response") + } + return + } sublog.Error().Err(err).Msg("PUT request to data server failed") - w.WriteHeader(http.StatusInternalServerError) + w.WriteHeader(httpRes.StatusCode) return } } diff --git a/internal/http/services/owncloud/ocdav/tus.go b/internal/http/services/owncloud/ocdav/tus.go index d173cb752a..2e25085866 100644 --- a/internal/http/services/owncloud/ocdav/tus.go +++ b/internal/http/services/owncloud/ocdav/tus.go @@ -56,6 +56,10 @@ func (s *svc) handleTusPost(w http.ResponseWriter, r *http.Request, ns string) { w.WriteHeader(http.StatusPreconditionFailed) return } + // r.Header.Get("OC-Checksum") + // TODO must be SHA1, ADLER32 or MD5 ... in capital letters???? + // curl -X PUT https://demo.owncloud.com/remote.php/webdav/testcs.bin -u demo:demo -d '123' -v -H 'OC-Checksum: SHA1:40bd001563085fc35165329ea1ff5c5ecbdbbeef' + //TODO check Expect: 100-continue // read filename from metadata diff --git a/pkg/errtypes/errtypes.go b/pkg/errtypes/errtypes.go index aa50e7c7d6..d85b6f9055 100644 --- a/pkg/errtypes/errtypes.go +++ b/pkg/errtypes/errtypes.go @@ -94,6 +94,22 @@ func (e BadRequest) Error() string { return "error: bad request: " + string(e) } // IsBadRequest implements the IsBadRequest interface. func (e BadRequest) IsBadRequest() {} +// ChecksumMismatch is the error to use when the sent hash does not match the calculated hash. +type ChecksumMismatch string + +func (e ChecksumMismatch) Error() string { return "error: checksum mismatch: " + string(e) } + +// IsChecksumMismatch implements the IsChecksumMismatch interface. +func (e ChecksumMismatch) IsChecksumMismatch() {} + +// StatusChecksumMismatch 419 is an unofficial http status code in an unassigned range that is used for checksum mismatches +// Proposed by https://stackoverflow.com/a/35665694 +// Official HTTP status code registry: https://www.iana.org/assignments/http-status-codes/http-status-codes.xhtml +// Note: TUS uses unassigned 460 Checksum-Mismatch +// RFC proposal for checksum digest uses a `Want-Digest` header: https://tools.ietf.org/html/rfc3230 +// oc clienst issue: https://github.com/owncloud/core/issues/22711 +const StatusChecksumMismatch = 419 + // IsNotFound is the interface to implement // to specify that an a resource is not found. type IsNotFound interface { @@ -147,3 +163,9 @@ type IsPartialContent interface { type IsBadRequest interface { IsBadRequest() } + +// IsChecksumMismatch is the interface to implement +// to specify that a checksum does not match. +type IsChecksumMismatch interface { + IsChecksumMismatch() +} diff --git a/pkg/rhttp/datatx/manager/simple/simple.go b/pkg/rhttp/datatx/manager/simple/simple.go index 7de66a7ee9..8f3cddd392 100644 --- a/pkg/rhttp/datatx/manager/simple/simple.go +++ b/pkg/rhttp/datatx/manager/simple/simple.go @@ -76,18 +76,24 @@ func (m *manager) Handler(fs storage.FS) (http.Handler, error) { ref := &provider.Reference{Spec: &provider.Reference_Path{Path: fn}} err := fs.Upload(ctx, ref, r.Body) - if err != nil { - if _, ok := err.(errtypes.IsPartialContent); ok { - w.WriteHeader(http.StatusPartialContent) - return - } - sublog.Error().Err(err).Msg("error uploading file") + switch v := err.(type) { + case nil: + w.WriteHeader(http.StatusOK) + case errtypes.PartialContent: + w.WriteHeader(http.StatusPartialContent) + case errtypes.ChecksumMismatch: + w.WriteHeader(errtypes.StatusChecksumMismatch) + case errtypes.NotFound: + w.WriteHeader(http.StatusNotFound) + case errtypes.PermissionDenied: + w.WriteHeader(http.StatusForbidden) + case errtypes.InvalidCredentials: + w.WriteHeader(http.StatusUnauthorized) + default: + sublog.Error().Err(v).Msg("error uploading file") w.WriteHeader(http.StatusInternalServerError) - return } - - w.WriteHeader(http.StatusOK) - + return default: w.WriteHeader(http.StatusNotImplemented) } diff --git a/pkg/storage/fs/ocis/node.go b/pkg/storage/fs/ocis/node.go index c2ab5537e7..f79a07e0ed 100644 --- a/pkg/storage/fs/ocis/node.go +++ b/pkg/storage/fs/ocis/node.go @@ -21,7 +21,9 @@ package ocis import ( "context" "crypto/md5" + "encoding/hex" "fmt" + "hash" "io" "os" "path/filepath" @@ -31,6 +33,7 @@ import ( userpb "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" types "github.com/cs3org/go-cs3apis/cs3/types/v1beta1" + "github.com/cs3org/reva/internal/grpc/services/storageprovider" "github.com/cs3org/reva/pkg/appctx" "github.com/cs3org/reva/pkg/errtypes" "github.com/cs3org/reva/pkg/mime" @@ -45,7 +48,8 @@ const ( _shareTypesKey = "http://owncloud.org/ns/share-types" _userShareType = "0" - _favoriteKey = "http://owncloud.org/ns/favorite" + _favoriteKey = "http://owncloud.org/ns/favorite" + _checksumsKey = "http://owncloud.org/ns/checksums" ) // Node represents a node in the tree and provides methods to get a Parent or Child instance @@ -481,7 +485,7 @@ func (n *Node) AsResourceInfo(ctx context.Context, rp *provider.ResourcePermissi // use temporary etag if it is set if b, err := xattr.Get(nodePath, tmpEtagAttr); err == nil { - ri.Etag = fmt.Sprintf(`"%x"`, string(b)) + ri.Etag = fmt.Sprintf(`"%x"`, string(b)) // TODO why do we convert string(b)? is the temporary etag stored as string? -> should we use bytes? use hex.EncodeToString? } else if ri.Etag, err = calculateEtag(n.ID, tmTime); err != nil { sublog.Debug().Err(err).Msg("could not calculate etag") } @@ -535,24 +539,32 @@ func (n *Node) AsResourceInfo(ctx context.Context, rp *provider.ResourcePermissi } } + // checksums + if _, ok := mdKeysMap[_checksumsKey]; (nodeType == provider.ResourceType_RESOURCE_TYPE_FILE) && returnAllKeys || ok { + // TODO which checksum was requested? sha1 adler32 or md5? for now hardcode sha1? + readChecksumIntoResourceChecksum(ctx, nodePath, storageprovider.XSSHA1, ri) + readChecksumIntoOpaque(ctx, nodePath, storageprovider.XSMD5, ri) + readChecksumIntoOpaque(ctx, nodePath, storageprovider.XSAdler32, ri) + } + // only read the requested metadata attributes - list, err := xattr.List(nodePath) + attrs, err := xattr.List(nodePath) if err != nil { sublog.Error().Err(err).Msg("error getting list of extended attributes") } else { - for _, entry := range list { + for i := range attrs { // filter out non-custom properties - if !strings.HasPrefix(entry, metadataPrefix) { + if !strings.HasPrefix(attrs[i], metadataPrefix) { continue } // only read when key was requested - k := entry[len(metadataPrefix):] + k := attrs[i][len(metadataPrefix):] if _, ok := mdKeysMap[k]; returnAllKeys || ok { - if val, err := xattr.Get(nodePath, entry); err == nil { + if val, err := xattr.Get(nodePath, attrs[i]); err == nil { metadata[k] = string(val) } else { sublog.Error().Err(err). - Str("entry", entry). + Str("entry", attrs[i]). Msg("error retrieving xattr metadata") } } @@ -570,6 +582,44 @@ func (n *Node) AsResourceInfo(ctx context.Context, rp *provider.ResourcePermissi return ri, nil } +func readChecksumIntoResourceChecksum(ctx context.Context, nodePath, algo string, ri *provider.ResourceInfo) { + v, err := xattr.Get(nodePath, checksumPrefix+algo) + switch { + case err == nil: + ri.Checksum = &provider.ResourceChecksum{ + Type: storageprovider.PKG2GRPCXS(algo), + Sum: hex.EncodeToString(v), + } + case isNoData(err): + appctx.GetLogger(ctx).Debug().Err(err).Str("nodepath", nodePath).Str("algorithm", algo).Msg("checksum not set") + case isNotFound(err): + appctx.GetLogger(ctx).Error().Err(err).Str("nodepath", nodePath).Str("algorithm", algo).Msg("file not fount") + default: + appctx.GetLogger(ctx).Error().Err(err).Str("nodepath", nodePath).Str("algorithm", algo).Msg("could not read checksum") + } +} +func readChecksumIntoOpaque(ctx context.Context, nodePath, algo string, ri *provider.ResourceInfo) { + v, err := xattr.Get(nodePath, checksumPrefix+algo) + switch { + case err == nil: + if ri.Opaque == nil { + ri.Opaque = &types.Opaque{ + Map: map[string]*types.OpaqueEntry{}, + } + } + ri.Opaque.Map[algo] = &types.OpaqueEntry{ + Decoder: "plain", + Value: []byte(hex.EncodeToString(v)), + } + case isNoData(err): + appctx.GetLogger(ctx).Debug().Err(err).Str("nodepath", nodePath).Str("algorithm", algo).Msg("checksum not set") + case isNotFound(err): + appctx.GetLogger(ctx).Error().Err(err).Str("nodepath", nodePath).Str("algorithm", algo).Msg("file not fount") + default: + appctx.GetLogger(ctx).Error().Err(err).Str("nodepath", nodePath).Str("algorithm", algo).Msg("could not read checksum") + } +} + // HasPropagation checks if the propagation attribute exists and is set to "1" func (n *Node) HasPropagation() (propagation bool) { if b, err := xattr.Get(n.lu.toInternalPath(n.ID), propagationAttr); err == nil { @@ -592,6 +642,11 @@ func (n *Node) SetTMTime(t time.Time) (err error) { return xattr.Set(n.lu.toInternalPath(n.ID), treeMTimeAttr, []byte(t.UTC().Format(time.RFC3339Nano))) } +// SetChecksum writes the checksum with the given checksum type to the extended attributes +func (n *Node) SetChecksum(csType string, h hash.Hash) (err error) { + return xattr.Set(n.lu.toInternalPath(n.ID), checksumPrefix+csType, h.Sum(nil)) +} + // UnsetTempEtag removes the temporary etag attribute func (n *Node) UnsetTempEtag() (err error) { if err = xattr.Remove(n.lu.toInternalPath(n.ID), tmpEtagAttr); err != nil { diff --git a/pkg/storage/fs/ocis/ocis.go b/pkg/storage/fs/ocis/ocis.go index 0d17aebf66..b3024b51ca 100644 --- a/pkg/storage/fs/ocis/ocis.go +++ b/pkg/storage/fs/ocis/ocis.go @@ -67,9 +67,9 @@ const ( favPrefix string = ocisPrefix + "fav." // a temporary etag for a folder that is removed when the mtime propagation happens - tmpEtagAttr string = ocisPrefix + "tmp.etag" - referenceAttr string = ocisPrefix + "cs3.ref" // arbitrary metadata - //checksumPrefix string = ocisPrefix + "cs." // TODO add checksum support + tmpEtagAttr string = ocisPrefix + "tmp.etag" + referenceAttr string = ocisPrefix + "cs3.ref" // target of a cs3 reference + checksumPrefix string = ocisPrefix + "cs." // followed by the algorithm, eg. ocis.cs.sha1 trashOriginAttr string = ocisPrefix + "trash.origin" // trash origin // we use a single attribute to enable or disable propagation of both: synctime and treesize diff --git a/pkg/storage/fs/ocis/upload.go b/pkg/storage/fs/ocis/upload.go index d79093ab0a..5d0261e569 100644 --- a/pkg/storage/fs/ocis/upload.go +++ b/pkg/storage/fs/ocis/upload.go @@ -20,11 +20,18 @@ package ocis import ( "context" + "crypto/md5" + "crypto/sha1" + "encoding/hex" "encoding/json" + "fmt" + "hash" + "hash/adler32" "io" "io/ioutil" "os" "path/filepath" + "strings" "time" userpb "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" @@ -36,11 +43,14 @@ import ( "github.com/cs3org/reva/pkg/user" "github.com/google/uuid" "github.com/pkg/errors" + "github.com/rs/zerolog" tusd "github.com/tus/tusd/pkg/handler" ) var defaultFilePerm = os.FileMode(0664) +// TODO Upload (and InitiateUpload) needs a way to receive the expected checksum. +// Maybe in metadata as 'checksum' => 'sha1 aeosvp45w5xaeoe' = lowercase, space separated? func (fs *ocisfs) Upload(ctx context.Context, ref *provider.Reference, r io.ReadCloser) (err error) { upload, err := fs.GetUpload(ctx, ref.GetPath()) if err != nil { @@ -61,7 +71,7 @@ func (fs *ocisfs) Upload(ctx context.Context, ref *provider.Reference, r io.Read uploadInfo := upload.(*fileUpload) p := uploadInfo.info.Storage["NodeName"] - ok, err := chunking.IsChunked(p) + ok, err := chunking.IsChunked(p) // check chunking v1 if err != nil { return errors.Wrap(err, "ocisfs: error checking path") } @@ -96,6 +106,7 @@ func (fs *ocisfs) Upload(ctx context.Context, ref *provider.Reference, r io.Read // InitiateUpload returns upload ids corresponding to different protocols it supports // TODO read optional content for small files in this request +// TODO InitiateUpload (and Upload) needs a way to receive the expected checksum. Maybe in metadata as 'checksum' => 'sha1 aeosvp45w5xaeoe' = lowercase, space separated? func (fs *ocisfs) InitiateUpload(ctx context.Context, ref *provider.Reference, uploadLength int64, metadata map[string]string) (map[string]string, error) { log := appctx.GetLogger(ctx) @@ -129,6 +140,18 @@ func (fs *ocisfs) InitiateUpload(ctx context.Context, ref *provider.Reference, u if _, ok := metadata["sizedeferred"]; ok { info.SizeIsDeferred = true } + if metadata["checksum"] != "" { + parts := strings.SplitN(metadata["checksum"], " ", 2) + if len(parts) != 2 { + return nil, errtypes.BadRequest("invalid checksum format. must be '[algorithm] [checksum]'") + } + switch parts[0] { + case "sha1", "md5", "adler32": + info.MetaData["checksum"] = metadata["checksum"] + default: + return nil, errtypes.BadRequest("unsupported checksum algorithm: " + parts[0]) + } + } } log.Debug().Interface("info", info).Interface("node", n).Interface("metadata", metadata).Msg("ocisfs: resolved filename") @@ -353,6 +376,10 @@ func (upload *fileUpload) WriteChunk(ctx context.Context, offset int64, src io.R } defer file.Close() + // calculate cheksum here? needed for the TUS checksum extension. https://tus.io/protocols/resumable-upload.html#checksum + // TODO but how do we get the `Upload-Checksum`? WriteChunk() only has a context, offset and the reader ... + // It is sent with the PATCH request, well or in the POST when the creation-with-upload extension is used + // but the tus handler uses a context.Background() so we cannot really check the header and put it in the context ... n, err := io.Copy(file, src) // If the HTTP PATCH request gets interrupted in the middle (e.g. because @@ -387,7 +414,6 @@ func (upload *fileUpload) writeInfo() error { // FinishUpload finishes an upload and moves the file to the internal destination func (upload *fileUpload) FinishUpload(ctx context.Context) (err error) { - log := appctx.GetLogger(upload.ctx) n := &Node{ lu: upload.fs.lu, @@ -401,6 +427,54 @@ func (upload *fileUpload) FinishUpload(ctx context.Context) (err error) { } targetPath := upload.fs.lu.toInternalPath(n.ID) + sublog := appctx.GetLogger(upload.ctx).With().Interface("info", upload.info).Str("binPath", upload.binPath).Str("targetPath", targetPath).Logger() + + // calculate the checksum of the written bytes + // they will all be written to the metadata later, so we cannot omit any of them + // TODO only calculate the checksum in sync that was requested to match, the rest could be async ... but the tests currently expect all to be present + // TODO the hashes all implement BinaryMarshaler so we could try to persist the state for resumable upload. we would neet do keep track of the copied bytes ... + sha1h := sha1.New() + md5h := md5.New() + adler32h := adler32.New() + { + f, err := os.Open(upload.binPath) + if err != nil { + sublog.Err(err).Msg("ocisfs: could not open file for checksumming") + // we can continue if no oc checksum header is set + } + defer f.Close() + + r1 := io.TeeReader(f, sha1h) + r2 := io.TeeReader(r1, md5h) + + if _, err := io.Copy(adler32h, r2); err != nil { + sublog.Err(err).Msg("ocisfs: could not copy bytes for checksumming") + } + } + // compare if they match the sent checksum + // TODO the tus checksum extension would do this on every chunk, but I currently don't see an easy way to pass in the requested checksum. for now we do it in FinishUpload which is also called for chunked uploads + if upload.info.MetaData["checksum"] != "" { + parts := strings.SplitN(upload.info.MetaData["checksum"], " ", 2) + if len(parts) != 2 { + return errtypes.BadRequest("invalid checksum format. must be '[algorithm] [checksum]'") + } + switch parts[0] { + case "sha1": + err = upload.checkHash(parts[1], sha1h) + case "md5": + err = upload.checkHash(parts[1], md5h) + case "adler32": + err = upload.checkHash(parts[1], adler32h) + default: + err = errtypes.BadRequest("unsupported checksum algorithm: " + parts[0]) + } + if err != nil { + return err + } + } + + // defer writing the checksums until the node is in place + // if target exists create new version var fi os.FileInfo if fi, err = os.Stat(targetPath); err == nil { @@ -408,9 +482,8 @@ func (upload *fileUpload) FinishUpload(ctx context.Context) (err error) { versionsPath := upload.fs.lu.toInternalPath(n.ID + ".REV." + fi.ModTime().UTC().Format(time.RFC3339Nano)) if err = os.Rename(targetPath, versionsPath); err != nil { - log.Err(err).Interface("info", upload.info). - Str("binPath", upload.binPath). - Str("targetPath", targetPath). + sublog.Err(err). + Str("versionsPath", versionsPath). Msg("ocisfs: could not create version") return } @@ -420,14 +493,16 @@ func (upload *fileUpload) FinishUpload(ctx context.Context) (err error) { // TODO put uploads on the same underlying storage as the destination dir? // TODO trigger a workflow as the final rename might eg involve antivirus scanning if err = os.Rename(upload.binPath, targetPath); err != nil { - log := appctx.GetLogger(upload.ctx) - log.Err(err).Interface("info", upload.info). - Str("binPath", upload.binPath). - Str("targetPath", targetPath). + sublog.Err(err). Msg("ocisfs: could not rename") return } + // now try write all checksums + tryWritingChecksum(&sublog, n, "sha1", sha1h) + tryWritingChecksum(&sublog, n, "md5", md5h) + tryWritingChecksum(&sublog, n, "adler32", adler32h) + // who will become the owner? the owner of the parent actually ... not the currently logged in user err = n.writeMetadata(&userpb.UserId{ Idp: upload.info.Storage["OwnerIdp"], @@ -442,10 +517,8 @@ func (upload *fileUpload) FinishUpload(ctx context.Context) (err error) { var link string link, err = os.Readlink(childNameLink) if err == nil && link != "../"+n.ID { - log.Err(err). - Interface("info", upload.info). + sublog.Err(err). Interface("node", n). - Str("targetPath", targetPath). Str("childNameLink", childNameLink). Str("link", link). Msg("ocisfs: child name link has wrong target id, repairing") @@ -463,7 +536,7 @@ func (upload *fileUpload) FinishUpload(ctx context.Context) (err error) { // only delete the upload if it was successfully written to the storage if err = os.Remove(upload.infoPath); err != nil { if !os.IsNotExist(err) { - log.Err(err).Interface("info", upload.info).Msg("ocisfs: could not delete upload info") + sublog.Err(err).Msg("ocisfs: could not delete upload info") return } } @@ -481,6 +554,32 @@ func (upload *fileUpload) FinishUpload(ctx context.Context) (err error) { return upload.fs.tp.Propagate(upload.ctx, n) } +func (upload *fileUpload) checkHash(expected string, h hash.Hash) error { + if expected != hex.EncodeToString(h.Sum(nil)) { + upload.discardChunk() + return errtypes.ChecksumMismatch(fmt.Sprintf("invalid checksum: expected %s got %x", upload.info.MetaData["checksum"], h.Sum(nil))) + } + return nil +} +func tryWritingChecksum(log *zerolog.Logger, n *Node, algo string, h hash.Hash) { + if err := n.SetChecksum(algo, h); err != nil { + log.Err(err). + Str("csType", algo). + Bytes("hash", h.Sum(nil)). + Msg("ocisfs: could not write checksum") + // this is not critical, the bytes are there so we will continue + } +} + +func (upload *fileUpload) discardChunk() { + if err := os.Remove(upload.binPath); err != nil { + if !os.IsNotExist(err) { + appctx.GetLogger(upload.ctx).Err(err).Interface("info", upload.info).Str("binPath", upload.binPath).Interface("info", upload.info).Msg("ocisfs: could not discard chunk") + return + } + } +} + // To implement the termination extension as specified in https://tus.io/protocols/resumable-upload.html#termination // - the storage needs to implement AsTerminatableUpload // - the upload needs to implement Terminate diff --git a/tests/acceptance/expected-failures-on-OCIS-storage.md b/tests/acceptance/expected-failures-on-OCIS-storage.md index 5f3e8cf1f3..0362729fe6 100644 --- a/tests/acceptance/expected-failures-on-OCIS-storage.md +++ b/tests/acceptance/expected-failures-on-OCIS-storage.md @@ -4,19 +4,8 @@ - [apiWebdavProperties1/setFileProperties.feature:33](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiWebdavProperties1/setFileProperties.feature#L33) ### [Checksum feature](https://github.com/owncloud/ocis-reva/issues/196) -- [apiMain/checksums.feature:24](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L24) -- [apiMain/checksums.feature:25](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L25) -- [apiMain/checksums.feature:35](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L35) -- [apiMain/checksums.feature:36](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L36) -- [apiMain/checksums.feature:46](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L46) -- [apiMain/checksums.feature:47](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L47) -- [apiMain/checksums.feature:50](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L50) - [apiMain/checksums.feature:58](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L58) - [apiMain/checksums.feature:67](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L67) -- [apiMain/checksums.feature:99](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L99) -- [apiMain/checksums.feature:100](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L100) -- [apiMain/checksums.feature:103](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L103) -- [apiMain/checksums.feature:110](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L110) - [apiMain/checksums.feature:119](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L119) - [apiMain/checksums.feature:129](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L129) - [apiMain/checksums.feature:138](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L138) @@ -24,17 +13,7 @@ - [apiMain/checksums.feature:158](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L158) - [apiMain/checksums.feature:174](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L174) - [apiMain/checksums.feature:192](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L192) -- [apiMain/checksums.feature:217](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L217) -- [apiMain/checksums.feature:218](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L218) -- [apiMain/checksums.feature:239](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L239) -- [apiMain/checksums.feature:240](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L240) - [apiMain/checksums.feature:258](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L258) -- [apiMain/checksums.feature:279](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L279) -- [apiMain/checksums.feature:280](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L280) -- [apiMain/checksums.feature:295](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L295) -- [apiMain/checksums.feature:296](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L296) -- [apiMain/checksums.feature:308](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L308) -- [apiMain/checksums.feature:309](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L309) - [apiMain/checksums.feature:312](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L312) - [apiMain/checksums.feature:324](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiMain/checksums.feature#L324) @@ -900,7 +879,6 @@ special character username not valid - [apiVersions/fileVersions.feature:88](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiVersions/fileVersions.feature#L88) - [apiVersions/fileVersions.feature:89](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiVersions/fileVersions.feature#L89) - [apiVersions/fileVersions.feature:93](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiVersions/fileVersions.feature#L93) -- [apiVersions/fileVersions.feature:104](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiVersions/fileVersions.feature#L104) - [apiVersions/fileVersions.feature:288](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiVersions/fileVersions.feature#L288) - [apiVersions/fileVersions.feature:362](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiVersions/fileVersions.feature#L362) - [apiVersions/fileVersions.feature:373](https://github.com/owncloud/core/blob/master/tests/acceptance/features/apiVersions/fileVersions.feature#L373) diff --git a/tests/acceptance/features/apiOcisSpecific/apiMain-checksums.feature b/tests/acceptance/features/apiOcisSpecific/apiMain-checksums.feature index 2576d2a1d7..080aa7b57d 100644 --- a/tests/acceptance/features/apiOcisSpecific/apiMain-checksums.feature +++ b/tests/acceptance/features/apiOcisSpecific/apiMain-checksums.feature @@ -4,40 +4,6 @@ Feature: checksums Background: Given user "Alice" has been created with default attributes and without skeleton files - @issue-ocis-reva-98 - # after fixing all issues delete this Scenario and use the one from oC10 core - Scenario Outline: Uploading a file with checksum should return the checksum in the download header - Given using DAV path - And user "Alice" has uploaded file "filesForUpload/textfile.txt" to "/myChecksumFile.txt" with checksum "MD5:d70b40f177b14b470d1756a3c12b963a" - When user "Alice" downloads file "/myChecksumFile.txt" using the WebDAV API - Then the following headers should not be set - | header | - | OC-Checksum | - Examples: - | dav_version | - | old | - | new | - - @issue-ocis-reva-98 - # after fixing all issues delete this Scenario and use the one from oC10 core - Scenario: Copying file with checksum should return the checksum in the download header using new DAV path - Given using new DAV path - And user "Alice" has uploaded file "filesForUpload/textfile.txt" to "/myChecksumFile.txt" with checksum "MD5:d70b40f177b14b470d1756a3c12b963a" - When user "Alice" copies file "/myChecksumFile.txt" to "/myChecksumFileCopy.txt" using the WebDAV API - And user "Alice" downloads file "/myChecksumFileCopy.txt" using the WebDAV API - Then the following headers should not be set - | header | - | OC-Checksum | - - @issue-ocis-reva-99 - # after fixing all issues delete this Scenario and use the one from oC10 core - Scenario: Upload a file where checksum does not match (old DAV path) - Given using old DAV path - When user "Alice" uploads file with checksum "SHA1:f005ba11" and content "Some Text" to "/chksumtst.txt" using the WebDAV API - Then the HTTP status code should be "201" - And user "Alice" should see the following elements - | /chksumtst.txt | - @issue-ocis-reva-99 @skipOnOcis-OCIS-Storage # after fixing all issues delete this Scenario and use the one from oC10 core Scenario: Upload a file where checksum does not match (new DAV path) @@ -45,18 +11,4 @@ Feature: checksums When user "Alice" uploads file with checksum "SHA1:f005ba11" and content "Some Text" to "/chksumtst.txt" using the WebDAV API Then the HTTP status code should be "201" And user "Alice" should see the following elements - | /chksumtst.txt | - - @issue-ocis-reva-99 - # after fixing all issues delete this Scenario and use the one from oC10 core - Scenario Outline: Uploaded file should have the same checksum when downloaded - Given using DAV path - And user "Alice" has uploaded file with checksum "SHA1:ce5582148c6f0c1282335b87df5ed4be4b781399" and content "Some Text" to "/chksumtst.txt" - When user "Alice" downloads file "/chksumtst.txt" using the WebDAV API - Then the following headers should not be set - | header | - | OC-Checksum | - Examples: - | dav_version | - | old | - | new | + | /chksumtst.txt | \ No newline at end of file