From e2a19f8ed0a226257e93f221a062cdf09e0b37c1 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 12 Nov 2022 15:50:12 +0000 Subject: [PATCH 1/3] Adjust gitea doctor --run storages to check all storage types The doctor check `storages` currently only checks the attachment storage. This PR adds some basic garbage collection functionality for the other types of storage. Signed-off-by: Andrew Thornton --- models/git/lfs.go | 4 +- models/packages/package_blob.go | 7 + models/repo/archiver.go | 28 ++++ models/repo/attachment.go | 6 +- models/repo/avatar.go | 5 + models/user/avatar.go | 5 + modules/doctor/storage.go | 222 ++++++++++++++++++++++++++++---- modules/git/repo_archive.go | 12 ++ modules/util/string.go | 4 +- routers/web/repo/lfs.go | 2 +- 10 files changed, 265 insertions(+), 30 deletions(-) diff --git a/models/git/lfs.go b/models/git/lfs.go index 58042edfdbe1f..94d51d04bad7e 100644 --- a/models/git/lfs.go +++ b/models/git/lfs.go @@ -236,8 +236,8 @@ func LFSObjectAccessible(user *user_model.User, oid string) (bool, error) { } // LFSObjectIsAssociated checks if a provided Oid is associated -func LFSObjectIsAssociated(oid string) (bool, error) { - return db.GetEngine(db.DefaultContext).Exist(&LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}}) +func LFSObjectIsAssociated(ctx context.Context, oid string) (bool, error) { + return db.GetEngine(ctx).Exist(&LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}}) } // LFSAutoAssociate auto associates accessible LFSMetaObjects diff --git a/models/packages/package_blob.go b/models/packages/package_blob.go index 8c701d4285d09..fcb71a96ec677 100644 --- a/models/packages/package_blob.go +++ b/models/packages/package_blob.go @@ -62,6 +62,13 @@ func GetBlobByID(ctx context.Context, blobID int64) (*PackageBlob, error) { return pb, nil } +// ExistPackageBlobWithSHA returns if a package blob exists with the provided sha +func ExistPackageBlobWithSHA(ctx context.Context, blobSha256 string) (bool, error) { + return db.GetEngine(ctx).Exist(&PackageBlob{ + HashSHA256: blobSha256, + }) +} + // FindExpiredUnreferencedBlobs gets all blobs without associated files older than the specific duration func FindExpiredUnreferencedBlobs(ctx context.Context, olderThan time.Duration) ([]*PackageBlob, error) { pbs := make([]*PackageBlob, 0, 10) diff --git a/models/repo/archiver.go b/models/repo/archiver.go index 003911943f52a..da86eb6e8ca64 100644 --- a/models/repo/archiver.go +++ b/models/repo/archiver.go @@ -7,11 +7,14 @@ package repo import ( "context" "fmt" + "strconv" + "strings" "time" "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/timeutil" + "code.gitea.io/gitea/modules/util" "xorm.io/builder" ) @@ -65,6 +68,31 @@ func GetRepoArchiver(ctx context.Context, repoID int64, tp git.ArchiveType, comm return nil, nil } +// ExistsRepoArchiverWithStoragePath checks if there is a RepoArchiver for a given storage path +func ExistsRepoArchiverWithStoragePath(ctx context.Context, pth string) (bool, error) { + // fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()) + parts := strings.SplitN(pth, "/", 3) + if len(parts) != 3 { + return false, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", pth), Err: util.ErrInvalidArgument} + } + repoID, err := strconv.ParseInt(parts[0], 10, 64) + if err != nil { + return false, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", pth), Err: util.ErrInvalidArgument} + } + nameExts := strings.SplitN(parts[2], ".", 2) + if len(nameExts) != 2 { + return false, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", pth), Err: util.ErrInvalidArgument} + } + + archiver := &RepoArchiver{ + RepoID: repoID, + CommitID: parts[1] + nameExts[0], + Type: git.ToArchiveType(nameExts[1]), + } + + return db.GetEngine(ctx).Exist(&archiver) +} + // AddRepoArchiver adds an archiver func AddRepoArchiver(ctx context.Context, archiver *RepoArchiver) error { _, err := db.GetEngine(ctx).Insert(archiver) diff --git a/models/repo/attachment.go b/models/repo/attachment.go index 180d7730ba715..df7528df09c9c 100644 --- a/models/repo/attachment.go +++ b/models/repo/attachment.go @@ -122,9 +122,9 @@ func GetAttachmentsByUUIDs(ctx context.Context, uuids []string) ([]*Attachment, return attachments, db.GetEngine(ctx).In("uuid", uuids).Find(&attachments) } -// ExistAttachmentsByUUID returns true if attachment is exist by given UUID -func ExistAttachmentsByUUID(uuid string) (bool, error) { - return db.GetEngine(db.DefaultContext).Where("`uuid`=?", uuid).Exist(new(Attachment)) +// ExistAttachmentsByUUID returns true if attachment exists with the given UUID +func ExistAttachmentsByUUID(ctx context.Context, uuid string) (bool, error) { + return db.GetEngine(ctx).Where("`uuid`=?", uuid).Exist(new(Attachment)) } // GetAttachmentsByIssueID returns all attachments of an issue. diff --git a/models/repo/avatar.go b/models/repo/avatar.go index 1bc37598feef9..6e6dd46712ab3 100644 --- a/models/repo/avatar.go +++ b/models/repo/avatar.go @@ -24,6 +24,11 @@ func (repo *Repository) CustomAvatarRelativePath() string { return repo.Avatar } +// ExistRepoWithAvatar returns true if there is a user with this Avatar +func ExistRepoWithAvatar(ctx context.Context, avatar string) (bool, error) { + return db.GetEngine(ctx).Where("`avatar`=?", avatar).Exist(new(Repository)) +} + // RelAvatarLink returns a relative link to the repository's avatar. func (repo *Repository) RelAvatarLink() string { return repo.relAvatarLink(db.DefaultContext) diff --git a/models/user/avatar.go b/models/user/avatar.go index 102206f3a208e..c56abee11a85c 100644 --- a/models/user/avatar.go +++ b/models/user/avatar.go @@ -111,3 +111,8 @@ func (u *User) IsUploadAvatarChanged(data []byte) bool { avatarID := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("%d-%x", u.ID, md5.Sum(data))))) return u.Avatar != avatarID } + +// ExistUserWithAvatar returns true if there is a user with this Avatar +func ExistUserWithAvatar(ctx context.Context, avatar string) (bool, error) { + return db.GetEngine(ctx).Where("`avatar`=?", avatar).Exist(new(User)) +} diff --git a/modules/doctor/storage.go b/modules/doctor/storage.go index dafd989cf03b9..62c3044c28f6a 100644 --- a/modules/doctor/storage.go +++ b/modules/doctor/storage.go @@ -6,71 +6,247 @@ package doctor import ( "context" + "errors" + "io/fs" + "strings" + "code.gitea.io/gitea/models/git" + "code.gitea.io/gitea/models/packages" + "code.gitea.io/gitea/models/repo" repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/util" ) -func checkAttachmentStorageFiles(logger log.Logger, autofix bool) error { - var total, garbageNum int - var deletePaths []string - if err := storage.Attachments.IterateObjects(func(p string, obj storage.Object) error { +type commonStorageCheckOptions struct { + storer storage.ObjectStorage + isAssociated func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) + name string +} + +func commonCheckStorage(ctx context.Context, logger log.Logger, autofix bool, opts *commonStorageCheckOptions) error { + totalCount, unassociatedCount := 0, 0 + totalSize, unassociatedSize := int64(0), int64(0) + + var pathsToDelete []string + if err := opts.storer.IterateObjects(func(p string, obj storage.Object) error { defer obj.Close() - total++ + totalCount++ stat, err := obj.Stat() if err != nil { return err } - exist, err := repo_model.ExistAttachmentsByUUID(stat.Name()) + totalSize += stat.Size() + + associated, err := opts.isAssociated(p, obj, stat) if err != nil { return err } - if !exist { - garbageNum++ + if !associated { + unassociatedCount++ + unassociatedSize += stat.Size() if autofix { - deletePaths = append(deletePaths, p) + pathsToDelete = append(pathsToDelete, p) } } return nil }); err != nil { - logger.Error("storage.Attachments.IterateObjects failed: %v", err) + logger.Error("Error whilst iterating %s storage: %v", opts.name, err) return err } - if garbageNum > 0 { + if unassociatedCount > 0 { if autofix { var deletedNum int - for _, p := range deletePaths { - if err := storage.Attachments.Delete(p); err != nil { - log.Error("Delete attachment %s failed: %v", p, err) + for _, p := range pathsToDelete { + if err := opts.storer.Delete(p); err != nil { + log.Error("Error whilst deleting %s from %s storage: %v", p, opts.name, err) } else { deletedNum++ } } - logger.Info("%d missed information attachment detected, %d deleted.", garbageNum, deletedNum) + logger.Info("Deleted %d/%d unassociated %s(s)", deletedNum, unassociatedCount, opts.name) } else { - logger.Warn("Checked %d attachment, %d missed information.", total, garbageNum) + logger.Warn("Found %d/%d (%s/%s) unassociated %s(s)", unassociatedCount, totalCount, base.FileSize(unassociatedSize), base.FileSize(totalSize), opts.name) } + } else { + logger.Info("Found %d (%s) %s(s)", totalCount, base.FileSize(totalSize), opts.name) } return nil } -func checkStorageFiles(ctx context.Context, logger log.Logger, autofix bool) error { - if err := storage.Init(); err != nil { - logger.Error("storage.Init failed: %v", err) - return err +type storageCheckOptions struct { + All bool + Attachments bool + LFS bool + Avatars bool + RepoAvatars bool + RepoArchives bool + Packages bool +} + +func checkStorage(opts *storageCheckOptions) func(ctx context.Context, logger log.Logger, autofix bool) error { + return func(ctx context.Context, logger log.Logger, autofix bool) error { + if err := storage.Init(); err != nil { + logger.Error("storage.Init failed: %v", err) + return err + } + + if opts.Attachments || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.Attachments, + isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + return repo_model.ExistAttachmentsByUUID(ctx, stat.Name()) + }, + name: "attachment", + }); err != nil { + return err + } + } + + if opts.LFS || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.LFS, + isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + // The oid of an LFS stored object is the name but with all the path.Separators removed + oid := strings.ReplaceAll(path, "/", "") + + return git.LFSObjectIsAssociated(ctx, oid) + }, + name: "LFS file", + }); err != nil { + return err + } + } + + if opts.Avatars || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.Avatars, + isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + return user.ExistUserWithAvatar(ctx, path) + }, + name: "avatar", + }); err != nil { + return err + } + } + + if opts.RepoAvatars || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.RepoAvatars, + isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + return repo.ExistRepoWithAvatar(ctx, path) + }, + name: "repo avatar", + }); err != nil { + return err + } + } + + if opts.RepoArchives || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.RepoAvatars, + isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + has, err := repo.ExistsRepoArchiverWithStoragePath(ctx, path) + if err == nil || errors.Is(err, util.ErrInvalidArgument) { + // invalid arguments mean that the object is not a valid repo archiver and it should be removed + return has, nil + } + return has, err + }, + name: "repo archive", + }); err != nil { + return err + } + } + + if opts.Packages || opts.All { + if err := commonCheckStorage(ctx, logger, autofix, + &commonStorageCheckOptions{ + storer: storage.Packages, + isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + parts := strings.SplitN(path, "/", 3) + if len(parts) != 3 || len(parts[0]) != 2 || len(parts[1]) != 2 || len(parts[2]) < 4 || parts[0]+parts[1] != parts[2][0:4] { + return false, nil + } + + return packages.ExistPackageBlobWithSHA(ctx, parts[2]) + }, + name: "package blob", + }); err != nil { + return err + } + } + + return nil } - return checkAttachmentStorageFiles(logger, autofix) } func init() { Register(&Check{ - Title: "Check if there is garbage storage files", + Title: "Check if there are unassociated storage files", Name: "storages", IsDefault: false, - Run: checkStorageFiles, + Run: checkStorage(&storageCheckOptions{All: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are unassociated attachments in storage", + Name: "storage-attachments", + IsDefault: false, + Run: checkStorage(&storageCheckOptions{Attachments: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are unassociated lfs files in storage", + Name: "storage-lfs", + IsDefault: false, + Run: checkStorage(&storageCheckOptions{LFS: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are unassociated avatars in storage", + Name: "storage-avatars", + IsDefault: false, + Run: checkStorage(&storageCheckOptions{Avatars: true, RepoAvatars: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are unassociated archives in storage", + Name: "storage-archives", + IsDefault: false, + Run: checkStorage(&storageCheckOptions{RepoArchives: true}), + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) + + Register(&Check{ + Title: "Check if there are unassociated package blobs in storage", + Name: "storage-packages", + IsDefault: false, + Run: checkStorage(&storageCheckOptions{Packages: true}), AbortIfFailed: false, SkipDatabaseInitialization: false, Priority: 1, diff --git a/modules/git/repo_archive.go b/modules/git/repo_archive.go index a0cbfba5d965d..13be2004ca2f4 100644 --- a/modules/git/repo_archive.go +++ b/modules/git/repo_archive.go @@ -38,6 +38,18 @@ func (a ArchiveType) String() string { return "unknown" } +func ToArchiveType(s string) ArchiveType { + switch s { + case "zip": + return ZIP + case "tar.gz": + return TARGZ + case "bundle": + return BUNDLE + } + return 0 +} + // CreateArchive create archive content to the target path func (repo *Repository) CreateArchive(ctx context.Context, format ArchiveType, target io.Writer, usePrefix bool, commitID string) error { if format.String() == "unknown" { diff --git a/modules/util/string.go b/modules/util/string.go index 2da2bc5dc4eb9..13f90c2e701ae 100644 --- a/modules/util/string.go +++ b/modules/util/string.go @@ -4,7 +4,9 @@ package util -import "github.com/yuin/goldmark/util" +import ( + "github.com/yuin/goldmark/util" +) func isSnakeCaseUpper(c byte) bool { return 'A' <= c && c <= 'Z' diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index 67cb6837a56a6..5b41ddae502cc 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -478,7 +478,7 @@ func LFSPointerFiles(ctx *context.Context) { return err } if !result.Associatable { - associated, err := git_model.LFSObjectIsAssociated(pointerBlob.Oid) + associated, err := git_model.LFSObjectIsAssociated(ctx, pointerBlob.Oid) if err != nil { return err } From 1922726f7086bf8ac69b2ebd424e641766432793 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 13 Nov 2022 12:05:08 +0000 Subject: [PATCH 2/3] as per review Signed-off-by: Andrew Thornton --- models/git/lfs.go | 4 +- models/repo/archiver.go | 46 ++++++++++------- models/repo/avatar.go | 8 +-- models/user/avatar.go | 8 +-- modules/doctor/storage.go | 86 +++++++++++++++++-------------- modules/packages/content_store.go | 12 +++++ modules/util/string.go | 4 +- routers/web/repo/lfs.go | 2 +- 8 files changed, 100 insertions(+), 70 deletions(-) diff --git a/models/git/lfs.go b/models/git/lfs.go index 94d51d04bad7e..5f5a30798853d 100644 --- a/models/git/lfs.go +++ b/models/git/lfs.go @@ -235,8 +235,8 @@ func LFSObjectAccessible(user *user_model.User, oid string) (bool, error) { return count > 0, err } -// LFSObjectIsAssociated checks if a provided Oid is associated -func LFSObjectIsAssociated(ctx context.Context, oid string) (bool, error) { +// ExistsLFSObject checks if a provided Oid exists within the DB +func ExistsLFSObject(ctx context.Context, oid string) (bool, error) { return db.GetEngine(ctx).Exist(&LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}}) } diff --git a/models/repo/archiver.go b/models/repo/archiver.go index da86eb6e8ca64..84358ce0dc690 100644 --- a/models/repo/archiver.go +++ b/models/repo/archiver.go @@ -47,6 +47,28 @@ func (archiver *RepoArchiver) RelativePath() string { return fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()) } +// repoArchiverForRelativePath takes a relativePath created from (archiver *RepoArchiver) RelativePath() and creates a shell repoArchiver struct representing it +func repoArchiverForRelativePath(relativePath string) (*RepoArchiver, error) { + parts := strings.SplitN(relativePath, "/", 3) + if len(parts) != 3 { + return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument} + } + repoID, err := strconv.ParseInt(parts[0], 10, 64) + if err != nil { + return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument} + } + nameExts := strings.SplitN(parts[2], ".", 2) + if len(nameExts) != 2 { + return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument} + } + + return &RepoArchiver{ + RepoID: repoID, + CommitID: parts[1] + nameExts[0], + Type: git.ToArchiveType(nameExts[1]), + }, nil +} + var delRepoArchiver = new(RepoArchiver) // DeleteRepoArchiver delete archiver @@ -69,28 +91,14 @@ func GetRepoArchiver(ctx context.Context, repoID int64, tp git.ArchiveType, comm } // ExistsRepoArchiverWithStoragePath checks if there is a RepoArchiver for a given storage path -func ExistsRepoArchiverWithStoragePath(ctx context.Context, pth string) (bool, error) { - // fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()) - parts := strings.SplitN(pth, "/", 3) - if len(parts) != 3 { - return false, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", pth), Err: util.ErrInvalidArgument} - } - repoID, err := strconv.ParseInt(parts[0], 10, 64) +func ExistsRepoArchiverWithStoragePath(ctx context.Context, storagePath string) (bool, error) { + // We need to invert the path provided func (archiver *RepoArchiver) RelativePath() above + archiver, err := repoArchiverForRelativePath(storagePath) if err != nil { - return false, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", pth), Err: util.ErrInvalidArgument} - } - nameExts := strings.SplitN(parts[2], ".", 2) - if len(nameExts) != 2 { - return false, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", pth), Err: util.ErrInvalidArgument} - } - - archiver := &RepoArchiver{ - RepoID: repoID, - CommitID: parts[1] + nameExts[0], - Type: git.ToArchiveType(nameExts[1]), + return false, err } - return db.GetEngine(ctx).Exist(&archiver) + return db.GetEngine(ctx).Exist(archiver) } // AddRepoArchiver adds an archiver diff --git a/models/repo/avatar.go b/models/repo/avatar.go index 6e6dd46712ab3..84b9f5ac21eb6 100644 --- a/models/repo/avatar.go +++ b/models/repo/avatar.go @@ -24,9 +24,11 @@ func (repo *Repository) CustomAvatarRelativePath() string { return repo.Avatar } -// ExistRepoWithAvatar returns true if there is a user with this Avatar -func ExistRepoWithAvatar(ctx context.Context, avatar string) (bool, error) { - return db.GetEngine(ctx).Where("`avatar`=?", avatar).Exist(new(Repository)) +// ExistsWithAvatarAtStoragePath returns true if there is a user with this Avatar +func ExistsWithAvatarAtStoragePath(ctx context.Context, storagePath string) (bool, error) { + // See func (repo *Repository) CustomAvatarRelativePath() + // repo.Avatar is used directly as the storage path - therefore we can check for existence directly using the path + return db.GetEngine(ctx).Where("`avatar`=?", storagePath).Exist(new(Repository)) } // RelAvatarLink returns a relative link to the repository's avatar. diff --git a/models/user/avatar.go b/models/user/avatar.go index c56abee11a85c..f5237667464d4 100644 --- a/models/user/avatar.go +++ b/models/user/avatar.go @@ -112,7 +112,9 @@ func (u *User) IsUploadAvatarChanged(data []byte) bool { return u.Avatar != avatarID } -// ExistUserWithAvatar returns true if there is a user with this Avatar -func ExistUserWithAvatar(ctx context.Context, avatar string) (bool, error) { - return db.GetEngine(ctx).Where("`avatar`=?", avatar).Exist(new(User)) +// ExistsWithAvatarAtStoragePath returns true if there is a user with this Avatar +func ExistsWithAvatarAtStoragePath(ctx context.Context, storagePath string) (bool, error) { + // See func (u *User) CustomAvatarRelativePath() + // u.Avatar is used directly as the storage path - therefore we can check for existence directly using the path + return db.GetEngine(ctx).Where("`avatar`=?", storagePath).Exist(new(User)) } diff --git a/modules/doctor/storage.go b/modules/doctor/storage.go index 62c3044c28f6a..478ca5b8278b8 100644 --- a/modules/doctor/storage.go +++ b/modules/doctor/storage.go @@ -13,23 +13,23 @@ import ( "code.gitea.io/gitea/models/git" "code.gitea.io/gitea/models/packages" "code.gitea.io/gitea/models/repo" - repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/log" + packages_module "code.gitea.io/gitea/modules/packages" "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/util" ) type commonStorageCheckOptions struct { - storer storage.ObjectStorage - isAssociated func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) - name string + storer storage.ObjectStorage + isOrphaned func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) + name string } func commonCheckStorage(ctx context.Context, logger log.Logger, autofix bool, opts *commonStorageCheckOptions) error { - totalCount, unassociatedCount := 0, 0 - totalSize, unassociatedSize := int64(0), int64(0) + totalCount, orphanedCount := 0, 0 + totalSize, orphanedSize := int64(0), int64(0) var pathsToDelete []string if err := opts.storer.IterateObjects(func(p string, obj storage.Object) error { @@ -42,13 +42,13 @@ func commonCheckStorage(ctx context.Context, logger log.Logger, autofix bool, op } totalSize += stat.Size() - associated, err := opts.isAssociated(p, obj, stat) + orphaned, err := opts.isOrphaned(p, obj, stat) if err != nil { return err } - if !associated { - unassociatedCount++ - unassociatedSize += stat.Size() + if orphaned { + orphanedCount++ + orphanedSize += stat.Size() if autofix { pathsToDelete = append(pathsToDelete, p) } @@ -59,7 +59,7 @@ func commonCheckStorage(ctx context.Context, logger log.Logger, autofix bool, op return err } - if unassociatedCount > 0 { + if orphanedCount > 0 { if autofix { var deletedNum int for _, p := range pathsToDelete { @@ -69,9 +69,9 @@ func commonCheckStorage(ctx context.Context, logger log.Logger, autofix bool, op deletedNum++ } } - logger.Info("Deleted %d/%d unassociated %s(s)", deletedNum, unassociatedCount, opts.name) + logger.Info("Deleted %d/%d orphaned %s(s)", deletedNum, orphanedCount, opts.name) } else { - logger.Warn("Found %d/%d (%s/%s) unassociated %s(s)", unassociatedCount, totalCount, base.FileSize(unassociatedSize), base.FileSize(totalSize), opts.name) + logger.Warn("Found %d/%d (%s/%s) orphaned %s(s)", orphanedCount, totalCount, base.FileSize(orphanedSize), base.FileSize(totalSize), opts.name) } } else { logger.Info("Found %d (%s) %s(s)", totalCount, base.FileSize(totalSize), opts.name) @@ -79,7 +79,7 @@ func commonCheckStorage(ctx context.Context, logger log.Logger, autofix bool, op return nil } -type storageCheckOptions struct { +type checkStorageOptions struct { All bool Attachments bool LFS bool @@ -89,7 +89,8 @@ type storageCheckOptions struct { Packages bool } -func checkStorage(opts *storageCheckOptions) func(ctx context.Context, logger log.Logger, autofix bool) error { +// checkStorage will return a doctor check function to check the requested storage types for "orphaned" stored object/files and optionally delete them +func checkStorage(opts *checkStorageOptions) func(ctx context.Context, logger log.Logger, autofix bool) error { return func(ctx context.Context, logger log.Logger, autofix bool) error { if err := storage.Init(); err != nil { logger.Error("storage.Init failed: %v", err) @@ -100,8 +101,9 @@ func checkStorage(opts *storageCheckOptions) func(ctx context.Context, logger lo if err := commonCheckStorage(ctx, logger, autofix, &commonStorageCheckOptions{ storer: storage.Attachments, - isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { - return repo_model.ExistAttachmentsByUUID(ctx, stat.Name()) + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := repo.ExistAttachmentsByUUID(ctx, stat.Name()) + return !exists, err }, name: "attachment", }); err != nil { @@ -113,11 +115,11 @@ func checkStorage(opts *storageCheckOptions) func(ctx context.Context, logger lo if err := commonCheckStorage(ctx, logger, autofix, &commonStorageCheckOptions{ storer: storage.LFS, - isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { // The oid of an LFS stored object is the name but with all the path.Separators removed oid := strings.ReplaceAll(path, "/", "") - - return git.LFSObjectIsAssociated(ctx, oid) + exists, err := git.ExistsLFSObject(ctx, oid) + return !exists, err }, name: "LFS file", }); err != nil { @@ -129,8 +131,9 @@ func checkStorage(opts *storageCheckOptions) func(ctx context.Context, logger lo if err := commonCheckStorage(ctx, logger, autofix, &commonStorageCheckOptions{ storer: storage.Avatars, - isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { - return user.ExistUserWithAvatar(ctx, path) + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := user.ExistsWithAvatarAtStoragePath(ctx, path) + return !exists, err }, name: "avatar", }); err != nil { @@ -142,8 +145,9 @@ func checkStorage(opts *storageCheckOptions) func(ctx context.Context, logger lo if err := commonCheckStorage(ctx, logger, autofix, &commonStorageCheckOptions{ storer: storage.RepoAvatars, - isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { - return repo.ExistRepoWithAvatar(ctx, path) + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := repo.ExistsWithAvatarAtStoragePath(ctx, path) + return !exists, err }, name: "repo avatar", }); err != nil { @@ -155,13 +159,13 @@ func checkStorage(opts *storageCheckOptions) func(ctx context.Context, logger lo if err := commonCheckStorage(ctx, logger, autofix, &commonStorageCheckOptions{ storer: storage.RepoAvatars, - isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { - has, err := repo.ExistsRepoArchiverWithStoragePath(ctx, path) + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + exists, err := repo.ExistsRepoArchiverWithStoragePath(ctx, path) if err == nil || errors.Is(err, util.ErrInvalidArgument) { // invalid arguments mean that the object is not a valid repo archiver and it should be removed - return has, nil + return !exists, nil } - return has, err + return !exists, err }, name: "repo archive", }); err != nil { @@ -173,13 +177,17 @@ func checkStorage(opts *storageCheckOptions) func(ctx context.Context, logger lo if err := commonCheckStorage(ctx, logger, autofix, &commonStorageCheckOptions{ storer: storage.Packages, - isAssociated: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { - parts := strings.SplitN(path, "/", 3) - if len(parts) != 3 || len(parts[0]) != 2 || len(parts[1]) != 2 || len(parts[2]) < 4 || parts[0]+parts[1] != parts[2][0:4] { - return false, nil + isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) { + key, err := packages_module.RelativePathToKey(path) + if err != nil { + // If there is an error here then the relative path does not match a valid package + // Therefore it is orphaned by default + return true, nil } - return packages.ExistPackageBlobWithSHA(ctx, parts[2]) + exists, err := packages.ExistPackageBlobWithSHA(ctx, string(key)) + + return !exists, err }, name: "package blob", }); err != nil { @@ -196,7 +204,7 @@ func init() { Title: "Check if there are unassociated storage files", Name: "storages", IsDefault: false, - Run: checkStorage(&storageCheckOptions{All: true}), + Run: checkStorage(&checkStorageOptions{All: true}), AbortIfFailed: false, SkipDatabaseInitialization: false, Priority: 1, @@ -206,7 +214,7 @@ func init() { Title: "Check if there are unassociated attachments in storage", Name: "storage-attachments", IsDefault: false, - Run: checkStorage(&storageCheckOptions{Attachments: true}), + Run: checkStorage(&checkStorageOptions{Attachments: true}), AbortIfFailed: false, SkipDatabaseInitialization: false, Priority: 1, @@ -216,7 +224,7 @@ func init() { Title: "Check if there are unassociated lfs files in storage", Name: "storage-lfs", IsDefault: false, - Run: checkStorage(&storageCheckOptions{LFS: true}), + Run: checkStorage(&checkStorageOptions{LFS: true}), AbortIfFailed: false, SkipDatabaseInitialization: false, Priority: 1, @@ -226,7 +234,7 @@ func init() { Title: "Check if there are unassociated avatars in storage", Name: "storage-avatars", IsDefault: false, - Run: checkStorage(&storageCheckOptions{Avatars: true, RepoAvatars: true}), + Run: checkStorage(&checkStorageOptions{Avatars: true, RepoAvatars: true}), AbortIfFailed: false, SkipDatabaseInitialization: false, Priority: 1, @@ -236,7 +244,7 @@ func init() { Title: "Check if there are unassociated archives in storage", Name: "storage-archives", IsDefault: false, - Run: checkStorage(&storageCheckOptions{RepoArchives: true}), + Run: checkStorage(&checkStorageOptions{RepoArchives: true}), AbortIfFailed: false, SkipDatabaseInitialization: false, Priority: 1, @@ -246,7 +254,7 @@ func init() { Title: "Check if there are unassociated package blobs in storage", Name: "storage-packages", IsDefault: false, - Run: checkStorage(&storageCheckOptions{Packages: true}), + Run: checkStorage(&checkStorageOptions{Packages: true}), AbortIfFailed: false, SkipDatabaseInitialization: false, Priority: 1, diff --git a/modules/packages/content_store.go b/modules/packages/content_store.go index a3a5d1a6663c8..be416ac269350 100644 --- a/modules/packages/content_store.go +++ b/modules/packages/content_store.go @@ -7,8 +7,10 @@ package packages import ( "io" "path" + "strings" "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/util" ) // BlobHash256Key is the key to address a blob content @@ -45,3 +47,13 @@ func (s *ContentStore) Delete(key BlobHash256Key) error { func KeyToRelativePath(key BlobHash256Key) string { return path.Join(string(key)[0:2], string(key)[2:4], string(key)) } + +// RelativePathToKey converts a relative path aa/bb/aabb000000... to the sha256 key aabb000000... +func RelativePathToKey(relativePath string) (BlobHash256Key, error) { + parts := strings.SplitN(relativePath, "/", 3) + if len(parts) != 3 || len(parts[0]) != 2 || len(parts[1]) != 2 || len(parts[2]) < 4 || parts[0]+parts[1] != parts[2][0:4] { + return "", util.ErrInvalidArgument + } + + return BlobHash256Key(parts[2]), nil +} diff --git a/modules/util/string.go b/modules/util/string.go index 13f90c2e701ae..2da2bc5dc4eb9 100644 --- a/modules/util/string.go +++ b/modules/util/string.go @@ -4,9 +4,7 @@ package util -import ( - "github.com/yuin/goldmark/util" -) +import "github.com/yuin/goldmark/util" func isSnakeCaseUpper(c byte) bool { return 'A' <= c && c <= 'Z' diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index 5b41ddae502cc..9bf4307bfe092 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -478,7 +478,7 @@ func LFSPointerFiles(ctx *context.Context) { return err } if !result.Associatable { - associated, err := git_model.LFSObjectIsAssociated(ctx, pointerBlob.Oid) + associated, err := git_model.ExistsLFSObject(ctx, pointerBlob.Oid) if err != nil { return err } From 7868e57ee0a5fd61a76b628963486cf2bb955df7 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 13 Nov 2022 12:14:46 +0000 Subject: [PATCH 3/3] also change to use orphaned in the descriptions Signed-off-by: Andrew Thornton --- modules/doctor/storage.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/doctor/storage.go b/modules/doctor/storage.go index 478ca5b8278b8..8ae9168ea6e52 100644 --- a/modules/doctor/storage.go +++ b/modules/doctor/storage.go @@ -201,7 +201,7 @@ func checkStorage(opts *checkStorageOptions) func(ctx context.Context, logger lo func init() { Register(&Check{ - Title: "Check if there are unassociated storage files", + Title: "Check if there are orphaned storage files", Name: "storages", IsDefault: false, Run: checkStorage(&checkStorageOptions{All: true}), @@ -211,7 +211,7 @@ func init() { }) Register(&Check{ - Title: "Check if there are unassociated attachments in storage", + Title: "Check if there are orphaned attachments in storage", Name: "storage-attachments", IsDefault: false, Run: checkStorage(&checkStorageOptions{Attachments: true}), @@ -221,7 +221,7 @@ func init() { }) Register(&Check{ - Title: "Check if there are unassociated lfs files in storage", + Title: "Check if there are orphaned lfs files in storage", Name: "storage-lfs", IsDefault: false, Run: checkStorage(&checkStorageOptions{LFS: true}), @@ -231,7 +231,7 @@ func init() { }) Register(&Check{ - Title: "Check if there are unassociated avatars in storage", + Title: "Check if there are orphaned avatars in storage", Name: "storage-avatars", IsDefault: false, Run: checkStorage(&checkStorageOptions{Avatars: true, RepoAvatars: true}), @@ -241,7 +241,7 @@ func init() { }) Register(&Check{ - Title: "Check if there are unassociated archives in storage", + Title: "Check if there are orphaned archives in storage", Name: "storage-archives", IsDefault: false, Run: checkStorage(&checkStorageOptions{RepoArchives: true}), @@ -251,7 +251,7 @@ func init() { }) Register(&Check{ - Title: "Check if there are unassociated package blobs in storage", + Title: "Check if there are orphaned package blobs in storage", Name: "storage-packages", IsDefault: false, Run: checkStorage(&checkStorageOptions{Packages: true}),