diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index a7476ad1be818..1f22a752b2bdf 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2645,7 +2645,12 @@ LEVEL = Info ;LIMIT_SIZE_HELM = -1 ;; Maximum size of a Maven upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) ;LIMIT_SIZE_MAVEN = -1 +;; Specifies the number of most recent Maven snapshot builds to retain. `-1` retains all builds, while `1` retains only the latest build. Value should be -1 or positive. +;; Cleanup expired packages/data then targets the files within all maven snapshots versions +;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1 ;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) +; Enable debug logging for Maven cleanup. Enabling debug will stop snapshot version artifacts from being deleted but will log the files which were meant for deletion. +; DEBUG_MAVEN_CLEANUP = true ;LIMIT_SIZE_NPM = -1 ;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) ;LIMIT_SIZE_NUGET = -1 diff --git a/models/packages/package_file.go b/models/packages/package_file.go index 270cb32fdf6b5..f940efc90e0ec 100644 --- a/models/packages/package_file.go +++ b/models/packages/package_file.go @@ -5,6 +5,8 @@ package packages import ( "context" + "errors" + "fmt" "strconv" "strings" "time" @@ -21,6 +23,8 @@ func init() { } var ( + // ErrMetadataFile indicated a metadata file + ErrMetadataFile = errors.New("metadata file") // ErrDuplicatePackageFile indicates a duplicated package file error ErrDuplicatePackageFile = util.NewAlreadyExistErrorf("package file already exists") // ErrPackageFileNotExist indicates a package file not exist error @@ -226,6 +230,74 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error) return db.Exist[PackageFile](ctx, opts.toConds()) } +// GetFilesBelowBuildNumber retrieves all files for maven snapshot version where the build number is <= maxBuildNumber. +// Returns two slices: one for filtered files and one for skipped files. +func GetFilesBelowBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int, classifiers ...string) ([]*PackageFile, []*PackageFile, error) { + if maxBuildNumber <= 0 { + return nil, nil, errors.New("maxBuildNumber must be a positive integer") + } + + files, err := GetFilesByVersionID(ctx, versionID) + if err != nil { + return nil, nil, fmt.Errorf("failed to retrieve files: %w", err) + } + + var filteredFiles, skippedFiles []*PackageFile + for _, file := range files { + buildNumber, err := extractBuildNumberFromFileName(file.Name, classifiers...) + if err != nil { + if !errors.Is(err, ErrMetadataFile) { + skippedFiles = append(skippedFiles, file) + } + continue + } + if buildNumber <= maxBuildNumber { + filteredFiles = append(filteredFiles, file) + } + } + + return filteredFiles, skippedFiles, nil +} + +// extractBuildNumberFromFileName extracts the build number from a Maven snapshot file name. +// Expected formats: +// +// "artifact-1.0.0-20250311.083409-9.tgz" returns 9 +// "artifact-to-test-2.0.0-20250311.083409-10-sources.tgz" returns 10 +func extractBuildNumberFromFileName(filename string, classifiers ...string) (int, error) { + if strings.Contains(filename, "maven-metadata.xml") { + return 0, ErrMetadataFile + } + + dotIdx := strings.LastIndex(filename, ".") + if dotIdx == -1 { + return 0, fmt.Errorf("extract build number from filename: no file extension found in '%s'", filename) + } + base := filename[:dotIdx] + + // Remove classifier suffix if present. + for _, classifier := range classifiers { + suffix := "-" + classifier + if strings.HasSuffix(base, suffix) { + base = base[:len(base)-len(suffix)] + break + } + } + + // The build number should be the token after the last dash. + lastDash := strings.LastIndex(base, "-") + if lastDash == -1 { + return 0, fmt.Errorf("extract build number from filename: invalid file name format in '%s'", filename) + } + buildNumberStr := base[lastDash+1:] + buildNumber, err := strconv.Atoi(buildNumberStr) + if err != nil { + return 0, fmt.Errorf("extract build number from filename: failed to convert build number '%s' to integer in '%s': %v", buildNumberStr, filename, err) + } + + return buildNumber, nil +} + // CalculateFileSize sums up all blob sizes matching the search options. // It does NOT respect the deduplication of blobs. func CalculateFileSize(ctx context.Context, opts *PackageFileSearchOptions) (int64, error) { diff --git a/models/packages/package_version.go b/models/packages/package_version.go index bb7fd895f81da..45a7715abbaf6 100644 --- a/models/packages/package_version.go +++ b/models/packages/package_version.go @@ -120,11 +120,16 @@ func getVersionByNameAndVersion(ctx context.Context, ownerID int64, packageType // GetVersionsByPackageType gets all versions of a specific type func GetVersionsByPackageType(ctx context.Context, ownerID int64, packageType Type) ([]*PackageVersion, error) { - pvs, _, err := SearchVersions(ctx, &PackageSearchOptions{ - OwnerID: ownerID, + opts := &PackageSearchOptions{ Type: packageType, IsInternal: optional.Some(false), - }) + } + + if ownerID != 0 { + opts.OwnerID = ownerID + } + + pvs, _, err := SearchVersions(ctx, opts) return pvs, err } diff --git a/modules/packages/maven/metadata.go b/modules/packages/maven/metadata.go index a61a62c086208..8dcbf14d18135 100644 --- a/modules/packages/maven/metadata.go +++ b/modules/packages/maven/metadata.go @@ -5,7 +5,9 @@ package maven import ( "encoding/xml" + "errors" "io" + "strconv" "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/validation" @@ -31,6 +33,12 @@ type Dependency struct { Version string `json:"version,omitempty"` } +// SnapshotMetadata struct holds the build number and the list of classifiers for a snapshot version +type SnapshotMetadata struct { + BuildNumber int `json:"build_number,omitempty"` + Classifiers []string `json:"classifiers,omitempty"` +} + type pomStruct struct { XMLName xml.Name `xml:"project"` @@ -61,6 +69,26 @@ type pomStruct struct { } `xml:"dependencies>dependency"` } +type snapshotMetadataStruct struct { + XMLName xml.Name `xml:"metadata"` + GroupID string `xml:"groupId"` + ArtifactID string `xml:"artifactId"` + Version string `xml:"version"` + Versioning struct { + LastUpdated string `xml:"lastUpdated"` + Snapshot struct { + Timestamp string `xml:"timestamp"` + BuildNumber string `xml:"buildNumber"` + } `xml:"snapshot"` + SnapshotVersions []struct { + Extension string `xml:"extension"` + Classifier string `xml:"classifier"` + Value string `xml:"value"` + Updated string `xml:"updated"` + } `xml:"snapshotVersions>snapshotVersion"` + } `xml:"versioning"` +} + // ParsePackageMetaData parses the metadata of a pom file func ParsePackageMetaData(r io.Reader) (*Metadata, error) { var pom pomStruct @@ -109,3 +137,31 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) { Dependencies: dependencies, }, nil } + +// ParseSnapshotVersionMetadata parses the Maven Snapshot Version metadata to extract the build number and list of available classifiers. +func ParseSnapshotVersionMetaData(r io.Reader) (*SnapshotMetadata, error) { + var metadata snapshotMetadataStruct + + dec := xml.NewDecoder(r) + dec.CharsetReader = charset.NewReaderLabel + if err := dec.Decode(&metadata); err != nil { + return nil, err + } + + buildNumber, err := strconv.Atoi(metadata.Versioning.Snapshot.BuildNumber) + if err != nil { + return nil, errors.New("invalid or missing build number in snapshot metadata") + } + + var classifiers []string + for _, snapshotVersion := range metadata.Versioning.SnapshotVersions { + if snapshotVersion.Classifier != "" { + classifiers = append(classifiers, snapshotVersion.Classifier) + } + } + + return &SnapshotMetadata{ + BuildNumber: buildNumber, + Classifiers: classifiers, + }, nil +} diff --git a/modules/setting/packages.go b/modules/setting/packages.go index b598424064832..c1628a8ca80b3 100644 --- a/modules/setting/packages.go +++ b/modules/setting/packages.go @@ -41,10 +41,13 @@ var ( LimitSizeSwift int64 LimitSizeVagrant int64 - DefaultRPMSignEnabled bool + DefaultRPMSignEnabled bool + RetainMavenSnapshotBuilds int + DebugMavenCleanup bool }{ - Enabled: true, - LimitTotalOwnerCount: -1, + Enabled: true, + LimitTotalOwnerCount: -1, + RetainMavenSnapshotBuilds: -1, } ) @@ -88,6 +91,8 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) { Packages.LimitSizeSwift = mustBytes(sec, "LIMIT_SIZE_SWIFT") Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT") Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false) + Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds) + Packages.DebugMavenCleanup = sec.Key("DEBUG_MAVEN_CLEANUP").MustBool(true) return nil } diff --git a/services/packages/cleanup/cleanup.go b/services/packages/cleanup/cleanup.go index b7ba2b6ac4afc..7d70afbd804b3 100644 --- a/services/packages/cleanup/cleanup.go +++ b/services/packages/cleanup/cleanup.go @@ -1,7 +1,7 @@ // Copyright 2022 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package container +package cleanup import ( "context" @@ -20,6 +20,7 @@ import ( cargo_service "code.gitea.io/gitea/services/packages/cargo" container_service "code.gitea.io/gitea/services/packages/container" debian_service "code.gitea.io/gitea/services/packages/debian" + maven_service "code.gitea.io/gitea/services/packages/maven" rpm_service "code.gitea.io/gitea/services/packages/rpm" ) @@ -166,6 +167,10 @@ func CleanupExpiredData(outerCtx context.Context, olderThan time.Duration) error return err } + if err := maven_service.CleanupSnapshotVersions(ctx); err != nil { + return err + } + ps, err := packages_model.FindUnreferencedPackages(ctx) if err != nil { return err diff --git a/services/packages/maven/cleanup.go b/services/packages/maven/cleanup.go new file mode 100644 index 0000000000000..f7a14cdc4c323 --- /dev/null +++ b/services/packages/maven/cleanup.go @@ -0,0 +1,133 @@ +package maven + +import ( + "context" + "fmt" + "strings" + + "code.gitea.io/gitea/models/packages" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/packages/maven" + "code.gitea.io/gitea/modules/setting" + packages_service "code.gitea.io/gitea/services/packages" +) + +// CleanupSnapshotVersions removes outdated files for SNAPHOT versions for all Maven packages. +func CleanupSnapshotVersions(ctx context.Context) error { + retainBuilds := setting.Packages.RetainMavenSnapshotBuilds + debugSession := setting.Packages.DebugMavenCleanup + log.Debug("Starting Maven CleanupSnapshotVersions with retainBuilds: %d, debugSession: %t", retainBuilds, debugSession) + + if retainBuilds == -1 { + log.Info("Maven CleanupSnapshotVersions skipped because retainBuilds is set to -1") + return nil + } + + if retainBuilds < 1 { + return fmt.Errorf("Maven CleanupSnapshotVersions: forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds) + } + + versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven) + if err != nil { + return fmt.Errorf("Maven CleanupSnapshotVersions: failed to retrieve Maven package versions: %w", err) + } + + var errors []error + + for _, version := range versions { + if !isSnapshotVersion(version.Version) { + continue + } + + if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds, debugSession); err != nil { + errors = append(errors, fmt.Errorf("Maven CleanupSnapshotVersions: version '%s' (ID: %d): %w", version.Version, version.ID, err)) + } + } + + if len(errors) > 0 { + for _, err := range errors { + log.Warn("Maven CleanupSnapshotVersions: Error during cleanup: %v", err) + } + return fmt.Errorf("Maven CleanupSnapshotVersions: cleanup completed with errors: %v", errors) + } + + log.Debug("Completed Maven CleanupSnapshotVersions") + return nil +} + +func isSnapshotVersion(version string) bool { + return strings.HasSuffix(version, "-SNAPSHOT") +} + +func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int, debugSession bool) error { + log.Debug("Starting Maven cleanSnapshotFiles for versionID: %d with retainBuilds: %d, debugSession: %t", versionID, retainBuilds, debugSession) + + metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey) + if err != nil { + return fmt.Errorf("cleanSnapshotFiles: failed to retrieve Maven metadata file for version ID %d: %w", versionID, err) + } + + maxBuildNumber, classifiers, err := extractMaxBuildNumber(ctx, metadataFile) + if err != nil { + return fmt.Errorf("cleanSnapshotFiles: failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err) + } + + thresholdBuildNumber := maxBuildNumber - retainBuilds + if thresholdBuildNumber <= 0 { + log.Debug("cleanSnapshotFiles: No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID) + return nil + } + + filesToRemove, skippedFiles, err := packages.GetFilesBelowBuildNumber(ctx, versionID, thresholdBuildNumber, classifiers...) + if err != nil { + return fmt.Errorf("cleanSnapshotFiles: failed to retrieve files for version ID %d: %w", versionID, err) + } + + if debugSession { + var fileNamesToRemove, skippedFileNames []string + + for _, file := range filesToRemove { + fileNamesToRemove = append(fileNamesToRemove, file.Name) + } + + for _, file := range skippedFiles { + skippedFileNames = append(skippedFileNames, file.Name) + } + + log.Info("cleanSnapshotFiles: Debug session active. Files to remove: %v, Skipped files: %v", fileNamesToRemove, skippedFileNames) + return nil + } + + for _, file := range filesToRemove { + log.Debug("Removing file '%s' below threshold %d", file.Name, thresholdBuildNumber) + if err := packages_service.DeletePackageFile(ctx, file); err != nil { + return fmt.Errorf("Maven cleanSnapshotFiles: failed to delete file '%s': %w", file.Name, err) + } + } + + log.Debug("Completed Maven cleanSnapshotFiles for versionID: %d", versionID) + return nil +} + +func extractMaxBuildNumber(ctx context.Context, metadataFile *packages.PackageFile) (int, []string, error) { + pb, err := packages.GetBlobByID(ctx, metadataFile.BlobID) + if err != nil { + return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to get package blob: %w", err) + } + + content, _, _, err := packages_service.GetPackageBlobStream(ctx, metadataFile, pb, nil, true) + if err != nil { + return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to get package file stream: %w", err) + } + defer content.Close() + + snapshotMetadata, err := maven.ParseSnapshotVersionMetaData(content) + if err != nil { + return 0, nil, fmt.Errorf("extractMaxBuildNumber: failed to parse maven-metadata.xml: %w", err) + } + + buildNumber := snapshotMetadata.BuildNumber + classifiers := snapshotMetadata.Classifiers + + return buildNumber, classifiers, nil +} diff --git a/services/packages/packages.go b/services/packages/packages.go index bd1d460fd3ba8..7f505622423d4 100644 --- a/services/packages/packages.go +++ b/services/packages/packages.go @@ -600,8 +600,8 @@ func GetPackageFileStream(ctx context.Context, pf *packages_model.PackageFile) ( } // GetPackageBlobStream returns the content of the specific package blob -// If the storage supports direct serving and it's enabled, only the direct serving url is returned. -func GetPackageBlobStream(ctx context.Context, pf *packages_model.PackageFile, pb *packages_model.PackageBlob, serveDirectReqParams url.Values) (io.ReadSeekCloser, *url.URL, *packages_model.PackageFile, error) { +// If the storage supports direct serving and it's enabled, only the direct serving url is returned; otherwise, forceInternalServe should be set to true. +func GetPackageBlobStream(ctx context.Context, pf *packages_model.PackageFile, pb *packages_model.PackageBlob, serveDirectReqParams url.Values, forceInternalServe ...bool) (io.ReadSeekCloser, *url.URL, *packages_model.PackageFile, error) { key := packages_module.BlobHash256Key(pb.HashSHA256) cs := packages_module.NewContentStore() @@ -610,7 +610,9 @@ func GetPackageBlobStream(ctx context.Context, pf *packages_model.PackageFile, p var u *url.URL var err error - if cs.ShouldServeDirect() { + internalServe := len(forceInternalServe) > 0 && forceInternalServe[0] + + if !internalServe && cs.ShouldServeDirect() { u, err = cs.GetServeDirectURL(key, pf.Name, serveDirectReqParams) if err != nil && !errors.Is(err, storage.ErrURLNotSupported) { log.Error("Error getting serve direct url: %v", err)