forgejo/services/packages/cleanup/cleanup.go
Mathieu Fenniak ac487e93d9 fix: reduce runtime of container cleanup by relying on mass digest cleanup (#10297)
The package cleanup routine checks every container version for whether it is referenced by a multi-platform manifest, which appears to be a performance problem indicated by CPU profiling collected in #9358 on SQLite systems.  This PR removes that check completely, which isn't necessary since #4698 added a much more performant mass-cleanup of these dangling platform versions.

May fix #9358 completely, but it leaves fundamental scalability concerns with SQLite due to long-running transactions.  The transactions will be shorter with this change.  Requires end-user testing to confirm if sufficiently fixed.

## Checklist

The [contributor guide](https://forgejo.org/docs/next/contributor/) contains information that will be helpful to first time contributors. There also are a few [conditions for merging Pull Requests in Forgejo repositories](https://codeberg.org/forgejo/governance/src/branch/main/PullRequestsAgreement.md). You are also welcome to join the [Forgejo development chatroom](https://matrix.to/#/#forgejo-development:matrix.org).

### Tests

- I added test coverage for Go changes...
  - [x] in their respective `*_test.go` for unit tests.
  - [x] in the `tests/integration` directory if it involves interactions with a live Forgejo server.
- I added test coverage for JavaScript changes...
  - [ ] in `web_src/js/*.test.js` if it can be unit tested.
  - [ ] in `tests/e2e/*.test.e2e.js` if it requires interactions with a live Forgejo server (see also the [developer guide for JavaScript testing](https://codeberg.org/forgejo/forgejo/src/branch/forgejo/tests/e2e/README.md#end-to-end-tests)).

### Documentation

- [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change.
- [x] I did not document these changes and I do not expect someone else to do it.

### Release notes

- [ ] I do not want this change to show in the release notes.
- [x] I want the title to show in the release notes with a link to this pull request.
- [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title.

<!--start release-notes-assistant-->

## Release notes
<!--URL:https://codeberg.org/forgejo/forgejo-->
- Bug fixes
  - [PR](https://codeberg.org/forgejo/forgejo/pulls/10297): <!--number 10297 --><!--line 0 --><!--description cmVkdWNlIHJ1bnRpbWUgb2YgY29udGFpbmVyIGNsZWFudXAgYnkgcmVseWluZyBvbiBtYXNzIGRpZ2VzdCBjbGVhbnVw-->reduce runtime of container cleanup by relying on mass digest cleanup<!--description-->
<!--end release-notes-assistant-->

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/10297
Reviewed-by: Michael Kriese <michael.kriese@gmx.de>
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
2025-12-05 15:45:47 +01:00

234 lines
7.4 KiB
Go

// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package container
import (
"context"
"fmt"
"time"
"forgejo.org/models/db"
packages_model "forgejo.org/models/packages"
user_model "forgejo.org/models/user"
"forgejo.org/modules/log"
"forgejo.org/modules/optional"
packages_module "forgejo.org/modules/packages"
packages_service "forgejo.org/services/packages"
alpine_service "forgejo.org/services/packages/alpine"
alt_service "forgejo.org/services/packages/alt"
arch_service "forgejo.org/services/packages/arch"
cargo_service "forgejo.org/services/packages/cargo"
container_service "forgejo.org/services/packages/container"
debian_service "forgejo.org/services/packages/debian"
rpm_service "forgejo.org/services/packages/rpm"
)
// Task method to execute cleanup rules and cleanup expired package data
func CleanupTask(ctx context.Context, olderThan time.Duration) error {
if err := ExecuteCleanupRules(ctx); err != nil {
return err
}
return CleanupExpiredData(ctx, olderThan)
}
func ExecuteCleanupRules(outerCtx context.Context) error {
ctx, committer, err := db.TxContext(outerCtx)
if err != nil {
return err
}
defer committer.Close()
err = packages_model.IterateEnabledCleanupRules(ctx, func(ctx context.Context, pcr *packages_model.PackageCleanupRule) error {
select {
case <-outerCtx.Done():
return db.ErrCancelledf("While processing package cleanup rules")
default:
}
versionsToRemove, err := GetCleanupTargets(ctx, pcr, true)
if err != nil {
return fmt.Errorf("CleanupRule [%d]: GetCleanupTargets failed: %w", pcr.ID, err)
}
anyVersionDeleted := false
packageWithVersionDeleted := make(map[int64]bool) // set of Package.ID's where at least one package version was removed
for _, ct := range versionsToRemove {
if err := packages_service.DeletePackageVersionAndReferences(ctx, ct.PackageVersion); err != nil {
return fmt.Errorf("CleanupRule [%d]: DeletePackageVersionAndReferences failed: %w", pcr.ID, err)
}
packageWithVersionDeleted[ct.Package.ID] = true
anyVersionDeleted = true
}
if pcr.Type == packages_model.TypeCargo {
for packageID := range packageWithVersionDeleted {
owner, err := user_model.GetUserByID(ctx, pcr.OwnerID)
if err != nil {
return fmt.Errorf("GetUserByID failed: %w", err)
}
if err := cargo_service.UpdatePackageIndexIfExists(ctx, owner, owner, packageID); err != nil {
return fmt.Errorf("CleanupRule [%d]: cargo.UpdatePackageIndexIfExists failed: %w", pcr.ID, err)
}
}
}
if anyVersionDeleted {
switch pcr.Type {
case packages_model.TypeDebian:
if err := debian_service.BuildAllRepositoryFiles(ctx, pcr.OwnerID); err != nil {
return fmt.Errorf("CleanupRule [%d]: debian.BuildAllRepositoryFiles failed: %w", pcr.ID, err)
}
case packages_model.TypeAlpine:
if err := alpine_service.BuildAllRepositoryFiles(ctx, pcr.OwnerID); err != nil {
return fmt.Errorf("CleanupRule [%d]: alpine.BuildAllRepositoryFiles failed: %w", pcr.ID, err)
}
case packages_model.TypeRpm:
if err := rpm_service.BuildAllRepositoryFiles(ctx, pcr.OwnerID); err != nil {
return fmt.Errorf("CleanupRule [%d]: rpm.BuildAllRepositoryFiles failed: %w", pcr.ID, err)
}
case packages_model.TypeArch:
if err := arch_service.BuildAllRepositoryFiles(ctx, pcr.OwnerID); err != nil {
return fmt.Errorf("CleanupRule [%d]: arch.BuildAllRepositoryFiles failed: %w", pcr.ID, err)
}
case packages_model.TypeAlt:
if err := alt_service.BuildAllRepositoryFiles(ctx, pcr.OwnerID); err != nil {
return fmt.Errorf("CleanupRule [%d]: alt.BuildAllRepositoryFiles failed: %w", pcr.ID, err)
}
}
}
return nil
})
if err != nil {
return err
}
return committer.Commit()
}
type CleanupTarget struct {
Package *packages_model.Package
PackageVersion *packages_model.PackageVersion
PackageDescriptor *packages_model.PackageDescriptor
}
func GetCleanupTargets(ctx context.Context, pcr *packages_model.PackageCleanupRule, skipPackageDescriptor bool) ([]*CleanupTarget, error) {
if err := pcr.CompiledPattern(); err != nil {
return nil, err
}
olderThan := time.Now().AddDate(0, 0, -pcr.RemoveDays)
packages, err := packages_model.GetPackagesByType(ctx, pcr.OwnerID, pcr.Type)
if err != nil {
return nil, fmt.Errorf("failure to GetPackagesByType for package cleanup rule: %w", err)
}
versionsToRemove := make([]*CleanupTarget, 0, 10)
for _, p := range packages {
pvs, _, err := packages_model.SearchVersions(ctx, &packages_model.PackageSearchOptions{
PackageID: p.ID,
IsInternal: optional.Some(false),
Sort: packages_model.SortCreatedDesc,
})
if err != nil {
return nil, fmt.Errorf("failure to SearchVersions for package cleanup rule: %w", err)
}
keep := min(len(pvs), pcr.KeepCount)
for _, pv := range pvs[keep:] {
if pcr.Type == packages_model.TypeContainer {
if skip := container_service.ShouldBeSkipped(pv); skip {
log.Debug("Rule[%d]: keep '%s/%s' (container)", pcr.ID, p.Name, pv.Version)
continue
}
}
toMatch := pv.LowerVersion
if pcr.MatchFullName {
toMatch = p.LowerName + "/" + pv.LowerVersion
}
if pcr.KeepPatternMatcher != nil && pcr.KeepPatternMatcher.MatchString(toMatch) {
log.Debug("Rule[%d]: keep '%s/%s' (keep pattern)", pcr.ID, p.Name, pv.Version)
continue
}
if pv.CreatedUnix.AsLocalTime().After(olderThan) {
log.Debug("Rule[%d]: keep '%s/%s' (remove days)", pcr.ID, p.Name, pv.Version)
continue
}
if pcr.RemovePatternMatcher != nil && !pcr.RemovePatternMatcher.MatchString(toMatch) {
log.Debug("Rule[%d]: keep '%s/%s' (remove pattern)", pcr.ID, p.Name, pv.Version)
continue
}
log.Debug("Rule[%d]: remove '%s/%s'", pcr.ID, p.Name, pv.Version)
var pd *packages_model.PackageDescriptor
// GetPackageDescriptor is a bit expensive and can be skipped; only used for cleanup preview to display the package to the UI
if !skipPackageDescriptor {
pd, err = packages_model.GetPackageDescriptor(ctx, pv)
if err != nil {
return nil, fmt.Errorf("failure to GetPackageDescriptor for package cleanup rule: %w", err)
}
}
versionsToRemove = append(versionsToRemove, &CleanupTarget{
Package: p,
PackageVersion: pv,
PackageDescriptor: pd,
})
}
}
return versionsToRemove, nil
}
func CleanupExpiredData(outerCtx context.Context, olderThan time.Duration) error {
ctx, committer, err := db.TxContext(outerCtx)
if err != nil {
return err
}
defer committer.Close()
if err := container_service.Cleanup(ctx, olderThan); err != nil {
return err
}
pIDs, err := packages_model.FindUnreferencedPackages(ctx)
if err != nil {
return err
}
for _, pID := range pIDs {
if err := packages_model.DeleteAllProperties(ctx, packages_model.PropertyTypePackage, pID); err != nil {
return err
}
if err := packages_model.DeletePackageByID(ctx, pID); err != nil {
return err
}
}
pbs, err := packages_model.FindExpiredUnreferencedBlobs(ctx, olderThan)
if err != nil {
return err
}
for _, pb := range pbs {
if err := packages_model.DeleteBlobByID(ctx, pb.ID); err != nil {
return err
}
}
if err := committer.Commit(); err != nil {
return err
}
contentStore := packages_module.NewContentStore()
for _, pb := range pbs {
if err := contentStore.Delete(packages_module.BlobHash256Key(pb.HashSHA256)); err != nil {
log.Error("Error deleting package blob [%v]: %v", pb.ID, err)
}
}
return nil
}