restic check with snapshot filters (#5469)

---------

Co-authored-by: Michael Eischer <michael.eischer@fau.de>
This commit is contained in:
Winfried Plappert 2025-11-28 19:12:38 +00:00 committed by GitHub
parent 8fdbdc57a0
commit ce57961f14
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 170 additions and 18 deletions

View file

@ -0,0 +1,8 @@
Enhancement: `restic check` for specified snapshot(s) via snapshot filtering
Snapshots can now be specified for the command `restic check` on the command line
via the standard snapshot filter, (`--tag`, `--host`, `--path` or specifying
snapshot IDs directly) and will be used for checking the packfiles used by these snapshots.
https://github.com/restic/restic/issues/3326
https://github.com/restic/restic/pull/5213

View file

@ -15,6 +15,7 @@ import (
"github.com/restic/restic/internal/backend/cache"
"github.com/restic/restic/internal/checker"
"github.com/restic/restic/internal/data"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/global"
"github.com/restic/restic/internal/repository"
@ -71,6 +72,7 @@ type CheckOptions struct {
ReadDataSubset string
CheckUnused bool
WithCache bool
data.SnapshotFilter
}
func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) {
@ -84,6 +86,7 @@ func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) {
panic(err)
}
f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository")
initMultiSnapshotFilter(f, &opts.SnapshotFilter, true)
}
func checkFlags(opts CheckOptions) error {
@ -220,9 +223,6 @@ func prepareCheckCache(opts CheckOptions, gopts *global.Options, printer progres
func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args []string, term ui.Terminal) (checkSummary, error) {
summary := checkSummary{MessageType: "summary"}
if len(args) != 0 {
return summary, errors.Fatal("the check command expects no arguments, only options - please see `restic help check` for usage and flags")
}
var printer progress.Printer
if !gopts.JSON {
@ -231,11 +231,6 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
printer = newJSONErrorPrinter(term)
}
readDataFilter, err := buildPacksFilter(opts, printer)
if err != nil {
return summary, err
}
cleanup := prepareCheckCache(opts, &gopts, printer)
defer cleanup()
@ -249,7 +244,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
defer unlock()
chkr := checker.New(repo, opts.CheckUnused)
err = chkr.LoadSnapshots(ctx)
err = chkr.LoadSnapshots(ctx, &opts.SnapshotFilter, args)
if err != nil {
return summary, err
}
@ -365,6 +360,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
return summary, ctx.Err()
}
// the following block only used for tests
if opts.CheckUnused {
unused, err := chkr.UnusedBlobs(ctx)
if err != nil {
@ -376,6 +372,11 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
}
}
readDataFilter, err := buildPacksFilter(opts, printer, chkr.IsFiltered())
if err != nil {
return summary, err
}
if readDataFilter != nil {
p := printer.NewCounter("packs")
errChan := make(chan error)
@ -416,11 +417,16 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args
return summary, nil
}
func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs map[restic.ID]int64) map[restic.ID]int64, error) {
func buildPacksFilter(opts CheckOptions, printer progress.Printer,
filteredStatus bool) (func(packs map[restic.ID]int64) map[restic.ID]int64, error) {
typeData := ""
if filteredStatus {
typeData = "filtered "
}
switch {
case opts.ReadData:
return func(packs map[restic.ID]int64) map[restic.ID]int64 {
printer.P("read all data\n")
printer.P("read all %sdata", typeData)
return packs
}, nil
case opts.ReadDataSubset != "":
@ -431,7 +437,7 @@ func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs m
return func(packs map[restic.ID]int64) map[restic.ID]int64 {
packCount := uint64(len(packs))
packs = selectPacksByBucket(packs, bucket, totalBuckets)
printer.P("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, len(packs), packCount, totalBuckets)
printer.P("read group #%d of %d %sdata packs (out of total %d packs in %d groups", bucket, len(packs), typeData, packCount, totalBuckets)
return packs
}, nil
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
@ -440,7 +446,7 @@ func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs m
return nil, err
}
return func(packs map[restic.ID]int64) map[restic.ID]int64 {
printer.P("read %.1f%% of data packs\n", percentage)
printer.P("read %.1f%% of %spackfiles", percentage, typeData)
return selectRandomPacksByPercentage(packs, percentage)
}, nil
}
@ -461,7 +467,7 @@ func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs m
if repoSize == 0 {
percentage = 100
}
printer.P("read %d bytes (%.1f%%) of data packs\n", subsetSize, percentage)
printer.P("read %d bytes (%.1f%%) of %sdata packs\n", subsetSize, percentage, typeData)
return packs
}, nil
}

View file

@ -2,6 +2,7 @@ package main
import (
"context"
"strings"
"testing"
"github.com/restic/restic/internal/global"
@ -34,3 +35,67 @@ func testRunCheckOutput(t testing.TB, gopts global.Options, checkUnused bool) (s
})
return buf.String(), err
}
func testRunCheckOutputWithOpts(t testing.TB, gopts global.Options, opts CheckOptions, args []string) (string, error) {
buf, err := withCaptureStdout(t, gopts, func(ctx context.Context, gopts global.Options) error {
gopts.Verbosity = 2
_, err := runCheck(context.TODO(), opts, gopts, args, gopts.Term)
return err
})
return buf.String(), err
}
func TestCheckWithSnaphotFilter(t *testing.T) {
testCases := []struct {
opts CheckOptions
args []string
expectedOutput string
}{
{ // full --read-data, all snapshots
CheckOptions{ReadData: true},
nil,
"4 / 4 packs",
},
{ // full --read-data, all snapshots
CheckOptions{ReadData: true},
nil,
"2 / 2 snapshots",
},
{ // full --read-data, latest snapshot
CheckOptions{ReadData: true},
[]string{"latest"},
"2 / 2 packs",
},
{ // full --read-data, latest snapshot
CheckOptions{ReadData: true},
[]string{"latest"},
"1 / 1 snapshots",
},
{ // --read-data-subset, latest snapshot
CheckOptions{ReadDataSubset: "1%"},
[]string{"latest"},
"1 / 1 packs",
},
{ // --read-data-subset, latest snapshot
CheckOptions{ReadDataSubset: "1%"},
[]string{"latest"},
"filtered",
},
}
env, cleanup := withTestEnvironment(t)
defer cleanup()
testSetupBackupData(t, env)
opts := BackupOptions{}
testRunBackup(t, env.testdata+"/0", []string{"for_cmd_ls"}, opts, env.gopts)
testRunBackup(t, env.testdata+"/0", []string{"0/9"}, opts, env.gopts)
for _, testCase := range testCases {
output, err := testRunCheckOutputWithOpts(t, env.gopts, testCase.opts, testCase.args)
rtest.OK(t, err)
hasOutput := strings.Contains(output, testCase.expectedOutput)
rtest.Assert(t, hasOutput, `expected to find substring %q, but did not find it`, testCase.expectedOutput)
}
}

View file

@ -82,6 +82,12 @@ If ``check`` detects damaged pack files, it will show instructions on how to rep
them using the ``repair pack`` command. Use that command instead of the "Repair the
index" section in this guide.
If you are interested to check only specific snapshots, you can now
use the standard snapshot filter method specifying ``--host``, ``--path``, ``--tag`` or
alternatively naming snapshot ID(s) explicitely. The selected subset of packfiles
will then be checked for consistency and read when either ``--read-data`` or
``--read-data-subset`` is given.
2. Backup the repository
************************

View file

@ -31,6 +31,10 @@ type Checker struct {
snapshots restic.Lister
repo restic.Repository
// when snapshot filtering is being used
snapshotFilter *data.SnapshotFilter
args []string
}
type checkerRepository interface {
@ -51,12 +55,19 @@ func New(repo checkerRepository, trackUnused bool) *Checker {
return c
}
func (c *Checker) LoadSnapshots(ctx context.Context) error {
func (c *Checker) LoadSnapshots(ctx context.Context, snapshotFilter *data.SnapshotFilter, args []string) error {
var err error
c.snapshots, err = restic.MemorizeList(ctx, c.repo, restic.SnapshotFile)
c.args = args
c.snapshotFilter = snapshotFilter
return err
}
// IsFiltered returns true if snapshot filtering is active
func (c *Checker) IsFiltered() bool {
return len(c.args) != 0 || !c.snapshotFilter.Empty()
}
// Error is an error that occurred while checking a repository.
type Error struct {
TreeID restic.ID
@ -124,11 +135,39 @@ func loadSnapshotTreeIDs(ctx context.Context, lister restic.Lister, repo restic.
return ids, errs
}
func (c *Checker) loadActiveTrees(ctx context.Context, snapshotFilter *data.SnapshotFilter, args []string) (trees restic.IDs, errs []error) {
trees = []restic.ID{}
errs = []error{}
if !c.IsFiltered() {
return loadSnapshotTreeIDs(ctx, c.snapshots, c.repo)
}
err := snapshotFilter.FindAll(ctx, c.snapshots, c.repo, args, func(_ string, sn *data.Snapshot, err error) error {
if err != nil {
errs = append(errs, err)
return err
} else if sn != nil {
trees = append(trees, *sn.Tree)
}
return nil
})
if err != nil {
errs = append(errs, err)
return nil, errs
}
// track blobs to learn which packs need to be checked
c.trackUnused = true
return trees, errs
}
// Structure checks that for all snapshots all referenced data blobs and
// subtrees are available in the index. errChan is closed after all trees have
// been traversed.
func (c *Checker) Structure(ctx context.Context, p *progress.Counter, errChan chan<- error) {
trees, errs := loadSnapshotTreeIDs(ctx, c.snapshots, c.repo)
trees, errs := c.loadActiveTrees(ctx, c.snapshotFilter, c.args)
p.SetMax(uint64(len(trees)))
debug.Log("need to check %d trees from snapshots, %d errs returned", len(trees), len(errs))
@ -259,3 +298,30 @@ func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles, er
return blobs, err
}
// ReadPacks wraps repository.ReadPacks:
// in case snapshot filtering is not active it calls repository.ReadPacks()
// with an unmodified parameter list
// Otherwise it calculates the packfiles needed, gets their sizes from the full
// packfile set and submits them to repository.ReadPacks()
func (c *Checker) ReadPacks(ctx context.Context, filter func(packs map[restic.ID]int64) map[restic.ID]int64, p *progress.Counter, errChan chan<- error) {
// no snapshot filtering, pass through
if !c.IsFiltered() {
c.Checker.ReadPacks(ctx, filter, p, errChan)
return
}
packfileFilter := func(allPacks map[restic.ID]int64) map[restic.ID]int64 {
filteredPacks := make(map[restic.ID]int64)
// convert used blobs into their encompassing packfiles
for bh := range c.blobRefs.M.Keys() {
for _, pb := range c.repo.LookupBlob(bh.Type, bh.ID) {
filteredPacks[pb.PackID] = allPacks[pb.PackID]
}
}
return filter(filteredPacks)
}
c.Checker.ReadPacks(ctx, packfileFilter, p, errChan)
}

View file

@ -46,7 +46,7 @@ func checkPacks(chkr *checker.Checker) []error {
}
func checkStruct(chkr *checker.Checker) []error {
err := chkr.LoadSnapshots(context.TODO())
err := chkr.LoadSnapshots(context.TODO(), &data.SnapshotFilter{}, nil)
if err != nil {
return []error{err}
}

View file

@ -4,6 +4,7 @@ import (
"context"
"testing"
"github.com/restic/restic/internal/data"
"github.com/restic/restic/internal/restic"
)
@ -20,7 +21,7 @@ func TestCheckRepo(t testing.TB, repo checkerRepository) {
t.Fatalf("errors loading index: %v", hints)
}
err := chkr.LoadSnapshots(context.TODO())
err := chkr.LoadSnapshots(context.TODO(), &data.SnapshotFilter{}, nil)
if err != nil {
t.Error(err)
}