diff --git a/changelog/unreleased/issue-3326 b/changelog/unreleased/issue-3326 new file mode 100644 index 000000000..72b25ea4b --- /dev/null +++ b/changelog/unreleased/issue-3326 @@ -0,0 +1,8 @@ +Enhancement: `restic check` for specified snapshot(s) via snapshot filtering + +Snapshots can now be specified for the command `restic check` on the command line +via the standard snapshot filter, (`--tag`, `--host`, `--path` or specifying +snapshot IDs directly) and will be used for checking the packfiles used by these snapshots. + +https://github.com/restic/restic/issues/3326 +https://github.com/restic/restic/pull/5213 diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index f93006f14..7b85ceff8 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -15,6 +15,7 @@ import ( "github.com/restic/restic/internal/backend/cache" "github.com/restic/restic/internal/checker" + "github.com/restic/restic/internal/data" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/global" "github.com/restic/restic/internal/repository" @@ -71,6 +72,7 @@ type CheckOptions struct { ReadDataSubset string CheckUnused bool WithCache bool + data.SnapshotFilter } func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) { @@ -84,6 +86,7 @@ func (opts *CheckOptions) AddFlags(f *pflag.FlagSet) { panic(err) } f.BoolVar(&opts.WithCache, "with-cache", false, "use existing cache, only read uncached data from repository") + initMultiSnapshotFilter(f, &opts.SnapshotFilter, true) } func checkFlags(opts CheckOptions) error { @@ -220,9 +223,6 @@ func prepareCheckCache(opts CheckOptions, gopts *global.Options, printer progres func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args []string, term ui.Terminal) (checkSummary, error) { summary := checkSummary{MessageType: "summary"} - if len(args) != 0 { - return summary, errors.Fatal("the check command expects no arguments, only options - please see `restic help check` for usage and flags") - } var printer progress.Printer if !gopts.JSON { @@ -231,11 +231,6 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args printer = newJSONErrorPrinter(term) } - readDataFilter, err := buildPacksFilter(opts, printer) - if err != nil { - return summary, err - } - cleanup := prepareCheckCache(opts, &gopts, printer) defer cleanup() @@ -249,7 +244,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args defer unlock() chkr := checker.New(repo, opts.CheckUnused) - err = chkr.LoadSnapshots(ctx) + err = chkr.LoadSnapshots(ctx, &opts.SnapshotFilter, args) if err != nil { return summary, err } @@ -365,6 +360,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args return summary, ctx.Err() } + // the following block only used for tests if opts.CheckUnused { unused, err := chkr.UnusedBlobs(ctx) if err != nil { @@ -376,6 +372,11 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args } } + readDataFilter, err := buildPacksFilter(opts, printer, chkr.IsFiltered()) + if err != nil { + return summary, err + } + if readDataFilter != nil { p := printer.NewCounter("packs") errChan := make(chan error) @@ -416,11 +417,16 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts global.Options, args return summary, nil } -func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs map[restic.ID]int64) map[restic.ID]int64, error) { +func buildPacksFilter(opts CheckOptions, printer progress.Printer, + filteredStatus bool) (func(packs map[restic.ID]int64) map[restic.ID]int64, error) { + typeData := "" + if filteredStatus { + typeData = "filtered " + } switch { case opts.ReadData: return func(packs map[restic.ID]int64) map[restic.ID]int64 { - printer.P("read all data\n") + printer.P("read all %sdata", typeData) return packs }, nil case opts.ReadDataSubset != "": @@ -431,7 +437,7 @@ func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs m return func(packs map[restic.ID]int64) map[restic.ID]int64 { packCount := uint64(len(packs)) packs = selectPacksByBucket(packs, bucket, totalBuckets) - printer.P("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, len(packs), packCount, totalBuckets) + printer.P("read group #%d of %d %sdata packs (out of total %d packs in %d groups", bucket, len(packs), typeData, packCount, totalBuckets) return packs }, nil } else if strings.HasSuffix(opts.ReadDataSubset, "%") { @@ -440,7 +446,7 @@ func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs m return nil, err } return func(packs map[restic.ID]int64) map[restic.ID]int64 { - printer.P("read %.1f%% of data packs\n", percentage) + printer.P("read %.1f%% of %spackfiles", percentage, typeData) return selectRandomPacksByPercentage(packs, percentage) }, nil } @@ -461,7 +467,7 @@ func buildPacksFilter(opts CheckOptions, printer progress.Printer) (func(packs m if repoSize == 0 { percentage = 100 } - printer.P("read %d bytes (%.1f%%) of data packs\n", subsetSize, percentage) + printer.P("read %d bytes (%.1f%%) of %sdata packs\n", subsetSize, percentage, typeData) return packs }, nil } diff --git a/cmd/restic/cmd_check_integration_test.go b/cmd/restic/cmd_check_integration_test.go index c4580100e..0a5bc3521 100644 --- a/cmd/restic/cmd_check_integration_test.go +++ b/cmd/restic/cmd_check_integration_test.go @@ -2,6 +2,7 @@ package main import ( "context" + "strings" "testing" "github.com/restic/restic/internal/global" @@ -34,3 +35,67 @@ func testRunCheckOutput(t testing.TB, gopts global.Options, checkUnused bool) (s }) return buf.String(), err } + +func testRunCheckOutputWithOpts(t testing.TB, gopts global.Options, opts CheckOptions, args []string) (string, error) { + buf, err := withCaptureStdout(t, gopts, func(ctx context.Context, gopts global.Options) error { + gopts.Verbosity = 2 + _, err := runCheck(context.TODO(), opts, gopts, args, gopts.Term) + return err + }) + return buf.String(), err +} + +func TestCheckWithSnaphotFilter(t *testing.T) { + testCases := []struct { + opts CheckOptions + args []string + expectedOutput string + }{ + { // full --read-data, all snapshots + CheckOptions{ReadData: true}, + nil, + "4 / 4 packs", + }, + { // full --read-data, all snapshots + CheckOptions{ReadData: true}, + nil, + "2 / 2 snapshots", + }, + { // full --read-data, latest snapshot + CheckOptions{ReadData: true}, + []string{"latest"}, + "2 / 2 packs", + }, + { // full --read-data, latest snapshot + CheckOptions{ReadData: true}, + []string{"latest"}, + "1 / 1 snapshots", + }, + { // --read-data-subset, latest snapshot + CheckOptions{ReadDataSubset: "1%"}, + []string{"latest"}, + "1 / 1 packs", + }, + { // --read-data-subset, latest snapshot + CheckOptions{ReadDataSubset: "1%"}, + []string{"latest"}, + "filtered", + }, + } + + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testSetupBackupData(t, env) + opts := BackupOptions{} + testRunBackup(t, env.testdata+"/0", []string{"for_cmd_ls"}, opts, env.gopts) + testRunBackup(t, env.testdata+"/0", []string{"0/9"}, opts, env.gopts) + + for _, testCase := range testCases { + output, err := testRunCheckOutputWithOpts(t, env.gopts, testCase.opts, testCase.args) + rtest.OK(t, err) + + hasOutput := strings.Contains(output, testCase.expectedOutput) + rtest.Assert(t, hasOutput, `expected to find substring %q, but did not find it`, testCase.expectedOutput) + } +} diff --git a/doc/077_troubleshooting.rst b/doc/077_troubleshooting.rst index 36c9d63ec..fd19f121d 100644 --- a/doc/077_troubleshooting.rst +++ b/doc/077_troubleshooting.rst @@ -82,6 +82,12 @@ If ``check`` detects damaged pack files, it will show instructions on how to rep them using the ``repair pack`` command. Use that command instead of the "Repair the index" section in this guide. +If you are interested to check only specific snapshots, you can now +use the standard snapshot filter method specifying ``--host``, ``--path``, ``--tag`` or +alternatively naming snapshot ID(s) explicitely. The selected subset of packfiles +will then be checked for consistency and read when either ``--read-data`` or +``--read-data-subset`` is given. + 2. Backup the repository ************************ diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 5ef4a52b5..c985951fd 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -31,6 +31,10 @@ type Checker struct { snapshots restic.Lister repo restic.Repository + + // when snapshot filtering is being used + snapshotFilter *data.SnapshotFilter + args []string } type checkerRepository interface { @@ -51,12 +55,19 @@ func New(repo checkerRepository, trackUnused bool) *Checker { return c } -func (c *Checker) LoadSnapshots(ctx context.Context) error { +func (c *Checker) LoadSnapshots(ctx context.Context, snapshotFilter *data.SnapshotFilter, args []string) error { var err error c.snapshots, err = restic.MemorizeList(ctx, c.repo, restic.SnapshotFile) + c.args = args + c.snapshotFilter = snapshotFilter return err } +// IsFiltered returns true if snapshot filtering is active +func (c *Checker) IsFiltered() bool { + return len(c.args) != 0 || !c.snapshotFilter.Empty() +} + // Error is an error that occurred while checking a repository. type Error struct { TreeID restic.ID @@ -124,11 +135,39 @@ func loadSnapshotTreeIDs(ctx context.Context, lister restic.Lister, repo restic. return ids, errs } +func (c *Checker) loadActiveTrees(ctx context.Context, snapshotFilter *data.SnapshotFilter, args []string) (trees restic.IDs, errs []error) { + trees = []restic.ID{} + errs = []error{} + + if !c.IsFiltered() { + return loadSnapshotTreeIDs(ctx, c.snapshots, c.repo) + } + + err := snapshotFilter.FindAll(ctx, c.snapshots, c.repo, args, func(_ string, sn *data.Snapshot, err error) error { + if err != nil { + errs = append(errs, err) + return err + } else if sn != nil { + trees = append(trees, *sn.Tree) + } + return nil + }) + + if err != nil { + errs = append(errs, err) + return nil, errs + } + + // track blobs to learn which packs need to be checked + c.trackUnused = true + return trees, errs +} + // Structure checks that for all snapshots all referenced data blobs and // subtrees are available in the index. errChan is closed after all trees have // been traversed. func (c *Checker) Structure(ctx context.Context, p *progress.Counter, errChan chan<- error) { - trees, errs := loadSnapshotTreeIDs(ctx, c.snapshots, c.repo) + trees, errs := c.loadActiveTrees(ctx, c.snapshotFilter, c.args) p.SetMax(uint64(len(trees))) debug.Log("need to check %d trees from snapshots, %d errs returned", len(trees), len(errs)) @@ -259,3 +298,30 @@ func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles, er return blobs, err } + +// ReadPacks wraps repository.ReadPacks: +// in case snapshot filtering is not active it calls repository.ReadPacks() +// with an unmodified parameter list +// Otherwise it calculates the packfiles needed, gets their sizes from the full +// packfile set and submits them to repository.ReadPacks() +func (c *Checker) ReadPacks(ctx context.Context, filter func(packs map[restic.ID]int64) map[restic.ID]int64, p *progress.Counter, errChan chan<- error) { + // no snapshot filtering, pass through + if !c.IsFiltered() { + c.Checker.ReadPacks(ctx, filter, p, errChan) + return + } + + packfileFilter := func(allPacks map[restic.ID]int64) map[restic.ID]int64 { + filteredPacks := make(map[restic.ID]int64) + // convert used blobs into their encompassing packfiles + for bh := range c.blobRefs.M.Keys() { + for _, pb := range c.repo.LookupBlob(bh.Type, bh.ID) { + filteredPacks[pb.PackID] = allPacks[pb.PackID] + } + } + + return filter(filteredPacks) + } + + c.Checker.ReadPacks(ctx, packfileFilter, p, errChan) +} diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index 8c78f4395..ea20b7302 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -46,7 +46,7 @@ func checkPacks(chkr *checker.Checker) []error { } func checkStruct(chkr *checker.Checker) []error { - err := chkr.LoadSnapshots(context.TODO()) + err := chkr.LoadSnapshots(context.TODO(), &data.SnapshotFilter{}, nil) if err != nil { return []error{err} } diff --git a/internal/checker/testing.go b/internal/checker/testing.go index 1358bf362..eaa16382a 100644 --- a/internal/checker/testing.go +++ b/internal/checker/testing.go @@ -4,6 +4,7 @@ import ( "context" "testing" + "github.com/restic/restic/internal/data" "github.com/restic/restic/internal/restic" ) @@ -20,7 +21,7 @@ func TestCheckRepo(t testing.TB, repo checkerRepository) { t.Fatalf("errors loading index: %v", hints) } - err := chkr.LoadSnapshots(context.TODO()) + err := chkr.LoadSnapshots(context.TODO(), &data.SnapshotFilter{}, nil) if err != nil { t.Error(err) }