From b87f7586e4fef964deb034e74efb239187668f7c Mon Sep 17 00:00:00 2001 From: Winfried Plappert <18740761+wplapper@users.noreply.github.com> Date: Wed, 19 Nov 2025 07:09:24 +0000 Subject: [PATCH 01/16] restic copy --batch: a fresh start from commit 382616747 Instead of rebasing my code, I decided to start fresh, since WithBlobUploader() has been introduced. changelog/unreleased/issue-5453: doc/045_working_with_repos.rst: the usual cmd/restic/cmd_copy.go: gather all snaps to be collected - collectAllSnapshots() run overall copy step - func copyTreeBatched() helper copySaveSnapshot() to save the corresponding snapshot internal/repository/repack.go: introduce wrapper CopyBlobs(), which passes parameter `uploader restic.BlobSaver` from WithBlobUploader() via copyTreeBatched() to repack(). internal/backend/local/local_windows.go: I did not touch it, but gofmt did: whitespace --- changelog/unreleased/issue-5453 | 12 ++ cmd/restic/cmd_copy.go | 156 ++++++++++++++++--- cmd/restic/cmd_copy_integration_test.go | 194 ++++++++++++++++++++++++ doc/045_working_with_repos.rst | 10 +- internal/backend/local/local_windows.go | 2 +- internal/repository/repack.go | 20 +++ 6 files changed, 370 insertions(+), 24 deletions(-) create mode 100644 changelog/unreleased/issue-5453 diff --git a/changelog/unreleased/issue-5453 b/changelog/unreleased/issue-5453 new file mode 100644 index 000000000..686474597 --- /dev/null +++ b/changelog/unreleased/issue-5453 @@ -0,0 +1,12 @@ +Enhancement: `restic copy` can now spool packfiles across muliple snapshots + +When using `restic copy` used to save all newly created packfiles when finishing one snapshot, +even when the actual packfile size was quite small. This applied particularly to +incremental backups, when there was only small changes between individual backups. + +When using the new option `--batch`, `restic copy` now creates one large request list +which contains all blobs from all snapshots to be copied and then executes the +copy operation. + +https://github.com/restic/restic/issues/5175 +https://github.com/restic/restic/pull/5464 diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index 498d6f75d..f9a75108b 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -3,6 +3,7 @@ package main import ( "context" "fmt" + "slices" "github.com/restic/restic/internal/data" "github.com/restic/restic/internal/debug" @@ -63,13 +64,52 @@ Exit status is 12 if the password is incorrect. type CopyOptions struct { global.SecondaryRepoOptions data.SnapshotFilter + batch bool } func (opts *CopyOptions) AddFlags(f *pflag.FlagSet) { + f.BoolVar(&opts.batch, "batch", false, "batch all snapshots to be copied into one step to optimize use of packfiles") opts.SecondaryRepoOptions.AddFlags(f, "destination", "to copy snapshots from") initMultiSnapshotFilter(f, &opts.SnapshotFilter, true) } +// collectAllSnapshots: select all snapshot trees to be copied +func collectAllSnapshots(ctx context.Context, opts CopyOptions, + srcSnapshotLister restic.Lister, srcRepo restic.Repository, + dstSnapshotByOriginal map[restic.ID][]*data.Snapshot, args []string, printer progress.Printer) (selectedSnapshots []*data.Snapshot) { + + selectedSnapshots = make([]*data.Snapshot, 0, 10) + for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args, printer) { + // check whether the destination has a snapshot with the same persistent ID which has similar snapshot fields + srcOriginal := *sn.ID() + if sn.Original != nil { + srcOriginal = *sn.Original + } + + if originalSns, ok := dstSnapshotByOriginal[srcOriginal]; ok { + isCopy := false + for _, originalSn := range originalSns { + if similarSnapshots(originalSn, sn) { + printer.V("\n%v\n", sn) + printer.V("skipping source snapshot %s, was already copied to snapshot %s\n", sn.ID().Str(), originalSn.ID().Str()) + isCopy = true + break + } + } + if isCopy { + continue + } + } + selectedSnapshots = append(selectedSnapshots, sn) + } + + slices.SortStableFunc(selectedSnapshots, func(a, b *data.Snapshot) int { + return a.Time.Compare(b.Time) + }) + + return selectedSnapshots +} + func runCopy(ctx context.Context, opts CopyOptions, gopts global.Options, args []string, term ui.Terminal) error { printer := ui.NewProgressPrinter(false, gopts.Verbosity, term) secondaryGopts, isFromRepo, err := opts.SecondaryRepoOptions.FillGlobalOpts(ctx, gopts, "destination") @@ -124,10 +164,11 @@ func runCopy(ctx context.Context, opts CopyOptions, gopts global.Options, args [ return ctx.Err() } + selectedSnapshots := collectAllSnapshots(ctx, opts, srcSnapshotLister, srcRepo, dstSnapshotByOriginal, args, printer) + // remember already processed trees across all snapshots visitedTrees := restic.NewIDSet() - - for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args, printer) { + for _, sn := range selectedSnapshots { // check whether the destination has a snapshot with the same persistent ID which has similar snapshot fields srcOriginal := *sn.ID() if sn.Original != nil { @@ -148,25 +189,12 @@ func runCopy(ctx context.Context, opts CopyOptions, gopts global.Options, args [ continue } } - printer.P("\n%v", sn) - printer.P(" copy started, this may take a while...") - if err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer); err != nil { - return err - } - debug.Log("tree copied") - - // save snapshot - sn.Parent = nil // Parent does not have relevance in the new repo. - // Use Original as a persistent snapshot ID - if sn.Original == nil { - sn.Original = sn.ID() - } - newID, err := data.SaveSnapshot(ctx, dstRepo, sn) - if err != nil { - return err - } - printer.P("snapshot %s saved", newID.Str()) } + + if err := copyTreeBatched(ctx, srcRepo, dstRepo, visitedTrees, selectedSnapshots, opts, printer); err != nil { + return err + } + return ctx.Err() } @@ -190,7 +218,7 @@ func similarSnapshots(sna *data.Snapshot, snb *data.Snapshot) bool { } func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Repository, - visitedTrees restic.IDSet, rootTreeID restic.ID, printer progress.Printer) error { + visitedTrees restic.IDSet, rootTreeID restic.ID, printer progress.Printer, uploader restic.BlobSaver, seenBlobs restic.IDSet) error { wg, wgCtx := errgroup.WithContext(ctx) @@ -204,11 +232,15 @@ func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Rep packList := restic.NewIDSet() enqueue := func(h restic.BlobHandle) { + if seenBlobs.Has(h.ID) { + return + } pb := srcRepo.LookupBlob(h.Type, h.ID) copyBlobs.Insert(h) for _, p := range pb { packList.Insert(p.PackID) } + seenBlobs.Insert(h.ID) } wg.Go(func() error { @@ -244,7 +276,9 @@ func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Rep copyStats(srcRepo, copyBlobs, packList, printer) bar := printer.NewCounter("packs copied") - err = repository.Repack(ctx, srcRepo, dstRepo, packList, copyBlobs, bar, printer.P) + bar.SetMax(uint64(len(packList))) + err = repository.CopyBlobs(ctx, srcRepo, dstRepo, uploader, packList, copyBlobs, bar, printer.P) + bar.Done() if err != nil { return errors.Fatalf("%s", err) } @@ -268,3 +302,81 @@ func copyStats(srcRepo restic.Repository, copyBlobs restic.BlobSet, packList res printer.V(" copy %d blobs with disk size %s in %d packfiles\n", countBlobs, ui.FormatBytes(uint64(sizeBlobs)), len(packList)) } + +func copySaveSnapshot(ctx context.Context, sn *data.Snapshot, dstRepo restic.Repository, printer progress.Printer) error { + sn.Parent = nil // Parent does not have relevance in the new repo. + // Use Original as a persistent snapshot ID + if sn.Original == nil { + sn.Original = sn.ID() + } + newID, err := data.SaveSnapshot(ctx, dstRepo, sn) + if err != nil { + return err + } + printer.P("snapshot %s saved", newID.Str()) + return nil +} + +// copyTreeBatched: copy multiple snapshot trees in one go, using calls to +// repository.RepackInner() for all selected snapshot trees and thereby packing the packfiles optimally. +// Usually each snapshot creates at least one tree packfile and one data packfile. +func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Repository, + visitedTrees restic.IDSet, selectedSnapshots []*data.Snapshot, opts CopyOptions, + printer progress.Printer) error { + + // seenBlobs is necessary in about 1 of 10000 blobs, in the other 99.99% the check + // dstRepo.LookupBlobSize() is working + seenBlobs := restic.NewIDSet() + // dependent on opts.batch the package Uploader is started either for + // each snapshot to be copied or once for all snapshots + + if opts.batch { + // call WithBlobUploader() once and then loop over all selectedSnapshots + err := dstRepo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { + for _, sn := range selectedSnapshots { + printer.P("\n%v", sn) + printer.P(" copy started, this may take a while...") + err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader, seenBlobs) + if err != nil { + return err + } + debug.Log("tree copied") + } + + // save all the snapshots + for _, sn := range selectedSnapshots { + err := copySaveSnapshot(ctx, sn, dstRepo, printer) + if err != nil { + return err + } + } + return nil + }) + + return err + } + + // no batch option, loop over selectedSnapshots and call WithBlobUploader() + // inside the loop + for _, sn := range selectedSnapshots { + printer.P("\n%v", sn) + printer.P(" copy started, this may take a while...") + err := dstRepo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { + if err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader, seenBlobs); err != nil { + return err + } + debug.Log("tree copied") + return nil + }) + if err != nil { + return err + } + + err = copySaveSnapshot(ctx, sn, dstRepo, printer) + if err != nil { + return err + } + } + + return nil +} diff --git a/cmd/restic/cmd_copy_integration_test.go b/cmd/restic/cmd_copy_integration_test.go index c35e960ff..41b8355da 100644 --- a/cmd/restic/cmd_copy_integration_test.go +++ b/cmd/restic/cmd_copy_integration_test.go @@ -6,8 +6,11 @@ import ( "path/filepath" "testing" + "github.com/restic/restic/internal/data" "github.com/restic/restic/internal/global" + "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" + "github.com/restic/restic/internal/ui" ) func testRunCopy(t testing.TB, srcGopts global.Options, dstGopts global.Options) { @@ -28,6 +31,25 @@ func testRunCopy(t testing.TB, srcGopts global.Options, dstGopts global.Options) })) } +func testRunCopyBatched(t testing.TB, srcGopts global.Options, dstGopts global.Options) { + gopts := srcGopts + gopts.Repo = dstGopts.Repo + gopts.Password = dstGopts.Password + gopts.InsecureNoPassword = dstGopts.InsecureNoPassword + copyOpts := CopyOptions{ + SecondaryRepoOptions: global.SecondaryRepoOptions{ + Repo: srcGopts.Repo, + Password: srcGopts.Password, + InsecureNoPassword: srcGopts.InsecureNoPassword, + }, + batch: true, + } + + rtest.OK(t, withTermStatus(t, gopts, func(ctx context.Context, gopts global.Options) error { + return runCopy(context.TODO(), copyOpts, gopts, nil, gopts.Term) + })) +} + func TestCopy(t *testing.T) { env, cleanup := withTestEnvironment(t) defer cleanup() @@ -85,6 +107,178 @@ func TestCopy(t *testing.T) { rtest.Assert(t, len(origRestores) == 0, "found not copied snapshots") } +// packfile with size and type +type packInfo struct { + Type string + size int64 + numberBlobs int +} + +// testGetUsedBlobs: call data.FindUsedBlobs for all snapshots in repositpry +func testGetUsedBlobs(t *testing.T, repo restic.Repository) (usedBlobs restic.BlobSet) { + selectedTrees := make([]restic.ID, 0, 3) + usedBlobs = restic.NewBlobSet() + + snapshotLister, err := restic.MemorizeList(context.TODO(), repo, restic.SnapshotFile) + rtest.OK(t, err) + rtest.OK(t, repo.LoadIndex(context.TODO(), nil)) + + // gather all snapshots + nullFilter := &data.SnapshotFilter{} + err = nullFilter.FindAll(context.TODO(), snapshotLister, repo, nil, func(_ string, sn *data.Snapshot, err error) error { + rtest.OK(t, err) + selectedTrees = append(selectedTrees, *sn.Tree) + return nil + }) + rtest.OK(t, err) + + rtest.OK(t, data.FindUsedBlobs(context.TODO(), repo, selectedTrees, usedBlobs, nil)) + + return usedBlobs +} + +// getPackfileInfo: get packfiles, their length, type and number of blobs in packfile +func getPackfileInfo(t *testing.T, repo restic.Repository) (packfiles map[restic.ID]packInfo) { + packfiles = make(map[restic.ID]packInfo) + + rtest.OK(t, repo.List(context.TODO(), restic.PackFile, func(id restic.ID, size int64) error { + blobs, _, err := repo.ListPack(context.TODO(), id, size) + rtest.OK(t, err) + rtest.Assert(t, len(blobs) > 0, "a packfile should contain at least one blob") + + Type := "" + if len(blobs) > 0 { + Type = blobs[0].Type.String() + } + + packfiles[id] = packInfo{ + Type: Type, + size: size, + numberBlobs: len(blobs), + } + + return nil + })) + + return packfiles +} + +// get various counts from the packfiles in the repository +func getCounts(t *testing.T, repo restic.Repository) (int, int, int) { + countTreePacks := 0 + countDataPacks := 0 + countBlobs := 0 + for _, item := range getPackfileInfo(t, repo) { + switch item.Type { + case "tree": + countTreePacks++ + case "data": + countDataPacks++ + } + countBlobs += item.numberBlobs + } + + return countTreePacks, countDataPacks, countBlobs +} + +func TestCopyBatched(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + env3, cleanup3 := withTestEnvironment(t) + defer cleanup3() + + testSetupBackupData(t, env) + opts := BackupOptions{} + testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9")}, opts, env.gopts) + testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "2")}, opts, env.gopts) + testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "3")}, opts, env.gopts) + testRunCheck(t, env.gopts) + + // batch copy + testRunInit(t, env3.gopts) + testRunCopyBatched(t, env.gopts, env3.gopts) + + // check integrity of the copy + testRunCheck(t, env3.gopts) + + snapshotIDs := testListSnapshots(t, env.gopts, 3) + copiedSnapshotIDs := testListSnapshots(t, env3.gopts, 3) + + // check that the copied snapshots have the same tree contents as the old ones (= identical tree hash) + origRestores := make(map[string]struct{}) + for i, snapshotID := range snapshotIDs { + restoredir := filepath.Join(env.base, fmt.Sprintf("restore%d", i)) + origRestores[restoredir] = struct{}{} + testRunRestore(t, env.gopts, restoredir, snapshotID.String()) + } + + for i, snapshotID := range copiedSnapshotIDs { + restoredir := filepath.Join(env3.base, fmt.Sprintf("restore%d", i)) + testRunRestore(t, env3.gopts, restoredir, snapshotID.String()) + foundMatch := false + for cmpdir := range origRestores { + diff := directoriesContentsDiff(t, restoredir, cmpdir) + if diff == "" { + delete(origRestores, cmpdir) + foundMatch = true + } + } + + rtest.Assert(t, foundMatch, "found no counterpart for snapshot %v", snapshotID) + } + + rtest.Assert(t, len(origRestores) == 0, "found not copied snapshots") + + // get access to the repositories + var repo1 restic.Repository + var unlock1 func() + var err error + rtest.OK(t, withTermStatus(t, env.gopts, func(ctx context.Context, gopts global.Options) error { + printer := ui.NewProgressPrinter(gopts.JSON, gopts.Verbosity, gopts.Term) + _, repo1, unlock1, err = openWithReadLock(ctx, gopts, false, printer) + rtest.OK(t, err) + defer unlock1() + return err + })) + + var repo3 restic.Repository + var unlock3 func() + rtest.OK(t, withTermStatus(t, env3.gopts, func(ctx context.Context, gopts global.Options) error { + printer := ui.NewProgressPrinter(gopts.JSON, gopts.Verbosity, gopts.Term) + _, repo3, unlock3, err = openWithReadLock(ctx, gopts, false, printer) + rtest.OK(t, err) + defer unlock3() + return err + })) + + usedBlobs1 := testGetUsedBlobs(t, repo1) + usedBlobs3 := testGetUsedBlobs(t, repo3) + rtest.Assert(t, len(usedBlobs1) == len(usedBlobs3), + "used blob length must be identical in both repositories, but is not: (normal) %d <=> (batched) %d", + len(usedBlobs1), len(usedBlobs3)) + + // compare usedBlobs1 <=> usedBlobs3 + good := true + for bh := range usedBlobs1 { + if !usedBlobs3.Has(bh) { + good = false + break + } + } + rtest.Assert(t, good, "all blobs in both repositories should be equal but they are not") + + _, _, countBlobs1 := getCounts(t, repo1) + countTreePacks3, countDataPacks3, countBlobs3 := getCounts(t, repo3) + + rtest.Assert(t, countBlobs1 == countBlobs3, + "expected 1 blob count in boths repos to be equal, but got %d and %d blobs", + countBlobs1, countBlobs3) + + rtest.Assert(t, countTreePacks3 == 1 && countDataPacks3 == 1, + "expected 1 data packfile and 1 tree packfile, but got %d trees and %d data packfiles", + countTreePacks3, countDataPacks3) +} + func TestCopyIncremental(t *testing.T) { env, cleanup := withTestEnvironment(t) defer cleanup() diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst index 75a7e79f1..10f898444 100644 --- a/doc/045_working_with_repos.rst +++ b/doc/045_working_with_repos.rst @@ -216,6 +216,14 @@ example from a local to a remote repository, you can use the ``copy`` command: snapshot 4e5d5487 of [/home/user/work] at 2020-05-01 22:44:07.012113 +0200 CEST by user@kasimir skipping snapshot 4e5d5487, was already copied to snapshot 50eb62b7 +In case you want to copy a repository which contains many backups with little changes +between ``restic backup`` runs, you can use the option ``--batch`` to make full use of +the ``--pack-size`` option. Newly created packfiles are saved when the ``copy`` +operation for one snapshot finishes. The option ``--batch`` disregards these snapshot boundaries +and creates optimally filled packfiles. You can always always achieve the same effect +by running ``restic prune`` after a ``restic copy`` operation, but this involves the extra +``prune`` step. + The example command copies all snapshots from the source repository ``/srv/restic-repo`` to the destination repository ``/srv/restic-repo-copy``. Snapshots which have previously been copied between repositories will @@ -353,7 +361,7 @@ modifying the repository. Instead restic will only print the actions it would perform. .. note:: The ``rewrite`` command verifies that it does not modify snapshots in - unexpected ways and fails with an ``cannot encode tree at "[...]" without losing information`` + unexpected ways and fails with an ``cannot encode tree at "[...]" without loosing information`` error otherwise. This can occur when rewriting a snapshot created by a newer version of restic or some third-party implementation. diff --git a/internal/backend/local/local_windows.go b/internal/backend/local/local_windows.go index fa21d8240..b3677b0ef 100644 --- a/internal/backend/local/local_windows.go +++ b/internal/backend/local/local_windows.go @@ -24,7 +24,7 @@ func removeFile(f string) error { // as Windows won't let you delete a read-only file err := os.Chmod(f, 0666) if err != nil && !os.IsPermission(err) { - return errors.WithStack(err) + return errors.WithStack(err) } return os.Remove(f) diff --git a/internal/repository/repack.go b/internal/repository/repack.go index 730325afd..17f4fbf3f 100644 --- a/internal/repository/repack.go +++ b/internal/repository/repack.go @@ -54,6 +54,26 @@ func Repack( }) } +/* the following code is a terrible hack, but there is currently no other way + of calling the functionality in repack() without a lot duplication of code. + + Repack() is still called from `restic prune` via plan.Execute() inside prune.go +*/ +// CopyBlobs is a wrapper around repack(). The parameter 'uploader' is passed through +// from WithBlobUploader() to CopyBlobs() via cmd/restic/cmd_copy.copyTree(). +func CopyBlobs( + ctx context.Context, + repo restic.Repository, + dstRepo restic.Repository, + uploader restic.BlobSaver, + packs restic.IDSet, + keepBlobs repackBlobSet, + p *progress.Counter, + logf LogFunc, +) error { + return repack(ctx, repo, dstRepo, uploader, packs, keepBlobs, p, logf) +} + func repack( ctx context.Context, repo restic.Repository, From fc3de018bcd5f309144f14ac7aec31d29ecf226a Mon Sep 17 00:00:00 2001 From: Winfried Plappert <18740761+wplapper@users.noreply.github.com> Date: Wed, 19 Nov 2025 07:29:09 +0000 Subject: [PATCH 02/16] restic copy --batch - fussy linter internal/repository/repack.go: I have to please the mighty linter. --- internal/repository/repack.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/repository/repack.go b/internal/repository/repack.go index 17f4fbf3f..298c708ce 100644 --- a/internal/repository/repack.go +++ b/internal/repository/repack.go @@ -54,7 +54,7 @@ func Repack( }) } -/* the following code is a terrible hack, but there is currently no other way +/* CopyBlobs: the following code is a terrible hack, but there is currently no other way of calling the functionality in repack() without a lot duplication of code. Repack() is still called from `restic prune` via plan.Execute() inside prune.go From b24b088978c3547807ab6bd99b515db5d62abf68 Mon Sep 17 00:00:00 2001 From: Winfried Plappert <18740761+wplapper@users.noreply.github.com> Date: Wed, 19 Nov 2025 07:34:39 +0000 Subject: [PATCH 03/16] restic copy --batch: The mighty linter I cave in - no double comment --- internal/repository/repack.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/internal/repository/repack.go b/internal/repository/repack.go index 298c708ce..7ebc2b2ac 100644 --- a/internal/repository/repack.go +++ b/internal/repository/repack.go @@ -54,11 +54,6 @@ func Repack( }) } -/* CopyBlobs: the following code is a terrible hack, but there is currently no other way - of calling the functionality in repack() without a lot duplication of code. - - Repack() is still called from `restic prune` via plan.Execute() inside prune.go -*/ // CopyBlobs is a wrapper around repack(). The parameter 'uploader' is passed through // from WithBlobUploader() to CopyBlobs() via cmd/restic/cmd_copy.copyTree(). func CopyBlobs( From 81d8bc4ade9d256b9a92b18dd5f18fac3586fecb Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 16:06:29 +0100 Subject: [PATCH 04/16] repository: replace CopyBlobs with Repack implementation --- cmd/restic/cmd_copy.go | 2 -- internal/repository/prune.go | 4 +++- internal/repository/repack.go | 26 +++++--------------------- internal/repository/repack_test.go | 16 ++++++++++++---- 4 files changed, 20 insertions(+), 28 deletions(-) diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index f9a75108b..446abb6cb 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -276,9 +276,7 @@ func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Rep copyStats(srcRepo, copyBlobs, packList, printer) bar := printer.NewCounter("packs copied") - bar.SetMax(uint64(len(packList))) err = repository.CopyBlobs(ctx, srcRepo, dstRepo, uploader, packList, copyBlobs, bar, printer.P) - bar.Done() if err != nil { return errors.Fatalf("%s", err) } diff --git a/internal/repository/prune.go b/internal/repository/prune.go index 250ab9846..cc36c7a96 100644 --- a/internal/repository/prune.go +++ b/internal/repository/prune.go @@ -563,7 +563,9 @@ func (plan *PrunePlan) Execute(ctx context.Context, printer progress.Printer) er if len(plan.repackPacks) != 0 { printer.P("repacking packs\n") bar := printer.NewCounter("packs repacked") - err := Repack(ctx, repo, repo, plan.repackPacks, plan.keepBlobs, bar, printer.P) + err := repo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error { + return CopyBlobs(ctx, repo, repo, uploader, plan.repackPacks, plan.keepBlobs, bar, printer.P) + }) if err != nil { return errors.Fatalf("%s", err) } diff --git a/internal/repository/repack.go b/internal/repository/repack.go index 7ebc2b2ac..ca0a8a48b 100644 --- a/internal/repository/repack.go +++ b/internal/repository/repack.go @@ -21,17 +21,18 @@ type repackBlobSet interface { type LogFunc func(msg string, args ...interface{}) -// Repack takes a list of packs together with a list of blobs contained in +// CopyBlobs takes a list of packs together with a list of blobs contained in // these packs. Each pack is loaded and the blobs listed in keepBlobs is saved // into a new pack. Returned is the list of obsolete packs which can then // be removed. // -// The map keepBlobs is modified by Repack, it is used to keep track of which +// The map keepBlobs is modified by CopyBlobs, it is used to keep track of which // blobs have been processed. -func Repack( +func CopyBlobs( ctx context.Context, repo restic.Repository, dstRepo restic.Repository, + dstUploader restic.BlobSaver, packs restic.IDSet, keepBlobs repackBlobSet, p *progress.Counter, @@ -49,24 +50,7 @@ func Repack( return errors.New("repack step requires a backend connection limit of at least two") } - return dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error { - return repack(ctx, repo, dstRepo, uploader, packs, keepBlobs, p, logf) - }) -} - -// CopyBlobs is a wrapper around repack(). The parameter 'uploader' is passed through -// from WithBlobUploader() to CopyBlobs() via cmd/restic/cmd_copy.copyTree(). -func CopyBlobs( - ctx context.Context, - repo restic.Repository, - dstRepo restic.Repository, - uploader restic.BlobSaver, - packs restic.IDSet, - keepBlobs repackBlobSet, - p *progress.Counter, - logf LogFunc, -) error { - return repack(ctx, repo, dstRepo, uploader, packs, keepBlobs, p, logf) + return repack(ctx, repo, dstRepo, dstUploader, packs, keepBlobs, p, logf) } func repack( diff --git a/internal/repository/repack_test.go b/internal/repository/repack_test.go index 4d285681f..bedacaa7e 100644 --- a/internal/repository/repack_test.go +++ b/internal/repository/repack_test.go @@ -150,7 +150,9 @@ func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSe } func repack(t *testing.T, repo restic.Repository, be backend.Backend, packs restic.IDSet, blobs restic.BlobSet) { - rtest.OK(t, repository.Repack(context.TODO(), repo, repo, packs, blobs, nil, nil)) + rtest.OK(t, repo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { + return repository.CopyBlobs(ctx, repo, repo, uploader, packs, blobs, nil, nil) + })) for id := range packs { rtest.OK(t, be.Remove(context.TODO(), backend.Handle{Type: restic.PackFile, Name: id.String()})) @@ -263,7 +265,9 @@ func testRepackCopy(t *testing.T, version uint) { _, keepBlobs := selectBlobs(t, random, repo, 0.2) copyPacks := findPacksForBlobs(t, repo, keepBlobs) - rtest.OK(t, repository.Repack(context.TODO(), repoWrapped, dstRepoWrapped, copyPacks, keepBlobs, nil, nil)) + rtest.OK(t, repoWrapped.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { + return repository.CopyBlobs(ctx, repoWrapped, dstRepoWrapped, uploader, copyPacks, keepBlobs, nil, nil) + })) rebuildAndReloadIndex(t, dstRepo) for h := range keepBlobs { @@ -299,7 +303,9 @@ func testRepackWrongBlob(t *testing.T, version uint) { _, keepBlobs := selectBlobs(t, random, repo, 0) rewritePacks := findPacksForBlobs(t, repo, keepBlobs) - err := repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil, nil) + err := repo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { + return repository.CopyBlobs(ctx, repo, repo, uploader, rewritePacks, keepBlobs, nil, nil) + }) if err == nil { t.Fatal("expected repack to fail but got no error") } @@ -346,7 +352,9 @@ func testRepackBlobFallback(t *testing.T, version uint) { })) // repack must fallback to valid copy - rtest.OK(t, repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil, nil)) + rtest.OK(t, repo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { + return repository.CopyBlobs(ctx, repo, repo, uploader, rewritePacks, keepBlobs, nil, nil) + })) keepBlobs = restic.NewBlobSet(restic.BlobHandle{Type: restic.DataBlob, ID: id}) packs := findPacksForBlobs(t, repo, keepBlobs) From 4395a77154c742c6db711c13629d8aa735c6bad7 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 16:06:45 +0100 Subject: [PATCH 05/16] copy: remove bugous seenBlobs set --- cmd/restic/cmd_copy.go | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index 446abb6cb..ab4be5c46 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -218,7 +218,7 @@ func similarSnapshots(sna *data.Snapshot, snb *data.Snapshot) bool { } func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Repository, - visitedTrees restic.IDSet, rootTreeID restic.ID, printer progress.Printer, uploader restic.BlobSaver, seenBlobs restic.IDSet) error { + visitedTrees restic.IDSet, rootTreeID restic.ID, printer progress.Printer, uploader restic.BlobSaver) error { wg, wgCtx := errgroup.WithContext(ctx) @@ -232,15 +232,11 @@ func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Rep packList := restic.NewIDSet() enqueue := func(h restic.BlobHandle) { - if seenBlobs.Has(h.ID) { - return - } pb := srcRepo.LookupBlob(h.Type, h.ID) copyBlobs.Insert(h) for _, p := range pb { packList.Insert(p.PackID) } - seenBlobs.Insert(h.ID) } wg.Go(func() error { @@ -322,19 +318,15 @@ func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo res visitedTrees restic.IDSet, selectedSnapshots []*data.Snapshot, opts CopyOptions, printer progress.Printer) error { - // seenBlobs is necessary in about 1 of 10000 blobs, in the other 99.99% the check - // dstRepo.LookupBlobSize() is working - seenBlobs := restic.NewIDSet() - // dependent on opts.batch the package Uploader is started either for + // dependent on opts.batch the pack uploader is started either for // each snapshot to be copied or once for all snapshots - if opts.batch { // call WithBlobUploader() once and then loop over all selectedSnapshots err := dstRepo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { for _, sn := range selectedSnapshots { printer.P("\n%v", sn) printer.P(" copy started, this may take a while...") - err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader, seenBlobs) + err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader) if err != nil { return err } @@ -360,7 +352,7 @@ func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo res printer.P("\n%v", sn) printer.P(" copy started, this may take a while...") err := dstRepo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { - if err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader, seenBlobs); err != nil { + if err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader); err != nil { return err } debug.Log("tree copied") From e775192fe72cd8696181c7deb65cb673bbc0336f Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 16:18:40 +0100 Subject: [PATCH 06/16] don't sort snapshots, drop duplicate code and cleanup copyTreeBatched function signature --- cmd/restic/cmd_copy.go | 161 +++++++++++++++++------------------------ 1 file changed, 67 insertions(+), 94 deletions(-) diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index ab4be5c46..e81db3915 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -3,7 +3,6 @@ package main import ( "context" "fmt" - "slices" "github.com/restic/restic/internal/data" "github.com/restic/restic/internal/debug" @@ -76,7 +75,8 @@ func (opts *CopyOptions) AddFlags(f *pflag.FlagSet) { // collectAllSnapshots: select all snapshot trees to be copied func collectAllSnapshots(ctx context.Context, opts CopyOptions, srcSnapshotLister restic.Lister, srcRepo restic.Repository, - dstSnapshotByOriginal map[restic.ID][]*data.Snapshot, args []string, printer progress.Printer) (selectedSnapshots []*data.Snapshot) { + dstSnapshotByOriginal map[restic.ID][]*data.Snapshot, args []string, printer progress.Printer, +) (selectedSnapshots []*data.Snapshot) { selectedSnapshots = make([]*data.Snapshot, 0, 10) for sn := range FindFilteredSnapshots(ctx, srcSnapshotLister, srcRepo, &opts.SnapshotFilter, args, printer) { @@ -90,8 +90,8 @@ func collectAllSnapshots(ctx context.Context, opts CopyOptions, isCopy := false for _, originalSn := range originalSns { if similarSnapshots(originalSn, sn) { - printer.V("\n%v\n", sn) - printer.V("skipping source snapshot %s, was already copied to snapshot %s\n", sn.ID().Str(), originalSn.ID().Str()) + printer.V("\n%v", sn) + printer.V("skipping source snapshot %s, was already copied to snapshot %s", sn.ID().Str(), originalSn.ID().Str()) isCopy = true break } @@ -103,10 +103,6 @@ func collectAllSnapshots(ctx context.Context, opts CopyOptions, selectedSnapshots = append(selectedSnapshots, sn) } - slices.SortStableFunc(selectedSnapshots, func(a, b *data.Snapshot) int { - return a.Time.Compare(b.Time) - }) - return selectedSnapshots } @@ -166,32 +162,7 @@ func runCopy(ctx context.Context, opts CopyOptions, gopts global.Options, args [ selectedSnapshots := collectAllSnapshots(ctx, opts, srcSnapshotLister, srcRepo, dstSnapshotByOriginal, args, printer) - // remember already processed trees across all snapshots - visitedTrees := restic.NewIDSet() - for _, sn := range selectedSnapshots { - // check whether the destination has a snapshot with the same persistent ID which has similar snapshot fields - srcOriginal := *sn.ID() - if sn.Original != nil { - srcOriginal = *sn.Original - } - - if originalSns, ok := dstSnapshotByOriginal[srcOriginal]; ok { - isCopy := false - for _, originalSn := range originalSns { - if similarSnapshots(originalSn, sn) { - printer.V("\n%v", sn) - printer.V("skipping source snapshot %s, was already copied to snapshot %s", sn.ID().Str(), originalSn.ID().Str()) - isCopy = true - break - } - } - if isCopy { - continue - } - } - } - - if err := copyTreeBatched(ctx, srcRepo, dstRepo, visitedTrees, selectedSnapshots, opts, printer); err != nil { + if err := copyTreeBatched(ctx, srcRepo, dstRepo, selectedSnapshots, opts, printer); err != nil { return err } @@ -217,6 +188,68 @@ func similarSnapshots(sna *data.Snapshot, snb *data.Snapshot) bool { return true } +// copyTreeBatched: copy multiple snapshot trees in one go, using calls to +// repository.RepackInner() for all selected snapshot trees and thereby packing the packfiles optimally. +// Usually each snapshot creates at least one tree packfile and one data packfile. +func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Repository, + selectedSnapshots []*data.Snapshot, opts CopyOptions, printer progress.Printer) error { + + // remember already processed trees across all snapshots + visitedTrees := restic.NewIDSet() + + // dependent on opts.batch the pack uploader is started either for + // each snapshot to be copied or once for all snapshots + if opts.batch { + // call WithBlobUploader() once and then loop over all selectedSnapshots + err := dstRepo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { + for _, sn := range selectedSnapshots { + printer.P("\n%v", sn) + printer.P(" copy started, this may take a while...") + err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader) + if err != nil { + return err + } + debug.Log("tree copied") + } + + // save all the snapshots + for _, sn := range selectedSnapshots { + err := copySaveSnapshot(ctx, sn, dstRepo, printer) + if err != nil { + return err + } + } + return nil + }) + + return err + } + + // no batch option, loop over selectedSnapshots and call WithBlobUploader() + // inside the loop + for _, sn := range selectedSnapshots { + printer.P("\n%v", sn) + printer.P(" copy started, this may take a while...") + err := dstRepo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { + if err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader); err != nil { + return err + } + debug.Log("tree copied") + return nil + }) + if err != nil { + return err + } + + err = copySaveSnapshot(ctx, sn, dstRepo, printer) + if err != nil { + return err + } + } + + return nil +} + func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Repository, visitedTrees restic.IDSet, rootTreeID restic.ID, printer progress.Printer, uploader restic.BlobSaver) error { @@ -310,63 +343,3 @@ func copySaveSnapshot(ctx context.Context, sn *data.Snapshot, dstRepo restic.Rep printer.P("snapshot %s saved", newID.Str()) return nil } - -// copyTreeBatched: copy multiple snapshot trees in one go, using calls to -// repository.RepackInner() for all selected snapshot trees and thereby packing the packfiles optimally. -// Usually each snapshot creates at least one tree packfile and one data packfile. -func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Repository, - visitedTrees restic.IDSet, selectedSnapshots []*data.Snapshot, opts CopyOptions, - printer progress.Printer) error { - - // dependent on opts.batch the pack uploader is started either for - // each snapshot to be copied or once for all snapshots - if opts.batch { - // call WithBlobUploader() once and then loop over all selectedSnapshots - err := dstRepo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { - for _, sn := range selectedSnapshots { - printer.P("\n%v", sn) - printer.P(" copy started, this may take a while...") - err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader) - if err != nil { - return err - } - debug.Log("tree copied") - } - - // save all the snapshots - for _, sn := range selectedSnapshots { - err := copySaveSnapshot(ctx, sn, dstRepo, printer) - if err != nil { - return err - } - } - return nil - }) - - return err - } - - // no batch option, loop over selectedSnapshots and call WithBlobUploader() - // inside the loop - for _, sn := range selectedSnapshots { - printer.P("\n%v", sn) - printer.P(" copy started, this may take a while...") - err := dstRepo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { - if err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader); err != nil { - return err - } - debug.Log("tree copied") - return nil - }) - if err != nil { - return err - } - - err = copySaveSnapshot(ctx, sn, dstRepo, printer) - if err != nil { - return err - } - } - - return nil -} From 05364500b6e0b9e1ec20acc34ddfa441c827bab8 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 16:25:09 +0100 Subject: [PATCH 07/16] use correct context --- cmd/restic/cmd_copy.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index e81db3915..db9ac1157 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -85,7 +85,6 @@ func collectAllSnapshots(ctx context.Context, opts CopyOptions, if sn.Original != nil { srcOriginal = *sn.Original } - if originalSns, ok := dstSnapshotByOriginal[srcOriginal]; ok { isCopy := false for _, originalSn := range originalSns { @@ -201,7 +200,7 @@ func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo res // each snapshot to be copied or once for all snapshots if opts.batch { // call WithBlobUploader() once and then loop over all selectedSnapshots - err := dstRepo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { + err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error { for _, sn := range selectedSnapshots { printer.P("\n%v", sn) printer.P(" copy started, this may take a while...") @@ -230,7 +229,7 @@ func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo res for _, sn := range selectedSnapshots { printer.P("\n%v", sn) printer.P(" copy started, this may take a while...") - err := dstRepo.WithBlobUploader(context.TODO(), func(ctx context.Context, uploader restic.BlobSaver) error { + err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error { if err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader); err != nil { return err } From 405813f250019ff7fd8c15c83bf418cfd83af625 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 17:09:07 +0100 Subject: [PATCH 08/16] repository: fix LookupBlobSize to also report pending blobs --- internal/repository/index/master_index.go | 38 ++++++------------- .../repository/index/master_index_test.go | 13 +------ internal/repository/repository.go | 4 +- 3 files changed, 15 insertions(+), 40 deletions(-) diff --git a/internal/repository/index/master_index.go b/internal/repository/index/master_index.go index 62ccc4f71..f410ebf61 100644 --- a/internal/repository/index/master_index.go +++ b/internal/repository/index/master_index.go @@ -16,13 +16,13 @@ import ( // MasterIndex is a collection of indexes and IDs of chunks that are in the process of being saved. type MasterIndex struct { idx []*Index - pendingBlobs restic.BlobSet + pendingBlobs map[restic.BlobHandle]uint idxMutex sync.RWMutex } // NewMasterIndex creates a new master index. func NewMasterIndex() *MasterIndex { - mi := &MasterIndex{pendingBlobs: restic.NewBlobSet()} + mi := &MasterIndex{pendingBlobs: make(map[restic.BlobHandle]uint)} mi.clear() return mi } @@ -46,10 +46,16 @@ func (mi *MasterIndex) Lookup(bh restic.BlobHandle) (pbs []restic.PackedBlob) { } // LookupSize queries all known Indexes for the ID and returns the first match. +// Also returns true if the ID is pending. func (mi *MasterIndex) LookupSize(bh restic.BlobHandle) (uint, bool) { mi.idxMutex.RLock() defer mi.idxMutex.RUnlock() + // also return true if blob is pending + if size, ok := mi.pendingBlobs[bh]; ok { + return size, true + } + for _, idx := range mi.idx { if size, found := idx.LookupSize(bh); found { return size, found @@ -63,13 +69,13 @@ func (mi *MasterIndex) LookupSize(bh restic.BlobHandle) (uint, bool) { // Before doing so it checks if this blob is already known. // Returns true if adding was successful and false if the blob // was already known -func (mi *MasterIndex) AddPending(bh restic.BlobHandle) bool { +func (mi *MasterIndex) AddPending(bh restic.BlobHandle, size uint) bool { mi.idxMutex.Lock() defer mi.idxMutex.Unlock() // Check if blob is pending or in index - if mi.pendingBlobs.Has(bh) { + if _, ok := mi.pendingBlobs[bh]; ok { return false } @@ -80,30 +86,10 @@ func (mi *MasterIndex) AddPending(bh restic.BlobHandle) bool { } // really not known -> insert - mi.pendingBlobs.Insert(bh) + mi.pendingBlobs[bh] = size return true } -// Has queries all known Indexes for the ID and returns the first match. -// Also returns true if the ID is pending. -func (mi *MasterIndex) Has(bh restic.BlobHandle) bool { - mi.idxMutex.RLock() - defer mi.idxMutex.RUnlock() - - // also return true if blob is pending - if mi.pendingBlobs.Has(bh) { - return true - } - - for _, idx := range mi.idx { - if idx.Has(bh) { - return true - } - } - - return false -} - // IDs returns the IDs of all indexes contained in the index. func (mi *MasterIndex) IDs() restic.IDSet { mi.idxMutex.RLock() @@ -165,7 +151,7 @@ func (mi *MasterIndex) storePack(id restic.ID, blobs []restic.Blob) { // delete blobs from pending for _, blob := range blobs { - mi.pendingBlobs.Delete(restic.BlobHandle{Type: blob.Type, ID: blob.ID}) + delete(mi.pendingBlobs, restic.BlobHandle{Type: blob.Type, ID: blob.ID}) } for _, idx := range mi.idx { diff --git a/internal/repository/index/master_index_test.go b/internal/repository/index/master_index_test.go index edf2067b9..98cfe9ac6 100644 --- a/internal/repository/index/master_index_test.go +++ b/internal/repository/index/master_index_test.go @@ -74,9 +74,6 @@ func TestMasterIndex(t *testing.T) { mIdx.Insert(idx2) // test idInIdx1 - found := mIdx.Has(bhInIdx1) - rtest.Equals(t, true, found) - blobs := mIdx.Lookup(bhInIdx1) rtest.Equals(t, []restic.PackedBlob{blob1}, blobs) @@ -85,9 +82,6 @@ func TestMasterIndex(t *testing.T) { rtest.Equals(t, uint(10), size) // test idInIdx2 - found = mIdx.Has(bhInIdx2) - rtest.Equals(t, true, found) - blobs = mIdx.Lookup(bhInIdx2) rtest.Equals(t, []restic.PackedBlob{blob2}, blobs) @@ -96,9 +90,6 @@ func TestMasterIndex(t *testing.T) { rtest.Equals(t, uint(200), size) // test idInIdx12 - found = mIdx.Has(bhInIdx12) - rtest.Equals(t, true, found) - blobs = mIdx.Lookup(bhInIdx12) rtest.Equals(t, 2, len(blobs)) @@ -121,8 +112,6 @@ func TestMasterIndex(t *testing.T) { rtest.Equals(t, uint(80), size) // test not in index - found = mIdx.Has(restic.BlobHandle{ID: restic.NewRandomID(), Type: restic.TreeBlob}) - rtest.Assert(t, !found, "Expected no blobs when fetching with a random id") blobs = mIdx.Lookup(restic.NewRandomBlobHandle()) rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id") _, found = mIdx.LookupSize(restic.NewRandomBlobHandle()) @@ -521,7 +510,7 @@ func TestRewriteOversizedIndex(t *testing.T) { // verify that blobs are still in the index for _, blob := range blobs { - found := mi2.Has(blob.BlobHandle) + _, found := mi2.LookupSize(blob.BlobHandle) rtest.Assert(t, found, "blob %v missing after rewrite", blob.ID) } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index bb9c6c3ba..d0da2e108 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -640,7 +640,7 @@ func (r *Repository) LookupBlob(tpe restic.BlobType, id restic.ID) []restic.Pack return r.idx.Lookup(restic.BlobHandle{Type: tpe, ID: id}) } -// LookupBlobSize returns the size of blob id. +// LookupBlobSize returns the size of blob id. Also returns pending blobs. func (r *Repository) LookupBlobSize(tpe restic.BlobType, id restic.ID) (uint, bool) { return r.idx.LookupSize(restic.BlobHandle{Type: tpe, ID: id}) } @@ -968,7 +968,7 @@ func (r *Repository) saveBlob(ctx context.Context, t restic.BlobType, buf []byte } // first try to add to pending blobs; if not successful, this blob is already known - known = !r.idx.AddPending(restic.BlobHandle{ID: newID, Type: t}) + known = !r.idx.AddPending(restic.BlobHandle{ID: newID, Type: t}, uint(len(buf))) // only save when needed or explicitly told if !known || storeDuplicate { From 63bc1405eae3322b13ed6d29cb636a443c15c753 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 17:12:54 +0100 Subject: [PATCH 09/16] unify snapshot copy codepaths --- cmd/restic/cmd_copy.go | 47 +++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index db9ac1157..711caa804 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -195,13 +195,20 @@ func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo res // remember already processed trees across all snapshots visitedTrees := restic.NewIDSet() - - // dependent on opts.batch the pack uploader is started either for - // each snapshot to be copied or once for all snapshots + batchSize := 1 if opts.batch { + batchSize = len(selectedSnapshots) + } + + for len(selectedSnapshots) > 0 { + var batch []*data.Snapshot // call WithBlobUploader() once and then loop over all selectedSnapshots err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error { - for _, sn := range selectedSnapshots { + for len(selectedSnapshots) > 0 && len(batch) < batchSize { + sn := selectedSnapshots[0] + selectedSnapshots = selectedSnapshots[1:] + batch = append(batch, sn) + printer.P("\n%v", sn) printer.P(" copy started, this may take a while...") err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader) @@ -211,38 +218,18 @@ func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo res debug.Log("tree copied") } - // save all the snapshots - for _, sn := range selectedSnapshots { - err := copySaveSnapshot(ctx, sn, dstRepo, printer) - if err != nil { - return err - } - } - return nil - }) - - return err - } - - // no batch option, loop over selectedSnapshots and call WithBlobUploader() - // inside the loop - for _, sn := range selectedSnapshots { - printer.P("\n%v", sn) - printer.P(" copy started, this may take a while...") - err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error { - if err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader); err != nil { - return err - } - debug.Log("tree copied") return nil }) if err != nil { return err } - err = copySaveSnapshot(ctx, sn, dstRepo, printer) - if err != nil { - return err + // save all the snapshots + for _, sn := range batch { + err := copySaveSnapshot(ctx, sn, dstRepo, printer) + if err != nil { + return err + } } } From f95dc73d380d13c9e4804d9bfa465a945a8ca8b3 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 17:13:10 +0100 Subject: [PATCH 10/16] deduplicate blob enqueuing --- cmd/restic/cmd_copy.go | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index 711caa804..82de6040e 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -251,10 +251,12 @@ func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Rep packList := restic.NewIDSet() enqueue := func(h restic.BlobHandle) { - pb := srcRepo.LookupBlob(h.Type, h.ID) - copyBlobs.Insert(h) - for _, p := range pb { - packList.Insert(p.PackID) + if _, ok := dstRepo.LookupBlobSize(h.Type, h.ID); !ok { + pb := srcRepo.LookupBlob(h.Type, h.ID) + copyBlobs.Insert(h) + for _, p := range pb { + packList.Insert(p.PackID) + } } } @@ -264,21 +266,14 @@ func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Rep return fmt.Errorf("LoadTree(%v) returned error %v", tree.ID.Str(), tree.Error) } - // Do we already have this tree blob? - treeHandle := restic.BlobHandle{ID: tree.ID, Type: restic.TreeBlob} - if _, ok := dstRepo.LookupBlobSize(treeHandle.Type, treeHandle.ID); !ok { - // copy raw tree bytes to avoid problems if the serialization changes - enqueue(treeHandle) - } + // copy raw tree bytes to avoid problems if the serialization changes + enqueue(restic.BlobHandle{ID: tree.ID, Type: restic.TreeBlob}) for _, entry := range tree.Nodes { // Recursion into directories is handled by StreamTrees // Copy the blobs for this file. for _, blobID := range entry.Content { - h := restic.BlobHandle{Type: restic.DataBlob, ID: blobID} - if _, ok := dstRepo.LookupBlobSize(h.Type, h.ID); !ok { - enqueue(h) - } + enqueue(restic.BlobHandle{Type: restic.DataBlob, ID: blobID}) } } } From cf409b7c66319ac37306164874e34738f88fc984 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 17:40:37 +0100 Subject: [PATCH 11/16] automatically batch snapshots in copy --- cmd/restic/cmd_copy.go | 36 ++++++++++++++----------------- internal/repository/prune.go | 6 +++--- internal/repository/repository.go | 8 +++---- internal/restic/repository.go | 1 + 4 files changed, 24 insertions(+), 27 deletions(-) diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index 82de6040e..efd637668 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -63,11 +63,9 @@ Exit status is 12 if the password is incorrect. type CopyOptions struct { global.SecondaryRepoOptions data.SnapshotFilter - batch bool } func (opts *CopyOptions) AddFlags(f *pflag.FlagSet) { - f.BoolVar(&opts.batch, "batch", false, "batch all snapshots to be copied into one step to optimize use of packfiles") opts.SecondaryRepoOptions.AddFlags(f, "destination", "to copy snapshots from") initMultiSnapshotFilter(f, &opts.SnapshotFilter, true) } @@ -161,7 +159,7 @@ func runCopy(ctx context.Context, opts CopyOptions, gopts global.Options, args [ selectedSnapshots := collectAllSnapshots(ctx, opts, srcSnapshotLister, srcRepo, dstSnapshotByOriginal, args, printer) - if err := copyTreeBatched(ctx, srcRepo, dstRepo, selectedSnapshots, opts, printer); err != nil { + if err := copyTreeBatched(ctx, srcRepo, dstRepo, selectedSnapshots, printer); err != nil { return err } @@ -187,35 +185,33 @@ func similarSnapshots(sna *data.Snapshot, snb *data.Snapshot) bool { return true } -// copyTreeBatched: copy multiple snapshot trees in one go, using calls to -// repository.RepackInner() for all selected snapshot trees and thereby packing the packfiles optimally. -// Usually each snapshot creates at least one tree packfile and one data packfile. +// copyTreeBatched copies multiple snapshots in one go. Snapshots are written after +// data equivalent to at least 10 packfiles was written. func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Repository, - selectedSnapshots []*data.Snapshot, opts CopyOptions, printer progress.Printer) error { + selectedSnapshots []*data.Snapshot, printer progress.Printer) error { // remember already processed trees across all snapshots visitedTrees := restic.NewIDSet() - batchSize := 1 - if opts.batch { - batchSize = len(selectedSnapshots) - } for len(selectedSnapshots) > 0 { var batch []*data.Snapshot + batchSize := uint64(0) + targetSize := uint64(dstRepo.PackSize()) * 10 // call WithBlobUploader() once and then loop over all selectedSnapshots err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error { - for len(selectedSnapshots) > 0 && len(batch) < batchSize { + for len(selectedSnapshots) > 0 && batchSize < targetSize { sn := selectedSnapshots[0] selectedSnapshots = selectedSnapshots[1:] batch = append(batch, sn) printer.P("\n%v", sn) printer.P(" copy started, this may take a while...") - err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader) + sizeBlobs, err := copyTree(ctx, srcRepo, dstRepo, visitedTrees, *sn.Tree, printer, uploader) if err != nil { return err } debug.Log("tree copied") + batchSize += sizeBlobs } return nil @@ -237,7 +233,7 @@ func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo res } func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Repository, - visitedTrees restic.IDSet, rootTreeID restic.ID, printer progress.Printer, uploader restic.BlobSaver) error { + visitedTrees restic.IDSet, rootTreeID restic.ID, printer progress.Printer, uploader restic.BlobSaver) (uint64, error) { wg, wgCtx := errgroup.WithContext(ctx) @@ -281,21 +277,20 @@ func copyTree(ctx context.Context, srcRepo restic.Repository, dstRepo restic.Rep }) err := wg.Wait() if err != nil { - return err + return 0, err } - copyStats(srcRepo, copyBlobs, packList, printer) + sizeBlobs := copyStats(srcRepo, copyBlobs, packList, printer) bar := printer.NewCounter("packs copied") err = repository.CopyBlobs(ctx, srcRepo, dstRepo, uploader, packList, copyBlobs, bar, printer.P) if err != nil { - return errors.Fatalf("%s", err) + return 0, errors.Fatalf("%s", err) } - return nil + return sizeBlobs, nil } // copyStats: print statistics for the blobs to be copied -func copyStats(srcRepo restic.Repository, copyBlobs restic.BlobSet, packList restic.IDSet, printer progress.Printer) { - +func copyStats(srcRepo restic.Repository, copyBlobs restic.BlobSet, packList restic.IDSet, printer progress.Printer) uint64 { // count and size countBlobs := 0 sizeBlobs := uint64(0) @@ -309,6 +304,7 @@ func copyStats(srcRepo restic.Repository, copyBlobs restic.BlobSet, packList res printer.V(" copy %d blobs with disk size %s in %d packfiles\n", countBlobs, ui.FormatBytes(uint64(sizeBlobs)), len(packList)) + return sizeBlobs } func copySaveSnapshot(ctx context.Context, sn *data.Snapshot, dstRepo restic.Repository, printer progress.Printer) error { diff --git a/internal/repository/prune.go b/internal/repository/prune.go index cc36c7a96..772765129 100644 --- a/internal/repository/prune.go +++ b/internal/repository/prune.go @@ -105,7 +105,7 @@ func PlanPrune(ctx context.Context, opts PruneOptions, repo *Repository, getUsed if repo.Config().Version < 2 && opts.RepackUncompressed { return nil, fmt.Errorf("compression requires at least repository format version 2") } - if opts.SmallPackBytes > uint64(repo.packSize()) { + if opts.SmallPackBytes > uint64(repo.PackSize()) { return nil, fmt.Errorf("repack-smaller-than exceeds repository packsize") } @@ -329,12 +329,12 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo *Repository, var repackSmallCandidates []packInfoWithID repoVersion := repo.Config().Version // only repack very small files by default - targetPackSize := repo.packSize() / 25 + targetPackSize := repo.PackSize() / 25 if opts.SmallPackBytes > 0 { targetPackSize = uint(opts.SmallPackBytes) } else if opts.RepackSmall { // consider files with at least 80% of the target size as large enough - targetPackSize = repo.packSize() / 5 * 4 + targetPackSize = repo.PackSize() / 5 * 4 } // loop over all packs and decide what to do diff --git a/internal/repository/repository.go b/internal/repository/repository.go index d0da2e108..3b26b1f90 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -154,8 +154,8 @@ func (r *Repository) Config() restic.Config { return r.cfg } -// packSize return the target size of a pack file when uploading -func (r *Repository) packSize() uint { +// PackSize return the target size of a pack file when uploading +func (r *Repository) PackSize() uint { return r.opts.PackSize } @@ -590,8 +590,8 @@ func (r *Repository) startPackUploader(ctx context.Context, wg *errgroup.Group) innerWg, ctx := errgroup.WithContext(ctx) r.packerWg = innerWg r.uploader = newPackerUploader(ctx, innerWg, r, r.Connections()) - r.treePM = newPackerManager(r.key, restic.TreeBlob, r.packSize(), r.packerCount, r.uploader.QueuePacker) - r.dataPM = newPackerManager(r.key, restic.DataBlob, r.packSize(), r.packerCount, r.uploader.QueuePacker) + r.treePM = newPackerManager(r.key, restic.TreeBlob, r.PackSize(), r.packerCount, r.uploader.QueuePacker) + r.dataPM = newPackerManager(r.key, restic.DataBlob, r.PackSize(), r.packerCount, r.uploader.QueuePacker) wg.Go(func() error { return innerWg.Wait() diff --git a/internal/restic/repository.go b/internal/restic/repository.go index cf3ec7834..2f1373641 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -18,6 +18,7 @@ type Repository interface { // Connections returns the maximum number of concurrent backend operations Connections() uint Config() Config + PackSize() uint Key() *crypto.Key LoadIndex(ctx context.Context, p TerminalCounterFactory) error From 7d08c9282a42d26c55aa63597ddae579e7838cfc Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 17:47:46 +0100 Subject: [PATCH 12/16] align docs --- changelog/unreleased/issue-5453 | 12 +++++------- doc/045_working_with_repos.rst | 8 -------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/changelog/unreleased/issue-5453 b/changelog/unreleased/issue-5453 index 686474597..12c09e0b0 100644 --- a/changelog/unreleased/issue-5453 +++ b/changelog/unreleased/issue-5453 @@ -1,12 +1,10 @@ -Enhancement: `restic copy` can now spool packfiles across muliple snapshots +Enhancement: `copy` copies snapshots in batches -When using `restic copy` used to save all newly created packfiles when finishing one snapshot, -even when the actual packfile size was quite small. This applied particularly to -incremental backups, when there was only small changes between individual backups. +The `copy` command used to copy snapshots individually, even if this resulted in creating pack files +smaller than the target pack size. In particular, this resulted in many small files +when copying small incremental snapshots. -When using the new option `--batch`, `restic copy` now creates one large request list -which contains all blobs from all snapshots to be copied and then executes the -copy operation. +Now, `copy` copies multiple snapshots at once to avoid creating small files. https://github.com/restic/restic/issues/5175 https://github.com/restic/restic/pull/5464 diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst index 10f898444..5fadac637 100644 --- a/doc/045_working_with_repos.rst +++ b/doc/045_working_with_repos.rst @@ -216,14 +216,6 @@ example from a local to a remote repository, you can use the ``copy`` command: snapshot 4e5d5487 of [/home/user/work] at 2020-05-01 22:44:07.012113 +0200 CEST by user@kasimir skipping snapshot 4e5d5487, was already copied to snapshot 50eb62b7 -In case you want to copy a repository which contains many backups with little changes -between ``restic backup`` runs, you can use the option ``--batch`` to make full use of -the ``--pack-size`` option. Newly created packfiles are saved when the ``copy`` -operation for one snapshot finishes. The option ``--batch`` disregards these snapshot boundaries -and creates optimally filled packfiles. You can always always achieve the same effect -by running ``restic prune`` after a ``restic copy`` operation, but this involves the extra -``prune`` step. - The example command copies all snapshots from the source repository ``/srv/restic-repo`` to the destination repository ``/srv/restic-repo-copy``. Snapshots which have previously been copied between repositories will From 39db78446f786b8f547f7435b8496a456f0f624c Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 19:05:55 +0100 Subject: [PATCH 13/16] Simplify test --- cmd/restic/cmd_copy_integration_test.go | 199 +++++------------------- 1 file changed, 39 insertions(+), 160 deletions(-) diff --git a/cmd/restic/cmd_copy_integration_test.go b/cmd/restic/cmd_copy_integration_test.go index 41b8355da..b2e430cc5 100644 --- a/cmd/restic/cmd_copy_integration_test.go +++ b/cmd/restic/cmd_copy_integration_test.go @@ -6,7 +6,6 @@ import ( "path/filepath" "testing" - "github.com/restic/restic/internal/data" "github.com/restic/restic/internal/global" "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" @@ -31,25 +30,6 @@ func testRunCopy(t testing.TB, srcGopts global.Options, dstGopts global.Options) })) } -func testRunCopyBatched(t testing.TB, srcGopts global.Options, dstGopts global.Options) { - gopts := srcGopts - gopts.Repo = dstGopts.Repo - gopts.Password = dstGopts.Password - gopts.InsecureNoPassword = dstGopts.InsecureNoPassword - copyOpts := CopyOptions{ - SecondaryRepoOptions: global.SecondaryRepoOptions{ - Repo: srcGopts.Repo, - Password: srcGopts.Password, - InsecureNoPassword: srcGopts.InsecureNoPassword, - }, - batch: true, - } - - rtest.OK(t, withTermStatus(t, gopts, func(ctx context.Context, gopts global.Options) error { - return runCopy(context.TODO(), copyOpts, gopts, nil, gopts.Term) - })) -} - func TestCopy(t *testing.T) { env, cleanup := withTestEnvironment(t) defer cleanup() @@ -107,176 +87,75 @@ func TestCopy(t *testing.T) { rtest.Assert(t, len(origRestores) == 0, "found not copied snapshots") } -// packfile with size and type -type packInfo struct { - Type string - size int64 - numberBlobs int -} - -// testGetUsedBlobs: call data.FindUsedBlobs for all snapshots in repositpry -func testGetUsedBlobs(t *testing.T, repo restic.Repository) (usedBlobs restic.BlobSet) { - selectedTrees := make([]restic.ID, 0, 3) - usedBlobs = restic.NewBlobSet() - - snapshotLister, err := restic.MemorizeList(context.TODO(), repo, restic.SnapshotFile) - rtest.OK(t, err) - rtest.OK(t, repo.LoadIndex(context.TODO(), nil)) - - // gather all snapshots - nullFilter := &data.SnapshotFilter{} - err = nullFilter.FindAll(context.TODO(), snapshotLister, repo, nil, func(_ string, sn *data.Snapshot, err error) error { +func testPackAndBlobCounts(t testing.TB, gopts global.Options) (countTreePacks int, countDataPacks int, countBlobs int) { + rtest.OK(t, withTermStatus(t, gopts, func(ctx context.Context, gopts global.Options) error { + printer := ui.NewProgressPrinter(gopts.JSON, gopts.Verbosity, gopts.Term) + _, repo, unlock, err := openWithReadLock(ctx, gopts, false, printer) rtest.OK(t, err) - selectedTrees = append(selectedTrees, *sn.Tree) - return nil - }) - rtest.OK(t, err) + defer unlock() - rtest.OK(t, data.FindUsedBlobs(context.TODO(), repo, selectedTrees, usedBlobs, nil)) - - return usedBlobs -} - -// getPackfileInfo: get packfiles, their length, type and number of blobs in packfile -func getPackfileInfo(t *testing.T, repo restic.Repository) (packfiles map[restic.ID]packInfo) { - packfiles = make(map[restic.ID]packInfo) - - rtest.OK(t, repo.List(context.TODO(), restic.PackFile, func(id restic.ID, size int64) error { - blobs, _, err := repo.ListPack(context.TODO(), id, size) - rtest.OK(t, err) - rtest.Assert(t, len(blobs) > 0, "a packfile should contain at least one blob") - - Type := "" - if len(blobs) > 0 { - Type = blobs[0].Type.String() - } - - packfiles[id] = packInfo{ - Type: Type, - size: size, - numberBlobs: len(blobs), - } + rtest.OK(t, repo.List(context.TODO(), restic.PackFile, func(id restic.ID, size int64) error { + blobs, _, err := repo.ListPack(context.TODO(), id, size) + rtest.OK(t, err) + rtest.Assert(t, len(blobs) > 0, "a packfile should contain at least one blob") + switch blobs[0].Type { + case restic.TreeBlob: + countTreePacks++ + case restic.DataBlob: + countDataPacks++ + } + countBlobs += len(blobs) + return nil + })) return nil })) - return packfiles -} - -// get various counts from the packfiles in the repository -func getCounts(t *testing.T, repo restic.Repository) (int, int, int) { - countTreePacks := 0 - countDataPacks := 0 - countBlobs := 0 - for _, item := range getPackfileInfo(t, repo) { - switch item.Type { - case "tree": - countTreePacks++ - case "data": - countDataPacks++ - } - countBlobs += item.numberBlobs - } - return countTreePacks, countDataPacks, countBlobs } func TestCopyBatched(t *testing.T) { env, cleanup := withTestEnvironment(t) defer cleanup() - env3, cleanup3 := withTestEnvironment(t) - defer cleanup3() + envDst, cleanupDst := withTestEnvironment(t) + defer cleanupDst() testSetupBackupData(t, env) opts := BackupOptions{} testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9")}, opts, env.gopts) testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "2")}, opts, env.gopts) testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "3")}, opts, env.gopts) - testRunCheck(t, env.gopts) // batch copy - testRunInit(t, env3.gopts) - testRunCopyBatched(t, env.gopts, env3.gopts) + testRunInit(t, envDst.gopts) + testRunCopy(t, env.gopts, envDst.gopts) // check integrity of the copy - testRunCheck(t, env3.gopts) - - snapshotIDs := testListSnapshots(t, env.gopts, 3) - copiedSnapshotIDs := testListSnapshots(t, env3.gopts, 3) + testRunCheck(t, envDst.gopts) // check that the copied snapshots have the same tree contents as the old ones (= identical tree hash) - origRestores := make(map[string]struct{}) - for i, snapshotID := range snapshotIDs { - restoredir := filepath.Join(env.base, fmt.Sprintf("restore%d", i)) - origRestores[restoredir] = struct{}{} - testRunRestore(t, env.gopts, restoredir, snapshotID.String()) + snapshotIDs := testListSnapshots(t, env.gopts, 3) + snapshotTrees := make(map[restic.ID]struct{}) + for _, snapshotID := range snapshotIDs { + snapshot := testLoadSnapshot(t, env.gopts, snapshotID) + snapshotTrees[*snapshot.Tree] = struct{}{} } - for i, snapshotID := range copiedSnapshotIDs { - restoredir := filepath.Join(env3.base, fmt.Sprintf("restore%d", i)) - testRunRestore(t, env3.gopts, restoredir, snapshotID.String()) - foundMatch := false - for cmpdir := range origRestores { - diff := directoriesContentsDiff(t, restoredir, cmpdir) - if diff == "" { - delete(origRestores, cmpdir) - foundMatch = true - } - } - - rtest.Assert(t, foundMatch, "found no counterpart for snapshot %v", snapshotID) + copiedSnapshotIDs := testListSnapshots(t, envDst.gopts, 3) + copiedSnapshotTrees := make(map[restic.ID]struct{}) + for _, snapshotID := range copiedSnapshotIDs { + snapshot := testLoadSnapshot(t, envDst.gopts, snapshotID) + copiedSnapshotTrees[*snapshot.Tree] = struct{}{} } - rtest.Assert(t, len(origRestores) == 0, "found not copied snapshots") + rtest.Equals(t, snapshotTrees, copiedSnapshotTrees, "snapshot trees must be identical after copy") - // get access to the repositories - var repo1 restic.Repository - var unlock1 func() - var err error - rtest.OK(t, withTermStatus(t, env.gopts, func(ctx context.Context, gopts global.Options) error { - printer := ui.NewProgressPrinter(gopts.JSON, gopts.Verbosity, gopts.Term) - _, repo1, unlock1, err = openWithReadLock(ctx, gopts, false, printer) - rtest.OK(t, err) - defer unlock1() - return err - })) + _, _, countBlobs := testPackAndBlobCounts(t, env.gopts) + countTreePacksDst, countDataPacksDst, countBlobsDst := testPackAndBlobCounts(t, envDst.gopts) - var repo3 restic.Repository - var unlock3 func() - rtest.OK(t, withTermStatus(t, env3.gopts, func(ctx context.Context, gopts global.Options) error { - printer := ui.NewProgressPrinter(gopts.JSON, gopts.Verbosity, gopts.Term) - _, repo3, unlock3, err = openWithReadLock(ctx, gopts, false, printer) - rtest.OK(t, err) - defer unlock3() - return err - })) - - usedBlobs1 := testGetUsedBlobs(t, repo1) - usedBlobs3 := testGetUsedBlobs(t, repo3) - rtest.Assert(t, len(usedBlobs1) == len(usedBlobs3), - "used blob length must be identical in both repositories, but is not: (normal) %d <=> (batched) %d", - len(usedBlobs1), len(usedBlobs3)) - - // compare usedBlobs1 <=> usedBlobs3 - good := true - for bh := range usedBlobs1 { - if !usedBlobs3.Has(bh) { - good = false - break - } - } - rtest.Assert(t, good, "all blobs in both repositories should be equal but they are not") - - _, _, countBlobs1 := getCounts(t, repo1) - countTreePacks3, countDataPacks3, countBlobs3 := getCounts(t, repo3) - - rtest.Assert(t, countBlobs1 == countBlobs3, - "expected 1 blob count in boths repos to be equal, but got %d and %d blobs", - countBlobs1, countBlobs3) - - rtest.Assert(t, countTreePacks3 == 1 && countDataPacks3 == 1, - "expected 1 data packfile and 1 tree packfile, but got %d trees and %d data packfiles", - countTreePacks3, countDataPacks3) + rtest.Equals(t, countBlobs, countBlobsDst, "expected blob count in boths repos to be equal") + rtest.Equals(t, countTreePacksDst, 1, "expected 1 tree packfile") + rtest.Equals(t, countDataPacksDst, 1, "expected 1 data packfile") } func TestCopyIncremental(t *testing.T) { From 857b42fca495e99c05858430e539caea7ec5f6b1 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 19:08:49 +0100 Subject: [PATCH 14/16] merge into existing copy test --- cmd/restic/cmd_copy_integration_test.go | 52 ++++--------------------- 1 file changed, 8 insertions(+), 44 deletions(-) diff --git a/cmd/restic/cmd_copy_integration_test.go b/cmd/restic/cmd_copy_integration_test.go index b2e430cc5..6105acfe4 100644 --- a/cmd/restic/cmd_copy_integration_test.go +++ b/cmd/restic/cmd_copy_integration_test.go @@ -85,6 +85,14 @@ func TestCopy(t *testing.T) { } rtest.Assert(t, len(origRestores) == 0, "found not copied snapshots") + + // check that snapshots were properly batched while copying + _, _, countBlobs := testPackAndBlobCounts(t, env.gopts) + countTreePacksDst, countDataPacksDst, countBlobsDst := testPackAndBlobCounts(t, env2.gopts) + + rtest.Equals(t, countBlobs, countBlobsDst, "expected blob count in boths repos to be equal") + rtest.Equals(t, countTreePacksDst, 1, "expected 1 tree packfile") + rtest.Equals(t, countDataPacksDst, 1, "expected 1 data packfile") } func testPackAndBlobCounts(t testing.TB, gopts global.Options) (countTreePacks int, countDataPacks int, countBlobs int) { @@ -114,50 +122,6 @@ func testPackAndBlobCounts(t testing.TB, gopts global.Options) (countTreePacks i return countTreePacks, countDataPacks, countBlobs } -func TestCopyBatched(t *testing.T) { - env, cleanup := withTestEnvironment(t) - defer cleanup() - envDst, cleanupDst := withTestEnvironment(t) - defer cleanupDst() - - testSetupBackupData(t, env) - opts := BackupOptions{} - testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9")}, opts, env.gopts) - testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "2")}, opts, env.gopts) - testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "3")}, opts, env.gopts) - - // batch copy - testRunInit(t, envDst.gopts) - testRunCopy(t, env.gopts, envDst.gopts) - - // check integrity of the copy - testRunCheck(t, envDst.gopts) - - // check that the copied snapshots have the same tree contents as the old ones (= identical tree hash) - snapshotIDs := testListSnapshots(t, env.gopts, 3) - snapshotTrees := make(map[restic.ID]struct{}) - for _, snapshotID := range snapshotIDs { - snapshot := testLoadSnapshot(t, env.gopts, snapshotID) - snapshotTrees[*snapshot.Tree] = struct{}{} - } - - copiedSnapshotIDs := testListSnapshots(t, envDst.gopts, 3) - copiedSnapshotTrees := make(map[restic.ID]struct{}) - for _, snapshotID := range copiedSnapshotIDs { - snapshot := testLoadSnapshot(t, envDst.gopts, snapshotID) - copiedSnapshotTrees[*snapshot.Tree] = struct{}{} - } - - rtest.Equals(t, snapshotTrees, copiedSnapshotTrees, "snapshot trees must be identical after copy") - - _, _, countBlobs := testPackAndBlobCounts(t, env.gopts) - countTreePacksDst, countDataPacksDst, countBlobsDst := testPackAndBlobCounts(t, envDst.gopts) - - rtest.Equals(t, countBlobs, countBlobsDst, "expected blob count in boths repos to be equal") - rtest.Equals(t, countTreePacksDst, 1, "expected 1 tree packfile") - rtest.Equals(t, countDataPacksDst, 1, "expected 1 data packfile") -} - func TestCopyIncremental(t *testing.T) { env, cleanup := withTestEnvironment(t) defer cleanup() From e79b01d82f1a048fee9e769c2382877f2400b15d Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 21:46:03 +0100 Subject: [PATCH 15/16] more aggressive batching --- cmd/restic/cmd_copy.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index efd637668..948782f82 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -3,6 +3,7 @@ package main import ( "context" "fmt" + "time" "github.com/restic/restic/internal/data" "github.com/restic/restic/internal/debug" @@ -193,13 +194,17 @@ func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo res // remember already processed trees across all snapshots visitedTrees := restic.NewIDSet() + targetSize := uint64(dstRepo.PackSize()) * 100 + minDuration := 1 * time.Minute + for len(selectedSnapshots) > 0 { var batch []*data.Snapshot batchSize := uint64(0) - targetSize := uint64(dstRepo.PackSize()) * 10 + startTime := time.Now() + // call WithBlobUploader() once and then loop over all selectedSnapshots err := dstRepo.WithBlobUploader(ctx, func(ctx context.Context, uploader restic.BlobSaver) error { - for len(selectedSnapshots) > 0 && batchSize < targetSize { + for len(selectedSnapshots) > 0 && (batchSize < targetSize || time.Since(startTime) < minDuration) { sn := selectedSnapshots[0] selectedSnapshots = selectedSnapshots[1:] batch = append(batch, sn) From f9e5660e75de02e1a0b1fddec58470864d5a2ef3 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 23 Nov 2025 22:01:53 +0100 Subject: [PATCH 16/16] output which source and target snapshot belong together --- cmd/restic/cmd_copy.go | 6 +++++- doc/045_working_with_repos.rst | 13 ++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index 948782f82..bf86de8a7 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -225,6 +225,10 @@ func copyTreeBatched(ctx context.Context, srcRepo restic.Repository, dstRepo res return err } + // add a newline to separate saved snapshot messages from the other messages + if len(batch) > 1 { + printer.P("") + } // save all the snapshots for _, sn := range batch { err := copySaveSnapshot(ctx, sn, dstRepo, printer) @@ -322,6 +326,6 @@ func copySaveSnapshot(ctx context.Context, sn *data.Snapshot, dstRepo restic.Rep if err != nil { return err } - printer.P("snapshot %s saved", newID.Str()) + printer.P("snapshot %s saved, copied from source snapshot %s", newID.Str(), sn.ID().Str()) return nil } diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst index 5fadac637..797ea9f9d 100644 --- a/doc/045_working_with_repos.rst +++ b/doc/045_working_with_repos.rst @@ -205,21 +205,28 @@ example from a local to a remote repository, you can use the ``copy`` command: .. code-block:: console - $ restic -r /srv/restic-repo-copy copy --from-repo /srv/restic-repo + $ restic -r /srv/restic-repo-copy copy --from-repo /srv/restic-repo --verbose repository d6504c63 opened successfully repository 3dd0878c opened successfully + [0:00] 100.00% 2 / 2 index files loaded + [0:00] 100.00% 7 / 7 index files loaded snapshot 410b18a2 of [/home/user/work] at 2020-06-09 23:15:57.305305 +0200 CEST by user@kasimir copy started, this may take a while... - snapshot 7a746a07 saved + [0:00] 100.00% 13 / 13 packs copied snapshot 4e5d5487 of [/home/user/work] at 2020-05-01 22:44:07.012113 +0200 CEST by user@kasimir skipping snapshot 4e5d5487, was already copied to snapshot 50eb62b7 + snapshot 7a746a07 saved, copied from source snapshot 410b18a2 + The example command copies all snapshots from the source repository ``/srv/restic-repo`` to the destination repository ``/srv/restic-repo-copy``. Snapshots which have previously been copied between repositories will -be skipped by later copy runs. +be skipped by later copy runs. Information about skipped snapshots is only +printed when ``--verbose`` is passed to the command. For efficiency reasons, +the snapshots are copied in batches, such that the ``snapshot [...] saved`` +messages can appear some time after the snapshot content was copied. .. important:: This process will have to both download (read) and upload (write) the entire snapshot(s) due to the different encryption keys used in the