From dd2d562b7bd7a6bf45b62cefbb406727c6944eee Mon Sep 17 00:00:00 2001 From: Srigovind Nayak <5201843+konidev20@users.noreply.github.com> Date: Wed, 1 Oct 2025 22:39:04 +0530 Subject: [PATCH 1/6] azure: enhanced upload with single PutBlob API and configurable upload methods --- internal/backend/azure/azure.go | 65 +++++++++++++++++++++++++++++--- internal/backend/azure/config.go | 21 +++++++++-- 2 files changed, 77 insertions(+), 9 deletions(-) diff --git a/internal/backend/azure/azure.go b/internal/backend/azure/azure.go index 46df66113..087c8840f 100644 --- a/internal/backend/azure/azure.go +++ b/internal/backend/azure/azure.go @@ -42,6 +42,7 @@ type Backend struct { } const saveLargeSize = 256 * 1024 * 1024 +const singleBlobMaxSize = 5000 * 1024 * 1024 // 5000 MiB - max size for Put Blob API in service version 2019-12-12+ const defaultListMaxItems = 5000 // make sure that *Backend implements backend.Backend @@ -53,6 +54,12 @@ func NewFactory() location.Factory { func open(cfg Config, rt http.RoundTripper) (*Backend, error) { debug.Log("open, config %#v", cfg) + + // Validate configuration + if err := cfg.Validate(); err != nil { + return nil, err + } + var client *azContainer.Client var err error @@ -255,12 +262,35 @@ func (be *Backend) Save(ctx context.Context, h backend.Handle, rd backend.Rewind } var err error - if rd.Length() < saveLargeSize { - // if it's smaller than 256miB, then just create the file directly from the reader - err = be.saveSmall(ctx, objName, rd, accessTier) - } else { - // otherwise use the more complicated method - err = be.saveLarge(ctx, objName, rd, accessTier) + uploadMethod := strings.ToLower(be.cfg.UploadMethod) + fileSize := rd.Length() + + switch uploadMethod { + case "single": + // Always use single blob upload + if fileSize > singleBlobMaxSize { + return errors.Errorf("file size %d exceeds single blob limit of %d MiB", fileSize, singleBlobMaxSize/1024/1024) + } + err = be.saveSingleBlob(ctx, objName, rd, accessTier) + + case "blocks": + // Legacy block-based upload method + if fileSize < saveLargeSize { + err = be.saveSmall(ctx, objName, rd, accessTier) + } else { + err = be.saveLarge(ctx, objName, rd, accessTier) + } + + case "auto", "": + // Automatic selection: use single blob for files <= 5000 MiB, blocks for larger files + if fileSize <= singleBlobMaxSize { + err = be.saveSingleBlob(ctx, objName, rd, accessTier) + } else { + err = be.saveLarge(ctx, objName, rd, accessTier) + } + + default: + return errors.Errorf("invalid upload method %q, must be 'auto', 'single', or 'blocks'", uploadMethod) } return err @@ -348,6 +378,29 @@ func (be *Backend) saveLarge(ctx context.Context, objName string, rd backend.Rew return errors.Wrap(err, "CommitBlockList") } +// saveSingleBlob uploads data using a single Put Blob operation. +// This method is more efficient for files under 5000 MiB as it requires only one API call +// instead of the two calls (StageBlock + CommitBlockList) required by the block-based approach. +func (be *Backend) saveSingleBlob(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error { + blockBlobClient := be.container.NewBlockBlobClient(objName) + + buf := make([]byte, rd.Length()) + _, err := io.ReadFull(rd, buf) + if err != nil { + return errors.Wrap(err, "ReadFull") + } + + reader := bytes.NewReader(buf) + opts := &blockblob.UploadOptions{ + Tier: &accessTier, + TransactionalValidation: blob.TransferValidationTypeMD5(rd.Hash()), + } + + debug.Log("Upload single blob %v with %d bytes", objName, len(buf)) + _, err = blockBlobClient.Upload(ctx, streaming.NopCloser(reader), opts) + return errors.Wrap(err, "Upload") +} + // Load runs fn with a reader that yields the contents of the file at h at the // given offset. func (be *Backend) Load(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error { diff --git a/internal/backend/azure/config.go b/internal/backend/azure/config.go index ee7ac51d8..51c59bf9d 100644 --- a/internal/backend/azure/config.go +++ b/internal/backend/azure/config.go @@ -22,14 +22,16 @@ type Config struct { Container string Prefix string - Connections uint `option:"connections" help:"set a limit for the number of concurrent connections (default: 5)"` - AccessTier string `option:"access-tier" help:"set the access tier for the blob storage (default: inferred from the storage account defaults)"` + Connections uint `option:"connections" help:"set a limit for the number of concurrent connections (default: 5)"` + AccessTier string `option:"access-tier" help:"set the access tier for the blob storage (default: inferred from the storage account defaults)"` + UploadMethod string `option:"upload-method" help:"blob upload method: 'auto' (single blob for <=5000 MiB), 'single' (always single blob), or 'blocks' (legacy block-based) (default: auto)"` } // NewConfig returns a new Config with the default values filled in. func NewConfig() Config { return Config{ - Connections: 5, + Connections: 5, + UploadMethod: "auto", } } @@ -85,3 +87,16 @@ func (cfg *Config) ApplyEnvironment(prefix string) { cfg.EndpointSuffix = os.Getenv(prefix + "AZURE_ENDPOINT_SUFFIX") } } + +// Validate checks the configuration for errors. +func (cfg *Config) Validate() error { + // Normalize upload method to lowercase + uploadMethod := strings.ToLower(cfg.UploadMethod) + if uploadMethod != "auto" && uploadMethod != "single" && uploadMethod != "blocks" && uploadMethod != "" { + return errors.Errorf("invalid upload method %q, must be 'auto', 'single', or 'blocks'", cfg.UploadMethod) + } + if uploadMethod != "" { + cfg.UploadMethod = uploadMethod + } + return nil +} From e80e8321309879883d45c30e72c8ad0533f1df00 Mon Sep 17 00:00:00 2001 From: Srigovind Nayak <5201843+konidev20@users.noreply.github.com> Date: Sat, 4 Oct 2025 00:14:03 +0530 Subject: [PATCH 2/6] azure: remove saveSmall, use only PutBlob API --- internal/backend/azure/azure.go | 87 +++++++------------------------- internal/backend/azure/config.go | 21 ++------ 2 files changed, 20 insertions(+), 88 deletions(-) diff --git a/internal/backend/azure/azure.go b/internal/backend/azure/azure.go index 087c8840f..a833012c3 100644 --- a/internal/backend/azure/azure.go +++ b/internal/backend/azure/azure.go @@ -41,8 +41,8 @@ type Backend struct { accessTier blob.AccessTier } -const saveLargeSize = 256 * 1024 * 1024 -const singleBlobMaxSize = 5000 * 1024 * 1024 // 5000 MiB - max size for Put Blob API in service version 2019-12-12+ +const singleBlobMaxSize = 5000 * 1024 * 1024 // 5000 MiB - max size for Put Blob API in service version 2019-12-12+ +const singleBlockMaxSize = 4000 * 1024 * 1024 // 4000 MiB - max size for StageBlock API in service version 2019-12-12+ const defaultListMaxItems = 5000 // make sure that *Backend implements backend.Backend @@ -55,11 +55,6 @@ func NewFactory() location.Factory { func open(cfg Config, rt http.RoundTripper) (*Backend, error) { debug.Log("open, config %#v", cfg) - // Validate configuration - if err := cfg.Validate(); err != nil { - return nil, err - } - var client *azContainer.Client var err error @@ -262,46 +257,25 @@ func (be *Backend) Save(ctx context.Context, h backend.Handle, rd backend.Rewind } var err error - uploadMethod := strings.ToLower(be.cfg.UploadMethod) fileSize := rd.Length() - switch uploadMethod { - case "single": - // Always use single blob upload - if fileSize > singleBlobMaxSize { - return errors.Errorf("file size %d exceeds single blob limit of %d MiB", fileSize, singleBlobMaxSize/1024/1024) - } + // If the file size is less than or equal to the max size for a single blob, use the single blob upload + // otherwise, use the block-based upload + if fileSize <= singleBlobMaxSize { err = be.saveSingleBlob(ctx, objName, rd, accessTier) - - case "blocks": - // Legacy block-based upload method - if fileSize < saveLargeSize { - err = be.saveSmall(ctx, objName, rd, accessTier) - } else { - err = be.saveLarge(ctx, objName, rd, accessTier) - } - - case "auto", "": - // Automatic selection: use single blob for files <= 5000 MiB, blocks for larger files - if fileSize <= singleBlobMaxSize { - err = be.saveSingleBlob(ctx, objName, rd, accessTier) - } else { - err = be.saveLarge(ctx, objName, rd, accessTier) - } - - default: - return errors.Errorf("invalid upload method %q, must be 'auto', 'single', or 'blocks'", uploadMethod) + } else { + err = be.saveLarge(ctx, objName, rd, accessTier) } return err } -func (be *Backend) saveSmall(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error { +// saveSingleBlob uploads data using a single Put Blob operation. +// This method is more efficient for files under 5000 MiB as it requires only one API call +// instead of the two calls (StageBlock + CommitBlockList) required by the block-based approach. +func (be *Backend) saveSingleBlob(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error { blockBlobClient := be.container.NewBlockBlobClient(objName) - // upload it as a new "block", use the base64 hash for the ID - id := base64.StdEncoding.EncodeToString(rd.Hash()) - buf := make([]byte, rd.Length()) _, err := io.ReadFull(rd, buf) if err != nil { @@ -309,24 +283,20 @@ func (be *Backend) saveSmall(ctx context.Context, objName string, rd backend.Rew } reader := bytes.NewReader(buf) - _, err = blockBlobClient.StageBlock(ctx, id, streaming.NopCloser(reader), &blockblob.StageBlockOptions{ + opts := &blockblob.UploadOptions{ + Tier: &accessTier, TransactionalValidation: blob.TransferValidationTypeMD5(rd.Hash()), - }) - if err != nil { - return errors.Wrap(err, "StageBlock") } - blocks := []string{id} - _, err = blockBlobClient.CommitBlockList(ctx, blocks, &blockblob.CommitBlockListOptions{ - Tier: &accessTier, - }) - return errors.Wrap(err, "CommitBlockList") + debug.Log("Upload single blob %v with %d bytes", objName, len(buf)) + _, err = blockBlobClient.Upload(ctx, streaming.NopCloser(reader), opts) + return errors.Wrap(err, "Upload") } func (be *Backend) saveLarge(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error { blockBlobClient := be.container.NewBlockBlobClient(objName) - buf := make([]byte, 100*1024*1024) + buf := make([]byte, singleBlockMaxSize) blocks := []string{} uploadedBytes := 0 @@ -378,29 +348,6 @@ func (be *Backend) saveLarge(ctx context.Context, objName string, rd backend.Rew return errors.Wrap(err, "CommitBlockList") } -// saveSingleBlob uploads data using a single Put Blob operation. -// This method is more efficient for files under 5000 MiB as it requires only one API call -// instead of the two calls (StageBlock + CommitBlockList) required by the block-based approach. -func (be *Backend) saveSingleBlob(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error { - blockBlobClient := be.container.NewBlockBlobClient(objName) - - buf := make([]byte, rd.Length()) - _, err := io.ReadFull(rd, buf) - if err != nil { - return errors.Wrap(err, "ReadFull") - } - - reader := bytes.NewReader(buf) - opts := &blockblob.UploadOptions{ - Tier: &accessTier, - TransactionalValidation: blob.TransferValidationTypeMD5(rd.Hash()), - } - - debug.Log("Upload single blob %v with %d bytes", objName, len(buf)) - _, err = blockBlobClient.Upload(ctx, streaming.NopCloser(reader), opts) - return errors.Wrap(err, "Upload") -} - // Load runs fn with a reader that yields the contents of the file at h at the // given offset. func (be *Backend) Load(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error { diff --git a/internal/backend/azure/config.go b/internal/backend/azure/config.go index 51c59bf9d..ee7ac51d8 100644 --- a/internal/backend/azure/config.go +++ b/internal/backend/azure/config.go @@ -22,16 +22,14 @@ type Config struct { Container string Prefix string - Connections uint `option:"connections" help:"set a limit for the number of concurrent connections (default: 5)"` - AccessTier string `option:"access-tier" help:"set the access tier for the blob storage (default: inferred from the storage account defaults)"` - UploadMethod string `option:"upload-method" help:"blob upload method: 'auto' (single blob for <=5000 MiB), 'single' (always single blob), or 'blocks' (legacy block-based) (default: auto)"` + Connections uint `option:"connections" help:"set a limit for the number of concurrent connections (default: 5)"` + AccessTier string `option:"access-tier" help:"set the access tier for the blob storage (default: inferred from the storage account defaults)"` } // NewConfig returns a new Config with the default values filled in. func NewConfig() Config { return Config{ - Connections: 5, - UploadMethod: "auto", + Connections: 5, } } @@ -87,16 +85,3 @@ func (cfg *Config) ApplyEnvironment(prefix string) { cfg.EndpointSuffix = os.Getenv(prefix + "AZURE_ENDPOINT_SUFFIX") } } - -// Validate checks the configuration for errors. -func (cfg *Config) Validate() error { - // Normalize upload method to lowercase - uploadMethod := strings.ToLower(cfg.UploadMethod) - if uploadMethod != "auto" && uploadMethod != "single" && uploadMethod != "blocks" && uploadMethod != "" { - return errors.Errorf("invalid upload method %q, must be 'auto', 'single', or 'blocks'", cfg.UploadMethod) - } - if uploadMethod != "" { - cfg.UploadMethod = uploadMethod - } - return nil -} From 9bcd09bde0e1c82ca36250120cd353caf470579c Mon Sep 17 00:00:00 2001 From: Srigovind Nayak <5201843+konidev20@users.noreply.github.com> Date: Sat, 4 Oct 2025 00:26:07 +0530 Subject: [PATCH 3/6] azure: reduce singleBlockMaxSize to accommodate 32-bit systems --- internal/backend/azure/azure.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/backend/azure/azure.go b/internal/backend/azure/azure.go index a833012c3..a136b4ee6 100644 --- a/internal/backend/azure/azure.go +++ b/internal/backend/azure/azure.go @@ -42,7 +42,7 @@ type Backend struct { } const singleBlobMaxSize = 5000 * 1024 * 1024 // 5000 MiB - max size for Put Blob API in service version 2019-12-12+ -const singleBlockMaxSize = 4000 * 1024 * 1024 // 4000 MiB - max size for StageBlock API in service version 2019-12-12+ +const singleBlockMaxSize = 2000 * 1024 * 1024 // 2000 MiB - max size for a stage block API since on 32-bit systems, the max size for an int is 2047 MiB const defaultListMaxItems = 5000 // make sure that *Backend implements backend.Backend From 34a94afc48a2a6aa83c02d7675652d2594daa0eb Mon Sep 17 00:00:00 2001 From: Srigovind Nayak <5201843+konidev20@users.noreply.github.com> Date: Sun, 5 Oct 2025 11:56:52 +0530 Subject: [PATCH 4/6] azure: update upload size constants to reduce memory allocation --- internal/backend/azure/azure.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/backend/azure/azure.go b/internal/backend/azure/azure.go index a136b4ee6..6669a47cc 100644 --- a/internal/backend/azure/azure.go +++ b/internal/backend/azure/azure.go @@ -41,8 +41,8 @@ type Backend struct { accessTier blob.AccessTier } -const singleBlobMaxSize = 5000 * 1024 * 1024 // 5000 MiB - max size for Put Blob API in service version 2019-12-12+ -const singleBlockMaxSize = 2000 * 1024 * 1024 // 2000 MiB - max size for a stage block API since on 32-bit systems, the max size for an int is 2047 MiB +const singleUploadMaxSize = 256 * 1024 * 1024 +const singleBlockMaxSize = 32 * 1024 * 1024 const defaultListMaxItems = 5000 // make sure that *Backend implements backend.Backend @@ -261,7 +261,7 @@ func (be *Backend) Save(ctx context.Context, h backend.Handle, rd backend.Rewind // If the file size is less than or equal to the max size for a single blob, use the single blob upload // otherwise, use the block-based upload - if fileSize <= singleBlobMaxSize { + if fileSize <= singleUploadMaxSize { err = be.saveSingleBlob(ctx, objName, rd, accessTier) } else { err = be.saveLarge(ctx, objName, rd, accessTier) From e65ee3cba8974a97ba09a39ffdf7412972e00550 Mon Sep 17 00:00:00 2001 From: Srigovind Nayak <5201843+konidev20@users.noreply.github.com> Date: Sun, 5 Oct 2025 18:35:19 +0530 Subject: [PATCH 5/6] fix: keep the PutBlock Size to 100 MiB No complaints in the past. --- internal/backend/azure/azure.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/backend/azure/azure.go b/internal/backend/azure/azure.go index 6669a47cc..576985b5b 100644 --- a/internal/backend/azure/azure.go +++ b/internal/backend/azure/azure.go @@ -42,7 +42,7 @@ type Backend struct { } const singleUploadMaxSize = 256 * 1024 * 1024 -const singleBlockMaxSize = 32 * 1024 * 1024 +const singleBlockMaxSize = 100 * 1024 * 1024 const defaultListMaxItems = 5000 // make sure that *Backend implements backend.Backend From f9ff2301e88faba989de7d267df90d126c838b72 Mon Sep 17 00:00:00 2001 From: Srigovind Nayak <5201843+konidev20@users.noreply.github.com> Date: Sun, 5 Oct 2025 21:48:02 +0530 Subject: [PATCH 6/6] changelog: add a changelog entry for azure PutBlob API changes --- changelog/unreleased/issue-5531 | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 changelog/unreleased/issue-5531 diff --git a/changelog/unreleased/issue-5531 b/changelog/unreleased/issue-5531 new file mode 100644 index 000000000..3cad4e624 --- /dev/null +++ b/changelog/unreleased/issue-5531 @@ -0,0 +1,15 @@ +Enhancement: Reduce Azure storage costs by optimizing upload method + +Restic previously used Azure's PutBlock and PutBlockList APIs for all file +uploads, which resulted in two transactions per file and doubled the storage +operation costs. For backups with many pack files, this could lead to +significant Azure storage transaction fees. + +Restic now uses the more efficient PutBlob API for files up to 256 MiB, +requiring only a single transaction per file. This reduces Azure storage +operation costs by approximately 50% for typical backup workloads. Files +larger than 256 MiB continue to use the block-based upload method as required +by Azure's API limits. + +https://github.com/restic/restic/issues/5531 +https://github.com/restic/restic/pull/5544