| 
									
										
										
										
											2024-05-10 16:28:23 +02:00
										 |  |  | package repository | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"bufio" | 
					
						
							|  |  |  | 	"bytes" | 
					
						
							|  |  |  | 	"context" | 
					
						
							|  |  |  | 	"fmt" | 
					
						
							|  |  |  | 	"io" | 
					
						
							|  |  |  | 	"sort" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/klauspost/compress/zstd" | 
					
						
							|  |  |  | 	"github.com/minio/sha256-simd" | 
					
						
							|  |  |  | 	"github.com/restic/restic/internal/backend" | 
					
						
							|  |  |  | 	"github.com/restic/restic/internal/debug" | 
					
						
							|  |  |  | 	"github.com/restic/restic/internal/errors" | 
					
						
							| 
									
										
										
										
											2024-05-24 23:11:53 +02:00
										 |  |  | 	"github.com/restic/restic/internal/repository/hashing" | 
					
						
							| 
									
										
										
										
											2024-05-24 23:09:58 +02:00
										 |  |  | 	"github.com/restic/restic/internal/repository/pack" | 
					
						
							| 
									
										
										
										
											2024-05-10 16:28:23 +02:00
										 |  |  | 	"github.com/restic/restic/internal/restic" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // ErrPackData is returned if errors are discovered while verifying a packfile | 
					
						
							|  |  |  | type ErrPackData struct { | 
					
						
							|  |  |  | 	PackID restic.ID | 
					
						
							|  |  |  | 	errs   []error | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (e *ErrPackData) Error() string { | 
					
						
							|  |  |  | 	return fmt.Sprintf("pack %v contains %v errors: %v", e.PackID, len(e.errs), e.errs) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type partialReadError struct { | 
					
						
							|  |  |  | 	err error | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (e *partialReadError) Error() string { | 
					
						
							|  |  |  | 	return e.err.Error() | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // CheckPack reads a pack and checks the integrity of all blobs. | 
					
						
							|  |  |  | func CheckPack(ctx context.Context, r *Repository, id restic.ID, blobs []restic.Blob, size int64, bufRd *bufio.Reader, dec *zstd.Decoder) error { | 
					
						
							|  |  |  | 	err := checkPackInner(ctx, r, id, blobs, size, bufRd, dec) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							| 
									
										
										
										
											2024-05-10 17:49:32 +02:00
										 |  |  | 		if r.Cache != nil { | 
					
						
							|  |  |  | 			// ignore error as there's not much we can do here | 
					
						
							|  |  |  | 			_ = r.Cache.Forget(backend.Handle{Type: restic.PackFile, Name: id.String()}) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-10 16:28:23 +02:00
										 |  |  | 		// retry pack verification to detect transient errors | 
					
						
							|  |  |  | 		err2 := checkPackInner(ctx, r, id, blobs, size, bufRd, dec) | 
					
						
							|  |  |  | 		if err2 != nil { | 
					
						
							|  |  |  | 			err = err2 | 
					
						
							|  |  |  | 		} else { | 
					
						
							|  |  |  | 			err = fmt.Errorf("check successful on second attempt, original error %w", err) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return err | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-10 17:05:16 +02:00
										 |  |  | func checkPackInner(ctx context.Context, r *Repository, id restic.ID, blobs []restic.Blob, size int64, bufRd *bufio.Reader, dec *zstd.Decoder) error { | 
					
						
							| 
									
										
										
										
											2024-05-10 16:28:23 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	debug.Log("checking pack %v", id.String()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if len(blobs) == 0 { | 
					
						
							|  |  |  | 		return &ErrPackData{PackID: id, errs: []error{errors.New("pack is empty or not indexed")}} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// sanity check blobs in index | 
					
						
							|  |  |  | 	sort.Slice(blobs, func(i, j int) bool { | 
					
						
							|  |  |  | 		return blobs[i].Offset < blobs[j].Offset | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 	idxHdrSize := pack.CalculateHeaderSize(blobs) | 
					
						
							|  |  |  | 	lastBlobEnd := 0 | 
					
						
							|  |  |  | 	nonContinuousPack := false | 
					
						
							|  |  |  | 	for _, blob := range blobs { | 
					
						
							|  |  |  | 		if lastBlobEnd != int(blob.Offset) { | 
					
						
							|  |  |  | 			nonContinuousPack = true | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		lastBlobEnd = int(blob.Offset + blob.Length) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	// size was calculated by masterindex.PackSize, thus there's no need to recalculate it here | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	var errs []error | 
					
						
							|  |  |  | 	if nonContinuousPack { | 
					
						
							|  |  |  | 		debug.Log("Index for pack contains gaps / overlaps, blobs: %v", blobs) | 
					
						
							|  |  |  | 		errs = append(errs, errors.New("index for pack contains gaps / overlapping blobs")) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// calculate hash on-the-fly while reading the pack and capture pack header | 
					
						
							|  |  |  | 	var hash restic.ID | 
					
						
							|  |  |  | 	var hdrBuf []byte | 
					
						
							|  |  |  | 	h := backend.Handle{Type: backend.PackFile, Name: id.String()} | 
					
						
							| 
									
										
										
										
											2024-05-10 17:05:16 +02:00
										 |  |  | 	err := r.be.Load(ctx, h, int(size), 0, func(rd io.Reader) error { | 
					
						
							| 
									
										
										
										
											2024-05-10 16:28:23 +02:00
										 |  |  | 		hrd := hashing.NewReader(rd, sha256.New()) | 
					
						
							|  |  |  | 		bufRd.Reset(hrd) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-10 16:29:48 +02:00
										 |  |  | 		it := newPackBlobIterator(id, newBufReader(bufRd), 0, blobs, r.Key(), dec) | 
					
						
							| 
									
										
										
										
											2024-05-10 16:28:23 +02:00
										 |  |  | 		for { | 
					
						
							| 
									
										
										
										
											2024-07-31 19:30:47 +02:00
										 |  |  | 			if ctx.Err() != nil { | 
					
						
							|  |  |  | 				return ctx.Err() | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-10 16:28:23 +02:00
										 |  |  | 			val, err := it.Next() | 
					
						
							| 
									
										
										
										
											2024-05-10 16:29:48 +02:00
										 |  |  | 			if err == errPackEOF { | 
					
						
							| 
									
										
										
										
											2024-05-10 16:28:23 +02:00
										 |  |  | 				break | 
					
						
							|  |  |  | 			} else if err != nil { | 
					
						
							|  |  |  | 				return &partialReadError{err} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			debug.Log("  check blob %v: %v", val.Handle.ID, val.Handle) | 
					
						
							|  |  |  | 			if val.Err != nil { | 
					
						
							|  |  |  | 				debug.Log("  error verifying blob %v: %v", val.Handle.ID, val.Err) | 
					
						
							|  |  |  | 				errs = append(errs, errors.Errorf("blob %v: %v", val.Handle.ID, val.Err)) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// skip enough bytes until we reach the possible header start | 
					
						
							|  |  |  | 		curPos := lastBlobEnd | 
					
						
							|  |  |  | 		minHdrStart := int(size) - pack.MaxHeaderSize | 
					
						
							|  |  |  | 		if minHdrStart > curPos { | 
					
						
							|  |  |  | 			_, err := bufRd.Discard(minHdrStart - curPos) | 
					
						
							|  |  |  | 			if err != nil { | 
					
						
							|  |  |  | 				return &partialReadError{err} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			curPos += minHdrStart - curPos | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// read remainder, which should be the pack header | 
					
						
							|  |  |  | 		var err error | 
					
						
							|  |  |  | 		hdrBuf = make([]byte, int(size-int64(curPos))) | 
					
						
							|  |  |  | 		_, err = io.ReadFull(bufRd, hdrBuf) | 
					
						
							|  |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return &partialReadError{err} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		hash = restic.IDFromHash(hrd.Sum(nil)) | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		var e *partialReadError | 
					
						
							|  |  |  | 		isPartialReadError := errors.As(err, &e) | 
					
						
							|  |  |  | 		// failed to load the pack file, return as further checks cannot succeed anyways | 
					
						
							|  |  |  | 		debug.Log("  error streaming pack (partial %v): %v", isPartialReadError, err) | 
					
						
							|  |  |  | 		if isPartialReadError { | 
					
						
							|  |  |  | 			return &ErrPackData{PackID: id, errs: append(errs, fmt.Errorf("partial download error: %w", err))} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// The check command suggests to repair files for which a `ErrPackData` is returned. However, this file | 
					
						
							|  |  |  | 		// completely failed to download such that there's no point in repairing anything. | 
					
						
							|  |  |  | 		return fmt.Errorf("download error: %w", err) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if !hash.Equal(id) { | 
					
						
							|  |  |  | 		debug.Log("pack ID does not match, want %v, got %v", id, hash) | 
					
						
							|  |  |  | 		return &ErrPackData{PackID: id, errs: append(errs, errors.Errorf("unexpected pack id %v", hash))} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	blobs, hdrSize, err := pack.List(r.Key(), bytes.NewReader(hdrBuf), int64(len(hdrBuf))) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return &ErrPackData{PackID: id, errs: append(errs, err)} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if uint32(idxHdrSize) != hdrSize { | 
					
						
							|  |  |  | 		debug.Log("Pack header size does not match, want %v, got %v", idxHdrSize, hdrSize) | 
					
						
							|  |  |  | 		errs = append(errs, errors.Errorf("pack header size does not match, want %v, got %v", idxHdrSize, hdrSize)) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for _, blob := range blobs { | 
					
						
							|  |  |  | 		// Check if blob is contained in index and position is correct | 
					
						
							|  |  |  | 		idxHas := false | 
					
						
							| 
									
										
										
										
											2024-05-19 14:56:17 +02:00
										 |  |  | 		for _, pb := range r.LookupBlob(blob.BlobHandle.Type, blob.BlobHandle.ID) { | 
					
						
							| 
									
										
										
										
											2024-05-10 16:28:23 +02:00
										 |  |  | 			if pb.PackID == id && pb.Blob == blob { | 
					
						
							|  |  |  | 				idxHas = true | 
					
						
							|  |  |  | 				break | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if !idxHas { | 
					
						
							|  |  |  | 			errs = append(errs, errors.Errorf("blob %v is not contained in index or position is incorrect", blob.ID)) | 
					
						
							|  |  |  | 			continue | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if len(errs) > 0 { | 
					
						
							|  |  |  | 		return &ErrPackData{PackID: id, errs: errs} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type bufReader struct { | 
					
						
							|  |  |  | 	rd  *bufio.Reader | 
					
						
							|  |  |  | 	buf []byte | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func newBufReader(rd *bufio.Reader) *bufReader { | 
					
						
							|  |  |  | 	return &bufReader{ | 
					
						
							|  |  |  | 		rd: rd, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *bufReader) Discard(n int) (discarded int, err error) { | 
					
						
							|  |  |  | 	return b.rd.Discard(n) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (b *bufReader) ReadFull(n int) (buf []byte, err error) { | 
					
						
							|  |  |  | 	if cap(b.buf) < n { | 
					
						
							|  |  |  | 		b.buf = make([]byte, n) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.buf = b.buf[:n] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	_, err = io.ReadFull(b.rd, b.buf) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return b.buf, nil | 
					
						
							|  |  |  | } |