restic/internal/repository/index/associated_data.go

198 lines
4.2 KiB
Go
Raw Normal View History

package index
import (
"iter"
"slices"
"sort"
"github.com/restic/restic/internal/restic"
)
type associatedSetSub[T any] struct {
value []T
isSet []bool
}
// AssociatedSet is a memory efficient implementation of a BlobSet that can
// store a small data item for each BlobHandle. It relies on a special property
// of our MasterIndex implementation. A BlobHandle can be permanently identified
// using an offset that never changes as MasterIndex entries cannot be modified (only added).
//
// The AssociatedSet thus can use an array with the size of the MasterIndex to store
// its data. Access to an individual entry is possible by looking up the BlobHandle's
// offset from the MasterIndex.
//
// BlobHandles that are not part of the MasterIndex can be stored by placing them in
// an overflow set that is expected to be empty in the normal case.
type AssociatedSet[T any] struct {
byType [restic.NumBlobTypes]associatedSetSub[T]
overflow map[restic.BlobHandle]T
idx *MasterIndex
}
func NewAssociatedSet[T any](mi *MasterIndex) *AssociatedSet[T] {
a := AssociatedSet[T]{
overflow: make(map[restic.BlobHandle]T),
idx: mi,
}
for typ := range a.byType {
if typ == 0 {
continue
}
// index starts counting at 1
count := mi.stableLen(restic.BlobType(typ)) + 1
a.byType[typ].value = make([]T, count)
a.byType[typ].isSet = make([]bool, count)
}
return &a
}
func (a *AssociatedSet[T]) Get(bh restic.BlobHandle) (T, bool) {
if val, ok := a.overflow[bh]; ok {
return val, true
}
idx := a.idx.blobIndex(bh)
bt := &a.byType[bh.Type]
if idx >= len(bt.value) || idx == -1 {
var zero T
return zero, false
}
has := bt.isSet[idx]
if has {
return bt.value[idx], has
}
var zero T
return zero, false
}
func (a *AssociatedSet[T]) Has(bh restic.BlobHandle) bool {
_, ok := a.Get(bh)
return ok
}
func (a *AssociatedSet[T]) Set(bh restic.BlobHandle, val T) {
if _, ok := a.overflow[bh]; ok {
a.overflow[bh] = val
return
}
idx := a.idx.blobIndex(bh)
bt := &a.byType[bh.Type]
if idx >= len(bt.value) || idx == -1 {
a.overflow[bh] = val
} else {
bt.value[idx] = val
bt.isSet[idx] = true
}
}
func (a *AssociatedSet[T]) Insert(bh restic.BlobHandle) {
var zero T
a.Set(bh, zero)
}
func (a *AssociatedSet[T]) Delete(bh restic.BlobHandle) {
if _, ok := a.overflow[bh]; ok {
delete(a.overflow, bh)
return
}
idx := a.idx.blobIndex(bh)
bt := &a.byType[bh.Type]
if idx < len(bt.value) && idx != -1 {
bt.isSet[idx] = false
}
}
type haser interface {
Has(bh restic.BlobHandle) bool
}
// Intersect returns a new set containing the handles that are present in both sets.
func (a *AssociatedSet[T]) Intersect(other haser) *AssociatedSet[T] {
result := NewAssociatedSet[T](a.idx)
// Determining the smaller set already requires iterating over all keys
// and thus provides no performance benefit.
for bh := range a.Keys() {
if other.Has(bh) {
// preserve value receiver
val, _ := a.Get(bh)
result.Set(bh, val)
}
}
return result
}
// Sub returns a new set containing all handles that are present in a but not in
// other.
func (a *AssociatedSet[T]) Sub(other haser) *AssociatedSet[T] {
result := NewAssociatedSet[T](a.idx)
for bh := range a.Keys() {
if !other.Has(bh) {
val, _ := a.Get(bh)
result.Set(bh, val)
}
}
return result
}
func (a *AssociatedSet[T]) Len() int {
count := 0
for range a.All() {
count++
}
return count
}
func (a *AssociatedSet[T]) All() iter.Seq2[restic.BlobHandle, T] {
return func(yield func(restic.BlobHandle, T) bool) {
for k, v := range a.overflow {
if !yield(k, v) {
return
}
}
for pb := range a.idx.Values() {
if _, ok := a.overflow[pb.BlobHandle]; ok {
// already reported via overflow set
continue
}
val, known := a.Get(pb.BlobHandle)
if known {
if !yield(pb.BlobHandle, val) {
return
}
}
}
}
}
func (a *AssociatedSet[T]) Keys() iter.Seq[restic.BlobHandle] {
return func(yield func(restic.BlobHandle) bool) {
for bh := range a.All() {
if !yield(bh) {
return
}
}
}
}
func (a *AssociatedSet[T]) String() string {
list := restic.BlobHandles(slices.Collect(a.Keys()))
sort.Sort(list)
str := list.String()
if len(str) < 2 {
return "{}"
}
return "{" + str[1:len(str)-1] + "}"
}