Skip to content

Commit

Permalink
make storage analysis admin-only and more extensive
Browse files Browse the repository at this point in the history
  • Loading branch information
DocSavage committed Mar 24, 2024
1 parent 7190785 commit 92c6348
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 30 deletions.
20 changes: 12 additions & 8 deletions datastore/datastore.go
Original file line number Diff line number Diff line change
Expand Up @@ -790,25 +790,28 @@ type StorageSummary struct {
DataUUID string
RootUUID string
Bytes uint64
KeyUsage map[int]int
KeyUsage storage.KeyUsage
}

// GetStorageSummary returns JSON for all the data instances in the stores.
func GetStorageSummary() (string, error) {
func GetStorageSummary() {
stores, err := storage.AllStores()
if err != nil {
return "", err
dvid.Errorf("Error getting all stores: %v\n", err)
return
}

breakdown := make(map[string]map[dvid.InstanceID]StorageSummary, len(stores))
for alias, store := range stores {
sizes, err := storage.GetDataSizes(store, nil)
if err != nil {
return "", err
dvid.Errorf("Error getting data sizes for store %s: %v\n", store, err)
continue
}
keyUsage, err := storage.GetStoreKeyUsage(store)
if err != nil {
return "", err
dvid.Errorf("Error getting key usage for store %s: %v\n", store, err)
continue
}
if sizes == nil && keyUsage == nil {
continue
Expand Down Expand Up @@ -854,10 +857,11 @@ func GetStorageSummary() (string, error) {
// Convert data to JSON string
m, err := json.MarshalIndent(breakdown, "", " ")
if err != nil {
return "", err
dvid.Errorf("Error marshalling storage summary to JSON: %v\n", err)
dvid.Infof("Storage summary:\n%s\n", string(m))
} else {
dvid.Infof("Storage summary:\n%v\n", breakdown)
}
dvid.Infof("Key usage:\n%s\n", string(m))
return string(m), nil
}

// LogRepoOpToKafka logs a repo operation to kafka
Expand Down
13 changes: 7 additions & 6 deletions server/web.go
Original file line number Diff line number Diff line change
Expand Up @@ -1516,14 +1516,15 @@ func latenciesHandler(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, string(m))
}

func serverStorageHandler(w http.ResponseWriter, r *http.Request) {
jsonStr, err := datastore.GetStorageSummary()
if err != nil {
BadRequest(w, r, err)
func serverStorageHandler(c *web.C, w http.ResponseWriter, r *http.Request) {
adminPriv := c.Env["adminPriv"].(bool)
if !adminPriv {
BadRequest(w, r, "Storage summary takes considerable resources and is only available to admin users.")
return
}
w.Header().Set("Content-Type", "application/json")
fmt.Fprint(w, jsonStr)

go datastore.GetStorageSummary()
fmt.Fprint(w, "Storage summary requested. Check log for details.\n")
}

func serverInfoHandler(w http.ResponseWriter, r *http.Request) {
Expand Down
32 changes: 22 additions & 10 deletions storage/badger/badger.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,45 +290,57 @@ func (db *BadgerDB) metadataExists() (bool, error) {

// ---- KeyUsageViewer interface ------

func (db *BadgerDB) GetKeyUsage(ranges []storage.KeyRange) (hitsPerInstance []map[int]int, err error) {
type versionTracker struct {
versions int
tombstones int
}

func (db *BadgerDB) GetKeyUsage(ranges []storage.KeyRange) (hitsPerInstance []storage.KeyUsage, err error) {
if db == nil {
err = fmt.Errorf("can't call GetKeyUsage on nil BadgerDB")
return
}
hitsPerInstance = make([]map[int]int, len(ranges))
hitsPerInstance = make([]storage.KeyUsage, len(ranges))
err = db.bdp.View(func(txn *badger.Txn) error {
opts := badger.DefaultIteratorOptions
opts.PrefetchValues = false
it := txn.NewIterator(opts)
defer it.Close()
dvid.Infof("Checking key usage for Badger @ %s ...\n", db.directory)
for i, kr := range ranges {
// Allocate histogram for this key range (i.e., a data instance)
hitsPerInstance[i] = make(map[int]int)

// Iterate and get all kv across versions for each key.
maxVersionKey := storage.MaxVersionDataKeyFromKey(kr.Start)
numVersions := 1
keyUsage := make(storage.KeyUsage)
versions := 0
tombstones := 0
for it.Seek(kr.Start); it.Valid(); it.Next() {
kv := new(storage.KeyValue)
item := it.Item()
kv.K = item.KeyCopy(nil)
storage.StoreKeyBytesRead <- len(kv.K)

// Add version to the stats for this key.
// If we now are in another TKey, record stats and reset version histogram.
if bytes.Compare(kv.K, maxVersionKey) > 0 {
tKeyClass := uint8(kv.K[5])
keyUsage.Add(tKeyClass, versions, tombstones)

maxVersionKey = storage.MaxVersionDataKeyFromKey(kv.K)
hitsPerInstance[i][numVersions]++
numVersions = 0
keyUsage = make(storage.KeyUsage)
versions = 0
tombstones = 0
}
versions++
if kv.K.IsTombstone() {
tombstones++
}
numVersions++

// Did we pass the final key?
if bytes.Compare(kv.K, kr.OpenEnd) > 0 {
break
}

}
hitsPerInstance[i] = keyUsage
}
dvid.Infof("Key usage for Badger @ %s:\n %v\n", db.directory, hitsPerInstance)
return nil
Expand Down
4 changes: 2 additions & 2 deletions storage/keyvalue.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ func getInstanceSizes(sv SizeViewer, instances []dvid.InstanceID) (map[dvid.Inst
return sizes, nil
}

func getKeyUsage(vw KeyUsageViewer, instances []dvid.InstanceID) (map[dvid.InstanceID]map[int]int, error) {
func getKeyUsage(vw KeyUsageViewer, instances []dvid.InstanceID) (map[dvid.InstanceID]KeyUsage, error) {
ranges := make([]KeyRange, len(instances))
for i, curID := range instances {
beg := constructDataKey(curID, 0, 0, minTKey)
Expand All @@ -462,7 +462,7 @@ func getKeyUsage(vw KeyUsageViewer, instances []dvid.InstanceID) (map[dvid.Insta
if len(s) != len(instances) {
return nil, fmt.Errorf("only got back %d instance key usages, not the requested %d instances", len(s), len(instances))
}
keyUsage := make(map[dvid.InstanceID]map[int]int, len(instances))
keyUsage := make(map[dvid.InstanceID]KeyUsage, len(instances))
for i, curID := range instances {
keyUsage[curID] = s[i]
}
Expand Down
41 changes: 37 additions & 4 deletions storage/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,18 +168,51 @@ func Repair(name, path string) error {
return repairer.Repair(path)
}

// VersionHistogram is a map of # versions to # keys that have that many versions.
type VersionHistogram map[int]int

func (vh VersionHistogram) Clone() VersionHistogram {
clone := make(VersionHistogram, len(vh))
for k, v := range vh {
clone[k] = v
}
return clone
}

// KeyUsage is a map of TKeyClass to VersionHistogram.
type KeyUsage map[uint8]VersionHistogram

func (ku KeyUsage) Clone() KeyUsage {
clone := make(KeyUsage, len(ku))
for k, v := range ku {
clone[k] = v.Clone()
}
return clone
}

// Add adds a key with the # versions and tombstones.
func (ku KeyUsage) Add(class uint8, versions int, tombstones int) {
vh, found := ku[class]
if !found {
vh = make(VersionHistogram)
ku[class] = vh
}
vh[versions]++
vh[0] += tombstones
}

// KeyUsageViewer stores can return how many keys are stored and a histogram of the
// number of versions per key for each data instance given by the key ranges.
type KeyUsageViewer interface {
GetKeyUsage(ranges []KeyRange) (histPerInstance []map[int]int, err error)
GetKeyUsage(ranges []KeyRange) (histPerInstance []KeyUsage, err error)
}

// GetStoreKeyUsage returns a histogram of the number of versions per key for each
// GetStoreKeyUsage returns a histogram map[# versions][# keys] for each
// data instance in the store.
func GetStoreKeyUsage(store dvid.Store) (map[dvid.InstanceID]map[int]int, error) {
func GetStoreKeyUsage(store dvid.Store) (map[dvid.InstanceID]KeyUsage, error) {
db, ok := store.(OrderedKeyValueGetter)
if !ok {
dvid.Infof("Cannot get data sizes for store %s, which is not an OrderedKeyValueGetter store\n", db)
dvid.Infof("Cannot get key usage for store %s, which is not an OrderedKeyValueGetter store\n", db)
return nil, nil
}
viewer, ok := store.(KeyUsageViewer)
Expand Down

0 comments on commit 92c6348

Please sign in to comment.