Skip to content

Commit

Permalink
statistics: reduce memory usage when to MergePartTopN2GlobalTopN (#45718
Browse files Browse the repository at this point in the history
)

close #45727
  • Loading branch information
hawkingrei authored Aug 10, 2023
1 parent 772275c commit 46534ff
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 15 deletions.
16 changes: 1 addition & 15 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -799,10 +799,7 @@ func MergePartTopN2GlobalTopN(loc *time.Location, version int, topNs []*TopN, n
if checkEmptyTopNs(topNs) {
return nil, nil, hists, nil
}

partNum := len(topNs)
removeVals := make([][]TopNMeta, partNum)

// Different TopN structures may hold the same value, we have to merge them.
counter := make(map[hack.MutableString]float64)
// datumMap is used to store the mapping from the string type to datum type.
Expand Down Expand Up @@ -864,22 +861,11 @@ func MergePartTopN2GlobalTopN(loc *time.Location, version int, topNs []*TopN, n
if count != 0 {
counter[encodedVal] += count
// Remove the value corresponding to encodedVal from the histogram.
removeVals[j] = append(removeVals[j], TopNMeta{Encoded: datum.GetBytes(), Count: uint64(count)})
hists[j].BinarySearchRemoveVal(TopNMeta{Encoded: datum.GetBytes(), Count: uint64(count)})
}
}
}
}
// Remove the value from the Hists.
for i := 0; i < partNum; i++ {
if len(removeVals[i]) > 0 {
tmp := removeVals[i]
slices.SortFunc(tmp, func(i, j TopNMeta) bool {
cmpResult := bytes.Compare(i.Encoded, j.Encoded)
return cmpResult < 0
})
hists[i].RemoveVals(tmp)
}
}
numTop := len(counter)
if numTop == 0 {
return nil, nil, hists, nil
Expand Down
29 changes: 29 additions & 0 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,35 @@ func (hg *Histogram) BucketToString(bktID, idxCols int) string {
return fmt.Sprintf("num: %d lower_bound: %s upper_bound: %s repeats: %d ndv: %d", hg.bucketCount(bktID), lowerVal, upperVal, hg.Buckets[bktID].Repeat, hg.Buckets[bktID].NDV)
}

// BinarySearchRemoveVal removes the value from the TopN using binary search.
func (hg *Histogram) BinarySearchRemoveVal(valCntPairs TopNMeta) {
lowIdx, highIdx := 0, hg.Len()-1
for lowIdx <= highIdx {
midIdx := (lowIdx + highIdx) / 2
cmpResult := bytes.Compare(hg.Bounds.Column(0).GetRaw(midIdx*2), valCntPairs.Encoded)
if cmpResult > 0 {
lowIdx = midIdx + 1
continue
}
cmpResult = bytes.Compare(hg.Bounds.Column(0).GetRaw(midIdx*2+1), valCntPairs.Encoded)
if cmpResult < 0 {
highIdx = midIdx - 1
continue
}
if hg.Buckets[midIdx].NDV > 0 {
hg.Buckets[midIdx].NDV--
}
if cmpResult == 0 {
hg.Buckets[midIdx].Repeat = 0
}
hg.Buckets[midIdx].Count -= int64(valCntPairs.Count)
if hg.Buckets[midIdx].Count < 0 {
hg.Buckets[midIdx].Count = 0
}
break
}
}

// RemoveVals remove the given values from the histogram.
// This function contains an **ASSUMPTION**: valCntPairs is sorted in ascending order.
func (hg *Histogram) RemoveVals(valCntPairs []TopNMeta) {
Expand Down

0 comments on commit 46534ff

Please sign in to comment.