Skip to content

Commit

Permalink
Improve content map, memory cache and dependency resolution
Browse files Browse the repository at this point in the history
TODO(bep) improve commit message.

Hugo has always been a active user of in-memory caches, but before this commit we did nothing to control the memory usage.

One failing example would be loading lots of big JSON data files and unmarshal them via `transform.Unmarshal`.

This commit consolidates all these caches into one single LRU cache with an eviction strategy that also considers used vs. available memory.

Hugo will try to limit its memory usage to 1/4 or total system memory, but this can be controlled with the `HUGO_MEMORYLIMIT` environment variable (a float value representing Gigabytes).

A natural next step after this would be to use this cache for `.Content`.

Fixes gohugoio#10386
Fixes gohugoio#8307
Fixes gohugoio#8498
Fixes gohugoio#8927
Fixes gohugoio#9192
Fixes gohugoio#9189
Fixes gohugoio#7425
Fixes gohugoio#7437
Fixes gohugoio#7436
Fixes gohugoio#7882
Updates gohugoio#7544
Fixes gohugoio#9224
Fixes gohugoio#9324
Fixes gohugoio#9352
Fixes gohugoio#9343
Fixes gohugoio#9171
Fixes gohugoio#10104
Fixes gohugoio#10380
  • Loading branch information
bep committed Nov 19, 2022
1 parent 7477672 commit a241681
Show file tree
Hide file tree
Showing 256 changed files with 14,113 additions and 11,168 deletions.
Empty file added .hugo_build.lock
Empty file.
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"autoHide.autoHidePanel": false
}
37 changes: 0 additions & 37 deletions bench.sh

This file was deleted.

12 changes: 0 additions & 12 deletions benchSite.sh

This file was deleted.

2 changes: 1 addition & 1 deletion benchbep.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
gobench -package=./hugolib -bench="BenchmarkSiteNew/Deep_content_tree"
gobench --package ./hugolib --bench "BenchmarkSiteNew/Regular_Deep" -base v0.89.4
1 change: 0 additions & 1 deletion bepdock.sh

This file was deleted.

3 changes: 2 additions & 1 deletion cache/filecache/filecache.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package filecache

import (
"bytes"
"context"
"errors"
"io"
"io/ioutil"
Expand Down Expand Up @@ -163,7 +164,7 @@ func (c *Cache) ReadOrCreate(id string,
// GetOrCreate tries to get the file with the given id from cache. If not found or expired, create will
// be invoked and the result cached.
// This method is protected by a named lock using the given id as identifier.
func (c *Cache) GetOrCreate(id string, create func() (io.ReadCloser, error)) (ItemInfo, io.ReadCloser, error) {
func (c *Cache) GetOrCreate(ctx context.Context, id string, create func() (io.ReadCloser, error)) (ItemInfo, io.ReadCloser, error) {
id = cleanID(id)

c.nlocker.Lock(id)
Expand Down
7 changes: 4 additions & 3 deletions cache/filecache/filecache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package filecache

import (
"context"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -134,7 +135,7 @@ dir = ":cacheDir/c"

for _, ca := range []*Cache{caches.ImageCache(), caches.AssetsCache(), caches.GetJSONCache(), caches.GetCSVCache()} {
for i := 0; i < 2; i++ {
info, r, err := ca.GetOrCreate("a", rf("abc"))
info, r, err := ca.GetOrCreate(context.TODO(), "a", rf("abc"))
c.Assert(err, qt.IsNil)
c.Assert(r, qt.Not(qt.IsNil))
c.Assert(info.Name, qt.Equals, "a")
Expand All @@ -152,7 +153,7 @@ dir = ":cacheDir/c"
c.Assert(err, qt.IsNil)
c.Assert(string(b), qt.Equals, "abc")

_, r, err = ca.GetOrCreate("a", rf("bcd"))
_, r, err = ca.GetOrCreate(context.TODO(), "a", rf("bcd"))
c.Assert(err, qt.IsNil)
b, _ = ioutil.ReadAll(r)
r.Close()
Expand Down Expand Up @@ -229,7 +230,7 @@ dir = "/cache/c"
ca := caches.Get(cacheName)
c.Assert(ca, qt.Not(qt.IsNil))
filename, data := filenameData(i)
_, r, err := ca.GetOrCreate(filename, func() (io.ReadCloser, error) {
_, r, err := ca.GetOrCreate(context.TODO(), filename, func() (io.ReadCloser, error) {
return hugio.ToReadCloser(strings.NewReader(data)), nil
})
c.Assert(err, qt.IsNil)
Expand Down
275 changes: 275 additions & 0 deletions cache/memcache/memcache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
// Copyright 2022 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package memcache

import (
"context"
"math"
"runtime"
"sync"
"time"

"github.com/bep/lazycache"
"github.com/gohugoio/hugo/config"
"github.com/gohugoio/hugo/identity"
)

const minMaxSize = 10

type Options struct {
CheckInterval time.Duration
MaxSize int
MinMaxSize int
Running bool
}

type OptionsPartition struct {
ClearWhen ClearWhen

// Weight is a number between 1 and 100 that indicates how, in general, how big this partition may get.
Weight int
}

func (o OptionsPartition) WeightFraction() float64 {
return float64(o.Weight) / 100
}

func (o OptionsPartition) CalculateMaxSize(maxSizePerPartition int) int {
return int(math.Floor(float64(maxSizePerPartition) * o.WeightFraction()))
}

type Cache struct {
mu sync.RWMutex

partitions map[string]PartitionManager
opts Options

stats *stats
stopOnce sync.Once
stop func()
}

func (c *Cache) ClearOn(when ClearWhen, changeset ...identity.Identity) {
if when == 0 {
panic("invalid ClearWhen")
}

// TODO1

}

func calculateMaxSizePerPartition(maxItemsTotal, totalWeightQuantity, numPartitions int) int {
if numPartitions == 0 {
panic("numPartitions must be > 0")
}
if totalWeightQuantity == 0 {
panic("totalWeightQuantity must be > 0")
}

avgWeight := float64(totalWeightQuantity) / float64(numPartitions)
return int(math.Floor(float64(maxItemsTotal) / float64(numPartitions) * (100.0 / avgWeight)))
}

func (c *Cache) Stop() {
c.stopOnce.Do(func() {
c.stop()
})
}

func (c *Cache) adjustCurrentMaxSize() {
if len(c.partitions) == 0 {
return
}
var m runtime.MemStats
runtime.ReadMemStats(&m)
s := c.stats
s.memstatsCurrent = m
if s.availableMemory >= s.memstatsCurrent.Alloc {
if s.adjustmentFactor <= 1.0 {
s.adjustmentFactor += 0.1
}
} else {
s.adjustmentFactor -= 0.4
}

if s.adjustmentFactor < 0.2 {
s.adjustmentFactor = 0.2
}

if !s.adjustCurrentMaxSize() {
return
}

//fmt.Printf("\n\nAvailable = %v\nAlloc = %v\nTotalAlloc = %v\nSys = %v\nNumGC = %v\nMaxSize = %d\n\n", helpers.FormatByteCount(s.availableMemory), helpers.FormatByteCount(m.Alloc), helpers.FormatByteCount(m.TotalAlloc), helpers.FormatByteCount(m.Sys), m.NumGC, c.stats.currentMaxSize)

totalWeight := 0
for _, pm := range c.partitions {
totalWeight += pm.getOptions().Weight
}

maxSizePerPartition := calculateMaxSizePerPartition(c.stats.currentMaxSize, totalWeight, len(c.partitions))

//fmt.Println("SCALE", s.adjustmentFactor, maxSizePerPartition)

evicted := 0
for _, p := range c.partitions {
evicted += p.adjustMaxSize(p.getOptions().CalculateMaxSize(maxSizePerPartition))
}

// TODO1
//fmt.Println("Evicted", evicted, "items from cache")

}

func (c *Cache) start() func() {
ticker := time.NewTicker(c.opts.CheckInterval)
quit := make(chan struct{})

go func() {
for {
select {
case <-ticker.C:
c.adjustCurrentMaxSize()
case <-quit:
ticker.Stop()
return
}
}
}()

return func() {
close(quit)
}
}

func GetOrCreatePartition[K comparable, V any](c *Cache, name string, opts OptionsPartition) *Partition[K, V] {
if c == nil {
panic("nil Cache")
}
if opts.Weight < 1 || opts.Weight > 100 {
panic("invalid Weight, must be between 1 and 100")
}

c.mu.RLock()
p, found := c.partitions[name]
c.mu.RUnlock()
if found {
return p.(*Partition[K, V])
}

c.mu.Lock()
defer c.mu.Unlock()

// Double check.
p, found = c.partitions[name]
if found {
return p.(*Partition[K, V])
}

// At this point, we don't now the the number of partitions or their configuration, but
// this will be re-adjusted later.
const numberOfPartitionsEstimate = 10
maxSize := opts.CalculateMaxSize(c.opts.MaxSize / numberOfPartitionsEstimate)

// Create a new partition and cache it.
partition := &Partition[K, V]{
c: lazycache.New[K, V](lazycache.Options{MaxEntries: maxSize}),
maxSize: maxSize,
opts: opts,
}
c.partitions[name] = partition

return partition
}

func New(opts Options) *Cache {
if opts.CheckInterval == 0 {
opts.CheckInterval = time.Second * 2
}

if opts.MaxSize == 0 {
opts.MaxSize = 10000
}

if opts.MinMaxSize == 0 {
opts.MinMaxSize = 30
}

stats := &stats{
configuredMaxSize: opts.MaxSize,
configuredMinMaxSize: opts.MinMaxSize,
currentMaxSize: opts.MaxSize,
availableMemory: config.GetMemoryLimit(),
}

c := &Cache{
partitions: make(map[string]PartitionManager),
opts: opts,
stats: stats,
}

c.stop = c.start()

return c
}

type Partition[K comparable, V any] struct {
c *lazycache.Cache[K, V]

opts OptionsPartition

maxSize int
}

func (p *Partition[K, V]) GetOrCreate(ctx context.Context, key K, create func(key K) (V, error)) (V, error) {
return p.c.GetOrCreate(key, create)

//g.c.trackDependencyIfRunning(ctx, v)

}

// adjustMaxSize adjusts the max size of the and returns the number of items evicted.
func (p *Partition[K, V]) adjustMaxSize(newMaxSize int) int {
if newMaxSize < minMaxSize {
newMaxSize = minMaxSize
}
p.maxSize = newMaxSize
//fmt.Println("Adjusting max size of partition from", oldMaxSize, "to", newMaxSize)
return p.c.Resize(newMaxSize)
}

func (p *Partition[K, V]) getMaxSize() int {
return p.maxSize
}

func (p *Partition[K, V]) getOptions() OptionsPartition {
return p.opts
}

func (p *Partition[K, V]) Clear() {
// TODOD1
}

func (p *Partition[K, V]) Get(ctx context.Context, key K) (V, bool) {
return p.c.Get(key)
// g.c.trackDependencyIfRunning(ctx, v)
}

type PartitionManager interface {
Clear()

adjustMaxSize(addend int) int
getMaxSize() int
getOptions() OptionsPartition
}
Loading

0 comments on commit a241681

Please sign in to comment.