From 55c86231a4fcc66d924bd568e301c5c8447fdb22 Mon Sep 17 00:00:00 2001 From: Andrew Durbin Date: Fri, 25 Oct 2024 08:04:50 -0600 Subject: [PATCH] Allow reporting cpu physical core count. cpu.Info() reports a list of cpus and the list will be double in length when hyperthreading is enabled. This difference also scales cpu utilization. New config property: cpu.stats.physicalcore.enabled Default set to false to allow opt-in usage. When set to true domainmgr will: Take the last cpu.InfoStat's CoreID Add 1 (since CoreID starts at 0) Set HostMemory.Ncpus to that value. Signed-off-by: Andrew Durbin --- docs/CONFIG-PROPERTIES.md | 1 + pkg/pillar/cmd/domainmgr/domainmgr.go | 4 +++- pkg/pillar/cmd/domainmgr/metric.go | 5 +++-- pkg/pillar/hypervisor/containerd.go | 4 ++-- pkg/pillar/hypervisor/hypervisor.go | 19 +++++++++++++++++-- pkg/pillar/hypervisor/null.go | 4 ++-- pkg/pillar/hypervisor/xen.go | 2 +- pkg/pillar/types/global.go | 3 +++ pkg/pillar/types/global_test.go | 1 + 9 files changed, 33 insertions(+), 10 deletions(-) diff --git a/docs/CONFIG-PROPERTIES.md b/docs/CONFIG-PROPERTIES.md index b7122386ec..9592e7e8b2 100644 --- a/docs/CONFIG-PROPERTIES.md +++ b/docs/CONFIG-PROPERTIES.md @@ -4,6 +4,7 @@ | ---- | ---- | ------- | ----------- | | app.allow.vnc | boolean | false (only local access) | allow access to EVE's VNC ports from external IPs | | app.fml.resolution | string | notset | Set system-wide value of forced resolution for applications running in FML mode, it can be one of [predefined](/pkg/pillar/types/global.go) FmlResolution* values. | +| cpu.stats.physicalcore.enable | boolean | false | Report Ncpus as physical cores instead of HyperThread/SMT cores | | timer.config.interval | integer in seconds | 60 | how frequently device gets config | | timer.cert.interval | integer in seconds | 1 day (24*3600) | how frequently device checks for new controller certificates | | timer.metric.interval | integer in seconds | 60 | how frequently device reports metrics | diff --git a/pkg/pillar/cmd/domainmgr/domainmgr.go b/pkg/pillar/cmd/domainmgr/domainmgr.go index fcfabd3e86..ee8a8b0d4f 100644 --- a/pkg/pillar/cmd/domainmgr/domainmgr.go +++ b/pkg/pillar/cmd/domainmgr/domainmgr.go @@ -107,6 +107,7 @@ type domainContext struct { setInitialVgaAccess bool consoleAccess bool setInitialConsoleAccess bool + reportPhyCores bool GCInitialized bool domainBootRetryTime uint32 // In seconds @@ -543,7 +544,7 @@ func Run(ps *pubsub.PubSub, loggerArg *logrus.Logger, logArg *base.LogObject, ar var resources types.HostMemory for i := 0; true; i++ { delay := 10 - resources, err = hyper.GetHostCPUMem() + resources, err = hyper.GetHostCPUMem(domainCtx.reportPhyCores) if err == nil { break } @@ -2639,6 +2640,7 @@ func handleGlobalConfigImpl(ctxArg interface{}, key string, ctx.metricInterval = metricInterval } ctx.processCloudInitMultiPart = gcp.GlobalValueBool(types.ProcessCloudInitMultiPart) + ctx.reportPhyCores = gcp.GlobalValueBool(types.CPUStatsPhysicalCoreEnable) ctx.GCInitialized = true } log.Functionf("handleGlobalConfigImpl done for %s. "+ diff --git a/pkg/pillar/cmd/domainmgr/metric.go b/pkg/pillar/cmd/domainmgr/metric.go index 5d52571286..e9528ec4f5 100644 --- a/pkg/pillar/cmd/domainmgr/metric.go +++ b/pkg/pillar/cmd/domainmgr/metric.go @@ -5,9 +5,10 @@ package domainmgr import ( "fmt" - "github.com/lf-edge/eve/pkg/pillar/hypervisor" "time" + "github.com/lf-edge/eve/pkg/pillar/hypervisor" + "github.com/lf-edge/eve/pkg/pillar/flextimer" "github.com/lf-edge/eve/pkg/pillar/types" "github.com/shirou/gopsutil/cpu" @@ -93,7 +94,7 @@ func logWatermarks(ctx *domainContext, status *types.DomainStatus, dm *types.Dom func getAndPublishMetrics(ctx *domainContext, hyper hypervisor.Hypervisor) { dmList, _ := hyper.GetDomsCPUMem() - hm, err := hyper.GetHostCPUMem() + hm, err := hyper.GetHostCPUMem(ctx.reportPhyCores) if err != nil { log.Errorf("Cannot obtain HostCPUMem: %s", err) return diff --git a/pkg/pillar/hypervisor/containerd.go b/pkg/pillar/hypervisor/containerd.go index e5a6c08647..c0a1c5240b 100644 --- a/pkg/pillar/hypervisor/containerd.go +++ b/pkg/pillar/hypervisor/containerd.go @@ -278,8 +278,8 @@ func (ctx ctrdContext) PCISameController(id1 string, id2 string) bool { return types.PCISameController(id1, id2) } -func (ctx ctrdContext) GetHostCPUMem() (types.HostMemory, error) { - return selfDomCPUMem() +func (ctx ctrdContext) GetHostCPUMem(reportPhyCores bool) (types.HostMemory, error) { + return selfDomCPUMem(reportPhyCores) } const nanoSecToSec uint64 = 1000000000 diff --git a/pkg/pillar/hypervisor/hypervisor.go b/pkg/pillar/hypervisor/hypervisor.go index 4993c431ba..69b4048cb6 100644 --- a/pkg/pillar/hypervisor/hypervisor.go +++ b/pkg/pillar/hypervisor/hypervisor.go @@ -9,6 +9,7 @@ import ( "fmt" "os" "path/filepath" + "strconv" "strings" "github.com/lf-edge/eve/pkg/pillar/base" @@ -33,7 +34,7 @@ type Hypervisor interface { PCIRelease(string) error PCISameController(string, string) bool - GetHostCPUMem() (types.HostMemory, error) + GetHostCPUMem(reportPhyCores bool) (types.HostMemory, error) GetDomsCPUMem() (map[string]types.DomainMetric, error) GetCapabilities() (*types.Capabilities, error) @@ -111,7 +112,7 @@ func GetAvailableHypervisors() (all []string, enabled []string) { return } -func selfDomCPUMem() (types.HostMemory, error) { +func selfDomCPUMem(reportPhysCores bool) (types.HostMemory, error) { hm := types.HostMemory{} vm, err := mem.VirtualMemory() if err != nil { @@ -139,6 +140,20 @@ func selfDomCPUMem() (types.HostMemory, error) { return hm, err } hm.Ncpus = uint32(len(info)) + if reportPhysCores { + if len(info) < 1 { + return hm, nil + } + // The list should be ordered so that CoreIds for a 4 core / 8 thread + // CPU would be eg. 0,1,2,3,0,1,2,3. Pull the last entry: + lastInfoStat := info[len(info)-1] + val, err := strconv.ParseInt(lastInfoStat.CoreID, 10, 32) + if err != nil { + return hm, err + } + // Account for coreid 0 + hm.Ncpus = uint32(val) + 1 + } return hm, nil } diff --git a/pkg/pillar/hypervisor/null.go b/pkg/pillar/hypervisor/null.go index 899c35ed27..ab43ce340d 100644 --- a/pkg/pillar/hypervisor/null.go +++ b/pkg/pillar/hypervisor/null.go @@ -166,8 +166,8 @@ func (ctx nullContext) PCISameController(id1 string, id2 string) bool { return types.PCISameController(id1, id2) } -func (ctx nullContext) GetHostCPUMem() (types.HostMemory, error) { - return selfDomCPUMem() +func (ctx nullContext) GetHostCPUMem(reportPhyCores bool) (types.HostMemory, error) { + return selfDomCPUMem(reportPhyCores) } func (ctx nullContext) GetDomsCPUMem() (map[string]types.DomainMetric, error) { diff --git a/pkg/pillar/hypervisor/xen.go b/pkg/pillar/hypervisor/xen.go index f25fa63339..507af79a4e 100644 --- a/pkg/pillar/hypervisor/xen.go +++ b/pkg/pillar/hypervisor/xen.go @@ -623,7 +623,7 @@ func (ctx xenContext) PCISameController(id1 string, id2 string) bool { return false } -func (ctx xenContext) GetHostCPUMem() (types.HostMemory, error) { +func (ctx xenContext) GetHostCPUMem(reportPhyCores bool) (types.HostMemory, error) { hm := types.HostMemory{} ctrdSystemCtx, done := ctx.ctrdClient.CtrNewSystemServicesCtx() defer done() diff --git a/pkg/pillar/types/global.go b/pkg/pillar/types/global.go index 9b33134f48..9447660fbc 100644 --- a/pkg/pillar/types/global.go +++ b/pkg/pillar/types/global.go @@ -256,6 +256,8 @@ const ( EnableARPSnoop GlobalSettingKey = "network.switch.enable.arpsnoop" // WwanQueryVisibleProviders : periodically query visible cellular service providers WwanQueryVisibleProviders GlobalSettingKey = "wwan.query.visible.providers" + // CPUStatsPhysicalCoreEnable: report Ncpus as Physical Cores instead of Hyperthread/SMT + CPUStatsPhysicalCoreEnable GlobalSettingKey = "cpu.stats.physicalcore.enable" // TriState Items // NetworkFallbackAnyEth global setting key @@ -947,6 +949,7 @@ func NewConfigItemSpecMap() ConfigItemSpecMap { configItemSpecMap.AddBoolItem(EnableARPSnoop, true) configItemSpecMap.AddBoolItem(WwanQueryVisibleProviders, false) configItemSpecMap.AddBoolItem(NetworkLocalLegacyMACAddress, false) + configItemSpecMap.AddBoolItem(CPUStatsPhysicalCoreEnable, false) // Add TriState Items configItemSpecMap.AddTriStateItem(NetworkFallbackAnyEth, TS_DISABLED) diff --git a/pkg/pillar/types/global_test.go b/pkg/pillar/types/global_test.go index e7325cd898..1a2eb97064 100644 --- a/pkg/pillar/types/global_test.go +++ b/pkg/pillar/types/global_test.go @@ -197,6 +197,7 @@ func TestNewConfigItemSpecMap(t *testing.T) { EnableARPSnoop, WwanQueryVisibleProviders, NetworkLocalLegacyMACAddress, + CPUStatsPhysicalCoreEnable, // TriState Items NetworkFallbackAnyEth, MaintenanceMode,