Skip to content

Commit

Permalink
feat: implement PCI device bus enumeration
Browse files Browse the repository at this point in the history
Fixes #8826

From the QEMU VM:

```shell
$ talosctl -n 172.20.0.5 get pcidevice
NODE         NAMESPACE   TYPE        ID             VERSION   CLASS                       SUBCLASS                    VENDOR              PRODUCT
172.20.0.5   hardware    PCIDevice   0000:00:00.0   1         Bridge                      Host bridge                 Intel Corporation   82G33/G31/P35/P31 Express DRAM Controller
172.20.0.5   hardware    PCIDevice   0000:00:01.0   1         Display controller          VGA compatible controller
172.20.0.5   hardware    PCIDevice   0000:00:02.0   1         Network controller          Ethernet controller         Red Hat, Inc.       Virtio network device
172.20.0.5   hardware    PCIDevice   0000:00:03.0   1         Unclassified device                                     Red Hat, Inc.       Virtio RNG
172.20.0.5   hardware    PCIDevice   0000:00:04.0   1         Unclassified device                                     Red Hat, Inc.       Virtio memory balloon
172.20.0.5   hardware    PCIDevice   0000:00:05.0   1         Communication controller    Communication controller    Red Hat, Inc.       Virtio console
172.20.0.5   hardware    PCIDevice   0000:00:06.0   1         Generic system peripheral   System peripheral           Intel Corporation   6300ESB Watchdog Timer
172.20.0.5   hardware    PCIDevice   0000:00:07.0   1         Mass storage controller     SCSI storage controller     Red Hat, Inc.       Virtio block device
172.20.0.5   hardware    PCIDevice   0000:00:1f.0   1         Bridge                      ISA bridge                  Intel Corporation   82801IB (ICH9) LPC Interface Controller
172.20.0.5   hardware    PCIDevice   0000:00:1f.2   1         Mass storage controller     SATA controller             Intel Corporation   82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode]
172.20.0.5   hardware    PCIDevice   0000:00:1f.3   1         Serial bus controller       SMBus                       Intel Corporation   82801I (ICH9 Family) SMBus Controller
```

```yaml
node: 172.20.0.5
metadata:
    namespace: hardware
    type: PCIDevices.hardware.talos.dev
    id: 0000:00:1f.3
    version: 1
    owner: hardware.PCIDevicesController
    phase: running
    created: 2024-05-30T12:09:05Z
    updated: 2024-05-30T12:09:05Z
spec:
    class: Serial bus controller
    subclass: SMBus
    vendor: Intel Corporation
    product: 82801I (ICH9 Family) SMBus Controller
    class_id: "0x0c"
    subclass_id: "0x05"
    vendor_id: "0x8086"
    product_id: "0x2930"
```

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
  • Loading branch information
smira committed May 31, 2024
1 parent b0466e0 commit 4dd0aa7
Show file tree
Hide file tree
Showing 15 changed files with 990 additions and 64 deletions.
12 changes: 12 additions & 0 deletions api/resource/definitions/hardware/hardware.proto
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ message MemoryModuleSpec {
string product_name = 8;
}

// PCIDeviceSpec represents a single processor.
message PCIDeviceSpec {
string class = 1;
string subclass = 2;
string vendor = 3;
string product = 4;
string class_id = 5;
string subclass_id = 6;
string vendor_id = 7;
string product_id = 8;
}

// ProcessorSpec represents a single processor.
message ProcessorSpec {
string socket = 1;
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ require (
github.com/siderolabs/go-kubeconfig v0.1.0
github.com/siderolabs/go-kubernetes v0.2.9
github.com/siderolabs/go-loadbalancer v0.3.3
github.com/siderolabs/go-pcidb v0.2.0
github.com/siderolabs/go-pcidb v0.3.0
github.com/siderolabs/go-pointer v1.0.0
github.com/siderolabs/go-procfs v0.1.2
github.com/siderolabs/go-retry v0.3.3
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -678,8 +678,8 @@ github.com/siderolabs/go-kubernetes v0.2.9 h1:EtaOcni9P0etJz+UDlIKQkgsTjCg2MWI2p
github.com/siderolabs/go-kubernetes v0.2.9/go.mod h1:AAydnLZrqG+MJrKTa82AszkWIytkqwDBt7PL+bfbupI=
github.com/siderolabs/go-loadbalancer v0.3.3 h1:D6ONnP9Erlh4TS6kV9L7ocnfrNYCA/58i6ZF0QweLJk=
github.com/siderolabs/go-loadbalancer v0.3.3/go.mod h1:7j4Q9peU/UFuTNSFfwhKLQ028CNkyMkAdGnSi1Dm7Jw=
github.com/siderolabs/go-pcidb v0.2.0 h1:ZCkF1cz6UjoEIHpP7+aeTI5BwmSxE627Jl1Wy2VZAwU=
github.com/siderolabs/go-pcidb v0.2.0/go.mod h1:XstZrp8xnganxzIc3UQKfCs1fQFgYWH2lqtWeqBwRok=
github.com/siderolabs/go-pcidb v0.3.0 h1:jR4w1YLNY8Cv1o5jnoQ2Q+pbxcosO2FVFrAAp1RURnw=
github.com/siderolabs/go-pcidb v0.3.0/go.mod h1:4XYdmnR/o9kSzMe8dKK17wLBhPNIsisjqmU3QD1FjRk=
github.com/siderolabs/go-pointer v1.0.0 h1:6TshPKep2doDQJAAtHUuHWXbca8ZfyRySjSBT/4GsMU=
github.com/siderolabs/go-pointer v1.0.0/go.mod h1:HTRFUNYa3R+k0FFKNv11zgkaCLzEkWVzoYZ433P3kHc=
github.com/siderolabs/go-procfs v0.1.2 h1:bDs9hHyYGE2HO1frpmUsD60yg80VIEDrx31fkbi4C8M=
Expand Down
6 changes: 6 additions & 0 deletions hack/release.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ machine:
```
Please note that on running cluster you will have to kill CoreDNS pods for this change to apply.
"""

[notes.lspci]
title = "PCI Devices"
description = """\
A list of PCI devices can now be obtained via `PCIDevices` resource, e.g. `talosctl get pcidevices`.
"""

[make_deps]
Expand Down
138 changes: 138 additions & 0 deletions internal/app/machined/pkg/controllers/hardware/pcidevices.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

package hardware

import (
"bytes"
"context"
"fmt"
"os"
"path/filepath"
"strconv"

"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/siderolabs/go-pcidb/pkg/pcidb"
"go.uber.org/zap"

runtimetalos "github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/pkg/machinery/resources/hardware"
)

// PCIDevicesController populates PCI device information.
type PCIDevicesController struct {
V1Alpha1Mode runtimetalos.Mode
}

// Name implements controller.Controller interface.
func (ctrl *PCIDevicesController) Name() string {
return "hardware.PCIDevicesController"
}

// Inputs implements controller.Controller interface.
func (ctrl *PCIDevicesController) Inputs() []controller.Input {
return nil
}

// Outputs implements controller.Controller interface.
func (ctrl *PCIDevicesController) Outputs() []controller.Output {
return []controller.Output{
{
Type: hardware.PCIDeviceType,
Kind: controller.OutputExclusive,
},
}
}

// Run implements controller.Controller interface.
//
//nolint:gocyclo
func (ctrl *PCIDevicesController) Run(ctx context.Context, r controller.Runtime, logger *zap.Logger) error {
// PCI device info doesn't make sense inside a container, so skip the controller
if ctrl.V1Alpha1Mode == runtimetalos.ModeContainer {
return nil
}

// [TODO]: a single run for now, need to figure out how to trigger rescan
for {
select {
case <-ctx.Done():
return nil
case <-r.EventCh():
}

deviceIDs, err := os.ReadDir("/sys/bus/pci/devices")
if err != nil {
return fmt.Errorf("error scanning devices: %w", err)
}

logger.Debug("found PCI devices", zap.Int("count", len(deviceIDs)))

r.StartTrackingOutputs()

for _, deviceID := range deviceIDs {
class, err := readHexPCIInfo(deviceID.Name(), "class")
if err != nil {
if os.IsNotExist(err) {
continue
}

return fmt.Errorf("error parsing device %s class: %w", deviceID.Name(), err)
}

vendor, err := readHexPCIInfo(deviceID.Name(), "vendor")
if err != nil {
if os.IsNotExist(err) {
continue
}

return fmt.Errorf("error parsing device %s vendor: %w", deviceID.Name(), err)
}

product, err := readHexPCIInfo(deviceID.Name(), "device")
if err != nil {
if os.IsNotExist(err) {
continue
}

return fmt.Errorf("error parsing device %s product: %w", deviceID.Name(), err)
}

classID := pcidb.Class((class >> 16) & 0xff)
subclassID := pcidb.Subclass((class >> 8) & 0xff)
vendorID := pcidb.Vendor(vendor)
productID := pcidb.Product(product)

if err := safe.WriterModify(ctx, r, hardware.NewPCIDeviceInfo(deviceID.Name()), func(r *hardware.PCIDevice) error {
r.TypedSpec().ClassID = fmt.Sprintf("0x%02x", classID)
r.TypedSpec().SubclassID = fmt.Sprintf("0x%02x", subclassID)
r.TypedSpec().VendorID = fmt.Sprintf("0x%04x", vendorID)
r.TypedSpec().ProductID = fmt.Sprintf("0x%04x", productID)

r.TypedSpec().Class, _ = pcidb.LookupClass(classID)
r.TypedSpec().Subclass, _ = pcidb.LookupSubclass(classID, subclassID)
r.TypedSpec().Vendor, _ = pcidb.LookupVendor(vendorID)
r.TypedSpec().Product, _ = pcidb.LookupProduct(vendorID, productID)

return nil
}); err != nil {
return fmt.Errorf("error modifying output resource: %w", err)
}
}

if err = safe.CleanupOutputs[*hardware.PCIDevice](ctx, r); err != nil {
return err
}
}
}

func readHexPCIInfo(deviceID, info string) (uint64, error) {
contents, err := os.ReadFile(filepath.Join("/sys/bus/pci/devices", deviceID, info))
if err != nil {
return 0, err
}

return strconv.ParseUint(string(bytes.TrimSpace(contents)), 0, 64)
}
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ func (ctrl *Controller) Run(ctx context.Context, drainer *runtime.Drainer) error
EtcPath: "/etc",
ShadowPath: constants.SystemEtcPath,
},
&hardware.PCIDevicesController{
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
},
&hardware.SystemInfoController{
V1Alpha1Mode: ctrl.v1alpha1Runtime.State().Platform().Mode(),
},
Expand Down
3 changes: 2 additions & 1 deletion internal/app/machined/pkg/runtime/v1alpha2/v1alpha2_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ func NewState() (*State, error) {
&etcd.Member{},
&files.EtcFileSpec{},
&files.EtcFileStatus{},
&hardware.Processor{},
&hardware.MemoryModule{},
&hardware.PCIDevice{},
&hardware.Processor{},
&hardware.SystemInformation{},
&k8s.AdmissionControlConfig{},
&k8s.AuditPolicyConfig{},
Expand Down
80 changes: 80 additions & 0 deletions internal/integration/api/hardware.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

//go:build integration_api

package api

import (
"context"
"time"

"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/google/uuid"

"github.com/siderolabs/talos/internal/integration/base"
"github.com/siderolabs/talos/pkg/machinery/client"
"github.com/siderolabs/talos/pkg/machinery/resources/hardware"
)

// HardwareSuite ...
type HardwareSuite struct {
base.APISuite

ctx context.Context //nolint:containedctx
ctxCancel context.CancelFunc
}

// SuiteName ...
func (suite *HardwareSuite) SuiteName() string {
return "api.HardwareSuite"
}

// SetupTest ...
func (suite *HardwareSuite) SetupTest() {
if !suite.Capabilities().RunsTalosKernel {
suite.T().Skipf("doesn't run Talos kernel, skipping")
}

suite.ctx, suite.ctxCancel = context.WithTimeout(context.Background(), 15*time.Second)
}

// TearDownTest ...
func (suite *HardwareSuite) TearDownTest() {
if suite.ctxCancel != nil {
suite.ctxCancel()
}
}

// TestSystemInformation tests that SystemInformation is populated.
func (suite *HardwareSuite) TestSystemInformation() {
node := suite.RandomDiscoveredNodeInternalIP()

sysInfo, err := safe.StateGetByID[*hardware.SystemInformation](client.WithNode(suite.ctx, node), suite.Client.COSI, hardware.SystemInformationID)
suite.Require().NoError(err)

suite.Assert().NotEmpty(sysInfo.TypedSpec().UUID)
suite.Assert().NotEqual((uuid.UUID{}).String(), sysInfo.TypedSpec().UUID)
}

// TestHardwareInfo tests that hardware info is populated.
func (suite *HardwareSuite) TestHardwareInfo() {
node := suite.RandomDiscoveredNodeInternalIP()

for _, resourceType := range []resource.Type{
hardware.MemoryModuleType,
hardware.ProcessorType,
hardware.PCIDeviceType,
} {
items, err := suite.Client.COSI.List(client.WithNode(suite.ctx, node), resource.NewMetadata(hardware.NamespaceName, resourceType, "", resource.VersionUndefined))
suite.Require().NoError(err)

suite.Assert().NotEmpty(items.Items, "resource type %s is not populated", resourceType)
}
}

func init() {
allSuites = append(allSuites, new(HardwareSuite))
}
Loading

0 comments on commit 4dd0aa7

Please sign in to comment.